RedPanda-CPP/RedPandaIDE/parser/cpptokenizer.h

/*
 * Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
#ifndef CPPTOKENIZER_H
#define CPPTOKENIZER_H

#include <QObject>
#include "parserutils.h"

class CppTokenizer
{
    enum class TokenType {
        Normal,
        LeftBrace,
        RightBrace,
        LeftParenthesis,
        RightParenthesis,
        LeftBracket,
        RightBracket,
        Assignment,
        LambdaCaptures
    };

public:
    struct Token {
      QString text;
      int line;
      int matchIndex;
    };
    using PToken = std::shared_ptr<Token>;
    using TokenList = QVector<PToken>;
    explicit CppTokenizer();
    CppTokenizer(const CppTokenizer&)=delete;
    CppTokenizer& operator=(const CppTokenizer&)=delete;

    void clear();
    void tokenize(const QStringList& buffer);
    void dumpTokens(const QString& fileName);
    const PToken& operator[](int i) const {
        return mTokenList[i];
    }
    int tokenCount() const {
        return mTokenList.count();
    }
    static bool isIdentChar(const QChar& ch) {
            return ch=='_' || ch.isLetter() ;
    }
    int lambdasCount() const {
        return mLambdas.count();
    }

    int indexOfFirstLambda() const {
        return mLambdas.front();
    }
    void removeFirstLambda() {
        mLambdas.pop_front();
    }

private:
    void addToken(const QString& sText, int iLine, TokenType tokenType);
    void advance();
    void countLines();
    PToken getToken(int index);

    QString getForInit();
    QString getNextToken(
            TokenType *pTokenType);
    QString getNumber();
    QString getPreprocessor();
    QString getWord();
    bool isArguments();
    bool isForInit();
    bool isNumber();
    bool isPreprocessor();
    bool isWord();
    void simplify(QString& output);
    void simplifyArgs(QString& output);
//    void skipAssignment();
    void skipDoubleQuotes();
    void skipPair(const QChar& cStart, const QChar cEnd);
    void skipParenthesis();
    bool skipAngleBracketPair();
    void skipRawString();
    void skipSingleQuote();
    void skipSplitLine();
    void skipTemplateArgs();
    void skipToEOL();
    void skipToNextToken();
    bool openFile(const QString& fileName);
    static bool isLetterChar(const QChar& ch) {
        return isIdentChar(ch)
                    || ch == '_'
                    || ch == '*'
                    || ch == '&'
                    || ch == '~';
    }
    static bool isHexChar(const QChar& ch) {
        return (ch >= 'A' && ch<='F')
                || (ch>='a' && ch<='f')
                || ch == 'x'
                || ch == 'L';
    }
    static bool isDigitChar(const QChar& ch) {
        return (ch>='0' && ch<='9');
    }

    static bool isSpaceChar(const QChar& ch) {
        return (ch == ' ' || ch == '\t');
    }

    static bool isLineChar(const QChar& ch) {
        return (ch=='\n' || ch=='\r');
    }

    static bool isBlankChar(const QChar& ch) {
        return (ch<=32) && (ch>0);
    }

//    static bool isOperatorChar(const QChar& ch) {
//        switch (ch.unicode()) {
//        case '+':
//        case '-':
//        case '/':
//        case '*':
//        case '[':
//        case ']':
//        case '=':
//        case '%':
//        case '!':
//        case '&':
//        case '|':
//        case '>':
//        case '<':
//        case '^':
//            return true;
//        default:
//            return false;
//        }
//    }

    static bool currentWordEquals(QChar* wordStart, QChar *wordEnd, const QString& text) {
        QString currentWord(wordStart, wordEnd-wordStart);
        return currentWord == text;
    }

private:
    QStringList mBuffer;
    QString mBufferStr;
    QChar* mStart;
    QChar* mCurrent;
    QChar* mLineCount;
    int mCurrentLine;
    QString mLastToken;
    TokenList mTokenList;
    QList<int> mLambdas;
    QVector<int> mUnmatchedBraces; // stack of indices for unmatched '{'
    QVector<int> mUnmatchedBrackets; // stack of indices for unmatched '['
    QVector<int> mUnmatchedParenthesis;// stack of indices for unmatched '('
};

using PCppTokenizer = std::shared_ptr<CppTokenizer>;

#endif // CPPTOKENIZER_H