RedPanda-CPP/RedPandaIDE/parser/cpptokenizer.h

/*
 * Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
#ifndef CPPTOKENIZER_H
#define CPPTOKENIZER_H

#include <QObject>
#include "parserutils.h"

class CppTokenizer
{
    enum class TokenType {
        Normal,
        LeftBrace,
        RightBrace,
        LeftParenthesis,
        RightParenthesis,
        LeftBracket,
        RightBracket,
        Assignment,
        LambdaCaptures
    };

public:
    struct Token {
      QString text;
      int line;
      int matchIndex;
    };
    using PToken = std::shared_ptr<Token>;
    using TokenList = QVector<PToken>;
    explicit CppTokenizer();

    void clear();
    void tokenize(const QStringList& buffer);
    void dumpTokens(const QString& fileName);
    const PToken& operator[](int i) const {
        return mTokenList[i];
    }
    int tokenCount() const {
        return mTokenList.count();
    }
    static bool isIdentChar(const QChar& ch) {
            return ch=='_' || ch.isLetter() ;
    }
    int lambdasCount() const {
        return mLambdas.count();
    }

    int indexOfFirstLambda() const {
        return mLambdas.front();
    }
    void removeFirstLambda() {
        mLambdas.pop_front();
    }

private:
    void addToken(const QString& sText, int iLine, TokenType tokenType);
    void advance();
    void countLines();
    PToken getToken(int index);

    QString getForInit();
    QString getNextToken(
            TokenType *pTokenType);
    QString getNumber();
    QString getPreprocessor();
    QString getWord(
            bool bSkipParenthesis);
    bool isArguments();
    bool isForInit();
    bool isNumber();
    bool isPreprocessor();
    bool isWord();
    void simplify(QString& output);
    void simplifyArgs(QString& output);
    void skipAssignment();
    void skipDoubleQuotes();
    void skipPair(const QChar& cStart, const QChar cEnd);
    void skipParenthesis();
    bool skipAngleBracketPair();
    void skipRawString();
    void skipSingleQuote();
    void skipSplitLine();
    void skipTemplateArgs();
    void skipToEOL();
    void skipToNextToken();
    bool openFile(const QString& fileName);
    static bool isLetterChar(const QChar& ch) {
        return isIdentChar(ch)
                    || ch == '_'
                    || ch == '*'
                    || ch == '&'
                    || ch == '~';
    }
    static bool isHexChar(const QChar& ch) {
        return (ch >= 'A' && ch<='F')
                || (ch>='a' && ch<='f')
                || ch == 'x'
                || ch == 'L';
    }
    static bool isDigitChar(const QChar& ch) {
        return (ch>='0' && ch<='9');
    }

    static bool isSpaceChar(const QChar& ch) {
        return (ch == ' ' || ch == '\t');
    }

    static bool isLineChar(const QChar& ch) {
        return (ch=='\n' || ch=='\r');
    }

    static bool isBlankChar(const QChar& ch) {
        return (ch<=32) && (ch>0);
    }

    static bool isOperatorChar(const QChar& ch) {
        switch (ch.unicode()) {
        case '+':
        case '-':
        case '/':
        case '*':
        case '[':
        case ']':
        case '=':
        case '%':
        case '!':
        case '&':
        case '|':
        case '>':
        case '<':
        case '^':
            return true;
        default:
            return false;
        }
    }

    static bool currentWordEquals(QChar* wordStart, QChar *wordEnd, const QString& text) {
        QString currentWord(wordStart, wordEnd-wordStart);
        return currentWord == text;
    }

private:
    QStringList mBuffer;
    QString mBufferStr;
    QChar* mStart;
    QChar* mCurrent;
    QChar* mLineCount;
    int mCurrentLine;
    QString mLastToken;
    TokenList mTokenList;
    QList<int> mLambdas;
    QVector<int> mUnmatchedBraces; // stack of indices for unmatched '{'
    QVector<int> mUnmatchedBrackets; // stack of indices for unmatched '['
    QVector<int> mUnmatchedParenthesis;// stack of indices for unmatched '('
};

using PCppTokenizer = std::shared_ptr<CppTokenizer>;

#endif // CPPTOKENIZER_H