2021-12-26 23:18:28 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2021-08-13 22:53:26 +08:00
|
|
|
#ifndef CPPTOKENIZER_H
|
|
|
|
#define CPPTOKENIZER_H
|
|
|
|
|
|
|
|
#include <QObject>
|
|
|
|
#include "parserutils.h"
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
class CppTokenizer
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
2022-10-31 19:37:24 +08:00
|
|
|
enum class TokenType {
|
|
|
|
Normal,
|
|
|
|
LeftBrace,
|
|
|
|
RightBrace,
|
|
|
|
LeftParenthesis,
|
|
|
|
RightParenthesis,
|
|
|
|
LeftBracket,
|
|
|
|
RightBracket,
|
|
|
|
Assignment,
|
2022-11-02 22:48:25 +08:00
|
|
|
LambdaCaptures
|
2022-10-31 19:37:24 +08:00
|
|
|
};
|
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
public:
|
|
|
|
struct Token {
|
|
|
|
QString text;
|
|
|
|
int line;
|
2022-10-31 19:37:24 +08:00
|
|
|
int matchIndex;
|
2021-08-13 22:53:26 +08:00
|
|
|
};
|
|
|
|
using PToken = std::shared_ptr<Token>;
|
|
|
|
using TokenList = QVector<PToken>;
|
2021-08-14 22:52:37 +08:00
|
|
|
explicit CppTokenizer();
|
2021-08-13 22:53:26 +08:00
|
|
|
|
2022-10-22 10:59:39 +08:00
|
|
|
void clear();
|
2021-08-14 18:55:42 +08:00
|
|
|
void tokenize(const QStringList& buffer);
|
|
|
|
void dumpTokens(const QString& fileName);
|
2021-08-15 16:49:37 +08:00
|
|
|
const TokenList& tokens();
|
|
|
|
PToken operator[](int i);
|
|
|
|
int tokenCount();
|
2022-01-12 20:59:28 +08:00
|
|
|
bool isIdentChar(const QChar& ch);
|
2022-11-04 23:44:11 +08:00
|
|
|
int lambdasCount() const;
|
|
|
|
int indexOfFirstLambda() const;
|
|
|
|
void removeFirstLambda();
|
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
private:
|
2022-10-31 19:37:24 +08:00
|
|
|
void addToken(const QString& sText, int iLine, TokenType tokenType);
|
2021-08-14 12:33:02 +08:00
|
|
|
void advance();
|
2021-08-13 22:53:26 +08:00
|
|
|
void countLines();
|
|
|
|
PToken getToken(int index);
|
2021-08-14 12:33:02 +08:00
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
QString getForInit();
|
2021-08-14 12:33:02 +08:00
|
|
|
QString getNextToken(
|
2022-11-16 09:38:55 +08:00
|
|
|
TokenType *pTokenType);
|
2021-08-13 22:53:26 +08:00
|
|
|
QString getNumber();
|
|
|
|
QString getPreprocessor();
|
|
|
|
QString getWord(
|
2022-11-16 09:38:55 +08:00
|
|
|
bool bSkipParenthesis);
|
2021-08-13 22:53:26 +08:00
|
|
|
bool isArguments();
|
|
|
|
bool isForInit();
|
2021-08-14 12:33:02 +08:00
|
|
|
bool isNumber();
|
|
|
|
bool isPreprocessor();
|
|
|
|
bool isWord();
|
2021-08-13 22:53:26 +08:00
|
|
|
void simplify(QString& output);
|
|
|
|
void simplifyArgs(QString& output);
|
2021-08-14 12:33:02 +08:00
|
|
|
void skipAssignment();
|
|
|
|
void skipDoubleQuotes();
|
2022-11-04 23:44:11 +08:00
|
|
|
void skipPair(const QChar& cStart, const QChar cEnd);
|
2022-11-12 12:14:19 +08:00
|
|
|
void skipParenthesis();
|
2022-11-04 20:27:35 +08:00
|
|
|
bool skipAngleBracketPair();
|
2021-08-14 12:33:02 +08:00
|
|
|
void skipRawString();
|
|
|
|
void skipSingleQuote();
|
|
|
|
void skipSplitLine();
|
|
|
|
void skipTemplateArgs();
|
|
|
|
void skipToEOL();
|
|
|
|
void skipToNextToken();
|
2021-08-13 22:53:26 +08:00
|
|
|
bool openFile(const QString& fileName);
|
2021-08-14 12:33:02 +08:00
|
|
|
bool isLetterChar(const QChar& ch);
|
|
|
|
bool isHexChar(const QChar& ch);
|
|
|
|
bool isDigitChar(const QChar& ch);
|
|
|
|
bool isSpaceChar(const QChar& ch);
|
|
|
|
bool isLineChar(const QChar& ch);
|
|
|
|
bool isBlankChar(const QChar& ch);
|
|
|
|
bool isOperatorChar(const QChar& ch);
|
|
|
|
|
|
|
|
bool currentWordEquals(QChar* wordStart, QChar *wordEnd, const QString& text);
|
2021-08-13 22:53:26 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
QStringList mBuffer;
|
|
|
|
QString mBufferStr;
|
|
|
|
QChar* mStart;
|
|
|
|
QChar* mCurrent;
|
|
|
|
QChar* mLineCount;
|
|
|
|
int mCurrentLine;
|
|
|
|
QString mLastToken;
|
|
|
|
TokenList mTokenList;
|
2022-11-04 23:44:11 +08:00
|
|
|
QList<int> mLambdas;
|
|
|
|
QVector<int> mUnmatchedBraces; // stack of indices for unmatched '{'
|
|
|
|
QVector<int> mUnmatchedBrackets; // stack of indices for unmatched '['
|
|
|
|
QVector<int> mUnmatchedParenthesis;// stack of indices for unmatched '('
|
2021-08-13 22:53:26 +08:00
|
|
|
};
|
|
|
|
|
2022-10-18 12:24:59 +08:00
|
|
|
using PCppTokenizer = std::shared_ptr<CppTokenizer>;
|
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
#endif // CPPTOKENIZER_H
|