2021-12-26 23:18:28 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2021-08-13 22:53:26 +08:00
|
|
|
#ifndef CPPTOKENIZER_H
|
|
|
|
#define CPPTOKENIZER_H
|
|
|
|
|
|
|
|
#include <QObject>
|
|
|
|
#include "parserutils.h"
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
class CppTokenizer
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
2022-10-31 19:37:24 +08:00
|
|
|
enum class TokenType {
|
|
|
|
Normal,
|
|
|
|
LeftBrace,
|
|
|
|
RightBrace,
|
|
|
|
LeftParenthesis,
|
|
|
|
RightParenthesis,
|
|
|
|
LeftBracket,
|
|
|
|
RightBracket,
|
|
|
|
Assignment,
|
2022-11-02 22:48:25 +08:00
|
|
|
LambdaCaptures
|
2022-10-31 19:37:24 +08:00
|
|
|
};
|
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
public:
|
|
|
|
struct Token {
|
|
|
|
QString text;
|
|
|
|
int line;
|
2022-10-31 19:37:24 +08:00
|
|
|
int matchIndex;
|
2021-08-13 22:53:26 +08:00
|
|
|
};
|
|
|
|
using PToken = std::shared_ptr<Token>;
|
|
|
|
using TokenList = QVector<PToken>;
|
2021-08-14 22:52:37 +08:00
|
|
|
explicit CppTokenizer();
|
2023-01-12 12:07:22 +08:00
|
|
|
CppTokenizer(const CppTokenizer&)=delete;
|
|
|
|
CppTokenizer& operator=(const CppTokenizer&)=delete;
|
2021-08-13 22:53:26 +08:00
|
|
|
|
2022-10-22 10:59:39 +08:00
|
|
|
void clear();
|
2021-08-14 18:55:42 +08:00
|
|
|
void tokenize(const QStringList& buffer);
|
|
|
|
void dumpTokens(const QString& fileName);
|
2022-12-27 14:29:49 +08:00
|
|
|
const PToken& operator[](int i) const {
|
|
|
|
return mTokenList[i];
|
|
|
|
}
|
|
|
|
int tokenCount() const {
|
|
|
|
return mTokenList.count();
|
|
|
|
}
|
|
|
|
static bool isIdentChar(const QChar& ch) {
|
|
|
|
return ch=='_' || ch.isLetter() ;
|
|
|
|
}
|
|
|
|
int lambdasCount() const {
|
|
|
|
return mLambdas.count();
|
|
|
|
}
|
|
|
|
|
|
|
|
int indexOfFirstLambda() const {
|
|
|
|
return mLambdas.front();
|
|
|
|
}
|
|
|
|
void removeFirstLambda() {
|
|
|
|
mLambdas.pop_front();
|
|
|
|
}
|
2022-11-04 23:44:11 +08:00
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
private:
|
2022-10-31 19:37:24 +08:00
|
|
|
void addToken(const QString& sText, int iLine, TokenType tokenType);
|
2021-08-14 12:33:02 +08:00
|
|
|
void advance();
|
2021-08-13 22:53:26 +08:00
|
|
|
void countLines();
|
|
|
|
PToken getToken(int index);
|
2021-08-14 12:33:02 +08:00
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
QString getForInit();
|
2021-08-14 12:33:02 +08:00
|
|
|
QString getNextToken(
|
2022-11-16 09:38:55 +08:00
|
|
|
TokenType *pTokenType);
|
2021-08-13 22:53:26 +08:00
|
|
|
QString getNumber();
|
|
|
|
QString getPreprocessor();
|
2023-02-06 14:04:38 +08:00
|
|
|
QString getWord();
|
2021-08-13 22:53:26 +08:00
|
|
|
bool isArguments();
|
|
|
|
bool isForInit();
|
2021-08-14 12:33:02 +08:00
|
|
|
bool isNumber();
|
|
|
|
bool isPreprocessor();
|
|
|
|
bool isWord();
|
2021-08-13 22:53:26 +08:00
|
|
|
void simplify(QString& output);
|
|
|
|
void simplifyArgs(QString& output);
|
2023-02-06 14:04:38 +08:00
|
|
|
// void skipAssignment();
|
2021-08-14 12:33:02 +08:00
|
|
|
void skipDoubleQuotes();
|
2022-11-04 23:44:11 +08:00
|
|
|
void skipPair(const QChar& cStart, const QChar cEnd);
|
2022-11-12 12:14:19 +08:00
|
|
|
void skipParenthesis();
|
2022-11-04 20:27:35 +08:00
|
|
|
bool skipAngleBracketPair();
|
2021-08-14 12:33:02 +08:00
|
|
|
void skipRawString();
|
|
|
|
void skipSingleQuote();
|
|
|
|
void skipSplitLine();
|
|
|
|
void skipTemplateArgs();
|
|
|
|
void skipToEOL();
|
|
|
|
void skipToNextToken();
|
2021-08-13 22:53:26 +08:00
|
|
|
bool openFile(const QString& fileName);
|
2022-12-27 14:29:49 +08:00
|
|
|
static bool isLetterChar(const QChar& ch) {
|
|
|
|
return isIdentChar(ch)
|
|
|
|
|| ch == '_'
|
|
|
|
|| ch == '*'
|
|
|
|
|| ch == '&'
|
|
|
|
|| ch == '~';
|
|
|
|
}
|
|
|
|
static bool isHexChar(const QChar& ch) {
|
|
|
|
return (ch >= 'A' && ch<='F')
|
|
|
|
|| (ch>='a' && ch<='f')
|
|
|
|
|| ch == 'x'
|
|
|
|
|| ch == 'L';
|
|
|
|
}
|
|
|
|
static bool isDigitChar(const QChar& ch) {
|
|
|
|
return (ch>='0' && ch<='9');
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isSpaceChar(const QChar& ch) {
|
|
|
|
return (ch == ' ' || ch == '\t');
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isLineChar(const QChar& ch) {
|
|
|
|
return (ch=='\n' || ch=='\r');
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool isBlankChar(const QChar& ch) {
|
|
|
|
return (ch<=32) && (ch>0);
|
|
|
|
}
|
|
|
|
|
2023-02-06 14:04:38 +08:00
|
|
|
// static bool isOperatorChar(const QChar& ch) {
|
|
|
|
// switch (ch.unicode()) {
|
|
|
|
// case '+':
|
|
|
|
// case '-':
|
|
|
|
// case '/':
|
|
|
|
// case '*':
|
|
|
|
// case '[':
|
|
|
|
// case ']':
|
|
|
|
// case '=':
|
|
|
|
// case '%':
|
|
|
|
// case '!':
|
|
|
|
// case '&':
|
|
|
|
// case '|':
|
|
|
|
// case '>':
|
|
|
|
// case '<':
|
|
|
|
// case '^':
|
|
|
|
// return true;
|
|
|
|
// default:
|
|
|
|
// return false;
|
|
|
|
// }
|
|
|
|
// }
|
2021-08-14 12:33:02 +08:00
|
|
|
|
2022-12-27 14:29:49 +08:00
|
|
|
static bool currentWordEquals(QChar* wordStart, QChar *wordEnd, const QString& text) {
|
|
|
|
QString currentWord(wordStart, wordEnd-wordStart);
|
|
|
|
return currentWord == text;
|
|
|
|
}
|
2021-08-13 22:53:26 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
QStringList mBuffer;
|
|
|
|
QString mBufferStr;
|
|
|
|
QChar* mStart;
|
|
|
|
QChar* mCurrent;
|
|
|
|
QChar* mLineCount;
|
|
|
|
int mCurrentLine;
|
|
|
|
QString mLastToken;
|
|
|
|
TokenList mTokenList;
|
2022-11-04 23:44:11 +08:00
|
|
|
QList<int> mLambdas;
|
|
|
|
QVector<int> mUnmatchedBraces; // stack of indices for unmatched '{'
|
|
|
|
QVector<int> mUnmatchedBrackets; // stack of indices for unmatched '['
|
|
|
|
QVector<int> mUnmatchedParenthesis;// stack of indices for unmatched '('
|
2021-08-13 22:53:26 +08:00
|
|
|
};
|
|
|
|
|
2022-10-18 12:24:59 +08:00
|
|
|
using PCppTokenizer = std::shared_ptr<CppTokenizer>;
|
|
|
|
|
2021-08-13 22:53:26 +08:00
|
|
|
#endif // CPPTOKENIZER_H
|