2021-12-26 23:18:28 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
|
|
|
|
*
|
|
|
|
* This program is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2021-08-13 22:53:26 +08:00
|
|
|
#include "cpptokenizer.h"
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
#include <QFile>
|
|
|
|
#include <QTextStream>
|
2022-11-02 22:48:25 +08:00
|
|
|
#include <QDebug>
|
2021-08-14 22:52:37 +08:00
|
|
|
|
|
|
|
CppTokenizer::CppTokenizer()
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-10-22 10:59:39 +08:00
|
|
|
void CppTokenizer::clear()
|
2021-08-14 22:52:37 +08:00
|
|
|
{
|
|
|
|
mTokenList.clear();
|
|
|
|
mBuffer.clear();
|
|
|
|
mBufferStr.clear();
|
2022-10-18 12:24:59 +08:00
|
|
|
mLastToken.clear();
|
2022-10-31 19:37:24 +08:00
|
|
|
mUnmatchedBraces.clear();
|
|
|
|
mUnmatchedBrackets.clear();
|
|
|
|
mUnmatchedParenthesis.clear();
|
2022-11-04 23:44:11 +08:00
|
|
|
mLambdas.clear();
|
2021-08-14 22:52:37 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void CppTokenizer::tokenize(const QStringList &buffer)
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
2022-10-22 10:59:39 +08:00
|
|
|
clear();
|
2021-08-14 22:52:37 +08:00
|
|
|
|
2021-08-14 18:55:42 +08:00
|
|
|
mBuffer = buffer;
|
|
|
|
if (mBuffer.isEmpty())
|
|
|
|
return;
|
|
|
|
mBufferStr = mBuffer[0];
|
|
|
|
for (int i=1;i<mBuffer.size();i++) {
|
|
|
|
mBufferStr+='\n';
|
|
|
|
mBufferStr+=mBuffer[i];
|
|
|
|
}
|
|
|
|
mStart = mBufferStr.data();
|
|
|
|
mCurrent = mStart;
|
|
|
|
mLineCount = mStart;
|
|
|
|
QString s = "";
|
|
|
|
mCurrentLine = 1;
|
2022-10-31 19:37:24 +08:00
|
|
|
|
|
|
|
TokenType tokenType;
|
2021-08-14 18:55:42 +08:00
|
|
|
while (true) {
|
|
|
|
mLastToken = s;
|
2022-11-16 09:38:55 +08:00
|
|
|
s = getNextToken(&tokenType);
|
2021-08-14 18:55:42 +08:00
|
|
|
simplify(s);
|
|
|
|
if (s.isEmpty())
|
|
|
|
break;
|
|
|
|
else
|
2022-10-31 19:37:24 +08:00
|
|
|
addToken(s,mCurrentLine,tokenType);
|
|
|
|
}
|
|
|
|
while (!mUnmatchedBraces.isEmpty()) {
|
2022-11-01 00:01:46 +08:00
|
|
|
addToken("}",mCurrentLine,TokenType::RightBrace);
|
2022-10-31 19:37:24 +08:00
|
|
|
}
|
|
|
|
while (!mUnmatchedBrackets.isEmpty()) {
|
2022-11-02 10:42:55 +08:00
|
|
|
addToken("]",mCurrentLine,TokenType::RightBracket);
|
2022-10-31 19:37:24 +08:00
|
|
|
}
|
|
|
|
while (!mUnmatchedParenthesis.isEmpty()) {
|
2022-11-02 10:42:55 +08:00
|
|
|
addToken(")",mCurrentLine,TokenType::RightParenthesis);
|
2021-08-14 18:55:42 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::dumpTokens(const QString &fileName)
|
|
|
|
{
|
|
|
|
QFile file(fileName);
|
|
|
|
|
|
|
|
if (file.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
|
|
|
|
QTextStream stream(&file);
|
2021-08-29 10:14:07 +08:00
|
|
|
foreach (const PToken& token,mTokenList) {
|
2022-10-31 19:37:24 +08:00
|
|
|
stream<<QString("%1,%2,%3").arg(token->line).arg(token->text).arg(token->matchIndex)
|
2022-07-24 11:19:11 +08:00
|
|
|
#if QT_VERSION >= QT_VERSION_CHECK(5,15,0)
|
2022-07-04 11:39:06 +08:00
|
|
|
<<Qt::endl;
|
|
|
|
#else
|
|
|
|
<<endl;
|
|
|
|
#endif
|
2021-08-14 22:52:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-31 19:37:24 +08:00
|
|
|
void CppTokenizer::addToken(const QString &sText, int iLine, TokenType tokenType)
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
|
|
|
PToken token = std::make_shared<Token>();
|
|
|
|
token->text = sText;
|
|
|
|
token->line = iLine;
|
2022-11-05 08:42:54 +08:00
|
|
|
#ifdef Q_DEBUG
|
2022-11-04 20:27:35 +08:00
|
|
|
token->matchIndex = 1000000000;
|
2022-11-05 08:42:54 +08:00
|
|
|
#endif
|
2022-10-31 19:37:24 +08:00
|
|
|
switch(tokenType) {
|
|
|
|
case TokenType::LeftBrace:
|
|
|
|
token->matchIndex=-1;
|
|
|
|
mUnmatchedBraces.push_back(mTokenList.count());
|
|
|
|
break;
|
|
|
|
case TokenType::RightBrace:
|
|
|
|
if (mUnmatchedBraces.isEmpty()) {
|
|
|
|
token->matchIndex=-1;
|
|
|
|
} else {
|
|
|
|
token->matchIndex = mUnmatchedBraces.last();
|
|
|
|
mTokenList[token->matchIndex]->matchIndex=mTokenList.count();
|
|
|
|
mUnmatchedBraces.pop_back();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenType::LeftBracket:
|
|
|
|
token->matchIndex=-1;
|
|
|
|
mUnmatchedBrackets.push_back(mTokenList.count());
|
|
|
|
break;
|
|
|
|
case TokenType::RightBracket:
|
|
|
|
if (mUnmatchedBrackets.isEmpty()) {
|
|
|
|
token->matchIndex=-1;
|
|
|
|
} else {
|
|
|
|
token->matchIndex = mUnmatchedBrackets.last();
|
|
|
|
mTokenList[token->matchIndex]->matchIndex=mTokenList.count();
|
|
|
|
mUnmatchedBrackets.pop_back();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case TokenType::LeftParenthesis:
|
|
|
|
token->matchIndex=-1;
|
|
|
|
mUnmatchedParenthesis.push_back(mTokenList.count());
|
|
|
|
break;
|
|
|
|
case TokenType::RightParenthesis:
|
|
|
|
if (mUnmatchedParenthesis.isEmpty()) {
|
|
|
|
token->matchIndex=-1;
|
|
|
|
} else {
|
|
|
|
token->matchIndex = mUnmatchedParenthesis.last();
|
|
|
|
mTokenList[token->matchIndex]->matchIndex=mTokenList.count();
|
|
|
|
mUnmatchedParenthesis.pop_back();
|
|
|
|
}
|
|
|
|
break;
|
2022-11-04 23:44:11 +08:00
|
|
|
case TokenType::LambdaCaptures:
|
|
|
|
mLambdas.push_back(mTokenList.count());
|
2022-10-31 19:37:24 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2021-08-13 22:53:26 +08:00
|
|
|
mTokenList.append(token);
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::countLines()
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
2021-08-16 23:17:48 +08:00
|
|
|
while ((*mLineCount != 0) && (mLineCount < mCurrent)) {
|
2021-08-13 22:53:26 +08:00
|
|
|
if (*mLineCount == '\n')
|
|
|
|
mCurrentLine ++;
|
|
|
|
mLineCount++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
QString CppTokenizer::getForInit()
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
|
|
|
QChar* startOffset = mCurrent;
|
|
|
|
|
|
|
|
// Step into the init statement
|
|
|
|
mCurrent++;
|
|
|
|
|
2022-10-31 19:37:24 +08:00
|
|
|
TokenType tokenType;
|
2021-08-13 22:53:26 +08:00
|
|
|
// Process until ; or end of file
|
|
|
|
while (true) {
|
2022-11-16 09:38:55 +08:00
|
|
|
QString s = getNextToken(&tokenType);
|
2021-08-13 22:53:26 +08:00
|
|
|
simplify(s);
|
|
|
|
if (!s.isEmpty())
|
2022-10-31 19:37:24 +08:00
|
|
|
addToken(s,mCurrentLine,tokenType);
|
2022-11-27 13:32:14 +08:00
|
|
|
if ( (s == "") || (s == ";") || (s==")") || (s=="("))
|
2021-08-13 22:53:26 +08:00
|
|
|
break;
|
|
|
|
// : is used in for-each loop
|
|
|
|
}
|
|
|
|
|
|
|
|
// Skip to end of for loop
|
|
|
|
mCurrent = startOffset;
|
|
|
|
skipPair('(', ')');
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
2022-11-16 09:38:55 +08:00
|
|
|
QString CppTokenizer::getNextToken(TokenType *pTokenType)
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
QString result;
|
|
|
|
bool done = false;
|
2022-10-31 19:37:24 +08:00
|
|
|
*pTokenType=TokenType::Normal;
|
2021-08-14 12:33:02 +08:00
|
|
|
while (true) {
|
|
|
|
skipToNextToken();
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent == 0)
|
2021-08-14 12:33:02 +08:00
|
|
|
break;
|
|
|
|
if (isPreprocessor()) {
|
|
|
|
countLines();
|
|
|
|
result = getPreprocessor(); // don't count preprocessor lines
|
|
|
|
if (result.startsWith("#include")) { // if we find
|
|
|
|
int delimPos = result.lastIndexOf(':');
|
|
|
|
if (delimPos >= 0) {
|
|
|
|
bool ok;
|
2021-08-29 10:14:07 +08:00
|
|
|
mCurrentLine = result.midRef(delimPos+1).toInt(&ok)-1; // fCurrLine is 0 based
|
2021-08-14 12:33:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
done = (result != "");
|
|
|
|
} else if (isForInit()) {
|
|
|
|
countLines();
|
|
|
|
result = getForInit();
|
|
|
|
done = (result != "");
|
2022-10-31 19:37:24 +08:00
|
|
|
// } else if (isArguments()) {
|
|
|
|
// countLines();
|
|
|
|
// result = getArguments();
|
|
|
|
// done = (result != "");
|
2021-08-14 12:33:02 +08:00
|
|
|
} else if (isWord()) {
|
|
|
|
countLines();
|
2023-02-06 14:04:38 +08:00
|
|
|
result = getWord();
|
2022-11-02 13:38:26 +08:00
|
|
|
// if (result=="noexcept" || result == "throw") {
|
|
|
|
// result="";
|
|
|
|
// if (*mCurrent=='(')
|
|
|
|
// skipPair('(',')');
|
|
|
|
// }
|
2021-08-14 12:33:02 +08:00
|
|
|
done = (result != "");
|
|
|
|
} else if (isNumber()) {
|
|
|
|
countLines();
|
|
|
|
result = getNumber();
|
|
|
|
done = (result != "");
|
|
|
|
} else {
|
|
|
|
switch((*mCurrent).unicode()) {
|
2021-08-16 23:17:48 +08:00
|
|
|
case 0:
|
2021-08-14 12:33:02 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
2022-03-15 21:33:27 +08:00
|
|
|
case ':':
|
|
|
|
if (*(mCurrent + 1) == ':') {
|
|
|
|
countLines();
|
|
|
|
mCurrent+=2;
|
2022-11-02 10:42:55 +08:00
|
|
|
result = "::";
|
2022-11-02 22:48:25 +08:00
|
|
|
skipToNextToken();
|
2022-03-15 21:33:27 +08:00
|
|
|
// Append next token to this one
|
2023-02-06 14:04:38 +08:00
|
|
|
// if (isIdentChar(*mCurrent))
|
|
|
|
// result+=getWord(true);
|
2022-03-15 21:33:27 +08:00
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2022-03-15 21:33:27 +08:00
|
|
|
done = true;
|
|
|
|
}
|
|
|
|
break;
|
2021-08-14 12:33:02 +08:00
|
|
|
case '{':
|
2022-10-31 19:37:24 +08:00
|
|
|
*pTokenType=TokenType::LeftBrace;
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2022-10-31 19:37:24 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
2021-08-14 12:33:02 +08:00
|
|
|
case '}':
|
2022-10-31 19:37:24 +08:00
|
|
|
*pTokenType=TokenType::RightBrace;
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2022-10-31 19:37:24 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
|
|
|
case '(':
|
|
|
|
*pTokenType=TokenType::LeftParenthesis;
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2022-10-31 19:37:24 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
2022-11-02 22:48:25 +08:00
|
|
|
case '[':
|
2022-11-04 20:27:35 +08:00
|
|
|
if (*(mCurrent+1)!='[') {
|
|
|
|
*pTokenType=TokenType::LambdaCaptures;
|
|
|
|
countLines();
|
|
|
|
QChar* backup=mCurrent;
|
|
|
|
skipPair('[',']');
|
|
|
|
result = QString(backup,mCurrent-backup);
|
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
skipPair('[',']'); // attribute, skipit
|
|
|
|
}
|
2022-11-02 22:48:25 +08:00
|
|
|
break;
|
2022-10-31 19:37:24 +08:00
|
|
|
case ')':
|
|
|
|
*pTokenType=TokenType::RightParenthesis;
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2022-10-31 19:37:24 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
2023-02-06 14:04:38 +08:00
|
|
|
case '.':
|
2021-08-14 12:33:02 +08:00
|
|
|
case ';':
|
2022-03-15 21:33:27 +08:00
|
|
|
case ',': //just return the brace or the ';'
|
2021-08-14 12:33:02 +08:00
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2021-08-14 12:33:02 +08:00
|
|
|
done = true;
|
|
|
|
break;
|
|
|
|
case '>': // keep stream operators
|
|
|
|
if (*(mCurrent + 1) == '>') {
|
2023-02-06 14:04:38 +08:00
|
|
|
countLines();
|
|
|
|
result = ">>";
|
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
|
|
|
mCurrent++;
|
|
|
|
done = true;
|
|
|
|
} break;
|
2021-08-14 12:33:02 +08:00
|
|
|
case '<':
|
|
|
|
if (*(mCurrent + 1) == '<') {
|
|
|
|
countLines();
|
|
|
|
result = "<<";
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
2023-02-06 14:04:38 +08:00
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
|
|
|
mCurrent++;
|
|
|
|
done = true;
|
|
|
|
}
|
2022-11-05 16:17:46 +08:00
|
|
|
break;
|
|
|
|
case '=': {
|
2022-11-05 18:58:15 +08:00
|
|
|
if (*(mCurrent+1)=='=') {
|
|
|
|
// skip '=='
|
2023-02-06 14:04:38 +08:00
|
|
|
countLines();
|
|
|
|
result = "==";
|
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
2022-11-05 18:58:15 +08:00
|
|
|
} else {
|
2022-11-05 16:17:46 +08:00
|
|
|
countLines();
|
2023-02-06 14:04:38 +08:00
|
|
|
mCurrent++;
|
2022-11-05 18:58:15 +08:00
|
|
|
result = "=";
|
2021-08-14 12:33:02 +08:00
|
|
|
done = true;
|
2022-11-05 18:58:15 +08:00
|
|
|
}
|
|
|
|
break;
|
2022-11-05 16:17:46 +08:00
|
|
|
}
|
2021-08-14 12:33:02 +08:00
|
|
|
break;
|
2022-11-05 18:58:15 +08:00
|
|
|
case '!':
|
|
|
|
if (*(mCurrent+1)=='=') {
|
2023-02-06 14:04:38 +08:00
|
|
|
countLines();
|
|
|
|
result = "!=";
|
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
|
|
|
mCurrent++;
|
|
|
|
done = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
if (*(mCurrent + 1) == '=') {
|
|
|
|
countLines();
|
|
|
|
result = "-=";
|
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
|
|
|
} else if (*(mCurrent + 1) == '>') {
|
|
|
|
countLines();
|
|
|
|
mCurrent+=2;
|
|
|
|
result = "->";
|
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 18:58:15 +08:00
|
|
|
mCurrent++;
|
2023-02-06 14:04:38 +08:00
|
|
|
done = true;
|
|
|
|
}
|
2022-11-05 18:58:15 +08:00
|
|
|
break;
|
|
|
|
case '/':
|
|
|
|
case '%':
|
|
|
|
case '&':
|
|
|
|
case '*':
|
|
|
|
case '|':
|
|
|
|
case '+':
|
|
|
|
case '~':
|
2023-05-24 13:42:46 +08:00
|
|
|
case '^':
|
2022-11-05 18:58:15 +08:00
|
|
|
if (*(mCurrent + 1) == '=') {
|
2023-02-06 14:04:38 +08:00
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
|
|
|
result += "=";
|
|
|
|
mCurrent+=2;
|
|
|
|
done = true;
|
|
|
|
} else {
|
|
|
|
countLines();
|
|
|
|
result = *mCurrent;
|
2022-11-05 18:58:15 +08:00
|
|
|
mCurrent++;
|
2023-02-06 14:04:38 +08:00
|
|
|
done = true;
|
|
|
|
}
|
2022-11-05 18:58:15 +08:00
|
|
|
break;
|
2021-08-14 12:33:02 +08:00
|
|
|
default:
|
|
|
|
advance();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (done)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
QString CppTokenizer::getNumber()
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
|
|
|
QChar* offset = mCurrent;
|
|
|
|
|
|
|
|
if (isDigitChar(*mCurrent)) {
|
|
|
|
while (isDigitChar(*mCurrent) || isHexChar(*mCurrent)) {
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
|
|
|
//advance();
|
2021-08-13 22:53:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
QString result;
|
2021-08-14 22:52:37 +08:00
|
|
|
if (offset != mCurrent) {
|
2021-08-13 22:53:26 +08:00
|
|
|
result = QString(offset,mCurrent-offset);
|
|
|
|
if (*mCurrent=='.') // keep '.' for decimal
|
|
|
|
result += *mCurrent;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
QString CppTokenizer::getPreprocessor()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
QChar *offset = mCurrent;
|
|
|
|
skipToEOL();
|
|
|
|
return QString(offset, mCurrent-offset);
|
|
|
|
}
|
|
|
|
|
2023-02-06 14:04:38 +08:00
|
|
|
QString CppTokenizer::getWord()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
bool bFoundTemplate = false;
|
|
|
|
// bIsSmartPointer:=False;
|
|
|
|
|
|
|
|
// Skip spaces
|
|
|
|
skipToNextToken();
|
|
|
|
|
|
|
|
// Get next word...
|
|
|
|
QChar* offset = mCurrent;
|
|
|
|
|
2022-01-12 20:59:28 +08:00
|
|
|
mCurrent++;
|
2021-08-14 12:33:02 +08:00
|
|
|
// Copy the word ahead of us
|
2022-01-12 20:59:28 +08:00
|
|
|
while (isIdentChar(*mCurrent) || isDigitChar(*mCurrent))
|
2021-08-14 12:33:02 +08:00
|
|
|
mCurrent++;
|
|
|
|
|
|
|
|
QString currentWord;
|
|
|
|
if (offset != mCurrent) {
|
|
|
|
currentWord = QString(offset,mCurrent-offset);
|
|
|
|
}
|
2023-02-06 14:04:38 +08:00
|
|
|
// // Append the operator characters and argument list to the operator word
|
|
|
|
// if ((currentWord == "operator") ||
|
|
|
|
// (currentWord == "&operator") ||
|
|
|
|
// (currentWord == "operator*") ||
|
|
|
|
// (currentWord == "operator&")) {
|
|
|
|
// // Spaces between 'operator' and the operator itself are allowed
|
|
|
|
// while (isSpaceChar(*mCurrent))
|
|
|
|
// mCurrent++;
|
|
|
|
// // Find end of operator
|
|
|
|
// while (isOperatorChar(*mCurrent))
|
|
|
|
// mCurrent++;
|
|
|
|
// currentWord = QString(offset,mCurrent-offset);
|
|
|
|
// } else if (currentWord == "template") {
|
|
|
|
if (currentWord == "template") {
|
2021-08-14 12:33:02 +08:00
|
|
|
bFoundTemplate = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
QString result;
|
|
|
|
// We found a word...
|
2023-02-06 14:04:38 +08:00
|
|
|
if (!currentWord.isEmpty() ) {
|
2021-08-23 17:27:17 +08:00
|
|
|
result = currentWord;
|
2021-08-14 12:33:02 +08:00
|
|
|
// Skip whitespace
|
|
|
|
skipToNextToken();
|
2023-02-06 14:04:38 +08:00
|
|
|
if (currentWord!="operator") {
|
|
|
|
// Skip template contents, but keep template variable types
|
|
|
|
if (*mCurrent == '<') {
|
2021-09-28 22:26:12 +08:00
|
|
|
offset = mCurrent;
|
2021-08-14 12:33:02 +08:00
|
|
|
|
2023-02-06 14:04:38 +08:00
|
|
|
if (bFoundTemplate) {
|
|
|
|
skipTemplateArgs();
|
|
|
|
} else if (skipAngleBracketPair()){
|
|
|
|
result += QString(offset, mCurrent-offset);
|
|
|
|
skipToNextToken();
|
|
|
|
}
|
|
|
|
} else if (*mCurrent == '[') {
|
|
|
|
// Append array stuff
|
|
|
|
while(true) {
|
|
|
|
offset = mCurrent;
|
|
|
|
skipPair('[', ']');
|
|
|
|
result += QString(offset,mCurrent-offset);
|
|
|
|
simplifyArgs(result);
|
|
|
|
skipToNextToken();
|
|
|
|
if (*mCurrent!='[') //maybe multi-dimension array
|
|
|
|
break;
|
2022-11-02 10:42:55 +08:00
|
|
|
}
|
2022-03-15 21:33:27 +08:00
|
|
|
}
|
2023-02-06 14:04:38 +08:00
|
|
|
|
|
|
|
// Keep parent/child operators
|
|
|
|
// if (*mCurrent == '.') {
|
|
|
|
// result+=*mCurrent;
|
|
|
|
// mCurrent++;
|
|
|
|
// } else if ((*mCurrent == '-') && (*(mCurrent + 1) == '>')) {
|
|
|
|
// result+=QString(mCurrent,2);
|
|
|
|
// mCurrent+=2;
|
|
|
|
// } else if ((*mCurrent == ':') && (*(mCurrent + 1) == ':') ) {
|
|
|
|
// if (result != "using") {
|
|
|
|
// result+=QString(mCurrent,2);
|
|
|
|
// mCurrent+=2;
|
|
|
|
// skipToNextToken();
|
|
|
|
// if (isIdentChar(*mCurrent)) {
|
|
|
|
// // Append next token to this one
|
|
|
|
// QString s = getWord(bSkipParenthesis);
|
|
|
|
// result += s;
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// }
|
2021-08-14 12:33:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
bool CppTokenizer::isArguments()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
return *mCurrent == '(';
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
bool CppTokenizer::isForInit()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
return (*mCurrent == '(') && (mLastToken == "for");
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
bool CppTokenizer::isNumber()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
return isDigitChar(*mCurrent);
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
bool CppTokenizer::isPreprocessor()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
return *mCurrent=='#';
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
bool CppTokenizer::isWord()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
2023-02-25 12:23:45 +08:00
|
|
|
bool result = isIdentChar(*mCurrent);
|
2021-08-14 12:33:02 +08:00
|
|
|
if (result && (*(mCurrent+1) == '"'))
|
|
|
|
result = false;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::simplify(QString &output)
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
//remove \n \r;
|
|
|
|
QString temp;
|
2021-08-29 00:48:23 +08:00
|
|
|
for (const QChar& ch:output) {
|
2021-08-14 12:33:02 +08:00
|
|
|
if (!isLineChar(ch))
|
|
|
|
temp+=ch;
|
|
|
|
}
|
|
|
|
output = temp.trimmed();
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::simplifyArgs(QString &output)
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
QString temp;
|
|
|
|
QString lastSpace = "";
|
|
|
|
bool parentheseStart = true;
|
2021-08-29 10:14:07 +08:00
|
|
|
foreach (const QChar& ch,output.trimmed()) {
|
2021-08-14 12:33:02 +08:00
|
|
|
if (isSpaceChar(ch)) {
|
|
|
|
if (!parentheseStart)
|
|
|
|
lastSpace+=ch;
|
|
|
|
} else if (ch==','){
|
|
|
|
temp+=ch;
|
|
|
|
lastSpace = "";
|
|
|
|
parentheseStart = false;
|
|
|
|
} else if (ch=='(') {
|
|
|
|
temp+=ch;
|
|
|
|
lastSpace = "";
|
|
|
|
parentheseStart=true;
|
|
|
|
} else if (ch==')') {
|
|
|
|
temp+=ch;
|
|
|
|
lastSpace = "";
|
|
|
|
parentheseStart = false;
|
|
|
|
} else {
|
|
|
|
parentheseStart=false;
|
|
|
|
if (!lastSpace.isEmpty()) {
|
|
|
|
temp+=" ";
|
|
|
|
}
|
|
|
|
lastSpace = "";
|
|
|
|
temp+=ch;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
output = temp;
|
|
|
|
}
|
|
|
|
|
2023-02-06 14:04:38 +08:00
|
|
|
//void CppTokenizer::skipAssignment()
|
|
|
|
//{
|
|
|
|
// while (true) {
|
|
|
|
// switch ((*mCurrent).unicode()) {
|
|
|
|
// case '(': skipPair('(', ')');
|
|
|
|
// break;
|
|
|
|
// case '"': skipDoubleQuotes();
|
|
|
|
// break;
|
|
|
|
// case '\'': skipSingleQuote();
|
|
|
|
// break;
|
|
|
|
// case '{': skipPair('{', '}'); // support struct initializers
|
|
|
|
// break;
|
|
|
|
// case '/':
|
|
|
|
// mCurrent++;
|
|
|
|
// break;
|
|
|
|
// default:
|
|
|
|
// if ((*mCurrent == 'R') && (*(mCurrent+1) == '"'))
|
|
|
|
// skipRawString();
|
|
|
|
// else
|
|
|
|
// mCurrent++;
|
|
|
|
// }
|
|
|
|
// if (*mCurrent == ','
|
|
|
|
// || *mCurrent ==';'
|
|
|
|
// || *mCurrent ==')'
|
|
|
|
// || *mCurrent =='}'
|
|
|
|
// || *mCurrent ==0)
|
|
|
|
// break;
|
|
|
|
// }
|
|
|
|
//}
|
2021-08-14 12:33:02 +08:00
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipDoubleQuotes()
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
mCurrent++;
|
2021-08-16 23:17:48 +08:00
|
|
|
while (!(*mCurrent=='"' || *mCurrent == 0)) {
|
2021-08-14 12:33:02 +08:00
|
|
|
if (*mCurrent == '\\')
|
|
|
|
mCurrent+=2; // skip escaped char
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
}
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent!=0) {
|
2021-08-14 12:33:02 +08:00
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-05 08:42:54 +08:00
|
|
|
void CppTokenizer::skipPair(const QChar &cStart, const QChar cEnd)
|
2021-08-14 12:33:02 +08:00
|
|
|
{
|
|
|
|
mCurrent++;
|
2021-08-16 23:17:48 +08:00
|
|
|
while (*mCurrent != 0) {
|
2022-11-04 23:44:11 +08:00
|
|
|
if (*mCurrent == '(') {
|
2022-11-05 08:42:54 +08:00
|
|
|
skipPair('(', ')');
|
2022-11-04 23:44:11 +08:00
|
|
|
} else if (*mCurrent == '[') {
|
2022-11-05 08:42:54 +08:00
|
|
|
skipPair('[', ']');
|
2022-11-04 23:44:11 +08:00
|
|
|
} else if (*mCurrent == '{') {
|
2022-11-05 08:42:54 +08:00
|
|
|
skipPair('{', '}');
|
2021-08-14 12:59:54 +08:00
|
|
|
} else if (*mCurrent == cStart) {
|
2022-11-04 23:44:11 +08:00
|
|
|
skipPair(cStart, cEnd);
|
2021-08-14 12:59:54 +08:00
|
|
|
} else if (*mCurrent == cEnd) {
|
|
|
|
mCurrent++; // skip over end
|
|
|
|
break;
|
|
|
|
} else if ((*mCurrent == 'R') && (*(mCurrent+1) == '"')) {
|
|
|
|
if (cStart != '\'' && cStart!='\"')
|
|
|
|
skipRawString(); // don't do it inside AnsiString!
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
} else if (*mCurrent == '"') {
|
|
|
|
if (cStart != '\'' && cStart!='\"')
|
|
|
|
skipDoubleQuotes(); // don't do it inside AnsiString!
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
} else if (*mCurrent == '\'') {
|
|
|
|
if (cStart != '\'' && cStart!='\"')
|
|
|
|
skipSingleQuote(); // don't do it inside AnsiString!
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
} else {
|
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
}
|
2021-08-14 12:33:02 +08:00
|
|
|
}
|
|
|
|
|
2022-11-04 20:27:35 +08:00
|
|
|
bool CppTokenizer::skipAngleBracketPair()
|
|
|
|
{
|
|
|
|
QChar* backup=mCurrent;
|
2022-11-05 08:42:54 +08:00
|
|
|
QVector<QChar> stack;
|
2022-11-04 20:27:35 +08:00
|
|
|
while (*mCurrent != '\0') {
|
|
|
|
switch((*mCurrent).unicode()) {
|
|
|
|
case '<':
|
|
|
|
case '(':
|
|
|
|
case '[':
|
2022-11-05 08:42:54 +08:00
|
|
|
stack.push_back(*mCurrent);
|
2022-11-04 20:27:35 +08:00
|
|
|
break;
|
|
|
|
case ')':
|
2022-11-05 08:42:54 +08:00
|
|
|
while (!stack.isEmpty() && stack.back()!='(') {
|
|
|
|
stack.pop_back();
|
|
|
|
}
|
|
|
|
//pop up '('
|
|
|
|
if (stack.isEmpty()) {
|
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
stack.pop_back();
|
|
|
|
break;
|
2022-11-04 20:27:35 +08:00
|
|
|
case ']':
|
2022-11-05 08:42:54 +08:00
|
|
|
while (!stack.isEmpty() && stack.back()!='[')
|
|
|
|
stack.pop_back();
|
|
|
|
//pop up '['
|
|
|
|
if (stack.isEmpty()) {
|
2022-11-04 20:27:35 +08:00
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
}
|
2022-11-05 08:42:54 +08:00
|
|
|
stack.pop_back();
|
2022-11-04 20:27:35 +08:00
|
|
|
break;
|
|
|
|
case '>':
|
2022-11-05 08:42:54 +08:00
|
|
|
if (stack.back()=='<')
|
|
|
|
stack.pop_back();
|
|
|
|
if (stack.isEmpty()) {
|
|
|
|
mCurrent++;
|
2022-11-04 20:27:35 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '{':
|
|
|
|
case '}':
|
|
|
|
case ';':
|
|
|
|
case '"':
|
|
|
|
case '\'':
|
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
case '-':
|
|
|
|
if (*(mCurrent+1)=='>') {
|
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
if (*(mCurrent+1)!='.') {
|
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// skip
|
|
|
|
while (*(mCurrent+1)=='.')
|
|
|
|
mCurrent++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
mCurrent=backup;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipRawString()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
mCurrent++; //skip R
|
|
|
|
bool noEscape = false;
|
|
|
|
while(true) {
|
|
|
|
mCurrent++;
|
|
|
|
switch(mCurrent->unicode()) {
|
|
|
|
case '(':
|
|
|
|
noEscape = true;
|
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
noEscape = false;
|
|
|
|
break;
|
|
|
|
}
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent == 0)
|
2021-08-14 18:55:42 +08:00
|
|
|
break;
|
|
|
|
if ((*mCurrent == '"') && !noEscape)
|
|
|
|
break;
|
|
|
|
}
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent!=0)
|
2021-08-14 18:55:42 +08:00
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipSingleQuote()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
mCurrent++;
|
2021-08-16 23:17:48 +08:00
|
|
|
while (!(*mCurrent=='\'' || *mCurrent == 0)) {
|
2021-08-14 18:55:42 +08:00
|
|
|
if (*mCurrent == '\\')
|
|
|
|
mCurrent+=2; // skip escaped char
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
}
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent!=0) {
|
2021-08-14 18:55:42 +08:00
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipSplitLine()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
mCurrent++; // skip '\'
|
|
|
|
while ( isLineChar(*mCurrent)) // skip newline
|
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipTemplateArgs()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
if (*mCurrent != '<')
|
|
|
|
return;
|
|
|
|
|
2022-11-12 12:14:19 +08:00
|
|
|
if (skipAngleBracketPair())
|
|
|
|
return;
|
|
|
|
QChar* lastBracketPos = mCurrent;
|
|
|
|
bool shouldExit=false;
|
|
|
|
while (true) {
|
|
|
|
switch(mCurrent->unicode()) {
|
|
|
|
case '\0':
|
|
|
|
case ';':
|
|
|
|
case '}':
|
|
|
|
case '{':
|
|
|
|
shouldExit=true;
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
lastBracketPos = mCurrent;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (shouldExit)
|
|
|
|
break;
|
|
|
|
mCurrent++;
|
|
|
|
}
|
|
|
|
if (*lastBracketPos=='>')
|
|
|
|
mCurrent = lastBracketPos+1; //skip '>';
|
2021-08-14 18:55:42 +08:00
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipToEOL()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
while (true) {
|
2021-08-16 23:17:48 +08:00
|
|
|
while (!isLineChar(*mCurrent) && (*mCurrent!=0)) {
|
2021-08-14 18:55:42 +08:00
|
|
|
mCurrent++;
|
|
|
|
}
|
2021-08-16 23:17:48 +08:00
|
|
|
if (*mCurrent==0)
|
2021-08-14 18:55:42 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
bool splitLine = (*(mCurrent - 1) == '\\');
|
|
|
|
|
|
|
|
while (isLineChar(*mCurrent))
|
|
|
|
mCurrent++;
|
|
|
|
|
2021-08-16 23:17:48 +08:00
|
|
|
if (!splitLine || *mCurrent==0)
|
2021-08-14 18:55:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::skipToNextToken()
|
2021-08-14 18:55:42 +08:00
|
|
|
{
|
|
|
|
while (isSpaceChar(*mCurrent) || isLineChar(*mCurrent))
|
2022-11-05 16:17:46 +08:00
|
|
|
mCurrent++;
|
2021-08-14 18:55:42 +08:00
|
|
|
}
|
|
|
|
|
2021-08-14 22:52:37 +08:00
|
|
|
void CppTokenizer::advance()
|
2021-08-13 22:53:26 +08:00
|
|
|
{
|
|
|
|
switch(mCurrent->unicode()) {
|
2022-11-05 08:42:54 +08:00
|
|
|
case '\"':
|
|
|
|
skipDoubleQuotes();
|
2021-08-13 22:53:26 +08:00
|
|
|
break;
|
2022-11-05 08:42:54 +08:00
|
|
|
case '\'':
|
|
|
|
skipSingleQuote();
|
2021-08-13 22:53:26 +08:00
|
|
|
break;
|
|
|
|
case '\\':
|
|
|
|
if (isLineChar(*(mCurrent + 1)))
|
|
|
|
skipSplitLine();
|
|
|
|
else
|
|
|
|
mCurrent++;
|
|
|
|
break;
|
2022-11-05 18:58:15 +08:00
|
|
|
case 'R':
|
|
|
|
if (*(mCurrent+1) == '"')
|
2021-08-13 22:53:26 +08:00
|
|
|
skipRawString();
|
|
|
|
else
|
|
|
|
mCurrent++;
|
2022-11-05 18:58:15 +08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
mCurrent++;
|
2021-08-13 22:53:26 +08:00
|
|
|
}
|
|
|
|
}
|