work save: cpp parser tokenizer

This commit is contained in:
royqh1979@gmail.com 2021-08-13 22:53:26 +08:00
parent 34dd3eb474
commit a169306e86
4 changed files with 201 additions and 1 deletions

View File

@ -17,6 +17,7 @@ SOURCES += \
compiler/filecompiler.cpp \ compiler/filecompiler.cpp \
compiler/stdincompiler.cpp \ compiler/stdincompiler.cpp \
parser/cpppreprocessor.cpp \ parser/cpppreprocessor.cpp \
parser/cpptokenizer.cpp \
parser/parserutils.cpp \ parser/parserutils.cpp \
parser/statementmodel.cpp \ parser/statementmodel.cpp \
qsynedit/Search.cpp \ qsynedit/Search.cpp \
@ -77,6 +78,7 @@ HEADERS += \
compiler/filecompiler.h \ compiler/filecompiler.h \
compiler/stdincompiler.h \ compiler/stdincompiler.h \
parser/cpppreprocessor.h \ parser/cpppreprocessor.h \
parser/cpptokenizer.h \
parser/parserutils.h \ parser/parserutils.h \
parser/statementmodel.h \ parser/statementmodel.h \
qsynedit/Search.h \ qsynedit/Search.h \

View File

@ -0,0 +1,131 @@
#include "cpptokenizer.h"
cpptokenizer::cpptokenizer(QObject *parent) : QObject(parent)
{
}
void cpptokenizer::addToken(const QString &sText, int iLine)
{
PToken token = std::make_shared<Token>();
token->text = sText;
token->line = iLine;
mTokenList.append(token);
}
void cpptokenizer::countLines()
{
while ((*mLineCount != '\0') && (mLineCount < mCurrent)) {
if (*mLineCount == '\n')
mCurrentLine ++;
mLineCount++;
}
}
QString cpptokenizer::getArguments()
{
QChar* offset = mCurrent;
skipPair('(', ')');
QString result(offset,mCurrent-offset);
simplifyArgs(result);
if ((*mCurrent == '.') || ((*mCurrent == '-') && (*(mCurrent + 1) == '>'))) {
// skip '.' and '->'
while ( !( *mCurrent == '\0'
|| *mCurrent == '('
|| *mCurrent == ';'
|| *mCurrent == '{'
|| *mCurrent == '}'
|| *mCurrent == ')'
|| isLineChar(*mCurrent)
|| isSpaceChar(*mCurrent)) )
mCurrent++;
}
skipToNextToken();
return result;
}
QString cpptokenizer::getForInit()
{
QChar* startOffset = mCurrent;
// Step into the init statement
mCurrent++;
// Process until ; or end of file
while (true) {
QString s = getNextToken(true, true, false);
simplify(s);
if (!s.isEmpty())
addToken(s,mCurrentLine);
if ( (s == "") || (s == ";") || (s==":"))
break;
// : is used in for-each loop
}
// Skip to end of for loop
mCurrent = startOffset;
skipPair('(', ')');
return "";
}
QString cpptokenizer::getNumber()
{
QChar* offset = mCurrent;
if (isDigitChar(*mCurrent)) {
while (isDigitChar(*mCurrent) || isHexChar(*mCurrent)) {
advance();
}
}
QString result;
if (offset != mpCurrent) {
result = QString(offset,mCurrent-offset);
if (*mCurrent=='.') // keep '.' for decimal
result += *mCurrent;
}
return result;
}
void cpptokenizer::advance()
{
switch(mCurrent->unicode()) {
case '\"': skipDoubleQuotes();
break;
case '\'': skipSingleQuote();
break;
case '/':
if (*(mCurrent + 1) == '=')
skipAssignment();
else
mCurrent++;
break;
case '=':
skipAssignment();
break;
case '&':
case '*':
case '!':
case '|':
case '+':
case '-':
case '~':
if (*(mCurrent + 1) == '=')
skipAssignment();
else
mCurrent++;
break;
case '\\':
if (isLineChar(*(mCurrent + 1)))
skipSplitLine();
else
mCurrent++;
break;
default:
if ((*mCurrent == 'R') && (*(mCurrent+1) == '"'))
skipRawString();
else
mCurrent++;
}
}

View File

@ -0,0 +1,67 @@
#ifndef CPPTOKENIZER_H
#define CPPTOKENIZER_H
#include <QObject>
#include "parserutils.h"
class cpptokenizer : public QObject
{
Q_OBJECT
public:
struct Token {
QString text;
int line;
};
using PToken = std::shared_ptr<Token>;
using TokenList = QVector<PToken>;
explicit cpptokenizer(QObject *parent = nullptr);
signals:
private:
void addToken(const QString& sText, int iLine);
void countLines();
PToken getToken(int index);
void skipSplitLine();
void skipToNextToken();
void skipDoubleQuotes();
void skipRawString();
void skipSingleQuote();
void skipPair(const QChar& cStart, const QChar cEnd, QSet<QChar> failChars = QSet<QChar>());
void skipAssignment();
void skipTemplateArgs();
QString getArguments();
QString getForInit();
QString getNumber();
QString getPreprocessor();
QString getWord(
bool bSkipParenthesis = false,
bool bSkipArray = false,
bool bSkipBlock = false);
bool isWord();
bool isNumber();
bool isPreprocessor();
bool isArguments();
bool isForInit();
QString getNextToken(
bool bSkipParenthesis = false,
bool bSkipArray = false,
bool bSkipBlock = false);
void simplify(QString& output);
void simplifyArgs(QString& output);
void advance();
bool openFile(const QString& fileName);
private:
QStringList mBuffer;
QString mBufferStr;
QChar* mStart;
QChar* mCurrent;
QChar* mLineCount;
int mCurrentLine;
QString mLastToken;
int mEnd;
TokenList mTokenList;
QString mFilename;
};
#endif // CPPTOKENIZER_H

View File

@ -6,6 +6,7 @@
#include <functional> #include <functional>
#include <QString> #include <QString>
#include <QRect> #include <QRect>
#include <QStringList>
class QByteArray; class QByteArray;
class QString; class QString;
@ -138,5 +139,4 @@ finally(F&& f) noexcept
return final_action<typename std::remove_cv<typename std::remove_reference<F>::type>::type>( return final_action<typename std::remove_cv<typename std::remove_reference<F>::type>::type>(
std::forward<F>(f)); std::forward<F>(f));
} }
#endif // UTILS_H #endif // UTILS_H