2021-04-06 23:10:57 +08:00
|
|
|
#include "utils.h"
|
|
|
|
#include <QByteArray>
|
|
|
|
#include <QString>
|
|
|
|
#include <QTextCodec>
|
|
|
|
|
2021-04-08 10:29:21 +08:00
|
|
|
const QByteArray GetFileEncodingType(const QByteArray& content){
|
2021-04-06 23:10:57 +08:00
|
|
|
bool allAscii;
|
|
|
|
int ii;
|
|
|
|
int size;
|
|
|
|
const QByteArray& s=content;
|
|
|
|
size = s.length();
|
|
|
|
if ( (size >= 3) && ((unsigned char)s[0]==0xEF) && ((unsigned char)s[1]==0xBB) && ((unsigned char)s[2]==0xBF)) {
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_UTF8_BOM;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
allAscii = true;
|
|
|
|
ii = 0;
|
|
|
|
while (ii < size) {
|
|
|
|
unsigned char ch = s[ii];
|
|
|
|
if (ch < 0x80 ) {
|
|
|
|
ii++; // is an ascii char
|
|
|
|
} else if (ch < 0xC0) { // value between 0x80 and 0xC0 is an invalid UTF-8 char
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
} else if (ch < 0xE0) { // should be an 2-byte UTF-8 char
|
|
|
|
if (ii>=size-1) {
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
unsigned char ch2=s[ii+1];
|
|
|
|
if ((ch2 & 0xC0) !=0x80) {
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
allAscii = false;
|
|
|
|
ii+=2;
|
|
|
|
} else if (ch < 0xF0) { // should be an 3-byte UTF-8 char
|
|
|
|
if (ii>=size-2) {
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
unsigned char ch2=s[ii+1];
|
|
|
|
unsigned char ch3=s[ii+2];
|
|
|
|
if (((ch2 & 0xC0)!=0x80) || ((ch3 & 0xC0)!=0x80)) {
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
allAscii = false;
|
|
|
|
ii+=3;
|
|
|
|
} else { // invalid UTF-8 char
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_SYSTEM_DEFAULT;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (allAscii)
|
2021-04-08 10:29:21 +08:00
|
|
|
return ENCODING_ASCII;
|
|
|
|
return ENCODING_UTF8;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
|
|
|
|
2021-04-07 21:13:15 +08:00
|
|
|
bool isTextAllAscii(const QString& text) {
|
|
|
|
for (QChar c:text) {
|
|
|
|
if (c.unicode()>127) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2021-04-06 23:10:57 +08:00
|
|
|
}
|
2021-04-07 21:13:15 +08:00
|
|
|
|
|
|
|
|