Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ControllerScriptInterfaceLegacy: Windows 1252 fix #14108

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1060,67 +1060,69 @@ QByteArray ControllerScriptInterfaceLegacy::convertCharset(
using enum Charset;
switch (targetCharset) {
case ASCII:
return convertCharsetInternal(QStringLiteral("US-ASCII"), value);
return convertCharsetInternal(QLatin1String("US-ASCII"), value);
case UTF_8:
return convertCharsetInternal(QStringLiteral("UTF-8"), value);
return convertCharsetInternal(QLatin1String("UTF-8"), value);
case UTF_16LE:
return convertCharsetInternal(QStringLiteral("UTF-16LE"), value);
return convertCharsetInternal(QLatin1String("UTF-16LE"), value);
case UTF_16BE:
return convertCharsetInternal(QStringLiteral("UTF-16BE"), value);
return convertCharsetInternal(QLatin1String("UTF-16BE"), value);
case UTF_32LE:
return convertCharsetInternal(QStringLiteral("UTF-32LE"), value);
return convertCharsetInternal(QLatin1String("UTF-32LE"), value);
case UTF_32BE:
return convertCharsetInternal(QStringLiteral("UTF-32BE"), value);
return convertCharsetInternal(QLatin1String("UTF-32BE"), value);
case CentralEurope:
return convertCharsetInternal(QStringLiteral("windows-1250"), value);
return convertCharsetInternal(QLatin1String("windows-1250"), value);
case Cyrillic:
return convertCharsetInternal(QStringLiteral("windows-1251"), value);
case Latin1:
return convertCharsetInternal(QStringLiteral("windows-1252"), value);
return convertCharsetInternal(QLatin1String("windows-1251"), value);
case WesternEurope:
return convertCharsetInternal(QLatin1String("windows-1252"), value);
case Greek:
return convertCharsetInternal(QStringLiteral("windows-1253"), value);
return convertCharsetInternal(QLatin1String("windows-1253"), value);
case Turkish:
return convertCharsetInternal(QStringLiteral("windows-1254"), value);
return convertCharsetInternal(QLatin1String("windows-1254"), value);
case Hebrew:
return convertCharsetInternal(QStringLiteral("windows-1255"), value);
return convertCharsetInternal(QLatin1String("windows-1255"), value);
case Arabic:
return convertCharsetInternal(QStringLiteral("windows-1256"), value);
return convertCharsetInternal(QLatin1String("windows-1256"), value);
case Baltic:
return convertCharsetInternal(QStringLiteral("windows-1257"), value);
return convertCharsetInternal(QLatin1String("windows-1257"), value);
case Vietnamese:
return convertCharsetInternal(QStringLiteral("windows-1258"), value);
return convertCharsetInternal(QLatin1String("windows-1258"), value);
case Latin9:
return convertCharsetInternal(QStringLiteral("ISO-8859-15"), value);
return convertCharsetInternal(QLatin1String("ISO-8859-15"), value);
case Shift_JIS:
return convertCharsetInternal(QStringLiteral("Shift_JIS"), value);
return convertCharsetInternal(QLatin1String("Shift_JIS"), value);
case EUC_JP:
return convertCharsetInternal(QStringLiteral("EUC-JP"), value);
return convertCharsetInternal(QLatin1String("EUC-JP"), value);
case EUC_KR:
return convertCharsetInternal(QStringLiteral("EUC-KR"), value);
return convertCharsetInternal(QLatin1String("EUC-KR"), value);
case Big5_HKSCS:
return convertCharsetInternal(QStringLiteral("Big5-HKSCS"), value);
return convertCharsetInternal(QLatin1String("Big5-HKSCS"), value);
case KOI8_U:
return convertCharsetInternal(QStringLiteral("KOI8-U"), value);
return convertCharsetInternal(QLatin1String("KOI8-U"), value);
case UCS2:
return convertCharsetInternal(QStringLiteral("ISO-10646-UCS-2"), value);
return convertCharsetInternal(QLatin1String("ISO-10646-UCS-2"), value);
case SCSU:
return convertCharsetInternal(QStringLiteral("SCSU"), value);
return convertCharsetInternal(QLatin1String("SCSU"), value);
case BOCU_1:
return convertCharsetInternal(QStringLiteral("BOCU-1"), value);
return convertCharsetInternal(QLatin1String("BOCU-1"), value);
case CESU_8:
return convertCharsetInternal(QStringLiteral("CESU-8"), value);
return convertCharsetInternal(QLatin1String("CESU-8"), value);
case Latin1:
return convertCharsetInternal(QLatin1String("ISO-8859-1"), value);
}

m_pScriptEngineLegacy->logOrThrowError(QStringLiteral("Unknown charset specified"));
return QByteArray();
}

QByteArray ControllerScriptInterfaceLegacy::convertCharsetInternal(
const QString& targetCharset, const QString& value) {
QLatin1String targetCharset, const QString& value) {
#if QT_VERSION >= QT_VERSION_CHECK(6, 8, 0)
QAnyStringView encoderName = QAnyStringView(targetCharset);
#else
QByteArray encoderNameArray = targetCharset.toUtf8();
const char* encoderName = encoderNameArray.constData();
const char* encoderName = targetCharset.data();
#endif
QStringEncoder fromUtf16 = QStringEncoder(encoderName);
if (!fromUtf16.isValid()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class ControllerScriptInterfaceLegacy : public QObject {
UTF_32BE,
CentralEurope,
Cyrillic,
Latin1,
WesternEurope,
Greek,
Turkish,
Hebrew,
Expand All @@ -45,7 +45,8 @@ class ControllerScriptInterfaceLegacy : public QObject {
UCS2,
SCSU,
BOCU_1,
CESU_8
CESU_8,
Latin1
};
Q_ENUM(Charset)

Expand Down Expand Up @@ -122,7 +123,7 @@ class ControllerScriptInterfaceLegacy : public QObject {
const QJSValue& callback,
bool skipSuperseded = false);

QByteArray convertCharsetInternal(const QString& targetCharset, const QString& value);
QByteArray convertCharsetInternal(QLatin1String targetCharset, const QString& value);

QHash<ConfigKey, ControlObjectScript*> m_controlCache;
ControlObjectScript* getControlObjectScript(const QString& group, const QString& name);
Expand Down
84 changes: 67 additions & 17 deletions src/test/controllerscriptenginelegacy_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,22 +661,47 @@ TEST_F(ControllerScriptEngineLegacyTest, connectionExecutesWithCorrectThisObject
EXPECT_DOUBLE_EQ(1.0, pass->get());
}


#if QT_VERSION >= QT_VERSION_CHECK(6, 5, 0) // Latin9 is available form Qt 6.5
TEST_F(ControllerScriptEngineLegacyTest, convertCharsetCorrectValueStringCharset) {
const auto result = evaluate(
"engine.convertCharset(engine.Charset.Latin9, 'Hello!')");
"engine.convertCharset(engine.Charset.Latin9, 'Hello!')");

EXPECT_EQ(qjsvalue_cast<QByteArray>(result),
QByteArrayView::fromArray({'\x48', '\x65', '\x6c', '\x6c', '\x6f', '\x21'}));
QByteArrayView::fromArray({'\x48',
'\x65',
'\x6c',
'\x6c',
'\x6f',
'\x21',
'\x20',
'\xA4'}));
}

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetUnsupportedChars) {
auto result = qjsvalue_cast<QByteArray>(
evaluate("engine.convertCharset(engine.Charset.Latin9, 'مايأ نامز')"));
evaluate("engine.convertCharset(engine.Charset.Latin9, 'مايأ نامز')"));
char sub = '\x1A'; // ASCII/Latin9 SUB character
EXPECT_EQ(result,
QByteArrayView::fromArray(
{sub, sub, sub, sub, '\x20', sub, sub, sub, sub}));
{sub, sub, sub, sub, '\x20', sub, sub, sub, sub, '\x20', sub}));
}
#endif

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetLatin1Eur) {
const auto result = evaluate(
"engine.convertCharset(engine.Charset.Latin1, 'Hello! ¤€')");

char sub = '?'; // used by Qt for substitution
EXPECT_EQ(qjsvalue_cast<QByteArray>(result),
QByteArrayView::fromArray({'\x48',
'\x65',
'\x6c',
'\x6c',
'\x6f',
'\x21',
'\x20',
'\xA4',
sub}));
}

TEST_F(ControllerScriptEngineLegacyTest, convertCharsetMultiByteEncoding) {
Expand All @@ -703,24 +728,26 @@ TEST_F(ControllerScriptEngineLegacyTest, convertCharsetMultiByteEncoding) {
'\x06'}));
}

#define COMPLICATEDSTRINGLITERAL "Hello, 世界! שלום! こんにちは! 안녕하세요! 😊"
#define COMPLICATEDSTRINGLITERAL "Hello, 世界! שלום! こんにちは! 안녕하세요! 😊"

static int convertedCharsetForString(ControllerScriptInterfaceLegacy::Charset charset) {
// the expected length after conversion of COMPLICATEDSTRINGLITERAL
using enum ControllerScriptInterfaceLegacy::Charset;

#if QT_VERSION >= QT_VERSION_CHECK(6, 4, 0)
switch (charset) {
case UTF_8:
return 63;
return 67;
case UTF_16LE:
case UTF_16BE:
return 66;
return 70;
case UTF_32LE:
case UTF_32BE:
return 128;
return 136;
case ASCII:
case CentralEurope:
case Cyrillic:
case Latin1:
case WesternEurope:
case Greek:
case Turkish:
case Hebrew:
Expand All @@ -729,21 +756,43 @@ static int convertedCharsetForString(ControllerScriptInterfaceLegacy::Charset ch
case Vietnamese:
case Latin9:
case KOI8_U:
return 32;
case Shift_JIS:
return 34;
case Latin1:
// Latin1 is handled by Qt internally and 😊 becomes "??"
return 35;
case EUC_JP:
return 53;
case Shift_JIS:
case EUC_KR:
case Big5_HKSCS:
return 49;
return 52;
case UCS2:
return 68;
return 72;
case SCSU:
return 51;
return 55;
case BOCU_1:
return 53;
return 56;
case CESU_8:
return 65;
return 69;
}
#else
// Qt < 6.4 only supports these conversions
switch (charset) {
case UTF_8:
return 67;
case UTF_16LE:
case UTF_16BE:
return 70;
case UTF_32LE:
case UTF_32BE:
return 136;
case Latin1:
return 35;
default:
return 0;
}
#endif

// unreachable, but gtest does not offer a way to assert this here.
// returning 0 will almost certainly also result in a failure.
return 0;
Expand All @@ -763,6 +812,7 @@ TEST_F(ControllerScriptEngineLegacyTest, convertCharsetAllCharset) {
"'" COMPLICATEDSTRINGLITERAL "')")
.arg(key);
auto result = qjsvalue_cast<QByteArray>(evaluate(source));
qDebug() << result;
EXPECT_EQ(result.size(), convertedCharsetForString(enumValue))
<< "Unexpected length of converted string for encoding: '"
<< key.toStdString() << "'";
Expand Down
Loading