Skip to content

Commit

Permalink
Improve natural sort algorithm
Browse files Browse the repository at this point in the history
1. Use proper case folding function instead of `toLower()`.
2. Use locale aware comparison instead of comparing unicode code points.
   Now `a` comes before `A` which is the same as the result from QCollator. A nice side effect
   is now it properly compares locale specific characters (for example `C`, `Č`).
3. Improve testing. Now the test is runnable and stable on all platforms.

PR  qbittorrent#20208.
  • Loading branch information
Chocobo1 authored Jan 8, 2024
1 parent e69f857 commit 5b3b56c
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 34 deletions.
8 changes: 3 additions & 5 deletions src/base/utils/compare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
#include <QChar>
#include <QString>

#ifndef QBT_USE_QCOLLATOR
int Utils::Compare::naturalCompare(const QString &left, const QString &right, const Qt::CaseSensitivity caseSensitivity)
{
// Return value <0: `left` is smaller than `right`
Expand All @@ -45,8 +44,8 @@ int Utils::Compare::naturalCompare(const QString &left, const QString &right, co
if ((posL == left.size()) || (posR == right.size()))
return (left.size() - right.size()); // when a shorter string is another string's prefix, shorter string place before longer string

const QChar leftChar = (caseSensitivity == Qt::CaseSensitive) ? left[posL] : left[posL].toLower();
const QChar rightChar = (caseSensitivity == Qt::CaseSensitive) ? right[posR] : right[posR].toLower();
const QChar leftChar = (caseSensitivity == Qt::CaseSensitive) ? left[posL] : left[posL].toCaseFolded();
const QChar rightChar = (caseSensitivity == Qt::CaseSensitive) ? right[posR] : right[posR].toCaseFolded();
// Compare only non-digits.
// Numbers should be compared as a whole
// otherwise the string->int conversion can yield a wrong value
Expand Down Expand Up @@ -89,8 +88,7 @@ int Utils::Compare::naturalCompare(const QString &left, const QString &right, co
}
else
{
return (leftChar.unicode() - rightChar.unicode());
return QString::localeAwareCompare(leftChar, rightChar);
}
}
}
#endif
29 changes: 14 additions & 15 deletions src/base/utils/compare.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,24 +34,35 @@
// for QT_FEATURE_xxx, see: https://wiki.qt.io/Qt5_Build_System#How_to
#include <QtCore/private/qtcore-config_p.h>

#ifndef QBT_USE_QCOLLATOR
// macOS and Windows support 'case sensitivity' and 'numeric mode' natively
// https://github.com/qt/qtbase/blob/6.0/src/corelib/CMakeLists.txt#L777-L793
// https://github.com/qt/qtbase/blob/6.0/src/corelib/text/qcollator_macx.cpp#L74-L77
// https://github.com/qt/qtbase/blob/6.0/src/corelib/text/qcollator_win.cpp#L72-L78
#if ((QT_FEATURE_icu == 1) || defined(Q_OS_MACOS) || defined(Q_OS_WIN))
#define QBT_USE_QCOLLATOR
#define QBT_USE_QCOLLATOR 1
#include <QCollator>
#else
#define QBT_USE_QCOLLATOR 0
#endif
#endif

class QString;

namespace Utils::Compare
{
#ifdef QBT_USE_QCOLLATOR
int naturalCompare(const QString &left, const QString &right, Qt::CaseSensitivity caseSensitivity);

template <Qt::CaseSensitivity caseSensitivity>
class NaturalCompare
{
public:
#if (QBT_USE_QCOLLATOR == 0)
int operator()(const QString &left, const QString &right) const
{
return naturalCompare(left, right, caseSensitivity);
}
#else
NaturalCompare()
{
m_collator.setNumericMode(true);
Expand All @@ -65,20 +76,8 @@ namespace Utils::Compare

private:
QCollator m_collator;
};
#else
int naturalCompare(const QString &left, const QString &right, Qt::CaseSensitivity caseSensitivity);

template <Qt::CaseSensitivity caseSensitivity>
class NaturalCompare
{
public:
int operator()(const QString &left, const QString &right) const
{
return naturalCompare(left, right, caseSensitivity);
}
};
#endif
};

template <Qt::CaseSensitivity caseSensitivity>
class NaturalLessThan
Expand Down
39 changes: 25 additions & 14 deletions test/testutilscompare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@
* exception statement from your version.
*/

#include <tuple>

#include <QLocale>
#include <QObject>
#include <QTest>

#include "base/global.h"

// only test qbt own implementation, not QCollator
#define QBT_USE_QCOLLATOR 0
#include "base/utils/compare.h"

#ifndef QBT_USE_QCOLLATOR // only test qbt own implementation, not QCollator
namespace
{
enum class CompareResult
Expand All @@ -59,8 +60,8 @@ namespace
{u"a"_s, u""_s, CompareResult::Greater, CompareResult::Greater},

{u"a"_s, u"a"_s, CompareResult::Equal, CompareResult::Equal},
{u"A"_s, u"a"_s, CompareResult::Equal, CompareResult::Less}, // ascii code of 'A' is smaller than 'a'
{u"a"_s, u"A"_s, CompareResult::Equal, CompareResult::Greater},
{u"A"_s, u"a"_s, CompareResult::Equal, CompareResult::Greater},
{u"a"_s, u"A"_s, CompareResult::Equal, CompareResult::Less},

{u"0"_s, u"0"_s, CompareResult::Equal, CompareResult::Equal},
{u"1"_s, u"0"_s, CompareResult::Greater, CompareResult::Greater},
Expand All @@ -71,17 +72,17 @@ namespace
{u"😁"_s, u"😀"_s, CompareResult::Greater, CompareResult::Greater},

{u"a1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Equal},
{u"A1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Less},
{u"a1"_s, u"A1"_s, CompareResult::Equal, CompareResult::Greater},
{u"A1"_s, u"a1"_s, CompareResult::Equal, CompareResult::Greater},
{u"a1"_s, u"A1"_s, CompareResult::Equal, CompareResult::Less},

{u"a1"_s, u"a2"_s, CompareResult::Less, CompareResult::Less},
{u"A1"_s, u"a2"_s, CompareResult::Less, CompareResult::Less},
{u"a1"_s, u"A2"_s, CompareResult::Less, CompareResult::Greater},
{u"A1"_s, u"a2"_s, CompareResult::Less, CompareResult::Greater},
{u"a1"_s, u"A2"_s, CompareResult::Less, CompareResult::Less},
{u"A1"_s, u"A2"_s, CompareResult::Less, CompareResult::Less},

{u"abc100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Greater},
{u"ABC100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Less},
{u"abc100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Greater},
{u"ABC100"_s, u"abc99"_s, CompareResult::Greater, CompareResult::Greater},
{u"abc100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Less},
{u"ABC100"_s, u"ABC99"_s, CompareResult::Greater, CompareResult::Greater},

{u"100abc"_s, u"99abc"_s, CompareResult::Greater, CompareResult::Greater},
Expand Down Expand Up @@ -135,7 +136,6 @@ namespace
}
}
}
#endif

class TestUtilsCompare final : public QObject
{
Expand All @@ -145,8 +145,20 @@ class TestUtilsCompare final : public QObject
public:
TestUtilsCompare() = default;

#ifndef QBT_USE_QCOLLATOR // only test qbt own implementation, not QCollator
private slots:
void initTestCase() const
{
// Test will fail if ran with `C` locale. This is because `C` locale compare chars by code points
// and doesn't take account of human expectations
QLocale::setDefault(QLocale::English);
}

void cleanupTestCase() const
{
// restore global state
QLocale::setDefault(QLocale::system());
}

void testNaturalCompareCaseInsensitive() const
{
const Utils::Compare::NaturalCompare<Qt::CaseInsensitive> cmp;
Expand Down Expand Up @@ -178,7 +190,6 @@ private slots:
for (const TestData &data : testData)
testLessThan(data, cmp(data.lhs, data.rhs), data.caseSensitiveResult);
}
#endif
};

QTEST_APPLESS_MAIN(TestUtilsCompare)
Expand Down

0 comments on commit 5b3b56c

Please sign in to comment.