From c51a870815c6f22fc17c21ee1910570a23952daa Mon Sep 17 00:00:00 2001 From: Esteban C Borsani Date: Mon, 30 Dec 2024 22:59:00 -0300 Subject: [PATCH] Fix casefold set (#151) --- regex.nimble | 2 +- src/regex/exptransformation.nim | 7 +++---- tests/tests_misc.nim | 11 ++++++++--- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/regex.nimble b/regex.nimble index efbf546..92328dd 100644 --- a/regex.nimble +++ b/regex.nimble @@ -8,7 +8,7 @@ srcDir = "src" skipDirs = @["tests", "bench", "docs"] requires "nim >= 1.6.0" -requires "unicodedb >= 0.13.1" +requires "unicodedb >= 0.13.2" template execTest(lang, target: static string) = doAssert lang in ["c", "js"] diff --git a/src/regex/exptransformation.nim b/src/regex/exptransformation.nim index 060965c..52666ae 100644 --- a/src/regex/exptransformation.nim +++ b/src/regex/exptransformation.nim @@ -183,13 +183,12 @@ func applyFlag(n: var Node, f: Flag) = n.cp = n.cp.simpleCaseFold # todo: apply recursevely to # shorthands of reInSet/reNotSet (i.e: [:ascii:]) - # XXX add all casefolds that map to the cp instead of swapCase if n.kind in {reInSet, reNotSet}: var cps = newSeq[Rune]() for cp in items n.cps: - let cp2 = cp.swapCase() - if cp != cp2: - cps.add cp2 + for cp2 in cp.resolveCaseFold: + if cp != cp2: + cps.add cp2 n.cps.add cps for sl in n.ranges[0 .. ^1]: let cpa = sl.a.swapCase() diff --git a/tests/tests_misc.nim b/tests/tests_misc.nim index e0d8989..5aef928 100644 --- a/tests/tests_misc.nim +++ b/tests/tests_misc.nim @@ -710,9 +710,14 @@ test "rebar": check findAllBounds("s", re2(r"ſ", {regexCaseless})) == @[0 .. 0] check findAllBounds("ſ", re2(r"S", {regexCaseless})) == @[0 .. 1] check findAllBounds("S", re2(r"ſ", {regexCaseless})) == @[0 .. 0] - # XXX fix - #check match("s", re2(r"[ſ]", {regexCaseless})) - #check match("ſ", re2(r"[s]", {regexCaseless})) + check match("s", re2(r"[ſ]", {regexCaseless})) + check match("ſ", re2(r"[s]", {regexCaseless})) + check match("S", re2(r"[ſ]", {regexCaseless})) + check match("ſ", re2(r"[S]", {regexCaseless})) + check findAllBounds("ſ", re2(r"[s]", {regexCaseless})) == @[0 .. 1] + check findAllBounds("s", re2(r"[ſ]", {regexCaseless})) == @[0 .. 0] + check findAllBounds("ſ", re2(r"[S]", {regexCaseless})) == @[0 .. 1] + check findAllBounds("S", re2(r"[ſ]", {regexCaseless})) == @[0 .. 0] check match("a", re2(r"A", {regexCaseless})) check match("A", re2(r"a", {regexCaseless})) check match("@", re2(r"@", {regexCaseless}))