diff --git a/src/regex.nim b/src/regex.nim index 2165da3..4738964 100644 --- a/src/regex.nim +++ b/src/regex.nim @@ -319,7 +319,7 @@ This flag makes ascii mode ``(?-u)`` the default. :test: let flags = {regexArbitraryBytes} doAssert match("\xff", re2(r"\xff", flags)) - #doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags)) + doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags)) Beware of (un)expected behaviour when mixin UTF-8 characters. @@ -1581,6 +1581,7 @@ when isMainModule: doAssert match("aΪ", re2"\w(?<=a)Ϊ") doAssert match("Ϊb", re2"\w(?<=Ϊ)b") doAssert match("弢Ⓐ", re2"\w(?<=弢)Ⓐ") + doAssert match("弢", re2"(?-u).+") block: # Follows Nim re's behaviour doAssert match("abc", re2"(?<=a)bc", m, start = 1) doAssert(not match("abc", re2"(?<=x)bc", m, start = 1)) diff --git a/src/regex/exptransformation.nim b/src/regex/exptransformation.nim index a549436..21b6bdc 100644 --- a/src/regex/exptransformation.nim +++ b/src/regex/exptransformation.nim @@ -118,10 +118,6 @@ func toAsciiKind(k: NodeKind): NodeKind = reNotDigitAscii of reNotWhiteSpace: reNotWhiteSpaceAscii - of reAny: - reAnyAscii - of reAnyNL: - reAnyNLAscii else: k diff --git a/src/regex/nfamacro.nim b/src/regex/nfamacro.nim index a4535f8..83f6d97 100644 --- a/src/regex/nfamacro.nim +++ b/src/regex/nfamacro.nim @@ -120,12 +120,8 @@ func genMatch(c: NimNode, n: Node): NimNode = quote do: not `whiteSpaceMatch` of reAny: quote do: `c` != '\L'.ord - of reAnyAscii: - quote do: `c` <= 128 and `c` != '\L'.ord of reAnyNL: quote do: true - of reAnyNlAscii: - quote do: `c` <= 128 of reCharCI: let cp2Lit = newLit n.cp.swapCase().int32 quote do: `c` == `cpLit` or `c` == `cp2Lit` diff --git a/src/regex/nodematch.nim b/src/regex/nodematch.nim index 2bda7d8..f4cfc12 100644 --- a/src/regex/nodematch.nim +++ b/src/regex/nodematch.nim @@ -95,10 +95,6 @@ func isDigitAscii(r: Rune): bool {.inline.} = else: false -func isAnyAscii(r: Rune): bool {.inline.} = - (r.int <= int8.high and - r != lineBreakRune) - # todo: can not use unicodeplus due to # https://github.com/nim-lang/Nim/issues/7059 func swapCase*(r: Rune): Rune = @@ -160,10 +156,6 @@ func match*(n: Node, r: Rune): bool {.inline.} = not r.isWhiteSpaceAscii() of reNotUCC: r.unicodeCategory() notin n.cc - of reAnyAscii: - r.isAnyAscii() - of reAnyNLAscii: - r.isAnyAscii() or r == lineBreakRune else: assert n.kind == reChar n.cp == r diff --git a/src/regex/types.nim b/src/regex/types.nim index 6542146..b0b293e 100644 --- a/src/regex/types.nim +++ b/src/regex/types.nim @@ -84,8 +84,6 @@ type reNotAlphaNumAscii, # \W ascii only reNotDigitAscii, # \D ascii only reNotWhiteSpaceAscii, # \S ascii only - reAnyAscii, # . ascii only - reAnyNlAscii, # . new-line ascii only reInSet, # [abc] reNotSet, # [^abc] reLookahead, # (?=...) @@ -261,9 +259,7 @@ const reWhiteSpaceAscii, reNotAlphaNumAscii, reNotDigitAscii, - reNotWhiteSpaceAscii, - reAnyAscii, - reAnyNLAscii} + reNotWhiteSpaceAscii} repetitionKind* = { reZeroOrMore, reOneOrMore, @@ -310,7 +306,7 @@ func `$`*(n: Node): string = of reNotDigit, reNotDigitAscii: r"\D" of reNotWhiteSpace, reNotWhiteSpaceAscii: r"\S" of reNotUCC: r"\PN" - of reAny, reAnyNl, reAnyAscii, reAnyNlAscii: "." + of reAny, reAnyNl: "." of reInSet, reNotSet: var str = "" str.add '[' diff --git a/tests/tests2.nim b/tests/tests2.nim index 22b4e67..0be6a29 100644 --- a/tests/tests2.nim +++ b/tests/tests2.nim @@ -1232,6 +1232,8 @@ test "tflags": check(not "Ǝ".isMatch(re2"(?-u)[\w]")) check(not "\t".isMatch(re2"(?-u)[\w]")) check "ƎƎ".isMatch(re2"(?-u)[^\w](?u)\w") + check isMatch("弢", re2"(?u).+") + check isMatch("弢", re2"(?-u).+") check "a".isMatch(re2"(?x)a") check "a".isMatch(re2"(?x)a ") @@ -3156,9 +3158,9 @@ when not defined(js) or NimMajor >= 2: check match("abcd", re2(r"(?-su).{4}", flags)) check match("abcd", re2(r"(?s-u).{4}", flags)) check match("abcd", re2(r"(?u-s).{4}", flags)) - #check match("弢", re2(r".{4}", flags)) # XXX should match + check match("弢", re2(r".{4}", flags)) check match("弢", re2(r"(?u).{4}", flags)) - check(not match("弢", re2(r"(?-u).{4}", flags))) + check match("弢", re2(r"(?-u).{4}", flags)) check(not match("\n", re2(r".", flags))) check match("\n", re2(r"(?s).", flags)) check(not match("\n", re2(r"(?u).", flags)))