Skip to content

Commit

Permalink
Fix dot to match anything in ascii mode (#135)
Browse files Browse the repository at this point in the history
fix dot to match anything in ascii mode
  • Loading branch information
nitely authored Jan 5, 2024
1 parent 77aa186 commit 2d0f491
Show file tree
Hide file tree
Showing 6 changed files with 8 additions and 25 deletions.
3 changes: 2 additions & 1 deletion src/regex.nim
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ This flag makes ascii mode ``(?-u)`` the default.
:test:
let flags = {regexArbitraryBytes}
doAssert match("\xff", re2(r"\xff", flags))
#doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags))
doAssert match("\xf8\xa1\xa1\xa1\xa1", re2(r".+", flags))
Beware of (un)expected behaviour when mixin UTF-8 characters.
Expand Down Expand Up @@ -1581,6 +1581,7 @@ when isMainModule:
doAssert match("", re2"\w(?<=a)Ϊ")
doAssert match("Ϊb", re2"\w(?<=Ϊ)b")
doAssert match("弢Ⓐ", re2"\w(?<=弢)Ⓐ")
doAssert match("弢", re2"(?-u).+")
block: # Follows Nim re's behaviour
doAssert match("abc", re2"(?<=a)bc", m, start = 1)
doAssert(not match("abc", re2"(?<=x)bc", m, start = 1))
Expand Down
4 changes: 0 additions & 4 deletions src/regex/exptransformation.nim
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,6 @@ func toAsciiKind(k: NodeKind): NodeKind =
reNotDigitAscii
of reNotWhiteSpace:
reNotWhiteSpaceAscii
of reAny:
reAnyAscii
of reAnyNL:
reAnyNLAscii
else:
k

Expand Down
4 changes: 0 additions & 4 deletions src/regex/nfamacro.nim
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,8 @@ func genMatch(c: NimNode, n: Node): NimNode =
quote do: not `whiteSpaceMatch`
of reAny:
quote do: `c` != '\L'.ord
of reAnyAscii:
quote do: `c` <= 128 and `c` != '\L'.ord
of reAnyNL:
quote do: true
of reAnyNlAscii:
quote do: `c` <= 128
of reCharCI:
let cp2Lit = newLit n.cp.swapCase().int32
quote do: `c` == `cpLit` or `c` == `cp2Lit`
Expand Down
8 changes: 0 additions & 8 deletions src/regex/nodematch.nim
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,6 @@ func isDigitAscii(r: Rune): bool {.inline.} =
else:
false

func isAnyAscii(r: Rune): bool {.inline.} =
(r.int <= int8.high and
r != lineBreakRune)

# todo: can not use unicodeplus due to
# https://github.com/nim-lang/Nim/issues/7059
func swapCase*(r: Rune): Rune =
Expand Down Expand Up @@ -160,10 +156,6 @@ func match*(n: Node, r: Rune): bool {.inline.} =
not r.isWhiteSpaceAscii()
of reNotUCC:
r.unicodeCategory() notin n.cc
of reAnyAscii:
r.isAnyAscii()
of reAnyNLAscii:
r.isAnyAscii() or r == lineBreakRune
else:
assert n.kind == reChar
n.cp == r
8 changes: 2 additions & 6 deletions src/regex/types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ type
reNotAlphaNumAscii, # \W ascii only
reNotDigitAscii, # \D ascii only
reNotWhiteSpaceAscii, # \S ascii only
reAnyAscii, # . ascii only
reAnyNlAscii, # . new-line ascii only
reInSet, # [abc]
reNotSet, # [^abc]
reLookahead, # (?=...)
Expand Down Expand Up @@ -261,9 +259,7 @@ const
reWhiteSpaceAscii,
reNotAlphaNumAscii,
reNotDigitAscii,
reNotWhiteSpaceAscii,
reAnyAscii,
reAnyNLAscii}
reNotWhiteSpaceAscii}
repetitionKind* = {
reZeroOrMore,
reOneOrMore,
Expand Down Expand Up @@ -310,7 +306,7 @@ func `$`*(n: Node): string =
of reNotDigit, reNotDigitAscii: r"\D"
of reNotWhiteSpace, reNotWhiteSpaceAscii: r"\S"
of reNotUCC: r"\PN"
of reAny, reAnyNl, reAnyAscii, reAnyNlAscii: "."
of reAny, reAnyNl: "."
of reInSet, reNotSet:
var str = ""
str.add '['
Expand Down
6 changes: 4 additions & 2 deletions tests/tests2.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1232,6 +1232,8 @@ test "tflags":
check(not "Ǝ".isMatch(re2"(?-u)[\w]"))
check(not "\t".isMatch(re2"(?-u)[\w]"))
check "ƎƎ".isMatch(re2"(?-u)[^\w](?u)\w")
check isMatch("弢", re2"(?u).+")
check isMatch("弢", re2"(?-u).+")

check "a".isMatch(re2"(?x)a")
check "a".isMatch(re2"(?x)a ")
Expand Down Expand Up @@ -3156,9 +3158,9 @@ when not defined(js) or NimMajor >= 2:
check match("abcd", re2(r"(?-su).{4}", flags))
check match("abcd", re2(r"(?s-u).{4}", flags))
check match("abcd", re2(r"(?u-s).{4}", flags))
#check match("弢", re2(r".{4}", flags)) # XXX should match
check match("弢", re2(r".{4}", flags))
check match("弢", re2(r"(?u).{4}", flags))
check(not match("弢", re2(r"(?-u).{4}", flags)))
check match("弢", re2(r"(?-u).{4}", flags))
check(not match("\n", re2(r".", flags)))
check match("\n", re2(r"(?s).", flags))
check(not match("\n", re2(r"(?u).", flags)))
Expand Down

0 comments on commit 2d0f491

Please sign in to comment.