Skip to content

Commit

Permalink
test: Fix regex syntax errors
Browse files Browse the repository at this point in the history
The regular expressions in normalize.py and pathological_tests.py are
missing a lot of backslashes -- you need two in the source file to get
one in the string.

Signed-off-by: Keith Packard <[email protected]>
  • Loading branch information
keith-packard committed Jan 6, 2025
1 parent 80d687b commit 2cd7a1a
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
4 changes: 2 additions & 2 deletions test/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class HTMLParseError(Exception):
# Normalization code, adapted from
# https://github.com/karlcow/markdown-testsuite/
significant_attrs = ["alt", "href", "src", "title"]
whitespace_re = re.compile('\s+')
whitespace_re = re.compile('\\s+')
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
Expand Down Expand Up @@ -176,7 +176,7 @@ def normalize_html(html):
'\u2200&amp;&gt;&lt;&quot;'
"""
html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)")
html_chunk_re = re.compile("(\\<!\\[CDATA\\[.*?\\]\\]\\>|\\<[^>]*\\>|[^<]+)")
try:
parser = MyHTMLParser()
# We work around HTMLParser's limitations parsing CDATA
Expand Down
20 changes: 10 additions & 10 deletions test/pathological_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def badhash(ref):

document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)

return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
return document, re.compile("(<p>\\[%s\\]</p>\n){%d}" % (bad_key, COUNT-1))

allowed_failures = {"many references": True}

Expand All @@ -48,10 +48,10 @@ def badhash(ref):
re.compile("(_a ){64999}_a")),
"many link closers with no openers":
(("a]" * 65000),
re.compile("(a\]){65000}")),
re.compile("(a\\]){65000}")),
"many link openers with no closers":
(("[a" * 65000),
re.compile("(\[a){65000}")),
re.compile("(\\[a){65000}")),
"mismatched openers and closers":
(("*a_ " * 50000),
re.compile("([*]a[_] ){49999}[*]a_")),
Expand All @@ -60,19 +60,19 @@ def badhash(ref):
re.compile("a[*][*]b(c[*] ){49999}c[*]")),
"link openers and emph closers":
(("[ a_" * 50000),
re.compile("(\[ a_){50000}")),
re.compile("(\\[ a_){50000}")),
"pattern [ (]( repeated":
(("[ (](" * 80000),
re.compile("(\[ \(\]\(){80000}")),
re.compile("(\\[ \\(\\]\\(){80000}")),
"pattern ![[]() repeated":
("![[]()" * 160000,
re.compile("(!\[<a href=\"\"></a>){160000}")),
re.compile("(!\\[<a href=\"\"></a>){160000}")),
"hard link/emph case":
("**x [a*b**c*](d)",
re.compile("\\*\\*x <a href=\"d\">a<em>b\\*\\*c</em></a>")),
"nested brackets":
(("[" * 50000) + "a" + ("]" * 50000),
re.compile("\[{50000}a\]{50000}")),
re.compile("\\[{50000}a\\]{50000}")),
"nested block quotes":
((("> " * 50000) + "a"),
re.compile("(<blockquote>\n){50000}")),
Expand All @@ -87,13 +87,13 @@ def badhash(ref):
re.compile("^<p>[e`]*</p>\n$")),
"unclosed links A":
("[a](<b" * 30000,
re.compile("(\[a\]\(&lt;b){30000}")),
re.compile("(\\[a\\]\\(&lt;b){30000}")),
"unclosed links B":
("[a](b" * 30000,
re.compile("(\[a\]\(b){30000}")),
re.compile("(\\[a\\]\\(b){30000}")),
"unclosed <!--":
("</" + "<!--" * 300000,
re.compile("\&lt;\/(\&lt;!--){300000}")),
re.compile("\\&lt;\\/(\\&lt;!--){300000}")),
"tables":
("aaa\rbbb\n-\v\n" * 30000,
re.compile("^<p>aaa</p>\n<table>\n<thead>\n<tr>\n<th>bbb</th>\n</tr>\n</thead>\n<tbody>\n(<tr>\n<td>aaa</td>\n</tr>\n<tr>\n<td>bbb</td>\n</tr>\n<tr>\n<td>-\x0b</td>\n</tr>\n){29999}</tbody>\n</table>\n$")),
Expand Down

0 comments on commit 2cd7a1a

Please sign in to comment.