From 22f653dde9b7d086742a480abbe6e80d87f8b0aa Mon Sep 17 00:00:00 2001 From: git5test <475285+Xadeck@users.noreply.github.com> Date: Wed, 18 Dec 2019 09:10:30 +0100 Subject: [PATCH] Made cmark re-entrant by storing special chars data per parser object --- src/blocks.c | 20 +++++++++++++++++-- src/inlines.c | 53 +++++++++++++++++---------------------------------- src/inlines.h | 4 ++-- src/parser.h | 2 ++ 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 53e882f19..1eab1e005 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -109,6 +109,20 @@ static void cmark_parser_dispose(cmark_parser *parser) { cmark_map_free(parser->refmap); } +// "\r\n\\`&_*[]syntax_extensions; cmark_llist *saved_inline_exts = parser->inline_syntax_extensions; @@ -132,6 +146,8 @@ static void cmark_parser_reset(cmark_parser *parser) { parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; + + memcpy(parser->SPECIAL_CHARS, SPECIAL_CHARS, sizeof(SPECIAL_CHARS)); } cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { @@ -416,9 +432,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) { for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) { unsigned char c = (unsigned char)(size_t)tmp_char->data; if (add) - cmark_inlines_add_special_character(c, ext->emphasis); + cmark_inlines_add_special_character(parser, c, ext->emphasis); else - cmark_inlines_remove_special_character(c, ext->emphasis); + cmark_inlines_remove_special_character(parser, c, ext->emphasis); } } } diff --git a/src/inlines.c b/src/inlines.c index c21430bde..79ecc99a2 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -57,9 +57,6 @@ typedef struct subject{ bool scanned_for_backticks; } subject; -// Extensions may populate this. -static int8_t SKIP_CHARS[256]; - static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } @@ -71,7 +68,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, cmark_chunk *buffer, cmark_map *refmap); -static bufsize_t subject_find_special_char(subject *subj, int options); +static bufsize_t subject_find_special_char(cmark_parser* parser, subject *subj, int options); // Create an inline with a literal string value. static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, @@ -384,7 +381,7 @@ static cmark_node *handle_backticks(subject *subj, int options) { // Scan ***, **, or * and return number scanned, or 0. // Advances position. -static int scan_delims(subject *subj, unsigned char c, bool *can_open, +static int scan_delims(cmark_parser* parser, subject *subj, unsigned char c, bool *can_open, bool *can_close) { int numdelims = 0; bufsize_t before_char_pos, after_char_pos; @@ -398,12 +395,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, } else { before_char_pos = subj->pos - 1; // walk back to the beginning of the UTF_8 sequence: - while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { + while ((peek_at(subj, before_char_pos) >> 6 == 2 || parser->SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) { before_char_pos -= 1; } len = cmark_utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); - if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) { + if (len == -1 || (before_char < 256 && parser->SKIP_CHARS[(unsigned char) before_char])) { before_char = 10; } } @@ -422,12 +419,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, after_char = 10; } else { after_char_pos = subj->pos; - while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { + while (parser->SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) { after_char_pos += 1; } len = cmark_utf8proc_iterate(subj->input.data + after_char_pos, subj->input.len - after_char_pos, &after_char); - if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) { + if (len == -1 || (after_char < 256 && parser->SKIP_CHARS[(unsigned char) after_char])) { after_char = 10; } } @@ -528,13 +525,13 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { } // Assumes the subject has a c at the current position. -static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { +static cmark_node *handle_delim(cmark_parser* parser, subject *subj, unsigned char c, bool smart) { bufsize_t numdelims; cmark_node *inl_text; bool can_open, can_close; cmark_chunk contents; - numdelims = scan_delims(subj, c, &can_open, &can_close); + numdelims = scan_delims(parser, subj, c, &can_open, &can_close); if (c == '\'' && smart) { contents = cmark_chunk_literal(RIGHTSINGLEQUOTE); @@ -1224,20 +1221,6 @@ static cmark_node *handle_newline(subject *subj) { } } -// "\r\n\\`&_*[]pos + 1; while (n < subj->input.len) { - if (SPECIAL_CHARS[subj->input.data[n]]) + if (parser->SPECIAL_CHARS[subj->input.data[n]]) return n; if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]]) return n; @@ -1267,16 +1250,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options) { return subj->input.len; } -void cmark_inlines_add_special_character(unsigned char c, bool emphasis) { - SPECIAL_CHARS[c] = 1; +void cmark_inlines_add_special_character(cmark_parser* parser, unsigned char c, bool emphasis) { + parser->SPECIAL_CHARS[c] = 1; if (emphasis) - SKIP_CHARS[c] = 1; + parser->SKIP_CHARS[c] = 1; } -void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) { - SPECIAL_CHARS[c] = 0; +void cmark_inlines_remove_special_character(cmark_parser* parser, unsigned char c, bool emphasis) { + parser->SPECIAL_CHARS[c] = 0; if (emphasis) - SKIP_CHARS[c] = 0; + parser->SKIP_CHARS[c] = 0; } static cmark_node *try_extensions(cmark_parser *parser, @@ -1329,7 +1312,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, case '_': case '\'': case '"': - new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); + new_inl = handle_delim(parser, subj, c, (options & CMARK_OPT_SMART) != 0); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); @@ -1360,7 +1343,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent, if (new_inl != NULL) break; - endpos = subject_find_special_char(subj, options); + endpos = subject_find_special_char(parser, subj, options); contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); startpos = subj->pos; subj->pos = endpos; diff --git a/src/inlines.h b/src/inlines.h index 7dd91bf52..8091cca84 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -19,8 +19,8 @@ void cmark_parse_inlines(cmark_parser *parser, bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input, cmark_map *refmap); -void cmark_inlines_add_special_character(unsigned char c, bool emphasis); -void cmark_inlines_remove_special_character(unsigned char c, bool emphasis); +void cmark_inlines_add_special_character(cmark_parser* parser, unsigned char, bool emphasis); +void cmark_inlines_remove_special_character(cmark_parser* parser, unsigned char, bool emphasis); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index 245580b85..9e29e9388 100644 --- a/src/parser.h +++ b/src/parser.h @@ -49,6 +49,8 @@ struct cmark_parser { cmark_llist *syntax_extensions; cmark_llist *inline_syntax_extensions; cmark_ispunct_func backslash_ispunct; + int8_t SPECIAL_CHARS[256]; + int8_t SKIP_CHARS[256]; }; #ifdef __cplusplus