Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Made cmark re-entrant by storing special chars data per parser object #176

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 18 additions & 2 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,20 @@ static void cmark_parser_dispose(cmark_parser *parser) {
cmark_map_free(parser->refmap);
}

// "\r\n\\`&_*[]<!"
static const int8_t SPECIAL_CHARS[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

static void cmark_parser_reset(cmark_parser *parser) {
cmark_llist *saved_exts = parser->syntax_extensions;
cmark_llist *saved_inline_exts = parser->inline_syntax_extensions;
Expand All @@ -132,6 +146,8 @@ static void cmark_parser_reset(cmark_parser *parser) {
parser->syntax_extensions = saved_exts;
parser->inline_syntax_extensions = saved_inline_exts;
parser->options = saved_options;

memcpy(parser->SPECIAL_CHARS, SPECIAL_CHARS, sizeof(SPECIAL_CHARS));
}

cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
Expand Down Expand Up @@ -416,9 +432,9 @@ void cmark_manage_extensions_special_characters(cmark_parser *parser, int add) {
for (tmp_char = ext->special_inline_chars; tmp_char; tmp_char=tmp_char->next) {
unsigned char c = (unsigned char)(size_t)tmp_char->data;
if (add)
cmark_inlines_add_special_character(c, ext->emphasis);
cmark_inlines_add_special_character(parser, c, ext->emphasis);
else
cmark_inlines_remove_special_character(c, ext->emphasis);
cmark_inlines_remove_special_character(parser, c, ext->emphasis);
}
}
}
Expand Down
53 changes: 18 additions & 35 deletions src/inlines.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ typedef struct subject{
bool scanned_for_backticks;
} subject;

// Extensions may populate this.
static int8_t SKIP_CHARS[256];

static CMARK_INLINE bool S_is_line_end_char(char c) {
return (c == '\n' || c == '\r');
}
Expand All @@ -71,7 +68,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,

static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e,
cmark_chunk *buffer, cmark_map *refmap);
static bufsize_t subject_find_special_char(subject *subj, int options);
static bufsize_t subject_find_special_char(cmark_parser* parser, subject *subj, int options);

// Create an inline with a literal string value.
static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t,
Expand Down Expand Up @@ -384,7 +381,7 @@ static cmark_node *handle_backticks(subject *subj, int options) {

// Scan ***, **, or * and return number scanned, or 0.
// Advances position.
static int scan_delims(subject *subj, unsigned char c, bool *can_open,
static int scan_delims(cmark_parser* parser, subject *subj, unsigned char c, bool *can_open,
bool *can_close) {
int numdelims = 0;
bufsize_t before_char_pos, after_char_pos;
Expand All @@ -398,12 +395,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
} else {
before_char_pos = subj->pos - 1;
// walk back to the beginning of the UTF_8 sequence:
while ((peek_at(subj, before_char_pos) >> 6 == 2 || SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
while ((peek_at(subj, before_char_pos) >> 6 == 2 || parser->SKIP_CHARS[peek_at(subj, before_char_pos)]) && before_char_pos > 0) {
before_char_pos -= 1;
}
len = cmark_utf8proc_iterate(subj->input.data + before_char_pos,
subj->pos - before_char_pos, &before_char);
if (len == -1 || (before_char < 256 && SKIP_CHARS[(unsigned char) before_char])) {
if (len == -1 || (before_char < 256 && parser->SKIP_CHARS[(unsigned char) before_char])) {
before_char = 10;
}
}
Expand All @@ -422,12 +419,12 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,
after_char = 10;
} else {
after_char_pos = subj->pos;
while (SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
while (parser->SKIP_CHARS[peek_at(subj, after_char_pos)] && after_char_pos < subj->input.len) {
after_char_pos += 1;
}
len = cmark_utf8proc_iterate(subj->input.data + after_char_pos,
subj->input.len - after_char_pos, &after_char);
if (len == -1 || (after_char < 256 && SKIP_CHARS[(unsigned char) after_char])) {
if (len == -1 || (after_char < 256 && parser->SKIP_CHARS[(unsigned char) after_char])) {
after_char = 10;
}
}
Expand Down Expand Up @@ -528,13 +525,13 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
}

// Assumes the subject has a c at the current position.
static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
static cmark_node *handle_delim(cmark_parser* parser, subject *subj, unsigned char c, bool smart) {
bufsize_t numdelims;
cmark_node *inl_text;
bool can_open, can_close;
cmark_chunk contents;

numdelims = scan_delims(subj, c, &can_open, &can_close);
numdelims = scan_delims(parser, subj, c, &can_open, &can_close);

if (c == '\'' && smart) {
contents = cmark_chunk_literal(RIGHTSINGLEQUOTE);
Expand Down Expand Up @@ -1224,20 +1221,6 @@ static cmark_node *handle_newline(subject *subj) {
}
}

// "\r\n\\`&_*[]<!"
static int8_t SPECIAL_CHARS[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};

// " ' . -
static char SMART_PUNCT_CHARS[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
Expand All @@ -1253,11 +1236,11 @@ static char SMART_PUNCT_CHARS[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

static bufsize_t subject_find_special_char(subject *subj, int options) {
static bufsize_t subject_find_special_char(cmark_parser* parser, subject *subj, int options) {
bufsize_t n = subj->pos + 1;

while (n < subj->input.len) {
if (SPECIAL_CHARS[subj->input.data[n]])
if (parser->SPECIAL_CHARS[subj->input.data[n]])
return n;
if (options & CMARK_OPT_SMART && SMART_PUNCT_CHARS[subj->input.data[n]])
return n;
Expand All @@ -1267,16 +1250,16 @@ static bufsize_t subject_find_special_char(subject *subj, int options) {
return subj->input.len;
}

void cmark_inlines_add_special_character(unsigned char c, bool emphasis) {
SPECIAL_CHARS[c] = 1;
void cmark_inlines_add_special_character(cmark_parser* parser, unsigned char c, bool emphasis) {
parser->SPECIAL_CHARS[c] = 1;
if (emphasis)
SKIP_CHARS[c] = 1;
parser->SKIP_CHARS[c] = 1;
}

void cmark_inlines_remove_special_character(unsigned char c, bool emphasis) {
SPECIAL_CHARS[c] = 0;
void cmark_inlines_remove_special_character(cmark_parser* parser, unsigned char c, bool emphasis) {
parser->SPECIAL_CHARS[c] = 0;
if (emphasis)
SKIP_CHARS[c] = 0;
parser->SKIP_CHARS[c] = 0;
}

static cmark_node *try_extensions(cmark_parser *parser,
Expand Down Expand Up @@ -1329,7 +1312,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
case '_':
case '\'':
case '"':
new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
new_inl = handle_delim(parser, subj, c, (options & CMARK_OPT_SMART) != 0);
break;
case '-':
new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
Expand Down Expand Up @@ -1360,7 +1343,7 @@ static int parse_inline(cmark_parser *parser, subject *subj, cmark_node *parent,
if (new_inl != NULL)
break;

endpos = subject_find_special_char(subj, options);
endpos = subject_find_special_char(parser, subj, options);
contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
startpos = subj->pos;
subj->pos = endpos;
Expand Down
4 changes: 2 additions & 2 deletions src/inlines.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ void cmark_parse_inlines(cmark_parser *parser,
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,
cmark_map *refmap);

void cmark_inlines_add_special_character(unsigned char c, bool emphasis);
void cmark_inlines_remove_special_character(unsigned char c, bool emphasis);
void cmark_inlines_add_special_character(cmark_parser* parser, unsigned char, bool emphasis);
void cmark_inlines_remove_special_character(cmark_parser* parser, unsigned char, bool emphasis);

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ struct cmark_parser {
cmark_llist *syntax_extensions;
cmark_llist *inline_syntax_extensions;
cmark_ispunct_func backslash_ispunct;
int8_t SPECIAL_CHARS[256];
int8_t SKIP_CHARS[256];
};

#ifdef __cplusplus
Expand Down