diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0cb653014..b75c0c736 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,6 +18,7 @@ set(HEADERS houdini.h cmark_ctype.h render.h + source_map.h ) set(LIBRARY_SOURCES cmark.c @@ -40,6 +41,7 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c + source_map.c ${HEADERS} ) diff --git a/src/blocks.c b/src/blocks.c index 301178362..4abe7d3c4 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -28,6 +28,10 @@ #define MIN(x, y) ((x < y) ? x : y) #endif +#ifndef MAX +#define MAX(x, y) ((x > y) ? x : y) +#endif + #define peek_at(i, n) (i)->data[n] static bool S_last_line_blank(const cmark_node *node) { @@ -95,6 +99,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->error_code = CMARK_ERR_NONE; parser->total_bytes = 0; parser->line_number = 0; + parser->line_offset = 0; parser->offset = 0; parser->column = 0; parser->first_nonspace = 0; @@ -106,6 +111,9 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->options = options; parser->last_buffer_ended_with_cr = false; + if (options & CMARK_OPT_SOURCEPOS) + parser->source_map = source_map_new(mem); + return parser; } @@ -118,6 +126,7 @@ void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); + source_map_free(parser->source_map); cmark_reference_map_free(parser->refmap); mem->free(parser); } @@ -257,18 +266,28 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: + source_map_start_cursor(parser->source_map, parser->last_paragraph_extent); while (cmark_strbuf_at(node_content, 0) == '[' && (pos = cmark_parse_reference_inline(parser->mem, node_content, - parser->refmap, parser->root))) { - + parser->refmap, b, + parser->source_map))) { + source_map_start_cursor(parser->source_map, + source_map_get_cursor(parser->source_map)); cmark_strbuf_drop(node_content, pos); } + + while (parser->last_paragraph_extent != source_map_get_cursor(parser->source_map)) { + if (parser->last_paragraph_extent->node == b) { + parser->last_paragraph_extent->node = parser->root; + } + parser->last_paragraph_extent = parser->last_paragraph_extent->next; + } + if (is_blank(node_content, 0)) { // remove blank node (former reference def) cmark_node_free(b); } break; - case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code remove_trailing_blank_lines(node_content); @@ -363,21 +382,36 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_mem *mem, cmark_node *root, - cmark_reference_map *refmap, int options) { - cmark_iter *iter = cmark_iter_new(root); +static void process_inlines(cmark_parser *parser) { + cmark_iter *iter = cmark_iter_new(parser->root); cmark_node *cur; cmark_event_type ev_type; + cmark_source_extent *cur_extent = NULL; + + cur_extent = source_map_get_head(parser->source_map); while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { if (contains_inlines(S_type(cur))) { - cmark_parse_inlines(mem, cur, refmap, options); + while (cur_extent && cur_extent->node != cur) { + cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, cur, parser->line_offset); + } + + if (parser->source_map) + assert(cur_extent); + + source_map_start_cursor(parser->source_map, cur_extent); + + cmark_parse_inlines(parser->mem, cur, parser->refmap, parser->options, parser->source_map, parser->line_offset); } } } + while (cur_extent) { + cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, NULL, parser->line_offset); + } + cmark_iter_free(iter); } @@ -484,7 +518,10 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); - process_inlines(parser->mem, parser->root, parser->refmap, parser->options); + + process_inlines(parser); + + assert(source_map_check(parser->source_map, parser->line_offset)); return parser->root; } @@ -526,6 +563,7 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; + const unsigned char *skipped; static const uint8_t repl[] = {239, 191, 189}; if (parser->error_code) { @@ -550,6 +588,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, while (buffer < end) { const unsigned char *eol; bufsize_t chunk_len; + bufsize_t linebuf_size = 0; bool process = false; for (eol = buffer; eol < end; ++eol) { if (S_is_line_end_char(*eol)) { @@ -567,6 +606,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, chunk_len = (eol - buffer); if (process) { if (parser->linebuf.size > 0) { + linebuf_size = cmark_strbuf_len(&parser->linebuf); cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); @@ -585,6 +625,8 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, } buffer += chunk_len; + skipped = buffer; + if (buffer < end) { if (*buffer == '\0') { // skip over NULL @@ -600,6 +642,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, buffer++; } } + chunk_len += buffer - skipped; + chunk_len += linebuf_size; + + if (process) + parser->line_offset += chunk_len; } } @@ -659,11 +706,13 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { // indicates a number of columns; otherwise, a number of bytes. // If advancing a certain number of columns partially consumes // a tab character, parser->partially_consumed_tab is set to true. -static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, - bufsize_t count, bool columns) { +static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_extent_type type, + cmark_chunk *input, bufsize_t count, bool columns) { char c; int chars_to_tab; int chars_to_advance; + int initial_pos = parser->offset + parser->line_offset; + while (count > 0 && (c = peek_at(input, parser->offset))) { if (c == '\t') { chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); @@ -686,6 +735,8 @@ static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, count -= 1; } } + + source_map_append_extent(parser->source_map, initial_pos, parser->offset + parser->line_offset, container, type); } static bool S_last_child_is_open(cmark_node *container) { @@ -693,7 +744,7 @@ static bool S_last_child_is_open(cmark_node *container) { (container->last_child->flags & CMARK_NODE__OPEN); } -static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) { +static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) { bool res = false; bufsize_t matched = 0; @@ -701,10 +752,10 @@ static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) { parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>'; if (matched) { - S_advance_offset(parser, input, parser->indent + 1, true); + S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->indent + 1, true); if (S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, input, 1, true); + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true); } res = true; @@ -718,7 +769,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { - S_advance_offset(parser, input, container->as.list.marker_offset + + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, container->as.list.marker_offset + container->as.list.padding, true); res = true; @@ -726,7 +777,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, false); res = true; } @@ -740,10 +791,10 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, if (!container->as.code.fenced) { // indented if (parser->indent >= CODE_INDENT) { - S_advance_offset(parser, input, CODE_INDENT, true); + S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true); res = true; } else if (parser->blank) { - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, false); res = true; } @@ -759,14 +810,15 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, // closing fence - and since we're at // the end of a line, we can stop processing it: *should_continue = false; - S_advance_offset(parser, input, matched, false); + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace, false); + S_advance_offset(parser, container, CMARK_EXTENT_CLOSER, input, parser->offset + matched, false); parser->current = finalize(parser, container); } else { // skip opt. spaces of fence parser->offset int i = container->as.code.fence_offset; while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, input, 1, true); + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true); i--; } res = true; @@ -823,7 +875,7 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: - if (!parse_block_quote_prefix(parser, input)) + if (!parse_block_quote_prefix(parser, input, container)) goto done; break; case CMARK_NODE_ITEM: @@ -883,29 +935,26 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, indented = parser->indent >= CODE_INDENT; if (!indented && peek_at(input, parser->first_nonspace) == '>') { + *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, + parser->first_nonspace + 1); - bufsize_t blockquote_startpos = parser->first_nonspace; - - S_advance_offset(parser, input, + S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, parser->first_nonspace + 1 - parser->offset, false); // optional following character if (S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, input, 1, true); + S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); } - *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, - blockquote_startpos + 1); } else if (!indented && (matched = scan_atx_heading_start( input, parser->first_nonspace))) { bufsize_t hashpos; int level = 0; - bufsize_t heading_startpos = parser->first_nonspace; - S_advance_offset(parser, input, + *container = add_child(parser, *container, CMARK_NODE_HEADING, + parser->first_nonspace + 1); + S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, parser->first_nonspace + matched - parser->offset, false); - *container = add_child(parser, *container, CMARK_NODE_HEADING, - heading_startpos + 1); hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace); @@ -927,7 +976,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); (*container)->as.code.info = cmark_chunk_literal(""); - S_advance_offset(parser, input, + S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, parser->first_nonspace + matched - parser->offset, false); @@ -947,14 +996,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->type = (uint16_t)CMARK_NODE_HEADING; (*container)->as.heading.level = lev; (*container)->as.heading.setext = true; - S_advance_offset(parser, input, input->len - 1 - parser->offset, false); + S_advance_offset(parser, *container, CMARK_EXTENT_CLOSER, input, input->len - 1 - parser->offset, false); } else if (!indented && !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) && (matched = scan_thematic_break(input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); - S_advance_offset(parser, input, input->len - 1 - parser->offset, false); + S_advance_offset(parser, *container, CMARK_EXTENT_CONTENT, input, input->len - 1 - parser->offset, false); } else if ((!indented || cont_type == CMARK_NODE_LIST) && (matched = parse_list_marker( parser->mem, input, parser->first_nonspace, @@ -964,6 +1013,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // spaces indent, as long as the list container is still open. cmark_node *list = NULL; cmark_node *item = NULL; + cmark_source_extent *save_source_map_tail; int i = 0; if (cont_type != CMARK_NODE_LIST || @@ -980,17 +1030,18 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, item = *container; // compute padding: - S_advance_offset(parser, input, + S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, parser->first_nonspace + matched - parser->offset, false); save_partially_consumed_tab = parser->partially_consumed_tab; save_offset = parser->offset; save_column = parser->column; + save_source_map_tail = source_map_get_tail(parser->source_map); while (parser->column - save_column <= 5 && S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, input, 1, true); + S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); } i = parser->column - save_column; @@ -1000,9 +1051,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, data->padding = matched + 1; parser->offset = save_offset; parser->column = save_column; + if (save_source_map_tail) { + cmark_source_extent *tmp_extent; + for (tmp_extent = save_source_map_tail->next; tmp_extent; tmp_extent = source_map_free_extent(parser->source_map, tmp_extent)); + } + parser->partially_consumed_tab = save_partially_consumed_tab; if (i > 0) { - S_advance_offset(parser, input, 1, true); + S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); } } else { data->padding = matched + i; @@ -1021,7 +1077,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, parser->mem->free(data); } else if (indented && !maybe_lazy && !parser->blank) { - S_advance_offset(parser, input, CODE_INDENT, true); *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->offset + 1); (*container)->as.code.fenced = false; @@ -1030,6 +1085,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); + S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true); } else { break; } @@ -1094,6 +1150,11 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } if (S_type(container) == CMARK_NODE_CODE_BLOCK) { + source_map_append_extent(parser->source_map, + parser->offset + parser->line_offset, + parser->line_offset + input->len, + container, + CMARK_EXTENT_CONTENT); add_line(container, input, parser); } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) { add_line(container, input, parser); @@ -1130,26 +1191,48 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, break; } + source_map_append_extent(parser->source_map, + parser->offset + parser->line_offset, + parser->line_offset + input->len, + container, + CMARK_EXTENT_CONTENT); + if (matches_end_condition) { container = finalize(parser, container); assert(parser->current != NULL); } } else if (parser->blank) { - // ??? do nothing + source_map_append_extent(parser->source_map, + parser->line_offset + parser->offset, + parser->line_offset + input->len, + container, + CMARK_EXTENT_BLANK); } else if (accepts_lines(S_type(container))) { + bufsize_t initial_len = input->len; + bool chopped = false; + if (S_type(container) == CMARK_NODE_HEADING && container->as.heading.setext == false) { chop_trailing_hashtags(input); + chopped = true; } - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, false); add_line(container, input, parser); + + if (chopped) + source_map_append_extent(parser->source_map, + MAX(parser->line_offset + parser->offset, parser->line_offset + input->len), + parser->line_offset + initial_len, + container, + CMARK_EXTENT_CLOSER); } else { // create paragraph container for line container = add_child(parser, container, CMARK_NODE_PARAGRAPH, parser->first_nonspace + 1); - S_advance_offset(parser, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->first_nonspace - parser->offset, false); + parser->last_paragraph_extent = source_map_get_tail(parser->source_map); add_line(container, input, parser); } @@ -1211,6 +1294,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer, cmark_node *cmark_parser_finish(cmark_parser *parser) { if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); + parser->line_offset += parser->linebuf.size; cmark_strbuf_clear(&parser->linebuf); } @@ -1235,6 +1319,12 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { return parser->root; } +cmark_source_extent * +cmark_parser_get_first_source_extent(cmark_parser *parser) +{ + return source_map_get_head(parser->source_map); +} + cmark_err_type cmark_parser_get_error(cmark_parser *parser) { return parser->error_code; } diff --git a/src/cmark.h b/src/cmark.h index 389bc3814..cc1e7f527 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -66,6 +66,21 @@ typedef enum { CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, } cmark_node_type; +typedef enum { + CMARK_EXTENT_NONE, + CMARK_EXTENT_OPENER, + CMARK_EXTENT_CLOSER, + CMARK_EXTENT_BLANK, + CMARK_EXTENT_CONTENT, + CMARK_EXTENT_PUNCTUATION, + CMARK_EXTENT_LINK_DESTINATION, + CMARK_EXTENT_LINK_TITLE, + CMARK_EXTENT_LINK_LABEL, + CMARK_EXTENT_REFERENCE_DESTINATION, + CMARK_EXTENT_REFERENCE_LABEL, + CMARK_EXTENT_REFERENCE_TITLE, +} cmark_extent_type; + /* For backwards compatibility: */ #define CMARK_NODE_HEADER CMARK_NODE_HEADING #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK @@ -93,6 +108,7 @@ typedef enum { typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; +typedef struct cmark_source_extent cmark_source_extent; /** * ## Custom memory allocator support @@ -504,6 +520,11 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); CMARK_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser); +/** Return a pointer to the first extent of the parser's source map + */ +CMARK_EXPORT +cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser); + /** Parse a CommonMark document in 'buffer' of length 'len'. * Returns a pointer to a tree of nodes. The memory allocated for * the node tree should be released using 'cmark_node_free' @@ -515,10 +536,44 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); /** Parse a CommonMark document in file 'f', returning a pointer to * a tree of nodes. The memory allocated for the node tree should be * released using 'cmark_node_free' when it is no longer needed. + * Returns NULL on error. */ CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f, int options); +/** + * ## Source map API + */ + +/* Return the index, in bytes, of the start of this extent */ +CMARK_EXPORT +size_t cmark_source_extent_get_start(cmark_source_extent *extent); + +/* Return the index, in bytes, of the stop of this extent. This + * index is not included in the extent*/ +CMARK_EXPORT +size_t cmark_source_extent_get_stop(cmark_source_extent *extent); + +/* Return the extent immediately following 'extent' */ +CMARK_EXPORT +cmark_source_extent *cmark_source_extent_get_next(cmark_source_extent *extent); + +/* Return the extent immediately preceding 'extent' */ +CMARK_EXPORT +cmark_source_extent *cmark_source_extent_get_previous(cmark_source_extent *extent); + +/* Return the node 'extent' maps to */ +CMARK_EXPORT +cmark_node *cmark_source_extent_get_node(cmark_source_extent *extent); + +/* Return the type of 'extent' */ +CMARK_EXPORT +cmark_extent_type cmark_source_extent_get_type(cmark_source_extent *extent); + +/* Return a string representation of 'extent' */ +CMARK_EXPORT +const char *cmark_source_extent_get_type_string(cmark_source_extent *extent); + /** * ## Rendering */ diff --git a/src/inlines.c b/src/inlines.c index fccdd9104..d2378b53f 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -13,6 +13,10 @@ #include "scanners.h" #include "inlines.h" +#ifndef MIN +#define MIN(x, y) ((x < y) ? x : y) +#endif + static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; static const char *ELLIPSES = "\xE2\x80\xA6"; @@ -40,6 +44,7 @@ typedef struct delimiter { unsigned char delim_char; bool can_open; bool can_close; + cmark_source_extent *extent; } delimiter; typedef struct bracket { @@ -50,6 +55,7 @@ typedef struct bracket { bool image; bool active; bool bracket_after; + cmark_source_extent *extent; } bracket; typedef struct { @@ -61,6 +67,7 @@ typedef struct { bracket *last_bracket; bufsize_t backticks[MAXBACKTICKS + 1]; bool scanned_for_backticks; + cmark_source_map *source_map; } subject; static CMARK_INLINE bool S_is_line_end_char(char c) { @@ -73,7 +80,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, static int parse_inline(subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap); + cmark_reference_map *refmap, cmark_source_map *source_map); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. @@ -149,7 +156,7 @@ static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url, } static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap) { + cmark_reference_map *refmap, cmark_source_map *source_map) { int i; e->mem = mem; e->input.data = buffer->ptr; @@ -159,6 +166,7 @@ static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, e->refmap = refmap; e->last_delim = NULL; e->last_bracket = NULL; + e->source_map = source_map; for (i = 0; i <= MAXBACKTICKS; i++) { e->backticks[i] = 0; } @@ -406,6 +414,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, if (delim->previous != NULL) { delim->previous->next = delim; } + delim->extent = NULL; subj->last_delim = delim; } @@ -421,11 +430,12 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { b->previous_delimiter = subj->last_delim; b->position = subj->pos; b->bracket_after = false; + b->extent = NULL; subj->last_bracket = b; } // Assumes the subject has a c at the current position. -static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { +static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool *pushed) { bufsize_t numdelims; cmark_node *inl_text; bool can_open, can_close; @@ -446,6 +456,9 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { push_delimiter(subj, c, can_open, can_close, inl_text); + *pushed = true; + } else { + *pushed = false; } return inl_text; @@ -612,6 +625,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, bufsize_t opener_num_chars = opener_inl->as.literal.len; bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *tmpnext, *emph; + cmark_source_extent *tmp_extent = NULL; // calculate the actual number of characters used from this closer if (closer_num_chars < 3 || opener_num_chars < 3) { @@ -647,9 +661,30 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } cmark_node_insert_after(opener_inl, emph); + if (subj->source_map) { + tmp_extent = closer->extent->prev; + + source_map_insert_extent(subj->source_map, + opener->extent, + opener->extent->stop - use_delims, + opener->extent->stop, + emph, + CMARK_EXTENT_OPENER); + opener->extent->stop -= use_delims; + + source_map_insert_extent(subj->source_map, + tmp_extent, + closer->extent->start, + closer->extent->start + use_delims, + emph, + CMARK_EXTENT_CLOSER); + closer->extent->start += use_delims; + } + // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { cmark_node_free(opener_inl); + source_map_free_extent(subj->source_map, opener->extent); remove_delimiter(subj, opener); } @@ -659,6 +694,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, cmark_node_free(closer_inl); // remove closer from list tmp_delim = closer->next; + source_map_free_extent(subj->source_map, closer->extent); remove_delimiter(subj, closer); closer = tmp_delim; } @@ -883,6 +919,8 @@ static cmark_node *handle_close_bracket(subject *subj) { int found_label; cmark_node *tmp, *tmpnext; bool is_image; + bool is_inline = false; + bool is_shortcut = false; advance(subj); // advance past ] initial_pos = subj->pos; @@ -933,6 +971,7 @@ static cmark_node *handle_close_bracket(subject *subj) { title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); + is_inline = true; goto match; } else { @@ -955,6 +994,7 @@ static cmark_node *handle_close_bracket(subject *subj) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); + is_shortcut = true; found_label = true; } @@ -984,6 +1024,31 @@ static cmark_node *handle_close_bracket(subject *subj) { cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; + + if (subj->source_map) { + assert(opener->extent); + + opener->extent->node = inl; + opener->extent->type = CMARK_EXTENT_OPENER; + } + + source_map_splice_extent(subj->source_map, initial_pos - 1, initial_pos, inl, CMARK_EXTENT_PUNCTUATION); + if (is_inline) { + source_map_splice_extent(subj->source_map, after_link_text_pos, starturl, inl, CMARK_EXTENT_PUNCTUATION); + source_map_splice_extent(subj->source_map, starturl, endurl, inl, CMARK_EXTENT_LINK_DESTINATION); + if (endtitle != starttitle) { + source_map_splice_extent(subj->source_map, endurl, starttitle, inl, CMARK_EXTENT_BLANK); + source_map_splice_extent(subj->source_map, starttitle, endtitle, inl, CMARK_EXTENT_LINK_TITLE); + source_map_splice_extent(subj->source_map, endtitle, subj->pos, inl, CMARK_EXTENT_PUNCTUATION); + } else { + source_map_splice_extent(subj->source_map, endurl, subj->pos, inl, CMARK_EXTENT_PUNCTUATION); + } + } else if (!is_shortcut) { + source_map_splice_extent(subj->source_map, initial_pos, initial_pos + 1, inl, CMARK_EXTENT_PUNCTUATION); + source_map_splice_extent(subj->source_map, initial_pos + 1, subj->pos - 1, inl, CMARK_EXTENT_LINK_LABEL); + source_map_splice_extent(subj->source_map, subj->pos - 1, subj->pos, inl, CMARK_EXTENT_PUNCTUATION); + } + while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); @@ -1087,6 +1152,11 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { cmark_chunk contents; unsigned char c; bufsize_t endpos; + bufsize_t startpos = subj->pos; + bufsize_t trimmed_spaces = 0; + bool add_extent_to_last_bracket = false; + bool add_extent_to_last_delimiter = false; + c = peek_char(subj); if (c == 0) { return 0; @@ -1095,6 +1165,8 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { case '\r': case '\n': new_inl = handle_newline(subj); + if (new_inl->type == CMARK_NODE_LINEBREAK) + startpos -= 2; break; case '`': new_inl = handle_backticks(subj); @@ -1112,7 +1184,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { case '_': case '\'': case '"': - new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); + new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0, &add_extent_to_last_delimiter); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); @@ -1124,6 +1196,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("[")); push_bracket(subj, false, new_inl); + add_extent_to_last_bracket = true; break; case ']': new_inl = handle_close_bracket(subj); @@ -1134,6 +1207,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); + add_extent_to_last_bracket = true; } else { new_inl = make_str(subj->mem, cmark_chunk_literal("!")); } @@ -1145,12 +1219,24 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { // if we're at a newline, strip trailing spaces. if (S_is_line_end_char(peek_char(subj))) { + bufsize_t initial_size = contents.len; cmark_chunk_rtrim(&contents); + trimmed_spaces = initial_size - contents.len; } new_inl = make_str(subj->mem, contents); } + if (new_inl != NULL) { + cmark_source_extent *extent; + + extent = source_map_splice_extent(subj->source_map, startpos, subj->pos - trimmed_spaces, new_inl, CMARK_EXTENT_CONTENT); + + if (add_extent_to_last_bracket) + subj->last_bracket->extent = extent; + else if (add_extent_to_last_delimiter) + subj->last_delim->extent = extent; + cmark_node_append_child(parent, new_inl); } @@ -1159,9 +1245,11 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { // Parse inlines from parent's string_content, adding as children of parent. extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options) { + cmark_reference_map *refmap, int options, + cmark_source_map *source_map, bufsize_t total_length) { subject subj; - subject_from_buf(mem, &subj, &parent->content, refmap); + subject_from_buf(mem, &subj, &parent->content, refmap, source_map); + bufsize_t initial_len = subj.input.len; cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(&subj, parent, options)) @@ -1175,6 +1263,14 @@ extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, while (subj.last_bracket) { pop_bracket(&subj); } + + if (source_map) + source_map_insert_extent(source_map, + source_map->cursor, + source_map->cursor->stop, + MIN(source_map->cursor->stop + initial_len - subj.input.len, total_length), + parent, + CMARK_EXTENT_BLANK); } // Parse zero or more space characters, including at most one newline. @@ -1191,24 +1287,29 @@ static void spnl(subject *subj) { // after reference is parsed. bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap, - cmark_node *root) { + cmark_node *container, + cmark_source_map *source_map) { subject subj; + cmark_node *reference = cmark_node_new(CMARK_NODE_REFERENCE); + cmark_reference *ref; cmark_chunk lab; cmark_chunk url; cmark_chunk title; bufsize_t matchlen = 0; - bufsize_t beforetitle; - cmark_reference *ref; - cmark_node *reference = cmark_node_new(CMARK_NODE_REFERENCE); + bufsize_t starttitle, endtitle; + bufsize_t endlabel; + bufsize_t starturl, endurl; - subject_from_buf(mem, &subj, input, NULL); + subject_from_buf(mem, &subj, input, NULL, source_map); // parse label: if (!link_label(&subj, &lab) || lab.len == 0) goto nomatch; + endlabel = subj.pos - 1; + // colon: if (peek_char(&subj) == ':') { advance(&subj); @@ -1218,6 +1319,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, // parse link url: spnl(&subj); + starturl = subj.pos; matchlen = manual_scan_link_url(&subj.input, subj.pos); if (matchlen > 0) { url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); @@ -1227,22 +1329,29 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, } // parse optional link_title - beforetitle = subj.pos; + endurl = subj.pos; spnl(&subj); + starttitle = subj.pos; matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { - subj.pos = beforetitle; + subj.pos = endurl; + starttitle = endurl; + endtitle = endurl; title = cmark_chunk_literal(""); } + endtitle = subj.pos; + // parse final spaces and newline: skip_spaces(&subj); if (!skip_line_end(&subj)) { if (matchlen) { // try rewinding before title - subj.pos = beforetitle; + subj.pos = endurl; + starttitle = endurl; + endtitle = endurl; skip_spaces(&subj); title = cmark_chunk_literal(""); if (!skip_line_end(&subj)) { @@ -1259,11 +1368,21 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_chunk_set_cstr(mem, &reference->as.reference.label, (char *) ref->label); cmark_chunk_set_cstr(mem, &reference->as.reference.url, cmark_chunk_to_cstr(mem, &ref->url)); cmark_chunk_set_cstr(mem, &reference->as.reference.title, cmark_chunk_to_cstr(mem, &ref->title)); - cmark_node_append_child(root, reference); + cmark_node_insert_before(container, reference); cmark_reference_add(refmap, ref); } + // Mark the extents of the reference + source_map_splice_extent(source_map, 0, 1, reference, CMARK_EXTENT_OPENER); + source_map_splice_extent(source_map, 1, endlabel, reference, CMARK_EXTENT_REFERENCE_LABEL); + source_map_splice_extent(source_map, endlabel, endlabel + 2, reference, CMARK_EXTENT_PUNCTUATION); + source_map_splice_extent(source_map, endlabel + 2, starturl, reference, CMARK_EXTENT_BLANK); + source_map_splice_extent(source_map, starturl, endurl, reference, CMARK_EXTENT_REFERENCE_DESTINATION); + source_map_splice_extent(source_map, endurl, starttitle, reference, CMARK_EXTENT_BLANK); + source_map_splice_extent(source_map, starttitle, endtitle, reference, CMARK_EXTENT_REFERENCE_TITLE); + source_map_splice_extent(source_map, endtitle, subj.pos, reference, CMARK_EXTENT_BLANK); + return subj.pos; nomatch: diff --git a/src/inlines.h b/src/inlines.h index a09a75940..ee85b87de 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -1,6 +1,10 @@ #ifndef CMARK_INLINES_H #define CMARK_INLINES_H +#include "chunk.h" +#include "references.h" +#include "source_map.h" + #ifdef __cplusplus extern "C" { #endif @@ -9,11 +13,13 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options); + cmark_reference_map *refmap, int options, + cmark_source_map *source_map, bufsize_t total_length); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, cmark_reference_map *refmap, - cmark_node *root); + cmark_node *container, + cmark_source_map *source_map); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index ec8c9b889..7b4fdbc9b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -6,6 +6,7 @@ #include "node.h" #include "buffer.h" #include "memory.h" +#include "source_map.h" #ifdef __cplusplus extern "C" { @@ -30,9 +31,12 @@ struct cmark_parser { bool partially_consumed_tab; cmark_strbuf curline; bufsize_t last_line_length; + bufsize_t line_offset; cmark_strbuf linebuf; int options; bool last_buffer_ended_with_cr; + cmark_source_map *source_map; + cmark_source_extent *last_paragraph_extent; }; #ifdef __cplusplus diff --git a/src/source_map.c b/src/source_map.c new file mode 100644 index 000000000..754c5bb6c --- /dev/null +++ b/src/source_map.c @@ -0,0 +1,344 @@ +#include <assert.h> + +#include "source_map.h" + +cmark_source_map * +source_map_new(cmark_mem *mem) +{ + cmark_source_map *res = (cmark_source_map *) mem->calloc(1, sizeof(cmark_source_map)); + res->mem = mem; + return res; +} + +void +source_map_free(cmark_source_map *self) +{ + if (!self) + return; + + cmark_source_extent *tmp; + for (tmp = self->head; tmp; tmp = source_map_free_extent(self, tmp)); + self->mem->free(self); +} + +cmark_source_extent * +source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) +{ + if (!self) + return NULL; + + assert (start <= stop); + assert (!self->tail || self->tail->stop <= start); + + cmark_source_extent *res = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent)); + + res->start = start; + res->stop = stop; + res->node = node; + res->type = type; + + res->next = NULL; + res->prev = self->tail; + + if (!self->head) + self->head = res; + else + self->tail->next = res; + + self->tail = res; + + return res; +} + +cmark_source_extent * +source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, + bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type) +{ + if (!self) + return NULL; + + if (start == stop) + return previous; + + cmark_source_extent *extent = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent)); + + extent->start = start; + extent->stop = stop; + extent->node = node; + extent->type = type; + extent->next = previous->next; + extent->prev = previous; + previous->next = extent; + + if (extent->next) + extent->next->prev = extent; + else + self->tail = extent; + + return extent; +} + +cmark_source_extent * +source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent) +{ + if (!self) + return NULL; + + cmark_source_extent *next = extent->next; + + if (extent->prev) + extent->prev->next = next; + + if (extent->next) + extent->next->prev = extent->prev; + + if (extent == self->tail) + self->tail = extent->prev; + + if (extent == self->head) + self->head = extent->next; + + if (extent == self->cursor) { + self->cursor = extent->prev; + } + + if (extent == self->next_cursor) { + self->next_cursor = extent->next; + } + + self->mem->free(extent); + + return next; +} + +cmark_source_extent * +source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, + cmark_node *root, cmark_node *target_node, bufsize_t total_length) +{ + cmark_source_extent *res; + + if (!self) + return NULL; + + while (extent->next && extent->start == extent->stop) { + extent = source_map_free_extent(self, extent); + if (extent->node == target_node) + return extent; + } + + if (extent->next) { + res = source_map_insert_extent(self, + extent, + extent->stop, + extent->next->start, + root, + CMARK_EXTENT_BLANK)->next; + } else { + res = source_map_insert_extent(self, + extent, + extent->stop, + total_length, + root, + CMARK_EXTENT_BLANK)->next; + } + + if (extent->start == extent->stop) + source_map_free_extent(self, extent); + + return res; +} + +cmark_source_extent * +source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, + cmark_node *node, cmark_extent_type type) +{ + if (!self) + return NULL; + + if (!self->next_cursor) { + self->cursor = source_map_insert_extent(self, + self->cursor, + start + self->cursor_offset, + stop + self->cursor_offset, node, type); + + return self->cursor; + } else if (start + self->cursor_offset < self->next_cursor->start && + stop + self->cursor_offset <= self->next_cursor->start) { + self->cursor = source_map_insert_extent(self, + self->cursor, + start + self->cursor_offset, + stop + self->cursor_offset, node, type); + + return self->cursor; + } else if (start + self->cursor_offset < self->next_cursor->start) { + bufsize_t new_start = self->next_cursor->start - self->cursor_offset; + + self->cursor = source_map_insert_extent(self, + self->cursor, + start + self->cursor_offset, + self->next_cursor->start, + node, type); + + if (new_start == stop) + return self->cursor; + + start = new_start; + } + + while (self->next_cursor && start + self->cursor_offset >= self->next_cursor->start) { + self->cursor_offset += self->next_cursor->stop - self->next_cursor->start; + self->cursor = self->cursor->next; + self->next_cursor = self->cursor->next; + } + + return source_map_splice_extent(self, start, stop, node, type); +} + +bool +source_map_start_cursor(cmark_source_map *self, cmark_source_extent *cursor) +{ + if (!self) + return false; + + self->cursor = cursor ? cursor : self->head; + + if (!self->cursor) + return false; + + self->next_cursor = self->cursor->next; + self->cursor_offset = self->cursor->stop; + + return true; +} + +void +source_map_pretty_print(cmark_source_map *self) { + cmark_source_extent *tmp; + + if (!self) + return; + + for (tmp = self->head; tmp; tmp = tmp->next) { + printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop, + cmark_node_get_type_string(tmp->node), + cmark_source_extent_get_type_string(tmp), + (void *) tmp->node); + } +} + +bool +source_map_check(cmark_source_map *self, bufsize_t total_length) +{ + bufsize_t last_stop = 0; + cmark_source_extent *tmp; + + if (!self) + return true; + + for (tmp = self->head; tmp; tmp = tmp->next) { + if (tmp->start != last_stop) { + return false; + } if (tmp->start == tmp->stop) + return false; + last_stop = tmp->stop; + } + + if (last_stop != total_length) + return false; + + return true; +} + +cmark_source_extent * +source_map_get_cursor(cmark_source_map *self) +{ + if (!self) + return NULL; + + return self->cursor; +} + +cmark_source_extent * +source_map_get_head(cmark_source_map *self) { + if (!self) + return NULL; + + return self->head; +} + +cmark_source_extent * +source_map_get_tail(cmark_source_map *self) +{ + if (!self) + return NULL; + + return self->tail; +} + +size_t +cmark_source_extent_get_start(cmark_source_extent *extent) +{ + return extent->start; +} + +size_t +cmark_source_extent_get_stop(cmark_source_extent *extent) +{ + return extent->stop; +} + +cmark_node * +cmark_source_extent_get_node(cmark_source_extent *extent) +{ + return extent->node; +} + +cmark_source_extent * +cmark_source_extent_get_next(cmark_source_extent *extent) +{ + return extent->next; +} + +cmark_source_extent * +cmark_source_extent_get_previous(cmark_source_extent *extent) +{ + return extent->prev; +} + +cmark_extent_type +cmark_source_extent_get_type(cmark_source_extent *extent) +{ + return extent->type; +} + +const char * +cmark_source_extent_get_type_string(cmark_source_extent *extent) +{ + switch (extent->type) { + case CMARK_EXTENT_NONE: + return "unknown"; + case CMARK_EXTENT_OPENER: + return "opener"; + case CMARK_EXTENT_CLOSER: + return "closer"; + case CMARK_EXTENT_BLANK: + return "blank"; + case CMARK_EXTENT_CONTENT: + return "content"; + case CMARK_EXTENT_PUNCTUATION: + return "punctuation"; + case CMARK_EXTENT_LINK_DESTINATION: + return "link_destination"; + case CMARK_EXTENT_LINK_TITLE: + return "link_title"; + case CMARK_EXTENT_LINK_LABEL: + return "link_label"; + case CMARK_EXTENT_REFERENCE_DESTINATION: + return "reference_destination"; + case CMARK_EXTENT_REFERENCE_LABEL: + return "reference_label"; + case CMARK_EXTENT_REFERENCE_TITLE: + return "reference_title"; + } + return "unknown"; +} diff --git a/src/source_map.h b/src/source_map.h new file mode 100644 index 000000000..de13f8ed7 --- /dev/null +++ b/src/source_map.h @@ -0,0 +1,74 @@ +#ifndef CMARK_SOURCE_MAP_H +#define CMARK_SOURCE_MAP_H + +#include "cmark.h" +#include "config.h" +#include "buffer.h" + +typedef struct _cmark_source_map +{ + cmark_source_extent *head; + cmark_source_extent *tail; + cmark_source_extent *cursor; + cmark_source_extent *next_cursor; + bufsize_t cursor_offset; + cmark_mem *mem; +} cmark_source_map; + +struct cmark_source_extent +{ + bufsize_t start; + bufsize_t stop; + struct cmark_source_extent *next; + struct cmark_source_extent *prev; + cmark_node *node; + cmark_extent_type type; +}; + +cmark_source_map * source_map_new (cmark_mem *mem); + +void source_map_free (cmark_source_map *self); + +bool source_map_check (cmark_source_map *self, + bufsize_t total_length); + +void source_map_pretty_print (cmark_source_map *self); + +cmark_source_extent * source_map_append_extent(cmark_source_map *self, + bufsize_t start, + bufsize_t stop, + cmark_node *node, + cmark_extent_type type); + +cmark_source_extent * source_map_insert_extent(cmark_source_map *self, + cmark_source_extent *previous, + bufsize_t start, + bufsize_t stop, + cmark_node *node, + cmark_extent_type type); + +cmark_source_extent * source_map_free_extent (cmark_source_map *self, + cmark_source_extent *extent); + +cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, + cmark_source_extent *extent, + cmark_node *root, + cmark_node *target_node, + bufsize_t total_length); + +cmark_source_extent * source_map_splice_extent(cmark_source_map *self, + bufsize_t start, + bufsize_t stop, + cmark_node *node, + cmark_extent_type type); + +cmark_source_extent * source_map_get_cursor (cmark_source_map *self); + +cmark_source_extent * source_map_get_head (cmark_source_map *self); + +cmark_source_extent * source_map_get_tail (cmark_source_map *self); + +bool source_map_start_cursor (cmark_source_map *self, + cmark_source_extent *cursor); + +#endif diff --git a/test/cmark.py b/test/cmark.py index f4ff5765b..fd35d54bb 100644 --- a/test/cmark.py +++ b/test/cmark.py @@ -6,6 +6,8 @@ import platform import os +OPT_SOURCEPOS = 1 << 1 + def pipe_through_prog(prog, text): p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) [result, err] = p1.communicate(input=text.encode('utf-8')) @@ -29,7 +31,8 @@ def to_commonmark(lib, text): render_commonmark = lib.cmark_render_commonmark render_commonmark.restype = c_char_p render_commonmark.argtypes = [c_void_p, c_int, c_int] - node = parse_document(textbytes, textlen, 0) + # We want tests to go through the source map code + node = parse_document(textbytes, textlen, OPT_SOURCEPOS) if node is None: raise Exception("parse_document failed") result = render_commonmark(node, 0, 0).decode('utf-8')