diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0cb653014..b75c0c736 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,7 @@ set(HEADERS
   houdini.h
   cmark_ctype.h
   render.h
+  source_map.h
   )
 set(LIBRARY_SOURCES
   cmark.c
@@ -40,6 +41,7 @@ set(LIBRARY_SOURCES
   houdini_html_e.c
   houdini_html_u.c
   cmark_ctype.c
+  source_map.c
   ${HEADERS}
   )
 
diff --git a/src/blocks.c b/src/blocks.c
index 301178362..4abe7d3c4 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -28,6 +28,10 @@
 #define MIN(x, y) ((x < y) ? x : y)
 #endif
 
+#ifndef MAX
+#define MAX(x, y) ((x > y) ? x : y)
+#endif
+
 #define peek_at(i, n) (i)->data[n]
 
 static bool S_last_line_blank(const cmark_node *node) {
@@ -95,6 +99,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
   parser->error_code = CMARK_ERR_NONE;
   parser->total_bytes = 0;
   parser->line_number = 0;
+  parser->line_offset = 0;
   parser->offset = 0;
   parser->column = 0;
   parser->first_nonspace = 0;
@@ -106,6 +111,9 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
   parser->options = options;
   parser->last_buffer_ended_with_cr = false;
 
+  if (options & CMARK_OPT_SOURCEPOS)
+    parser->source_map = source_map_new(mem);
+
   return parser;
 }
 
@@ -118,6 +126,7 @@ void cmark_parser_free(cmark_parser *parser) {
   cmark_mem *mem = parser->mem;
   cmark_strbuf_free(&parser->curline);
   cmark_strbuf_free(&parser->linebuf);
+  source_map_free(parser->source_map);
   cmark_reference_map_free(parser->refmap);
   mem->free(parser);
 }
@@ -257,18 +266,28 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
 
   switch (S_type(b)) {
   case CMARK_NODE_PARAGRAPH:
+    source_map_start_cursor(parser->source_map, parser->last_paragraph_extent);
     while (cmark_strbuf_at(node_content, 0) == '[' &&
            (pos = cmark_parse_reference_inline(parser->mem, node_content,
-                                               parser->refmap, parser->root))) {
-
+                                               parser->refmap, b,
+                                               parser->source_map))) {
+      source_map_start_cursor(parser->source_map,
+                              source_map_get_cursor(parser->source_map));
       cmark_strbuf_drop(node_content, pos);
     }
+
+    while (parser->last_paragraph_extent != source_map_get_cursor(parser->source_map)) {
+      if (parser->last_paragraph_extent->node == b) {
+        parser->last_paragraph_extent->node = parser->root;
+      }
+      parser->last_paragraph_extent = parser->last_paragraph_extent->next;
+    }
+
     if (is_blank(node_content, 0)) {
       // remove blank node (former reference def)
       cmark_node_free(b);
     }
     break;
-
   case CMARK_NODE_CODE_BLOCK:
     if (!b->as.code.fenced) { // indented code
       remove_trailing_blank_lines(node_content);
@@ -363,21 +382,36 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
 
 // Walk through node and all children, recursively, parsing
 // string content into inline content where appropriate.
-static void process_inlines(cmark_mem *mem, cmark_node *root,
-                            cmark_reference_map *refmap, int options) {
-  cmark_iter *iter = cmark_iter_new(root);
+static void process_inlines(cmark_parser *parser) {
+  cmark_iter *iter = cmark_iter_new(parser->root);
   cmark_node *cur;
   cmark_event_type ev_type;
+  cmark_source_extent *cur_extent = NULL;
+
+  cur_extent = source_map_get_head(parser->source_map);
 
   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
     cur = cmark_iter_get_node(iter);
     if (ev_type == CMARK_EVENT_ENTER) {
       if (contains_inlines(S_type(cur))) {
-        cmark_parse_inlines(mem, cur, refmap, options);
+        while (cur_extent && cur_extent->node != cur) {
+          cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, cur, parser->line_offset);
+        }
+
+        if (parser->source_map)
+          assert(cur_extent);
+
+        source_map_start_cursor(parser->source_map, cur_extent);
+
+        cmark_parse_inlines(parser->mem, cur, parser->refmap, parser->options, parser->source_map, parser->line_offset);
       }
     }
   }
 
+  while (cur_extent) {
+    cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, NULL, parser->line_offset);
+  }
+
   cmark_iter_free(iter);
 }
 
@@ -484,7 +518,10 @@ static cmark_node *finalize_document(cmark_parser *parser) {
   }
 
   finalize(parser, parser->root);
-  process_inlines(parser->mem, parser->root, parser->refmap, parser->options);
+
+  process_inlines(parser);
+
+  assert(source_map_check(parser->source_map, parser->line_offset));
 
   return parser->root;
 }
@@ -526,6 +563,7 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
 static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
                           size_t len, bool eof) {
   const unsigned char *end = buffer + len;
+  const unsigned char *skipped;
   static const uint8_t repl[] = {239, 191, 189};
 
   if (parser->error_code) {
@@ -550,6 +588,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
   while (buffer < end) {
     const unsigned char *eol;
     bufsize_t chunk_len;
+    bufsize_t linebuf_size = 0;
     bool process = false;
     for (eol = buffer; eol < end; ++eol) {
       if (S_is_line_end_char(*eol)) {
@@ -567,6 +606,7 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
     chunk_len = (eol - buffer);
     if (process) {
       if (parser->linebuf.size > 0) {
+        linebuf_size = cmark_strbuf_len(&parser->linebuf);
         cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
         S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
         cmark_strbuf_clear(&parser->linebuf);
@@ -585,6 +625,8 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
     }
 
     buffer += chunk_len;
+    skipped = buffer;
+
     if (buffer < end) {
       if (*buffer == '\0') {
         // skip over NULL
@@ -600,6 +642,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
           buffer++;
       }
     }
+		chunk_len += buffer - skipped;
+    chunk_len += linebuf_size;
+
+    if (process)
+		  parser->line_offset += chunk_len;
   }
 }
 
@@ -659,11 +706,13 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
 // indicates a number of columns; otherwise, a number of bytes.
 // If advancing a certain number of columns partially consumes
 // a tab character, parser->partially_consumed_tab is set to true.
-static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
-                             bufsize_t count, bool columns) {
+static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_extent_type type,
+                             cmark_chunk *input, bufsize_t count, bool columns) {
   char c;
   int chars_to_tab;
   int chars_to_advance;
+  int initial_pos = parser->offset + parser->line_offset;
+
   while (count > 0 && (c = peek_at(input, parser->offset))) {
     if (c == '\t') {
       chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
@@ -686,6 +735,8 @@ static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
       count -= 1;
     }
   }
+
+  source_map_append_extent(parser->source_map, initial_pos, parser->offset + parser->line_offset, container, type);
 }
 
 static bool S_last_child_is_open(cmark_node *container) {
@@ -693,7 +744,7 @@ static bool S_last_child_is_open(cmark_node *container) {
          (container->last_child->flags & CMARK_NODE__OPEN);
 }
 
-static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
+static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) {
   bool res = false;
   bufsize_t matched = 0;
 
@@ -701,10 +752,10 @@ static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
       parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
   if (matched) {
 
-    S_advance_offset(parser, input, parser->indent + 1, true);
+    S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->indent + 1, true);
 
     if (S_is_space_or_tab(peek_at(input, parser->offset))) {
-      S_advance_offset(parser, input, 1, true);
+      S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true);
     }
 
     res = true;
@@ -718,7 +769,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
 
   if (parser->indent >=
       container->as.list.marker_offset + container->as.list.padding) {
-    S_advance_offset(parser, input, container->as.list.marker_offset +
+    S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, container->as.list.marker_offset +
                                         container->as.list.padding,
                      true);
     res = true;
@@ -726,7 +777,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
     // if container->first_child is NULL, then the opening line
     // of the list item was blank after the list marker; in this
     // case, we are done with the list item.
-    S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
+    S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
                      false);
     res = true;
   }
@@ -740,10 +791,10 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
 
   if (!container->as.code.fenced) { // indented
     if (parser->indent >= CODE_INDENT) {
-      S_advance_offset(parser, input, CODE_INDENT, true);
+      S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true);
       res = true;
     } else if (parser->blank) {
-      S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
+      S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
                        false);
       res = true;
     }
@@ -759,14 +810,15 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
       // closing fence - and since we're at
       // the end of a line, we can stop processing it:
       *should_continue = false;
-      S_advance_offset(parser, input, matched, false);
+      S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace, false);
+      S_advance_offset(parser, container, CMARK_EXTENT_CLOSER, input, parser->offset + matched, false);
       parser->current = finalize(parser, container);
     } else {
       // skip opt. spaces of fence parser->offset
       int i = container->as.code.fence_offset;
 
       while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
-        S_advance_offset(parser, input, 1, true);
+        S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true);
         i--;
       }
       res = true;
@@ -823,7 +875,7 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
 
     switch (cont_type) {
     case CMARK_NODE_BLOCK_QUOTE:
-      if (!parse_block_quote_prefix(parser, input))
+      if (!parse_block_quote_prefix(parser, input, container))
         goto done;
       break;
     case CMARK_NODE_ITEM:
@@ -883,29 +935,26 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
     indented = parser->indent >= CODE_INDENT;
 
     if (!indented && peek_at(input, parser->first_nonspace) == '>') {
+      *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
+                             parser->first_nonspace + 1);
 
-      bufsize_t blockquote_startpos = parser->first_nonspace;
-
-      S_advance_offset(parser, input,
+      S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
                        parser->first_nonspace + 1 - parser->offset, false);
       // optional following character
       if (S_is_space_or_tab(peek_at(input, parser->offset))) {
-        S_advance_offset(parser, input, 1, true);
+        S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
       }
-      *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
-                             blockquote_startpos + 1);
 
     } else if (!indented && (matched = scan_atx_heading_start(
                                  input, parser->first_nonspace))) {
       bufsize_t hashpos;
       int level = 0;
-      bufsize_t heading_startpos = parser->first_nonspace;
 
-      S_advance_offset(parser, input,
+      *container = add_child(parser, *container, CMARK_NODE_HEADING,
+                             parser->first_nonspace + 1);
+      S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
-      *container = add_child(parser, *container, CMARK_NODE_HEADING,
-                             heading_startpos + 1);
 
       hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
 
@@ -927,7 +976,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       (*container)->as.code.fence_offset =
           (int8_t)(parser->first_nonspace - parser->offset);
       (*container)->as.code.info = cmark_chunk_literal("");
-      S_advance_offset(parser, input,
+      S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
 
@@ -947,14 +996,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       (*container)->type = (uint16_t)CMARK_NODE_HEADING;
       (*container)->as.heading.level = lev;
       (*container)->as.heading.setext = true;
-      S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+      S_advance_offset(parser, *container, CMARK_EXTENT_CLOSER, input, input->len - 1 - parser->offset, false);
     } else if (!indented &&
                !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
                (matched = scan_thematic_break(input, parser->first_nonspace))) {
       // it's only now that we know the line is not part of a setext heading:
       *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
                              parser->first_nonspace + 1);
-      S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+      S_advance_offset(parser, *container, CMARK_EXTENT_CONTENT, input, input->len - 1 - parser->offset, false);
     } else if ((!indented || cont_type == CMARK_NODE_LIST) &&
                (matched = parse_list_marker(
                     parser->mem, input, parser->first_nonspace,
@@ -964,6 +1013,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       // spaces indent, as long as the list container is still open.
       cmark_node *list = NULL;
       cmark_node *item = NULL;
+      cmark_source_extent *save_source_map_tail;
       int i = 0;
 
       if (cont_type != CMARK_NODE_LIST ||
@@ -980,17 +1030,18 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       item = *container;
 
       // compute padding:
-      S_advance_offset(parser, input,
+      S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
                        parser->first_nonspace + matched - parser->offset,
                        false);
 
       save_partially_consumed_tab = parser->partially_consumed_tab;
       save_offset = parser->offset;
       save_column = parser->column;
+      save_source_map_tail = source_map_get_tail(parser->source_map);
 
       while (parser->column - save_column <= 5 &&
              S_is_space_or_tab(peek_at(input, parser->offset))) {
-        S_advance_offset(parser, input, 1, true);
+        S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
       }
 
       i = parser->column - save_column;
@@ -1000,9 +1051,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
         data->padding = matched + 1;
         parser->offset = save_offset;
         parser->column = save_column;
+        if (save_source_map_tail) {
+          cmark_source_extent *tmp_extent;
+          for (tmp_extent = save_source_map_tail->next; tmp_extent; tmp_extent = source_map_free_extent(parser->source_map, tmp_extent));
+        }
+
         parser->partially_consumed_tab = save_partially_consumed_tab;
         if (i > 0) {
-          S_advance_offset(parser, input, 1, true);
+          S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
         }
       } else {
         data->padding = matched + i;
@@ -1021,7 +1077,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
 
       parser->mem->free(data);
     } else if (indented && !maybe_lazy && !parser->blank) {
-      S_advance_offset(parser, input, CODE_INDENT, true);
       *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
                              parser->offset + 1);
       (*container)->as.code.fenced = false;
@@ -1030,6 +1085,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
       (*container)->as.code.fence_offset = 0;
       (*container)->as.code.info = cmark_chunk_literal("");
 
+      S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true);
     } else {
       break;
     }
@@ -1094,6 +1150,11 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
     }
 
     if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
+      source_map_append_extent(parser->source_map,
+                               parser->offset + parser->line_offset,
+                               parser->line_offset + input->len,
+                               container,
+                               CMARK_EXTENT_CONTENT);
       add_line(container, input, parser);
     } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
       add_line(container, input, parser);
@@ -1130,26 +1191,48 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
         break;
       }
 
+      source_map_append_extent(parser->source_map,
+                               parser->offset + parser->line_offset,
+                               parser->line_offset + input->len,
+                               container,
+                               CMARK_EXTENT_CONTENT);
+
       if (matches_end_condition) {
         container = finalize(parser, container);
         assert(parser->current != NULL);
       }
     } else if (parser->blank) {
-      // ??? do nothing
+      source_map_append_extent(parser->source_map,
+                               parser->line_offset + parser->offset,
+                               parser->line_offset + input->len,
+                               container,
+                               CMARK_EXTENT_BLANK);
     } else if (accepts_lines(S_type(container))) {
+      bufsize_t initial_len = input->len;
+      bool chopped = false;
+
       if (S_type(container) == CMARK_NODE_HEADING &&
           container->as.heading.setext == false) {
         chop_trailing_hashtags(input);
+        chopped = true;
       }
-      S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
+      S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
                        false);
       add_line(container, input, parser);
+
+      if (chopped)
+        source_map_append_extent(parser->source_map,
+                                 MAX(parser->line_offset + parser->offset, parser->line_offset + input->len),
+                                 parser->line_offset + initial_len,
+                                 container,
+                                 CMARK_EXTENT_CLOSER);
     } else {
       // create paragraph container for line
       container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
                             parser->first_nonspace + 1);
-      S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
+      S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->first_nonspace - parser->offset,
                        false);
+      parser->last_paragraph_extent = source_map_get_tail(parser->source_map);
       add_line(container, input, parser);
     }
 
@@ -1211,6 +1294,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
 cmark_node *cmark_parser_finish(cmark_parser *parser) {
   if (parser->linebuf.size) {
     S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
+    parser->line_offset += parser->linebuf.size;
     cmark_strbuf_clear(&parser->linebuf);
   }
 
@@ -1235,6 +1319,12 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
   return parser->root;
 }
 
+cmark_source_extent *
+cmark_parser_get_first_source_extent(cmark_parser *parser)
+{
+  return source_map_get_head(parser->source_map);
+}
+
 cmark_err_type cmark_parser_get_error(cmark_parser *parser) {
   return parser->error_code;
 }
diff --git a/src/cmark.h b/src/cmark.h
index 389bc3814..cc1e7f527 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -66,6 +66,21 @@ typedef enum {
   CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
 } cmark_node_type;
 
+typedef enum {
+  CMARK_EXTENT_NONE,
+  CMARK_EXTENT_OPENER,
+  CMARK_EXTENT_CLOSER,
+  CMARK_EXTENT_BLANK,
+  CMARK_EXTENT_CONTENT,
+  CMARK_EXTENT_PUNCTUATION,
+  CMARK_EXTENT_LINK_DESTINATION,
+  CMARK_EXTENT_LINK_TITLE,
+  CMARK_EXTENT_LINK_LABEL,
+  CMARK_EXTENT_REFERENCE_DESTINATION,
+  CMARK_EXTENT_REFERENCE_LABEL,
+  CMARK_EXTENT_REFERENCE_TITLE,
+} cmark_extent_type;
+
 /* For backwards compatibility: */
 #define CMARK_NODE_HEADER CMARK_NODE_HEADING
 #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
@@ -93,6 +108,7 @@ typedef enum {
 typedef struct cmark_node cmark_node;
 typedef struct cmark_parser cmark_parser;
 typedef struct cmark_iter cmark_iter;
+typedef struct cmark_source_extent cmark_source_extent;
 
 /**
  * ## Custom memory allocator support
@@ -504,6 +520,11 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
 CMARK_EXPORT
 cmark_node *cmark_parser_finish(cmark_parser *parser);
 
+/** Return a pointer to the first extent of the parser's source map
+ */
+CMARK_EXPORT
+cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
+
 /** Parse a CommonMark document in 'buffer' of length 'len'.
  * Returns a pointer to a tree of nodes.  The memory allocated for
  * the node tree should be released using 'cmark_node_free'
@@ -515,10 +536,44 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
 /** Parse a CommonMark document in file 'f', returning a pointer to
  * a tree of nodes.  The memory allocated for the node tree should be
  * released using 'cmark_node_free' when it is no longer needed.
+ * Returns NULL on error.
  */
 CMARK_EXPORT
 cmark_node *cmark_parse_file(FILE *f, int options);
 
+/**
+ * ## Source map API
+ */
+
+/* Return the index, in bytes, of the start of this extent */
+CMARK_EXPORT
+size_t cmark_source_extent_get_start(cmark_source_extent *extent);
+
+/* Return the index, in bytes, of the stop of this extent. This
+ * index is not included in the extent*/
+CMARK_EXPORT
+size_t cmark_source_extent_get_stop(cmark_source_extent *extent);
+
+/* Return the extent immediately following 'extent' */
+CMARK_EXPORT
+cmark_source_extent *cmark_source_extent_get_next(cmark_source_extent *extent);
+
+/* Return the extent immediately preceding 'extent' */
+CMARK_EXPORT
+cmark_source_extent *cmark_source_extent_get_previous(cmark_source_extent *extent);
+
+/* Return the node 'extent' maps to */
+CMARK_EXPORT
+cmark_node *cmark_source_extent_get_node(cmark_source_extent *extent);
+
+/* Return the type of 'extent' */
+CMARK_EXPORT
+cmark_extent_type cmark_source_extent_get_type(cmark_source_extent *extent);
+
+/* Return a string representation of 'extent' */
+CMARK_EXPORT
+const char *cmark_source_extent_get_type_string(cmark_source_extent *extent);
+
 /**
  * ## Rendering
  */
diff --git a/src/inlines.c b/src/inlines.c
index fccdd9104..d2378b53f 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -13,6 +13,10 @@
 #include "scanners.h"
 #include "inlines.h"
 
+#ifndef MIN
+#define MIN(x, y) ((x < y) ? x : y)
+#endif
+
 static const char *EMDASH = "\xE2\x80\x94";
 static const char *ENDASH = "\xE2\x80\x93";
 static const char *ELLIPSES = "\xE2\x80\xA6";
@@ -40,6 +44,7 @@ typedef struct delimiter {
   unsigned char delim_char;
   bool can_open;
   bool can_close;
+  cmark_source_extent *extent;
 } delimiter;
 
 typedef struct bracket {
@@ -50,6 +55,7 @@ typedef struct bracket {
   bool image;
   bool active;
   bool bracket_after;
+  cmark_source_extent *extent;
 } bracket;
 
 typedef struct {
@@ -61,6 +67,7 @@ typedef struct {
   bracket *last_bracket;
   bufsize_t backticks[MAXBACKTICKS + 1];
   bool scanned_for_backticks;
+  cmark_source_map *source_map;
 } subject;
 
 static CMARK_INLINE bool S_is_line_end_char(char c) {
@@ -73,7 +80,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
 static int parse_inline(subject *subj, cmark_node *parent, int options);
 
 static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
-                             cmark_reference_map *refmap);
+                             cmark_reference_map *refmap, cmark_source_map *source_map);
 static bufsize_t subject_find_special_char(subject *subj, int options);
 
 // Create an inline with a literal string value.
@@ -149,7 +156,7 @@ static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url,
 }
 
 static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
-                             cmark_reference_map *refmap) {
+                             cmark_reference_map *refmap, cmark_source_map *source_map) {
   int i;
   e->mem = mem;
   e->input.data = buffer->ptr;
@@ -159,6 +166,7 @@ static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
   e->refmap = refmap;
   e->last_delim = NULL;
   e->last_bracket = NULL;
+  e->source_map = source_map;
   for (i = 0; i <= MAXBACKTICKS; i++) {
     e->backticks[i] = 0;
   }
@@ -406,6 +414,7 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
   if (delim->previous != NULL) {
     delim->previous->next = delim;
   }
+  delim->extent = NULL;
   subj->last_delim = delim;
 }
 
@@ -421,11 +430,12 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
   b->previous_delimiter = subj->last_delim;
   b->position = subj->pos;
   b->bracket_after = false;
+  b->extent = NULL;
   subj->last_bracket = b;
 }
 
 // Assumes the subject has a c at the current position.
-static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
+static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool *pushed) {
   bufsize_t numdelims;
   cmark_node *inl_text;
   bool can_open, can_close;
@@ -446,6 +456,9 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
 
   if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
     push_delimiter(subj, c, can_open, can_close, inl_text);
+    *pushed = true;
+  } else {
+    *pushed = false;
   }
 
   return inl_text;
@@ -612,6 +625,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
   bufsize_t opener_num_chars = opener_inl->as.literal.len;
   bufsize_t closer_num_chars = closer_inl->as.literal.len;
   cmark_node *tmp, *tmpnext, *emph;
+  cmark_source_extent *tmp_extent = NULL;
 
   // calculate the actual number of characters used from this closer
   if (closer_num_chars < 3 || opener_num_chars < 3) {
@@ -647,9 +661,30 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
   }
   cmark_node_insert_after(opener_inl, emph);
 
+  if (subj->source_map) {
+    tmp_extent = closer->extent->prev;
+
+    source_map_insert_extent(subj->source_map,
+                             opener->extent,
+                             opener->extent->stop - use_delims,
+                             opener->extent->stop,
+                             emph,
+                             CMARK_EXTENT_OPENER);
+    opener->extent->stop -= use_delims;
+
+    source_map_insert_extent(subj->source_map,
+                             tmp_extent,
+                             closer->extent->start,
+                             closer->extent->start + use_delims,
+                             emph,
+                             CMARK_EXTENT_CLOSER);
+    closer->extent->start += use_delims;
+  }
+
   // if opener has 0 characters, remove it and its associated inline
   if (opener_num_chars == 0) {
     cmark_node_free(opener_inl);
+    source_map_free_extent(subj->source_map, opener->extent);
     remove_delimiter(subj, opener);
   }
 
@@ -659,6 +694,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
     cmark_node_free(closer_inl);
     // remove closer from list
     tmp_delim = closer->next;
+    source_map_free_extent(subj->source_map, closer->extent);
     remove_delimiter(subj, closer);
     closer = tmp_delim;
   }
@@ -883,6 +919,8 @@ static cmark_node *handle_close_bracket(subject *subj) {
   int found_label;
   cmark_node *tmp, *tmpnext;
   bool is_image;
+  bool is_inline = false;
+  bool is_shortcut = false;
 
   advance(subj); // advance past ]
   initial_pos = subj->pos;
@@ -933,6 +971,7 @@ static cmark_node *handle_close_bracket(subject *subj) {
       title = cmark_clean_title(subj->mem, &title_chunk);
       cmark_chunk_free(subj->mem, &url_chunk);
       cmark_chunk_free(subj->mem, &title_chunk);
+      is_inline = true;
       goto match;
 
     } else {
@@ -955,6 +994,7 @@ static cmark_node *handle_close_bracket(subject *subj) {
     cmark_chunk_free(subj->mem, &raw_label);
     raw_label = cmark_chunk_dup(&subj->input, opener->position,
                                 initial_pos - opener->position - 1);
+    is_shortcut = true;
     found_label = true;
   }
 
@@ -984,6 +1024,31 @@ static cmark_node *handle_close_bracket(subject *subj) {
   cmark_node_insert_before(opener->inl_text, inl);
   // Add link text:
   tmp = opener->inl_text->next;
+
+  if (subj->source_map) {
+    assert(opener->extent);
+
+    opener->extent->node = inl;
+    opener->extent->type = CMARK_EXTENT_OPENER;
+  }
+
+  source_map_splice_extent(subj->source_map, initial_pos - 1, initial_pos, inl, CMARK_EXTENT_PUNCTUATION);
+  if (is_inline) {
+    source_map_splice_extent(subj->source_map, after_link_text_pos, starturl, inl, CMARK_EXTENT_PUNCTUATION);
+    source_map_splice_extent(subj->source_map, starturl, endurl, inl, CMARK_EXTENT_LINK_DESTINATION);
+    if (endtitle != starttitle) {
+      source_map_splice_extent(subj->source_map, endurl, starttitle, inl, CMARK_EXTENT_BLANK);
+      source_map_splice_extent(subj->source_map, starttitle, endtitle, inl, CMARK_EXTENT_LINK_TITLE);
+      source_map_splice_extent(subj->source_map, endtitle, subj->pos, inl, CMARK_EXTENT_PUNCTUATION);
+    } else {
+      source_map_splice_extent(subj->source_map, endurl, subj->pos, inl, CMARK_EXTENT_PUNCTUATION);
+    }
+  } else if (!is_shortcut) {
+    source_map_splice_extent(subj->source_map, initial_pos, initial_pos + 1, inl, CMARK_EXTENT_PUNCTUATION);
+    source_map_splice_extent(subj->source_map, initial_pos + 1, subj->pos - 1, inl, CMARK_EXTENT_LINK_LABEL);
+    source_map_splice_extent(subj->source_map, subj->pos - 1, subj->pos, inl, CMARK_EXTENT_PUNCTUATION);
+  }
+
   while (tmp) {
     tmpnext = tmp->next;
     cmark_node_append_child(inl, tmp);
@@ -1087,6 +1152,11 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
   cmark_chunk contents;
   unsigned char c;
   bufsize_t endpos;
+	bufsize_t startpos = subj->pos;
+  bufsize_t trimmed_spaces = 0;
+  bool add_extent_to_last_bracket = false;
+  bool add_extent_to_last_delimiter = false;
+
   c = peek_char(subj);
   if (c == 0) {
     return 0;
@@ -1095,6 +1165,8 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
   case '\r':
   case '\n':
     new_inl = handle_newline(subj);
+    if (new_inl->type == CMARK_NODE_LINEBREAK)
+      startpos -= 2;
     break;
   case '`':
     new_inl = handle_backticks(subj);
@@ -1112,7 +1184,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
   case '_':
   case '\'':
   case '"':
-    new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
+    new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0, &add_extent_to_last_delimiter);
     break;
   case '-':
     new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
@@ -1124,6 +1196,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
     advance(subj);
     new_inl = make_str(subj->mem, cmark_chunk_literal("["));
     push_bracket(subj, false, new_inl);
+    add_extent_to_last_bracket = true;
     break;
   case ']':
     new_inl = handle_close_bracket(subj);
@@ -1134,6 +1207,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
       advance(subj);
       new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
       push_bracket(subj, true, new_inl);
+      add_extent_to_last_bracket = true;
     } else {
       new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
     }
@@ -1145,12 +1219,24 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
 
     // if we're at a newline, strip trailing spaces.
     if (S_is_line_end_char(peek_char(subj))) {
+      bufsize_t initial_size = contents.len;
       cmark_chunk_rtrim(&contents);
+      trimmed_spaces = initial_size - contents.len;
     }
 
     new_inl = make_str(subj->mem, contents);
   }
+
   if (new_inl != NULL) {
+    cmark_source_extent *extent;
+
+    extent = source_map_splice_extent(subj->source_map, startpos, subj->pos - trimmed_spaces, new_inl, CMARK_EXTENT_CONTENT);
+
+    if (add_extent_to_last_bracket)
+      subj->last_bracket->extent = extent;
+    else if (add_extent_to_last_delimiter)
+      subj->last_delim->extent = extent;
+
     cmark_node_append_child(parent, new_inl);
   }
 
@@ -1159,9 +1245,11 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
 
 // Parse inlines from parent's string_content, adding as children of parent.
 extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
-                                cmark_reference_map *refmap, int options) {
+                                cmark_reference_map *refmap, int options,
+                                cmark_source_map *source_map, bufsize_t total_length) {
   subject subj;
-  subject_from_buf(mem, &subj, &parent->content, refmap);
+  subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
+  bufsize_t initial_len = subj.input.len;
   cmark_chunk_rtrim(&subj.input);
 
   while (!is_eof(&subj) && parse_inline(&subj, parent, options))
@@ -1175,6 +1263,14 @@ extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
   while (subj.last_bracket) {
     pop_bracket(&subj);
   }
+
+  if (source_map)
+    source_map_insert_extent(source_map,
+                             source_map->cursor,
+                             source_map->cursor->stop,
+                             MIN(source_map->cursor->stop + initial_len - subj.input.len, total_length),
+                             parent,
+                             CMARK_EXTENT_BLANK);
 }
 
 // Parse zero or more space characters, including at most one newline.
@@ -1191,24 +1287,29 @@ static void spnl(subject *subj) {
 // after reference is parsed.
 bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
                                        cmark_reference_map *refmap,
-                                       cmark_node *root) {
+                                       cmark_node *container,
+                                       cmark_source_map *source_map) {
   subject subj;
+  cmark_node *reference = cmark_node_new(CMARK_NODE_REFERENCE);
+  cmark_reference *ref;
 
   cmark_chunk lab;
   cmark_chunk url;
   cmark_chunk title;
 
   bufsize_t matchlen = 0;
-  bufsize_t beforetitle;
-  cmark_reference *ref;
-  cmark_node *reference = cmark_node_new(CMARK_NODE_REFERENCE);
+  bufsize_t starttitle, endtitle;
+  bufsize_t endlabel;
+  bufsize_t starturl, endurl;
 
-  subject_from_buf(mem, &subj, input, NULL);
+  subject_from_buf(mem, &subj, input, NULL, source_map);
 
   // parse label:
   if (!link_label(&subj, &lab) || lab.len == 0)
     goto nomatch;
 
+  endlabel = subj.pos - 1;
+
   // colon:
   if (peek_char(&subj) == ':') {
     advance(&subj);
@@ -1218,6 +1319,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
 
   // parse link url:
   spnl(&subj);
+  starturl = subj.pos;
   matchlen = manual_scan_link_url(&subj.input, subj.pos);
   if (matchlen > 0) {
     url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
@@ -1227,22 +1329,29 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
   }
 
   // parse optional link_title
-  beforetitle = subj.pos;
+  endurl = subj.pos;
   spnl(&subj);
+  starttitle = subj.pos;
   matchlen = scan_link_title(&subj.input, subj.pos);
   if (matchlen) {
     title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
     subj.pos += matchlen;
   } else {
-    subj.pos = beforetitle;
+    subj.pos = endurl;
+    starttitle = endurl;
+    endtitle = endurl;
     title = cmark_chunk_literal("");
   }
 
+  endtitle = subj.pos;
+
   // parse final spaces and newline:
   skip_spaces(&subj);
   if (!skip_line_end(&subj)) {
     if (matchlen) { // try rewinding before title
-      subj.pos = beforetitle;
+      subj.pos = endurl;
+      starttitle = endurl;
+      endtitle = endurl;
       skip_spaces(&subj);
       title = cmark_chunk_literal("");
       if (!skip_line_end(&subj)) {
@@ -1259,11 +1368,21 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
     cmark_chunk_set_cstr(mem, &reference->as.reference.label, (char *) ref->label);
     cmark_chunk_set_cstr(mem, &reference->as.reference.url, cmark_chunk_to_cstr(mem, &ref->url));
     cmark_chunk_set_cstr(mem, &reference->as.reference.title, cmark_chunk_to_cstr(mem, &ref->title));
-    cmark_node_append_child(root, reference);
+    cmark_node_insert_before(container, reference);
 
     cmark_reference_add(refmap, ref);
   }
 
+  // Mark the extents of the reference
+  source_map_splice_extent(source_map, 0, 1, reference, CMARK_EXTENT_OPENER);
+  source_map_splice_extent(source_map, 1, endlabel, reference, CMARK_EXTENT_REFERENCE_LABEL);
+  source_map_splice_extent(source_map, endlabel, endlabel + 2, reference, CMARK_EXTENT_PUNCTUATION);
+  source_map_splice_extent(source_map, endlabel + 2, starturl, reference, CMARK_EXTENT_BLANK);
+  source_map_splice_extent(source_map, starturl, endurl, reference, CMARK_EXTENT_REFERENCE_DESTINATION);
+  source_map_splice_extent(source_map, endurl, starttitle, reference, CMARK_EXTENT_BLANK);
+  source_map_splice_extent(source_map, starttitle, endtitle, reference, CMARK_EXTENT_REFERENCE_TITLE);
+  source_map_splice_extent(source_map, endtitle, subj.pos, reference, CMARK_EXTENT_BLANK);
+
   return subj.pos;
 
 nomatch:
diff --git a/src/inlines.h b/src/inlines.h
index a09a75940..ee85b87de 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -1,6 +1,10 @@
 #ifndef CMARK_INLINES_H
 #define CMARK_INLINES_H
 
+#include "chunk.h"
+#include "references.h"
+#include "source_map.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -9,11 +13,13 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
 cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
 
 void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
-                         cmark_reference_map *refmap, int options);
+                         cmark_reference_map *refmap, int options,
+                         cmark_source_map *source_map, bufsize_t total_length);
 
 bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
                                        cmark_reference_map *refmap,
-                                       cmark_node *root);
+                                       cmark_node *container,
+                                       cmark_source_map *source_map);
 
 #ifdef __cplusplus
 }
diff --git a/src/parser.h b/src/parser.h
index ec8c9b889..7b4fdbc9b 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -6,6 +6,7 @@
 #include "node.h"
 #include "buffer.h"
 #include "memory.h"
+#include "source_map.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -30,9 +31,12 @@ struct cmark_parser {
   bool partially_consumed_tab;
   cmark_strbuf curline;
   bufsize_t last_line_length;
+  bufsize_t line_offset;
   cmark_strbuf linebuf;
   int options;
   bool last_buffer_ended_with_cr;
+  cmark_source_map *source_map;
+  cmark_source_extent *last_paragraph_extent;
 };
 
 #ifdef __cplusplus
diff --git a/src/source_map.c b/src/source_map.c
new file mode 100644
index 000000000..754c5bb6c
--- /dev/null
+++ b/src/source_map.c
@@ -0,0 +1,344 @@
+#include <assert.h>
+
+#include "source_map.h"
+
+cmark_source_map *
+source_map_new(cmark_mem *mem)
+{
+  cmark_source_map *res = (cmark_source_map *) mem->calloc(1, sizeof(cmark_source_map));
+  res->mem = mem;
+  return res;
+}
+
+void
+source_map_free(cmark_source_map *self)
+{
+  if (!self)
+    return;
+
+  cmark_source_extent *tmp;
+  for (tmp = self->head; tmp; tmp = source_map_free_extent(self, tmp));
+  self->mem->free(self);
+}
+
+cmark_source_extent *
+source_map_append_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+{
+  if (!self)
+    return NULL;
+
+  assert (start <= stop);
+  assert (!self->tail || self->tail->stop <= start);
+
+  cmark_source_extent *res = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent));
+
+  res->start = start;
+  res->stop = stop;
+  res->node = node;
+  res->type = type;
+
+  res->next = NULL;
+  res->prev = self->tail;
+
+  if (!self->head)
+    self->head = res;
+  else
+    self->tail->next = res;
+
+  self->tail = res;
+
+  return res;
+}
+
+cmark_source_extent *
+source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
+                         bufsize_t start, bufsize_t stop, cmark_node *node, cmark_extent_type type)
+{
+  if (!self)
+    return NULL;
+
+  if (start == stop)
+    return previous;
+
+  cmark_source_extent *extent = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent));
+
+  extent->start = start;
+  extent->stop = stop;
+  extent->node = node;
+  extent->type = type;
+  extent->next = previous->next;
+  extent->prev = previous;
+  previous->next = extent;
+
+  if (extent->next)
+    extent->next->prev = extent;
+  else
+    self->tail = extent;
+
+  return extent;
+}
+
+cmark_source_extent *
+source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
+{
+  if (!self)
+    return NULL;
+
+  cmark_source_extent *next = extent->next;
+
+  if (extent->prev)
+    extent->prev->next = next;
+
+  if (extent->next)
+    extent->next->prev = extent->prev;
+
+  if (extent == self->tail)
+    self->tail = extent->prev;
+
+  if (extent == self->head)
+    self->head = extent->next;
+
+  if (extent == self->cursor) {
+    self->cursor = extent->prev;
+  }
+
+  if (extent == self->next_cursor) {
+    self->next_cursor = extent->next;
+  }
+
+  self->mem->free(extent);
+
+  return next;
+}
+
+cmark_source_extent *
+source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
+                         cmark_node *root, cmark_node *target_node, bufsize_t total_length)
+{
+  cmark_source_extent *res;
+
+  if (!self)
+    return NULL;
+
+  while (extent->next && extent->start == extent->stop) {
+    extent = source_map_free_extent(self, extent);
+    if (extent->node == target_node)
+      return extent;
+  }
+
+  if (extent->next) {
+    res = source_map_insert_extent(self,
+                                   extent,
+                                   extent->stop,
+                                   extent->next->start,
+                                   root,
+                                   CMARK_EXTENT_BLANK)->next;
+  } else {
+    res = source_map_insert_extent(self,
+                                   extent,
+                                   extent->stop,
+                                   total_length,
+                                   root,
+                                   CMARK_EXTENT_BLANK)->next;
+  }
+
+  if (extent->start == extent->stop)
+    source_map_free_extent(self, extent);
+
+  return res;
+}
+
+cmark_source_extent *
+source_map_splice_extent(cmark_source_map *self, bufsize_t start, bufsize_t stop,
+                         cmark_node *node, cmark_extent_type type)
+{
+  if (!self)
+    return NULL;
+
+  if (!self->next_cursor) {
+    self->cursor = source_map_insert_extent(self,
+                                            self->cursor,
+                                            start + self->cursor_offset,
+                                            stop + self->cursor_offset, node, type);
+
+    return self->cursor;
+  } else if (start + self->cursor_offset < self->next_cursor->start &&
+             stop + self->cursor_offset <= self->next_cursor->start) {
+    self->cursor = source_map_insert_extent(self,
+                                            self->cursor,
+                                            start + self->cursor_offset,
+                                            stop + self->cursor_offset, node, type);
+
+    return self->cursor;
+  } else if (start + self->cursor_offset < self->next_cursor->start) {
+    bufsize_t new_start = self->next_cursor->start - self->cursor_offset;
+
+    self->cursor = source_map_insert_extent(self,
+                                            self->cursor,
+                                            start + self->cursor_offset,
+                                            self->next_cursor->start,
+                                            node, type);
+
+    if (new_start == stop)
+      return self->cursor;
+
+    start = new_start;
+  }
+
+  while (self->next_cursor && start + self->cursor_offset >= self->next_cursor->start) {
+    self->cursor_offset += self->next_cursor->stop - self->next_cursor->start;
+    self->cursor = self->cursor->next;
+    self->next_cursor = self->cursor->next;
+  }
+
+  return source_map_splice_extent(self, start, stop, node, type);
+}
+
+bool
+source_map_start_cursor(cmark_source_map *self, cmark_source_extent *cursor)
+{
+  if (!self)
+    return false;
+
+  self->cursor = cursor ? cursor : self->head;
+
+  if (!self->cursor)
+    return false;
+
+  self->next_cursor = self->cursor->next;
+  self->cursor_offset = self->cursor->stop;
+
+  return true;
+}
+
+void
+source_map_pretty_print(cmark_source_map *self) {
+  cmark_source_extent *tmp;
+
+  if (!self)
+    return;
+
+  for (tmp = self->head; tmp; tmp = tmp->next) {
+    printf ("%d:%d - %s, %s (%p)\n", tmp->start, tmp->stop,
+						cmark_node_get_type_string(tmp->node),
+            cmark_source_extent_get_type_string(tmp),
+            (void *) tmp->node);
+  }
+}
+
+bool
+source_map_check(cmark_source_map *self, bufsize_t total_length)
+{
+  bufsize_t last_stop = 0;
+  cmark_source_extent *tmp;
+
+  if (!self)
+    return true;
+
+  for (tmp = self->head; tmp; tmp = tmp->next) {
+    if (tmp->start != last_stop) {
+      return false;
+    } if (tmp->start == tmp->stop)
+      return false;
+    last_stop = tmp->stop;
+  }
+
+  if (last_stop != total_length)
+    return false;
+
+  return true;
+}
+
+cmark_source_extent *
+source_map_get_cursor(cmark_source_map *self)
+{
+  if (!self)
+    return NULL;
+
+  return self->cursor;
+}
+
+cmark_source_extent *
+source_map_get_head(cmark_source_map *self) {
+  if (!self)
+    return NULL;
+
+  return self->head;
+}
+
+cmark_source_extent *
+source_map_get_tail(cmark_source_map *self)
+{
+  if (!self)
+    return NULL;
+
+  return self->tail;
+}
+
+size_t
+cmark_source_extent_get_start(cmark_source_extent *extent)
+{
+  return extent->start;
+}
+
+size_t
+cmark_source_extent_get_stop(cmark_source_extent *extent)
+{
+  return extent->stop;
+}
+
+cmark_node *
+cmark_source_extent_get_node(cmark_source_extent *extent)
+{
+  return extent->node;
+}
+
+cmark_source_extent *
+cmark_source_extent_get_next(cmark_source_extent *extent)
+{
+  return extent->next;
+}
+
+cmark_source_extent *
+cmark_source_extent_get_previous(cmark_source_extent *extent)
+{
+  return extent->prev;
+}
+
+cmark_extent_type
+cmark_source_extent_get_type(cmark_source_extent *extent)
+{
+  return extent->type;
+}
+
+const char *
+cmark_source_extent_get_type_string(cmark_source_extent *extent)
+{
+  switch (extent->type) {
+    case CMARK_EXTENT_NONE:
+      return "unknown";
+    case CMARK_EXTENT_OPENER:
+      return "opener";
+    case CMARK_EXTENT_CLOSER:
+      return "closer";
+    case CMARK_EXTENT_BLANK:
+      return "blank";
+    case CMARK_EXTENT_CONTENT:
+      return "content";
+    case CMARK_EXTENT_PUNCTUATION:
+      return "punctuation";
+    case CMARK_EXTENT_LINK_DESTINATION:
+      return "link_destination";
+    case CMARK_EXTENT_LINK_TITLE:
+      return "link_title";
+    case CMARK_EXTENT_LINK_LABEL:
+      return "link_label";
+    case CMARK_EXTENT_REFERENCE_DESTINATION:
+      return "reference_destination";
+    case CMARK_EXTENT_REFERENCE_LABEL:
+      return "reference_label";
+    case CMARK_EXTENT_REFERENCE_TITLE:
+      return "reference_title";
+  }
+  return "unknown";
+}
diff --git a/src/source_map.h b/src/source_map.h
new file mode 100644
index 000000000..de13f8ed7
--- /dev/null
+++ b/src/source_map.h
@@ -0,0 +1,74 @@
+#ifndef CMARK_SOURCE_MAP_H
+#define CMARK_SOURCE_MAP_H
+
+#include "cmark.h"
+#include "config.h"
+#include "buffer.h"
+
+typedef struct _cmark_source_map
+{
+  cmark_source_extent *head;
+  cmark_source_extent *tail;
+  cmark_source_extent *cursor;
+  cmark_source_extent *next_cursor;
+  bufsize_t cursor_offset;
+  cmark_mem *mem;
+} cmark_source_map;
+
+struct cmark_source_extent
+{
+  bufsize_t start;
+  bufsize_t stop;
+  struct cmark_source_extent *next;
+  struct cmark_source_extent *prev;
+  cmark_node *node;
+  cmark_extent_type type;
+};
+
+cmark_source_map    * source_map_new          (cmark_mem *mem);
+
+void                  source_map_free         (cmark_source_map *self);
+
+bool                  source_map_check        (cmark_source_map *self,
+                                               bufsize_t total_length);
+
+void                  source_map_pretty_print (cmark_source_map *self);
+
+cmark_source_extent * source_map_append_extent(cmark_source_map *self,
+                                               bufsize_t start,
+                                               bufsize_t stop,
+                                               cmark_node *node,
+                                               cmark_extent_type type);
+
+cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
+                                               cmark_source_extent *previous,
+                                               bufsize_t start,
+                                               bufsize_t stop,
+                                               cmark_node *node,
+                                               cmark_extent_type type);
+
+cmark_source_extent * source_map_free_extent  (cmark_source_map *self,
+                                               cmark_source_extent *extent);
+
+cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
+                                               cmark_source_extent *extent,
+                                               cmark_node *root,
+                                               cmark_node *target_node,
+                                               bufsize_t total_length);
+
+cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
+                                               bufsize_t start,
+                                               bufsize_t stop,
+                                               cmark_node *node,
+                                               cmark_extent_type type);
+
+cmark_source_extent * source_map_get_cursor   (cmark_source_map *self);
+
+cmark_source_extent * source_map_get_head     (cmark_source_map *self);
+
+cmark_source_extent * source_map_get_tail     (cmark_source_map *self);
+
+bool                  source_map_start_cursor (cmark_source_map *self,
+                                               cmark_source_extent *cursor);
+
+#endif
diff --git a/test/cmark.py b/test/cmark.py
index f4ff5765b..fd35d54bb 100644
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -6,6 +6,8 @@
 import platform
 import os
 
+OPT_SOURCEPOS = 1 << 1
+
 def pipe_through_prog(prog, text):
     p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
     [result, err] = p1.communicate(input=text.encode('utf-8'))
@@ -29,7 +31,8 @@ def to_commonmark(lib, text):
     render_commonmark = lib.cmark_render_commonmark
     render_commonmark.restype = c_char_p
     render_commonmark.argtypes = [c_void_p, c_int, c_int]
-    node = parse_document(textbytes, textlen, 0)
+    # We want tests to go through the source map code
+    node = parse_document(textbytes, textlen, OPT_SOURCEPOS)
     if node is None:
       raise Exception("parse_document failed")
     result = render_commonmark(node, 0, 0).decode('utf-8')