From d160cdd7e312bfdb437cbc689708a2f4f8cfad32 Mon Sep 17 00:00:00 2001 From: Robert Atkey Date: Sun, 30 Oct 2022 17:11:55 +0000 Subject: [PATCH 01/11] Support for GitHub-Flavoured Markdown tables support more github flavoury tables fix HTML generation for tables Switch to using slices for table lines --- src/ast.ml | 1 + src/ast_block.ml | 15 +++++++++ src/block_parser.ml | 76 +++++++++++++++++++++++++++++++++++++++++++++ src/html.ml | 44 ++++++++++++++++++++++++++ src/parser.ml | 30 +++++++++++++++--- src/sexp.ml | 15 ++++++++- src/strSlice.ml | 22 +++++++++++++ src/strSlice.mli | 14 +++++++++ src/toc.ml | 2 +- 9 files changed, 213 insertions(+), 6 deletions(-) diff --git a/src/ast.ml b/src/ast.ml index ef24a02a..7c1515f1 100644 --- a/src/ast.ml +++ b/src/ast.ml @@ -1,6 +1,7 @@ module Impl = struct include Ast_inline include Ast_block.List_types + include Ast_block.Table_alignments include Ast_block.WithInline type attributes = (string * string) list diff --git a/src/ast_block.ml b/src/ast_block.ml index cd79939b..413d40fc 100644 --- a/src/ast_block.ml +++ b/src/ast_block.ml @@ -20,8 +20,18 @@ module List_types = struct | Tight end +module Table_alignments = struct + type cell_alignment = + | Default + | Left + | Centre + | Right +end + open List_types +open Table_alignments + module Make (C : BlockContent) = struct type 'attr def_elt = { term : 'attr C.t @@ -41,6 +51,7 @@ module Make (C : BlockContent) = struct | Code_block of 'attr * string * string | Html_block of 'attr * string | Definition_list of 'attr * 'attr def_elt list + | Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list end module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct @@ -62,6 +73,10 @@ module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct Definition_list (attr, List.map f l) | Code_block (attr, label, code) -> Code_block (attr, label, code) | Html_block (attr, x) -> Html_block (attr, x) + | Table (attr, headers, rows) -> + Table (attr, + List.map (fun (header, alignment) -> (f header, alignment)) headers, + List.map (List.map f) rows) end module Mapper = MakeMapper (StringContent) (InlineContent) diff --git a/src/block_parser.ml b/src/block_parser.ml index e47c4f58..9424190c 100644 --- a/src/block_parser.ml +++ b/src/block_parser.ml @@ -22,6 +22,8 @@ module Pre = struct | Rindented_code of string list | Rhtml of Parser.html_kind * string list | Rdef_list of string * string list + | Rtable_header of StrSlice.t list * string + | Rtable of (string * cell_alignment) list * string list list | Rempty and t = @@ -74,6 +76,12 @@ module Pre = struct let rec loop = function "" :: l -> loop l | _ as l -> l in Code_block ([], "", concat (loop l)) :: blocks | Rhtml (_, l) -> Html_block ([], concat l) :: blocks + | Rtable_header (_header, line) -> + (* FIXME: this will only ever get called on the very last + line. Should it do the link definitions? *) + close link_defs { blocks; next = Rparagraph [ line ] } + | Rtable (header, rows) -> + Table ([], header, List.rev rows) :: blocks | Rempty -> blocks and finish link_defs state = List.rev (close link_defs state) @@ -81,6 +89,48 @@ module Pre = struct let empty = { blocks = []; next = Rempty } let classify_line s = Parser.parse s + let classify_delimiter s = + let left, s = + match StrSlice.head s with + | Some ':' -> true, StrSlice.drop 1 s + | _ -> false, s + in + let right, s = + match StrSlice.last s with + | Some ':' -> true, StrSlice.drop_last s + | _ -> false, s + in + if StrSlice.exists (fun c -> c <> '-') s then + None + else + match left, right with + | true, true -> Some Centre + | true, false -> Some Left + | false, true -> Some Right + | false, false -> Some Default + + let match_table_headers headers delimiters = + let rec loop processed = function + | [], [] -> + Some (List.rev processed) + | header::headers, line::delimiters -> + (match classify_delimiter line with + | None -> None + | Some alignment -> + loop ((StrSlice.to_string header, alignment)::processed) (headers, delimiters)) + | [], _::_ | _::_, [] -> + None + in + loop [] (headers, delimiters) + + let rec match_row_length l1 l2 = match l1, l2 with + | [], _ -> + [] + | l1, [] -> + List.init (List.length l1) (fun _ -> "") + | _::l1, x::l2 -> + StrSlice.to_string x :: match_row_length l1 l2 + let rec process link_defs { blocks; next } s = let process = process link_defs in let close = close link_defs in @@ -105,6 +155,8 @@ module Pre = struct } | Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _) -> { blocks; next = Rparagraph [ StrSlice.to_string s ] } + | Rempty, Ltable_line items -> + { blocks; next = Rtable_header (items, StrSlice.to_string s) } | Rparagraph [ h ], Ldef_list def -> { blocks; next = Rdef_list (h, [ def ]) } | Rdef_list (term, defs), Ldef_list def -> @@ -152,6 +204,30 @@ module Pre = struct } | Rdef_list _, _ -> process { blocks = close { blocks; next }; next = Rempty } s + | Rtable_header (headers, line), Ltable_line items -> + (match match_table_headers headers items with + | Some headers -> + (* Makes sure that there are the same number of delimiters + as headers. See + https://github.github.com/gfm/#example-203 *) + { blocks; next = Rtable (headers, []) } + | None -> + (* Reinterpret the previous line as the start of a + paragraph. *) + process { blocks; next = Rparagraph [ line ] } s) + | Rtable_header (_, line), _ -> + (* If we only have a potential header, and the current line + doesn't look like a table delimiter, then reinterpret the + previous line as the start of a paragraph. *) + process { blocks; next = Rparagraph [ line ] } s + | Rtable (header, rows), Ltable_line row -> + (* Make sure the number of items in the row is consistent with + the headers and the rest of the rows. See + https://github.github.com/gfm/#example-204 *) + let row = match_row_length header row in + { blocks; next = Rtable (header, row::rows) } + | Rtable _, _ -> + process { blocks = close { blocks; next }; next = Rempty } s | Rindented_code lines, Lindented_code s -> { blocks; next = Rindented_code (StrSlice.to_string s :: lines) } | Rindented_code lines, Lempty -> diff --git a/src/html.ml b/src/html.ml index 3a9ff803..f2bca1df 100644 --- a/src/html.ml +++ b/src/html.ml @@ -20,6 +20,9 @@ let concat t1 t2 = let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l +let concat_map2 f l1 l2 = + List.fold_left2 (fun accu x y -> concat accu (f x y)) Null l1 l2 + (* only convert when "necessary" *) let htmlentities s = let b = Buffer.create (String.length s) in @@ -128,6 +131,37 @@ and inline = function | Image (attr, { label; destination; title }) -> img label destination title attr +let table_header headers = + elt Block "thead" [] + (Some + (elt Block "tr" [] + (Some + (concat_map + (fun (header, _alignment) -> + elt Inline "th" [] (Some (inline header))) + headers)))) + +let table_body headers rows = + elt Block "tbody" [] + (Some + (concat_map + (fun row -> + elt Block "tr" [] + (Some + (concat_map2 + (fun (_, alignment) cell -> + let attrs = match alignment with + | Default -> [] + (* FIXME: or align="blah" ?? *) + | Left -> ["style", "text-align: left;"] + | Right -> ["style", "text-align: right;"] + | Centre -> ["style", "text-align: centre;"] + in + elt Block "td" attrs (Some (inline cell))) + headers + row))) + rows)) + let rec block = function | Blockquote (attr, q) -> elt Block "blockquote" attr (Some (concat nl (concat_map block q))) @@ -177,6 +211,16 @@ let rec block = function (concat_map (fun s -> elt Block "dd" [] (Some (inline s))) defs) in elt Block "dl" attr (Some (concat_map f l)) + | Table (attr, headers, []) -> + elt Block "table" attr + (Some + (table_header headers)) + | Table (attr, headers, rows) -> + elt Block "table" attr + (Some + (concat + (table_header headers) + (table_body headers rows))) let of_doc doc = concat_map block doc diff --git a/src/parser.ml b/src/parser.ml index 2d4c01a9..a7d7d436 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -208,6 +208,7 @@ type t = | Llist_item of list_type * int * StrSlice.t | Lparagraph | Ldef_list of string + | Ltable_line of StrSlice.t list (* drop up to 3 spaces, returning the number of spaces dropped and the remainder of the string *) let sp3 s = @@ -755,6 +756,26 @@ let indented_code ind s = if indent s + ind < 4 then raise Fail; Lindented_code (StrSlice.offset (4 - ind) s) +(* A sequence of cell contents separated by unescaped '|' + characters. *) +let table_row pipe_prefix s = + let rec loop items s = + match StrSlice.index_unescaped '|' s with + | None -> + if StrSlice.for_all is_whitespace s then + items + else + s::items + | Some i -> + let item = StrSlice.take_n i s in + loop (item::items) (StrSlice.drop (i+1) s) + in + let items = loop [] s in + if not pipe_prefix && List.length items <= 1 then + raise Fail + else + Ltable_line (List.rev_map StrSlice.trim items) + let parse s0 = let ind, s = sp3 s0 in match StrSlice.head s with @@ -764,16 +785,17 @@ let parse s0 = Lblockquote s | Some '=' -> setext_heading s | Some '-' -> - (setext_heading ||| thematic_break ||| unordered_list_item ind) s + (setext_heading ||| thematic_break ||| unordered_list_item ind ||| table_row false) s | Some '_' -> thematic_break s | Some '#' -> atx_heading s | Some ('~' | '`') -> fenced_code ind s | Some '<' -> raw_html s | Some '*' -> (thematic_break ||| unordered_list_item ind) s | Some '+' -> unordered_list_item ind s - | Some '0' .. '9' -> ordered_list_item ind s - | Some ':' -> def_list s - | Some _ -> (blank ||| indented_code ind) s + | Some '0' .. '9' -> (ordered_list_item ind ||| table_row false) s + | Some ':' -> (def_list ||| table_row false) s + | Some '|' -> table_row true (StrSlice.tail s) + | Some _ -> (blank ||| indented_code ind ||| table_row false) s | None -> Lempty let parse s = try parse s with Fail -> Lparagraph diff --git a/src/sexp.ml b/src/sexp.ml index 20583831..95cb6427 100644 --- a/src/sexp.ml +++ b/src/sexp.ml @@ -15,13 +15,21 @@ and inline = function | Text (_, s) -> Atom s | Emph (_, il) -> List [ Atom "emph"; inline il ] | Strong (_, il) -> List [ Atom "strong"; inline il ] - | Code _ -> Atom "code" + | Code _ -> Atom "code" (* FIXME: this seems broken? *) | Hard_break _ -> Atom "hard-break" | Soft_break _ -> Atom "soft-break" | Link (_, def) -> List [ Atom "url"; link def ] | Html (_, s) -> List [ Atom "html"; Atom s ] | Image _ -> Atom "img" +let table_header (header, alignment) = + List [ inline header + ; (match alignment with + | Default -> Atom "default" + | Left -> Atom "left" + | Centre -> Atom "centre" + | Right -> Atom "right") ] + let rec block = function | Paragraph (_, x) -> List [ Atom "paragraph"; inline x ] | List (_, _, _, bls) -> @@ -44,6 +52,11 @@ let rec block = function List [ inline elt.term; List (List.map inline elt.defs) ]) l) ] + | Table (_, headers, rows) -> + List [ Atom "table" + ; List (List.map table_header headers) + ; List (List.map (fun row -> List (List.map inline row)) rows) + ] let create ast = List (List.map block ast) diff --git a/src/strSlice.ml b/src/strSlice.ml index 4fb3429e..e7e7f856 100644 --- a/src/strSlice.ml +++ b/src/strSlice.ml @@ -63,6 +63,11 @@ let take n s = in loop n s +let take_n n s = + if n < 0 then invalid_arg "take_n"; + let len = min n s.len in + { s with len } + let drop n s = if n < 0 then invalid_arg "drop"; (* len should not be reduced below 0, as strings cannot have a negative length *) @@ -118,6 +123,16 @@ let split_at f s = (* assert ("aaa" = to_string before); *) (* assert ("" = to_string rest) *) +let index_unescaped c s = + let rec loop idx = + if idx = s.off+s.len then None + else if s.base.[idx] = c && (idx = s.off || s.base.[idx] <> '\\') then + Some (idx-s.off) + else + loop (idx+1) + in + loop s.off + let exists f s = let rec loop s i = if i >= s.len then false @@ -142,3 +157,10 @@ let fold_left f init s = (* let s = of_string "abcde" in *) (* assert (fold_left (fun _ n -> n + 1) 0 s = 5); *) (* assert (fold_left (fun c s -> String.make 2 c ^ s) "" s = "eeddccbbaa") *) + +let trim s = + let is_whitespace = function + | ' ' | '\t' | '\010' .. '\013' -> true + | _ -> false + in + drop_while is_whitespace (drop_last_while is_whitespace s) diff --git a/src/strSlice.mli b/src/strSlice.mli index 47d072e8..66820ec4 100644 --- a/src/strSlice.mli +++ b/src/strSlice.mli @@ -13,6 +13,12 @@ val index : (char -> bool) -> t -> int option (** [index c s] is [Some i] where [i] is the index of the character in [s] for which [f] is first true, or [None] if [f] holds for no characters in [s]. *) +val index_unescaped : char -> t -> int option +(** [index_unescaped c s] is [Some i] where [i] index of the first + occurrence of the character [c] in [s] that is not preceeded by a + backslash ['\\'], or [None] if [c] does not occur in [s] or all + occurrences are preceeded by backslashes. *) + val print : Format.formatter -> t -> unit val head : t -> char option val tail : t -> t @@ -30,6 +36,10 @@ val drop_last : t -> t val take : int -> t -> char list (** [take n s] is a list of the first [n] characters of [s] *) +val take_n : int -> t -> t +(** [take_n n s] returns the slice consisting of the first [n] + characters of [s]. *) + val drop : int -> t -> t (** [drop n s] is [s] with the first [n] characters dropped *) @@ -69,3 +79,7 @@ val exists : (char -> bool) -> t -> bool val is_empty : t -> bool val get_offset : t -> int val sub : len:int -> t -> t + +val trim : t -> t +(** [trim s] returns the slice that skips any whitespace at the start + or the end of [s]. *) diff --git a/src/toc.ml b/src/toc.ml index 2a649d6f..f9627623 100644 --- a/src/toc.ml +++ b/src/toc.ml @@ -26,7 +26,7 @@ let headers = | Blockquote (_, blocks) -> loop blocks | List (_, _, _, block_lists) -> List.iter loop block_lists | Paragraph _ | Thematic_break _ | Html_block _ | Definition_list _ - | Code_block _ -> + | Code_block _ | Table _ -> ()) blocks in From fb795d55954e049392d713f0786835ff27d33881 Mon Sep 17 00:00:00 2001 From: Robert Atkey Date: Fri, 4 Nov 2022 17:13:55 +0000 Subject: [PATCH 02/11] Improve parsing for GFM-style tables - Ignore `|` characters within code spans for the purposes of spotting table cell delimiters. Correctly allow for code spans containing `` ` `` characters. - Disallow tables with no columns. - Allow rows with no `|` characters, including rows that look like setext headings (sequences of `=` or `-`) --- src/block_parser.ml | 48 +++++++++++++++++++++++++-------------------- src/parser.ml | 19 +++++++++--------- src/strSlice.ml | 26 ++++++++++++++++-------- src/strSlice.mli | 4 ++-- 4 files changed, 57 insertions(+), 40 deletions(-) diff --git a/src/block_parser.ml b/src/block_parser.ml index 9424190c..5737eb77 100644 --- a/src/block_parser.ml +++ b/src/block_parser.ml @@ -153,7 +153,7 @@ module Pre = struct { blocks ; next = Rlist (kind, Tight, false, indent, [], process empty s) } - | Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _) -> + | Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _ | Ltable_line []) -> { blocks; next = Rparagraph [ StrSlice.to_string s ] } | Rempty, Ltable_line items -> { blocks; next = Rtable_header (items, StrSlice.to_string s) } @@ -205,29 +205,35 @@ module Pre = struct | Rdef_list _, _ -> process { blocks = close { blocks; next }; next = Rempty } s | Rtable_header (headers, line), Ltable_line items -> - (match match_table_headers headers items with - | Some headers -> - (* Makes sure that there are the same number of delimiters - as headers. See - https://github.github.com/gfm/#example-203 *) - { blocks; next = Rtable (headers, []) } - | None -> - (* Reinterpret the previous line as the start of a - paragraph. *) - process { blocks; next = Rparagraph [ line ] } s) + (match match_table_headers headers items with + | Some headers -> + (* Makes sure that there are the same number of delimiters + as headers. See + https://github.github.com/gfm/#example-203 *) + { blocks; next = Rtable (headers, []) } + | None -> + (* Reinterpret the previous line as the start of a + paragraph. *) + process { blocks; next = Rparagraph [ line ] } s) | Rtable_header (_, line), _ -> - (* If we only have a potential header, and the current line - doesn't look like a table delimiter, then reinterpret the - previous line as the start of a paragraph. *) - process { blocks; next = Rparagraph [ line ] } s + (* If we only have a potential header, and the current line + doesn't look like a table delimiter, then reinterpret the + previous line as the start of a paragraph. *) + process { blocks; next = Rparagraph [ line ] } s | Rtable (header, rows), Ltable_line row -> - (* Make sure the number of items in the row is consistent with - the headers and the rest of the rows. See - https://github.github.com/gfm/#example-204 *) - let row = match_row_length header row in - { blocks; next = Rtable (header, row::rows) } + (* Make sure the number of items in the row is consistent with + the headers and the rest of the rows. See + https://github.github.com/gfm/#example-204 *) + let row = match_row_length header row in + { blocks; next = Rtable (header, row::rows) } + | Rtable (header, rows), (Lparagraph | Lsetext_heading _) -> + (* Treat a contiguous line after a table as a row, even if it + doesn't contain any '|' + characters. https://github.github.com/gfm/#example-202 *) + let row = match_row_length header [s] in + { blocks; next = Rtable (header, row::rows) } | Rtable _, _ -> - process { blocks = close { blocks; next }; next = Rempty } s + process { blocks = close { blocks; next }; next = Rempty } s | Rindented_code lines, Lindented_code s -> { blocks; next = Rindented_code (StrSlice.to_string s :: lines) } | Rindented_code lines, Lempty -> diff --git a/src/parser.ml b/src/parser.ml index a7d7d436..d996f2af 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -759,22 +759,23 @@ let indented_code ind s = (* A sequence of cell contents separated by unescaped '|' characters. *) let table_row pipe_prefix s = - let rec loop items s = + let rec loop items seen_pipe s = match StrSlice.index_unescaped '|' s with | None -> if StrSlice.for_all is_whitespace s then - items + items, seen_pipe else - s::items + s::items, false | Some i -> let item = StrSlice.take_n i s in - loop (item::items) (StrSlice.drop (i+1) s) + loop (item::items) true (StrSlice.drop (i+1) s) in - let items = loop [] s in - if not pipe_prefix && List.length items <= 1 then - raise Fail - else + let items, terminating_pipe = loop [] pipe_prefix s in + match pipe_prefix, items, terminating_pipe with + | true, _, _ | _, _::_, true | _, _::_::_, _ -> Ltable_line (List.rev_map StrSlice.trim items) + | _ -> + raise Fail let parse s0 = let ind, s = sp3 s0 in @@ -783,7 +784,7 @@ let parse s0 = let s = StrSlice.offset 1 s in let s = if indent s > 0 then StrSlice.offset 1 s else s in Lblockquote s - | Some '=' -> setext_heading s + | Some '=' -> (setext_heading ||| table_row false) s | Some '-' -> (setext_heading ||| thematic_break ||| unordered_list_item ind ||| table_row false) s | Some '_' -> thematic_break s diff --git a/src/strSlice.ml b/src/strSlice.ml index e7e7f856..110e3202 100644 --- a/src/strSlice.ml +++ b/src/strSlice.ml @@ -123,15 +123,25 @@ let split_at f s = (* assert ("aaa" = to_string before); *) (* assert ("" = to_string rest) *) -let index_unescaped c s = - let rec loop idx = - if idx = s.off+s.len then None - else if s.base.[idx] = c && (idx = s.off || s.base.[idx] <> '\\') then - Some (idx-s.off) - else - loop (idx+1) +let index_unescaped sep s = + let rec loop idx state = + if idx = s.off+s.len then None (* If we get here and we're inside a verbatim span, what to do? *) + else match state, s.base.[idx] with + | `normal, '\\' -> loop (idx+1) `escape + | `normal, '`' -> loop (idx+1) (`verbatim_open 1) + | `normal, c when c = sep -> Some (idx-s.off) + | `normal, _ -> loop (idx+1) `normal + | `escape, _ -> loop (idx+1) `normal + | `verbatim_open n, '`' -> loop (idx+1) (`verbatim_open (n+1)) + | `verbatim_open n, _ -> loop (idx+1) (`within_verbatim n) + | `within_verbatim 1, '`' -> loop (idx+1) `normal + | `within_verbatim n, '`' -> loop (idx+1) (`verbatim_close (n,n-1)) + | `within_verbatim n, _ -> loop (idx+1) (`within_verbatim n) + | `verbatim_close (_, 1), '`' -> loop (idx+1) `normal + | `verbatim_close (n, k), '`' -> loop (idx+1) (`verbatim_close (n,k-1)) + | `verbatim_close (n, _), _ -> loop (idx+1) (`within_verbatim n) in - loop s.off + loop s.off `normal let exists f s = let rec loop s i = diff --git a/src/strSlice.mli b/src/strSlice.mli index 66820ec4..a28ca3bb 100644 --- a/src/strSlice.mli +++ b/src/strSlice.mli @@ -16,8 +16,8 @@ val index : (char -> bool) -> t -> int option val index_unescaped : char -> t -> int option (** [index_unescaped c s] is [Some i] where [i] index of the first occurrence of the character [c] in [s] that is not preceeded by a - backslash ['\\'], or [None] if [c] does not occur in [s] or all - occurrences are preceeded by backslashes. *) + backslash ['\\'] and not within a verbatim inline, or [None] if + there is no such [c] in [s]. *) val print : Format.formatter -> t -> unit val head : t -> char option From 433824dbe2d9856ad82b6ffe5c46755e61b61f70 Mon Sep 17 00:00:00 2001 From: Robert Atkey Date: Fri, 4 Nov 2022 17:17:41 +0000 Subject: [PATCH 03/11] Specialised HTML output for tables - Add a new kind of `element_type` to the HTML trees for the table elements `table`, `thead`, `tbody`, and `tr`. - This makes the HTML output match newline placement used the GFM specification. --- src/html.ml | 39 +++++++++++++++++++++------------------ src/html.mli | 1 + 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/html.ml b/src/html.ml index f2bca1df..f5f4653d 100644 --- a/src/html.ml +++ b/src/html.ml @@ -3,6 +3,7 @@ open Ast.Impl type element_type = | Inline | Block + | Table type t = | Element of element_type * string * attributes * t option @@ -53,14 +54,15 @@ let rec add_to_buffer buf = function | Element (eltype, name, attrs, Some c) -> Printf.bprintf buf - "<%s%a>%a" + "<%s%a>%s%a%s" name add_attrs_to_buffer attrs + (match eltype with Table -> "\n" | _ -> "") add_to_buffer c - name; - if eltype = Block then Buffer.add_char buf '\n' + name + (match eltype with Table | Block -> "\n" | _ -> "") | Text s -> Buffer.add_string buf (htmlentities s) | Raw s -> Buffer.add_string buf s | Null -> () @@ -131,32 +133,33 @@ and inline = function | Image (attr, { label; destination; title }) -> img label destination title attr +let alignment_attributes = function + | Default -> [] + | Left -> ["align", "left"] + | Right -> ["align", "right"] + | Centre -> ["align", "center"] + let table_header headers = - elt Block "thead" [] + elt Table "thead" [] (Some - (elt Block "tr" [] + (elt Table "tr" [] (Some (concat_map - (fun (header, _alignment) -> - elt Inline "th" [] (Some (inline header))) + (fun (header, alignment) -> + let attrs = alignment_attributes alignment in + elt Block "th" attrs (Some (inline header))) headers)))) let table_body headers rows = - elt Block "tbody" [] + elt Table "tbody" [] (Some (concat_map (fun row -> - elt Block "tr" [] + elt Table "tr" [] (Some (concat_map2 (fun (_, alignment) cell -> - let attrs = match alignment with - | Default -> [] - (* FIXME: or align="blah" ?? *) - | Left -> ["style", "text-align: left;"] - | Right -> ["style", "text-align: right;"] - | Centre -> ["style", "text-align: centre;"] - in + let attrs = alignment_attributes alignment in elt Block "td" attrs (Some (inline cell))) headers row))) @@ -212,11 +215,11 @@ let rec block = function in elt Block "dl" attr (Some (concat_map f l)) | Table (attr, headers, []) -> - elt Block "table" attr + elt Table "table" attr (Some (table_header headers)) | Table (attr, headers, rows) -> - elt Block "table" attr + elt Table "table" attr (Some (concat (table_header headers) diff --git a/src/html.mli b/src/html.mli index 1926e896..da27c659 100644 --- a/src/html.mli +++ b/src/html.mli @@ -3,6 +3,7 @@ open Ast.Impl type element_type = | Inline | Block + | Table type t = | Element of element_type * string * attributes * t option From d3b171a5a4450a7faec3de7b358d050d1f271c7a Mon Sep 17 00:00:00 2001 From: Robert Atkey Date: Fri, 4 Nov 2022 17:19:16 +0000 Subject: [PATCH 04/11] Tests for the GFM-style tables support New tests for the GFM-style table support, in the same style as the existing tests, comparing HTML output. 1. Excerpt the GFM specification's table examples, with one exception in the treatment of escape characters inside code spans (noted in the test file). 2. Twelve additional tests for various corner cases. --- tests/dune | 1 + tests/dune.inc | 182 ++++++++++++++++++++++++- tests/extra_table_tests.md | 267 +++++++++++++++++++++++++++++++++++++ tests/gfm_table_spec.md | 217 ++++++++++++++++++++++++++++++ 4 files changed, 666 insertions(+), 1 deletion(-) create mode 100644 tests/extra_table_tests.md create mode 100644 tests/gfm_table_spec.md diff --git a/tests/dune b/tests/dune index e08ee265..7b4a81a1 100644 --- a/tests/dune +++ b/tests/dune @@ -15,6 +15,7 @@ (with-stdout-to dune.inc.new (run ./extract_tests.exe -write-dune-file %{dep:spec.txt} + %{dep:gfm_table_spec.md} %{dep:extra_table_tests.md} %{dep:attributes.md} %{dep:def_list.md}))) (include dune.inc) diff --git a/tests/dune.inc b/tests/dune.inc index c57b94ae..5996df97 100644 --- a/tests/dune.inc +++ b/tests/dune.inc @@ -1,5 +1,5 @@ (rule - (deps spec.txt attributes.md def_list.md) + (deps spec.txt gfm_table_spec.md extra_table_tests.md attributes.md def_list.md) (targets spec-001.md spec-001.html spec-002.md spec-002.html @@ -653,6 +653,26 @@ spec-650.md spec-650.html spec-651.md spec-651.html spec-652.md spec-652.html + gfm_table_spec-001.md gfm_table_spec-001.html + gfm_table_spec-002.md gfm_table_spec-002.html + gfm_table_spec-003.md gfm_table_spec-003.html + gfm_table_spec-004.md gfm_table_spec-004.html + gfm_table_spec-005.md gfm_table_spec-005.html + gfm_table_spec-006.md gfm_table_spec-006.html + gfm_table_spec-007.md gfm_table_spec-007.html + gfm_table_spec-008.md gfm_table_spec-008.html + extra_table_tests-001.md extra_table_tests-001.html + extra_table_tests-002.md extra_table_tests-002.html + extra_table_tests-003.md extra_table_tests-003.html + extra_table_tests-004.md extra_table_tests-004.html + extra_table_tests-005.md extra_table_tests-005.html + extra_table_tests-006.md extra_table_tests-006.html + extra_table_tests-007.md extra_table_tests-007.html + extra_table_tests-008.md extra_table_tests-008.html + extra_table_tests-009.md extra_table_tests-009.html + extra_table_tests-010.md extra_table_tests-010.html + extra_table_tests-011.md extra_table_tests-011.html + extra_table_tests-012.md extra_table_tests-012.html attributes-001.md attributes-001.html attributes-002.md attributes-002.html attributes-003.md attributes-003.html @@ -4582,6 +4602,146 @@ (rule (alias spec-652) (action (diff spec-652.html spec-652.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-001.html.new + (run ./omd.exe %{dep:gfm_table_spec-001.md})))) +(rule + (alias gfm_table_spec-001) + (action (diff gfm_table_spec-001.html gfm_table_spec-001.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-002.html.new + (run ./omd.exe %{dep:gfm_table_spec-002.md})))) +(rule + (alias gfm_table_spec-002) + (action (diff gfm_table_spec-002.html gfm_table_spec-002.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-003.html.new + (run ./omd.exe %{dep:gfm_table_spec-003.md})))) +(rule + (alias gfm_table_spec-003) + (action (diff gfm_table_spec-003.html gfm_table_spec-003.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-004.html.new + (run ./omd.exe %{dep:gfm_table_spec-004.md})))) +(rule + (alias gfm_table_spec-004) + (action (diff gfm_table_spec-004.html gfm_table_spec-004.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-005.html.new + (run ./omd.exe %{dep:gfm_table_spec-005.md})))) +(rule + (alias gfm_table_spec-005) + (action (diff gfm_table_spec-005.html gfm_table_spec-005.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-006.html.new + (run ./omd.exe %{dep:gfm_table_spec-006.md})))) +(rule + (alias gfm_table_spec-006) + (action (diff gfm_table_spec-006.html gfm_table_spec-006.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-007.html.new + (run ./omd.exe %{dep:gfm_table_spec-007.md})))) +(rule + (alias gfm_table_spec-007) + (action (diff gfm_table_spec-007.html gfm_table_spec-007.html.new))) +(rule + (action + (with-stdout-to gfm_table_spec-008.html.new + (run ./omd.exe %{dep:gfm_table_spec-008.md})))) +(rule + (alias gfm_table_spec-008) + (action (diff gfm_table_spec-008.html gfm_table_spec-008.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-001.html.new + (run ./omd.exe %{dep:extra_table_tests-001.md})))) +(rule + (alias extra_table_tests-001) + (action (diff extra_table_tests-001.html extra_table_tests-001.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-002.html.new + (run ./omd.exe %{dep:extra_table_tests-002.md})))) +(rule + (alias extra_table_tests-002) + (action (diff extra_table_tests-002.html extra_table_tests-002.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-003.html.new + (run ./omd.exe %{dep:extra_table_tests-003.md})))) +(rule + (alias extra_table_tests-003) + (action (diff extra_table_tests-003.html extra_table_tests-003.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-004.html.new + (run ./omd.exe %{dep:extra_table_tests-004.md})))) +(rule + (alias extra_table_tests-004) + (action (diff extra_table_tests-004.html extra_table_tests-004.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-005.html.new + (run ./omd.exe %{dep:extra_table_tests-005.md})))) +(rule + (alias extra_table_tests-005) + (action (diff extra_table_tests-005.html extra_table_tests-005.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-006.html.new + (run ./omd.exe %{dep:extra_table_tests-006.md})))) +(rule + (alias extra_table_tests-006) + (action (diff extra_table_tests-006.html extra_table_tests-006.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-007.html.new + (run ./omd.exe %{dep:extra_table_tests-007.md})))) +(rule + (alias extra_table_tests-007) + (action (diff extra_table_tests-007.html extra_table_tests-007.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-008.html.new + (run ./omd.exe %{dep:extra_table_tests-008.md})))) +(rule + (alias extra_table_tests-008) + (action (diff extra_table_tests-008.html extra_table_tests-008.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-009.html.new + (run ./omd.exe %{dep:extra_table_tests-009.md})))) +(rule + (alias extra_table_tests-009) + (action (diff extra_table_tests-009.html extra_table_tests-009.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-010.html.new + (run ./omd.exe %{dep:extra_table_tests-010.md})))) +(rule + (alias extra_table_tests-010) + (action (diff extra_table_tests-010.html extra_table_tests-010.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-011.html.new + (run ./omd.exe %{dep:extra_table_tests-011.md})))) +(rule + (alias extra_table_tests-011) + (action (diff extra_table_tests-011.html extra_table_tests-011.html.new))) +(rule + (action + (with-stdout-to extra_table_tests-012.html.new + (run ./omd.exe %{dep:extra_table_tests-012.md})))) +(rule + (alias extra_table_tests-012) + (action (diff extra_table_tests-012.html extra_table_tests-012.html.new))) (rule (action (with-stdout-to attributes-001.html.new @@ -5349,6 +5509,26 @@ (alias spec-650) (alias spec-651) (alias spec-652) + (alias gfm_table_spec-001) + (alias gfm_table_spec-002) + (alias gfm_table_spec-003) + (alias gfm_table_spec-004) + (alias gfm_table_spec-005) + (alias gfm_table_spec-006) + (alias gfm_table_spec-007) + (alias gfm_table_spec-008) + (alias extra_table_tests-001) + (alias extra_table_tests-002) + (alias extra_table_tests-003) + (alias extra_table_tests-004) + (alias extra_table_tests-005) + (alias extra_table_tests-006) + (alias extra_table_tests-007) + (alias extra_table_tests-008) + (alias extra_table_tests-009) + (alias extra_table_tests-010) + (alias extra_table_tests-011) + (alias extra_table_tests-012) (alias attributes-001) (alias attributes-002) (alias attributes-003) diff --git a/tests/extra_table_tests.md b/tests/extra_table_tests.md new file mode 100644 index 00000000..42f432d7 --- /dev/null +++ b/tests/extra_table_tests.md @@ -0,0 +1,267 @@ +## Additional Table Tests + +Complete table + +```````````````````````````````` example +| abc | def | **ghi** | +|:----|:-----:|----------:| +| 1 | 2 | [link][0] | +| 3 | 4 | `code` | +| 5 | `6` | \| `|` | + +[0]: https://example.com +. + + + + + + + + + + + + + + + + + + + + + + + + + +
abcdefghi
12link
34code
56| |
+```````````````````````````````` + + +Not a table (no delimiter) + +```````````````````````````````` example +| abc | +| def | +. +

| abc | +| def |

+```````````````````````````````` + +Too few columns in a row gets expanded + +```````````````````````````````` example +| a | b | +|---|---| +| 1 | +. + + + + + + + + + + + + + +
ab
1
+```````````````````````````````` + +Table with no columns not allowed + +````````````````````````````````example +| +| +. +

| +|

+```````````````````````````````` + +Minimal table 1 + +```````````````````````````````` example +h| +-| +. + + + + + + +
h
+```````````````````````````````` + +Minimal table 2 + +```````````````````````````````` example +|h +|- +. + + + + + + +
h
+```````````````````````````````` + +Minimal table 3 + +```````````````````````````````` example +|| +|| +. + + + + + + +
+```````````````````````````````` + +Escaped `|` characters + +```````````````````````````````` example +\||\| +-|- +| +. + + + + + + + + + + + + + +
||
+```````````````````````````````` + +`|` characters inside code spans without escaping + +```````````````````````````````` example +abc | `|` | def +----|-----|------------- +ghi | | `` `| ``jkl +. + + + + + + + + + + + + + + + +
abc|def
ghi`|jkl
+```````````````````````````````` + +Cells starting with numbers + +```````````````````````````````` example +0 | 1 +--|-- +3 | 4 +. + + + + + + + + + + + + + +
01
34
+```````````````````````````````` + +Setext headings or cells? A setext heading marker isn't a start of a +new block, so it gets treated as if it were a single element row. + +```````````````````````````````` example += | b +--|-- += +. + + + + + + + + + + + + + +
=b
=
+```````````````````````````````` + +Tables in a list + +```````````````````````````````` example +1. abc | def + ----|---- + 1 | 2 + +2. | abc | def | + |-----|-----| + | 1. | 2. | +. +
    +
  1. + + + + + + + + + + + + +
    abcdef
    12
  2. +
  3. + + + + + + + + + + + + +
    abcdef
    1.2.
  4. +
+```````````````````````````````` diff --git a/tests/gfm_table_spec.md b/tests/gfm_table_spec.md new file mode 100644 index 00000000..bf9b870c --- /dev/null +++ b/tests/gfm_table_spec.md @@ -0,0 +1,217 @@ +*Note on the Omd implementation:* Table specification excerpted from +[this commit][0] in the GitHub-Flavored Markdown repository. There is +one alteration (noted below), where the GFM spec appears to contradict +the CommonMark spec (and itself) on the treatment of escape characters +in code spans. + +[0]: https://github.com/github/cmark-gfm/blob/6a6e335709ef68cf2c616eeaf61b09ed4c654669/test/spec.txt + +## Tables (extension) + +GFM enables the `table` extension, where an additional leaf block type is +available. + +A [table](@) is an arrangement of data with rows and columns, consisting of a +single header row, a [delimiter row] separating the header from the data, and +zero or more data rows. + +Each row consists of cells containing arbitrary text, in which [inlines] are +parsed, separated by pipes (`|`). A leading and trailing pipe is also +recommended for clarity of reading, and if there's otherwise parsing ambiguity. +Spaces between pipes and cell content are trimmed. Block-level elements cannot +be inserted in a table. + +The [delimiter row](@) consists of cells whose only content are hyphens (`-`), +and optionally, a leading or trailing colon (`:`), or both, to indicate left, +right, or center alignment respectively. + +```````````````````````````````` example table +| foo | bar | +| --- | --- | +| baz | bim | +. + + + + + + + + + + + + + +
foobar
bazbim
+```````````````````````````````` + +Cells in one column don't need to match length, though it's easier to read if +they are. Likewise, use of leading and trailing pipes may be inconsistent: + +```````````````````````````````` example table +| abc | defghi | +:-: | -----------: +bar | baz +. + + + + + + + + + + + + + +
abcdefghi
barbaz
+```````````````````````````````` + +Include a pipe in a cell's content by escaping it, including inside other +inline spans: + +**Omd alteration**: the escape `\` in the code span is preserved here, +in accordance with the rules for code spans in the CommonMark and GFM +specs. + +```````````````````````````````` example table +| f\|oo | +| ------ | +| b `\|` az | +| b **\|** im | +. + + + + + + + + + + + + + + +
f|oo
b \| az
b | im
+```````````````````````````````` + +The table is broken at the first empty line, or beginning of another +block-level structure: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | baz | +> bar +. + + + + + + + + + + + + + +
abcdef
barbaz
+
+

bar

+
+```````````````````````````````` + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | baz | +bar + +bar +. + + + + + + + + + + + + + + + + + +
abcdef
barbaz
bar
+

bar

+```````````````````````````````` + +The header row must match the [delimiter row] in the number of cells. If not, +a table will not be recognized: + +```````````````````````````````` example table +| abc | def | +| --- | +| bar | +. +

| abc | def | +| --- | +| bar |

+```````````````````````````````` + +The remainder of the table's rows may vary in the number of cells. If there +are a number of cells fewer than the number of cells in the header row, empty +cells are inserted. If there are greater, the excess is ignored: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +| bar | +| bar | baz | boo | +. + + + + + + + + + + + + + + + + + +
abcdef
bar
barbaz
+```````````````````````````````` + +If there are no rows in the body, no `` is generated in HTML output: + +```````````````````````````````` example table +| abc | def | +| --- | --- | +. + + + + + + + +
abcdef
+```````````````````````````````` From 8ccb1e8a20ac29272a6fefda82b3d7d705ecf54e Mon Sep 17 00:00:00 2001 From: Robert Atkey Date: Fri, 4 Nov 2022 20:21:06 +0000 Subject: [PATCH 05/11] Apply ocamlformat --- src/ast_block.ml | 8 +++--- src/block_parser.ml | 68 +++++++++++++++++++++------------------------ src/html.ml | 40 ++++++++++++++++---------- src/parser.ml | 25 +++++++++-------- src/sexp.ml | 23 ++++++++------- src/strSlice.ml | 33 ++++++++++++---------- tests/dune | 4 +-- 7 files changed, 107 insertions(+), 94 deletions(-) diff --git a/src/ast_block.ml b/src/ast_block.ml index 413d40fc..f21f8b81 100644 --- a/src/ast_block.ml +++ b/src/ast_block.ml @@ -29,7 +29,6 @@ module Table_alignments = struct end open List_types - open Table_alignments module Make (C : BlockContent) = struct @@ -74,9 +73,10 @@ module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct | Code_block (attr, label, code) -> Code_block (attr, label, code) | Html_block (attr, x) -> Html_block (attr, x) | Table (attr, headers, rows) -> - Table (attr, - List.map (fun (header, alignment) -> (f header, alignment)) headers, - List.map (List.map f) rows) + Table + ( attr + , List.map (fun (header, alignment) -> (f header, alignment)) headers + , List.map (List.map f) rows ) end module Mapper = MakeMapper (StringContent) (InlineContent) diff --git a/src/block_parser.ml b/src/block_parser.ml index 5737eb77..6aa42940 100644 --- a/src/block_parser.ml +++ b/src/block_parser.ml @@ -77,11 +77,10 @@ module Pre = struct Code_block ([], "", concat (loop l)) :: blocks | Rhtml (_, l) -> Html_block ([], concat l) :: blocks | Rtable_header (_header, line) -> - (* FIXME: this will only ever get called on the very last - line. Should it do the link definitions? *) - close link_defs { blocks; next = Rparagraph [ line ] } - | Rtable (header, rows) -> - Table ([], header, List.rev rows) :: blocks + (* FIXME: this will only ever get called on the very last + line. Should it do the link definitions? *) + close link_defs { blocks; next = Rparagraph [ line ] } + | Rtable (header, rows) -> Table ([], header, List.rev rows) :: blocks | Rempty -> blocks and finish link_defs state = List.rev (close link_defs state) @@ -92,44 +91,41 @@ module Pre = struct let classify_delimiter s = let left, s = match StrSlice.head s with - | Some ':' -> true, StrSlice.drop 1 s - | _ -> false, s + | Some ':' -> (true, StrSlice.drop 1 s) + | _ -> (false, s) in let right, s = match StrSlice.last s with - | Some ':' -> true, StrSlice.drop_last s - | _ -> false, s + | Some ':' -> (true, StrSlice.drop_last s) + | _ -> (false, s) in - if StrSlice.exists (fun c -> c <> '-') s then - None + if StrSlice.exists (fun c -> c <> '-') s then None else - match left, right with - | true, true -> Some Centre - | true, false -> Some Left - | false, true -> Some Right + match (left, right) with + | true, true -> Some Centre + | true, false -> Some Left + | false, true -> Some Right | false, false -> Some Default let match_table_headers headers delimiters = let rec loop processed = function - | [], [] -> - Some (List.rev processed) - | header::headers, line::delimiters -> - (match classify_delimiter line with + | [], [] -> Some (List.rev processed) + | header :: headers, line :: delimiters -> ( + match classify_delimiter line with | None -> None | Some alignment -> - loop ((StrSlice.to_string header, alignment)::processed) (headers, delimiters)) - | [], _::_ | _::_, [] -> - None + loop + ((StrSlice.to_string header, alignment) :: processed) + (headers, delimiters)) + | [], _ :: _ | _ :: _, [] -> None in loop [] (headers, delimiters) - let rec match_row_length l1 l2 = match l1, l2 with - | [], _ -> - [] - | l1, [] -> - List.init (List.length l1) (fun _ -> "") - | _::l1, x::l2 -> - StrSlice.to_string x :: match_row_length l1 l2 + let rec match_row_length l1 l2 = + match (l1, l2) with + | [], _ -> [] + | l1, [] -> List.init (List.length l1) (fun _ -> "") + | _ :: l1, x :: l2 -> StrSlice.to_string x :: match_row_length l1 l2 let rec process link_defs { blocks; next } s = let process = process link_defs in @@ -204,14 +200,14 @@ module Pre = struct } | Rdef_list _, _ -> process { blocks = close { blocks; next }; next = Rempty } s - | Rtable_header (headers, line), Ltable_line items -> - (match match_table_headers headers items with - | Some headers -> + | Rtable_header (headers, line), Ltable_line items -> ( + match match_table_headers headers items with + | Some headers -> (* Makes sure that there are the same number of delimiters as headers. See https://github.github.com/gfm/#example-203 *) { blocks; next = Rtable (headers, []) } - | None -> + | None -> (* Reinterpret the previous line as the start of a paragraph. *) process { blocks; next = Rparagraph [ line ] } s) @@ -225,13 +221,13 @@ module Pre = struct the headers and the rest of the rows. See https://github.github.com/gfm/#example-204 *) let row = match_row_length header row in - { blocks; next = Rtable (header, row::rows) } + { blocks; next = Rtable (header, row :: rows) } | Rtable (header, rows), (Lparagraph | Lsetext_heading _) -> (* Treat a contiguous line after a table as a row, even if it doesn't contain any '|' characters. https://github.github.com/gfm/#example-202 *) - let row = match_row_length header [s] in - { blocks; next = Rtable (header, row::rows) } + let row = match_row_length header [ s ] in + { blocks; next = Rtable (header, row :: rows) } | Rtable _, _ -> process { blocks = close { blocks; next }; next = Rempty } s | Rindented_code lines, Lindented_code s -> diff --git a/src/html.ml b/src/html.ml index f5f4653d..1dbe23ad 100644 --- a/src/html.ml +++ b/src/html.ml @@ -135,14 +135,20 @@ and inline = function let alignment_attributes = function | Default -> [] - | Left -> ["align", "left"] - | Right -> ["align", "right"] - | Centre -> ["align", "center"] + | Left -> [ ("align", "left") ] + | Right -> [ ("align", "right") ] + | Centre -> [ ("align", "center") ] let table_header headers = - elt Table "thead" [] + elt + Table + "thead" + [] (Some - (elt Table "tr" [] + (elt + Table + "tr" + [] (Some (concat_map (fun (header, alignment) -> @@ -151,11 +157,17 @@ let table_header headers = headers)))) let table_body headers rows = - elt Table "tbody" [] + elt + Table + "tbody" + [] (Some (concat_map (fun row -> - elt Table "tr" [] + elt + Table + "tr" + [] (Some (concat_map2 (fun (_, alignment) cell -> @@ -215,15 +227,13 @@ let rec block = function in elt Block "dl" attr (Some (concat_map f l)) | Table (attr, headers, []) -> - elt Table "table" attr - (Some - (table_header headers)) + elt Table "table" attr (Some (table_header headers)) | Table (attr, headers, rows) -> - elt Table "table" attr - (Some - (concat - (table_header headers) - (table_body headers rows))) + elt + Table + "table" + attr + (Some (concat (table_header headers) (table_body headers rows))) let of_doc doc = concat_map block doc diff --git a/src/parser.ml b/src/parser.ml index d996f2af..ed55cc53 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -762,20 +762,17 @@ let table_row pipe_prefix s = let rec loop items seen_pipe s = match StrSlice.index_unescaped '|' s with | None -> - if StrSlice.for_all is_whitespace s then - items, seen_pipe - else - s::items, false + if StrSlice.for_all is_whitespace s then (items, seen_pipe) + else (s :: items, false) | Some i -> - let item = StrSlice.take_n i s in - loop (item::items) true (StrSlice.drop (i+1) s) + let item = StrSlice.take_n i s in + loop (item :: items) true (StrSlice.drop (i + 1) s) in let items, terminating_pipe = loop [] pipe_prefix s in - match pipe_prefix, items, terminating_pipe with - | true, _, _ | _, _::_, true | _, _::_::_, _ -> - Ltable_line (List.rev_map StrSlice.trim items) - | _ -> - raise Fail + match (pipe_prefix, items, terminating_pipe) with + | true, _, _ | _, _ :: _, true | _, _ :: _ :: _, _ -> + Ltable_line (List.rev_map StrSlice.trim items) + | _ -> raise Fail let parse s0 = let ind, s = sp3 s0 in @@ -786,7 +783,11 @@ let parse s0 = Lblockquote s | Some '=' -> (setext_heading ||| table_row false) s | Some '-' -> - (setext_heading ||| thematic_break ||| unordered_list_item ind ||| table_row false) s + (setext_heading + ||| thematic_break + ||| unordered_list_item ind + ||| table_row false) + s | Some '_' -> thematic_break s | Some '#' -> atx_heading s | Some ('~' | '`') -> fenced_code ind s diff --git a/src/sexp.ml b/src/sexp.ml index 95cb6427..f84a3300 100644 --- a/src/sexp.ml +++ b/src/sexp.ml @@ -23,12 +23,14 @@ and inline = function | Image _ -> Atom "img" let table_header (header, alignment) = - List [ inline header - ; (match alignment with - | Default -> Atom "default" - | Left -> Atom "left" - | Centre -> Atom "centre" - | Right -> Atom "right") ] + List + [ inline header + ; (match alignment with + | Default -> Atom "default" + | Left -> Atom "left" + | Centre -> Atom "centre" + | Right -> Atom "right") + ] let rec block = function | Paragraph (_, x) -> List [ Atom "paragraph"; inline x ] @@ -53,10 +55,11 @@ let rec block = function l) ] | Table (_, headers, rows) -> - List [ Atom "table" - ; List (List.map table_header headers) - ; List (List.map (fun row -> List (List.map inline row)) rows) - ] + List + [ Atom "table" + ; List (List.map table_header headers) + ; List (List.map (fun row -> List (List.map inline row)) rows) + ] let create ast = List (List.map block ast) diff --git a/src/strSlice.ml b/src/strSlice.ml index 110e3202..232dad81 100644 --- a/src/strSlice.ml +++ b/src/strSlice.ml @@ -125,21 +125,24 @@ let split_at f s = let index_unescaped sep s = let rec loop idx state = - if idx = s.off+s.len then None (* If we get here and we're inside a verbatim span, what to do? *) - else match state, s.base.[idx] with - | `normal, '\\' -> loop (idx+1) `escape - | `normal, '`' -> loop (idx+1) (`verbatim_open 1) - | `normal, c when c = sep -> Some (idx-s.off) - | `normal, _ -> loop (idx+1) `normal - | `escape, _ -> loop (idx+1) `normal - | `verbatim_open n, '`' -> loop (idx+1) (`verbatim_open (n+1)) - | `verbatim_open n, _ -> loop (idx+1) (`within_verbatim n) - | `within_verbatim 1, '`' -> loop (idx+1) `normal - | `within_verbatim n, '`' -> loop (idx+1) (`verbatim_close (n,n-1)) - | `within_verbatim n, _ -> loop (idx+1) (`within_verbatim n) - | `verbatim_close (_, 1), '`' -> loop (idx+1) `normal - | `verbatim_close (n, k), '`' -> loop (idx+1) (`verbatim_close (n,k-1)) - | `verbatim_close (n, _), _ -> loop (idx+1) (`within_verbatim n) + if idx = s.off + s.len then None + (* If we get here and we're inside a verbatim span, what to do? *) + else + match (state, s.base.[idx]) with + | `normal, '\\' -> loop (idx + 1) `escape + | `normal, '`' -> loop (idx + 1) (`verbatim_open 1) + | `normal, c when c = sep -> Some (idx - s.off) + | `normal, _ -> loop (idx + 1) `normal + | `escape, _ -> loop (idx + 1) `normal + | `verbatim_open n, '`' -> loop (idx + 1) (`verbatim_open (n + 1)) + | `verbatim_open n, _ -> loop (idx + 1) (`within_verbatim n) + | `within_verbatim 1, '`' -> loop (idx + 1) `normal + | `within_verbatim n, '`' -> loop (idx + 1) (`verbatim_close (n, n - 1)) + | `within_verbatim n, _ -> loop (idx + 1) (`within_verbatim n) + | `verbatim_close (_, 1), '`' -> loop (idx + 1) `normal + | `verbatim_close (n, k), '`' -> + loop (idx + 1) (`verbatim_close (n, k - 1)) + | `verbatim_close (n, _), _ -> loop (idx + 1) (`within_verbatim n) in loop s.off `normal diff --git a/tests/dune b/tests/dune index 7b4a81a1..8f8e7ccf 100644 --- a/tests/dune +++ b/tests/dune @@ -15,8 +15,8 @@ (with-stdout-to dune.inc.new (run ./extract_tests.exe -write-dune-file %{dep:spec.txt} - %{dep:gfm_table_spec.md} %{dep:extra_table_tests.md} - %{dep:attributes.md} %{dep:def_list.md}))) + %{dep:gfm_table_spec.md} %{dep:extra_table_tests.md} %{dep:attributes.md} + %{dep:def_list.md}))) (include dune.inc) From c6a298a3aedb9f158950df35a6c9d1d1da93ef9f Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:02:44 -0500 Subject: [PATCH 06/11] Add comments for table parts --- src/ast_block.ml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ast_block.ml b/src/ast_block.ml index f21f8b81..3ca545d9 100644 --- a/src/ast_block.ml +++ b/src/ast_block.ml @@ -51,6 +51,8 @@ module Make (C : BlockContent) = struct | Html_block of 'attr * string | Definition_list of 'attr * 'attr def_elt list | Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list + (** A table is represented by a header row, which is a list of pairs of + header cells and alignments, and a list of rows *) end module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct From 10a4bd0d71b19e56c071674a7cba9be1ced406be Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:04:41 -0500 Subject: [PATCH 07/11] Use labeled arg for pipe_prefix argument --- src/parser.ml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/parser.ml b/src/parser.ml index ed55cc53..f290504b 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -758,7 +758,7 @@ let indented_code ind s = (* A sequence of cell contents separated by unescaped '|' characters. *) -let table_row pipe_prefix s = +let table_row ~pipe_prefix s = let rec loop items seen_pipe s = match StrSlice.index_unescaped '|' s with | None -> @@ -781,12 +781,12 @@ let parse s0 = let s = StrSlice.offset 1 s in let s = if indent s > 0 then StrSlice.offset 1 s else s in Lblockquote s - | Some '=' -> (setext_heading ||| table_row false) s + | Some '=' -> (setext_heading ||| table_row ~pipe_prefix:false) s | Some '-' -> (setext_heading ||| thematic_break ||| unordered_list_item ind - ||| table_row false) + ||| table_row ~pipe_prefix:false) s | Some '_' -> thematic_break s | Some '#' -> atx_heading s @@ -794,10 +794,10 @@ let parse s0 = | Some '<' -> raw_html s | Some '*' -> (thematic_break ||| unordered_list_item ind) s | Some '+' -> unordered_list_item ind s - | Some '0' .. '9' -> (ordered_list_item ind ||| table_row false) s - | Some ':' -> (def_list ||| table_row false) s - | Some '|' -> table_row true (StrSlice.tail s) - | Some _ -> (blank ||| indented_code ind ||| table_row false) s + | Some '0' .. '9' -> (ordered_list_item ind ||| table_row ~pipe_prefix:false) s + | Some ':' -> (def_list ||| table_row ~pipe_prefix:false) s + | Some '|' -> table_row ~pipe_prefix:true (StrSlice.tail s) + | Some _ -> (blank ||| indented_code ind ||| table_row ~pipe_prefix:false) s | None -> Lempty let parse s = try parse s with Fail -> Lparagraph From 77e791040b34c2d019953d839549a550b5ed2103 Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:07:24 -0500 Subject: [PATCH 08/11] Rename take_n to take_prefix --- src/parser.ml | 2 +- src/strSlice.ml | 4 ++-- src/strSlice.mli | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.ml b/src/parser.ml index f290504b..2837611c 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -765,7 +765,7 @@ let table_row ~pipe_prefix s = if StrSlice.for_all is_whitespace s then (items, seen_pipe) else (s :: items, false) | Some i -> - let item = StrSlice.take_n i s in + let item = StrSlice.take_prefix i s in loop (item :: items) true (StrSlice.drop (i + 1) s) in let items, terminating_pipe = loop [] pipe_prefix s in diff --git a/src/strSlice.ml b/src/strSlice.ml index 232dad81..01a9e79a 100644 --- a/src/strSlice.ml +++ b/src/strSlice.ml @@ -63,8 +63,8 @@ let take n s = in loop n s -let take_n n s = - if n < 0 then invalid_arg "take_n"; +let take_prefix n s = + if n < 0 then invalid_arg "take_prefix"; let len = min n s.len in { s with len } diff --git a/src/strSlice.mli b/src/strSlice.mli index a28ca3bb..ca865f42 100644 --- a/src/strSlice.mli +++ b/src/strSlice.mli @@ -36,8 +36,8 @@ val drop_last : t -> t val take : int -> t -> char list (** [take n s] is a list of the first [n] characters of [s] *) -val take_n : int -> t -> t -(** [take_n n s] returns the slice consisting of the first [n] +val take_prefix : int -> t -> t +(** [take_prefix n s] is the slice consisting of the first [n] characters of [s]. *) val drop : int -> t -> t From 16700ca84e600f88c8cefe061656958eddd5d6e2 Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:08:19 -0500 Subject: [PATCH 09/11] Add missing word --- src/strSlice.mli | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/strSlice.mli b/src/strSlice.mli index ca865f42..648df93a 100644 --- a/src/strSlice.mli +++ b/src/strSlice.mli @@ -14,7 +14,7 @@ val index : (char -> bool) -> t -> int option which [f] is first true, or [None] if [f] holds for no characters in [s]. *) val index_unescaped : char -> t -> int option -(** [index_unescaped c s] is [Some i] where [i] index of the first +(** [index_unescaped c s] is [Some i] where [i] is index of the first occurrence of the character [c] in [s] that is not preceeded by a backslash ['\\'] and not within a verbatim inline, or [None] if there is no such [c] in [s]. *) From 543418eeca3399329fe26be8c00a2e1a3f9231e4 Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:12:26 -0500 Subject: [PATCH 10/11] Add changelog entry and thank you --- CHANGES.md | 2 ++ README.md | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index b863036f..3166ebd0 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ unreleased ---------- +- Support GitHub-Flavoured Markdown tables (#292, @bobatkey) + - Update parser to support CommonMark Spec 0.30 (#266, @SquidDev) - Preserve the order of input files in the HTML output to stdout (#258, diff --git a/README.md b/README.md index c270d99f..8c7c2a28 100644 --- a/README.md +++ b/README.md @@ -111,4 +111,5 @@ Special thanks for feedback and contributions to this project goes out to: - [Nicolás Ojeda Bär](https://github.com/nojb) - [Raphael Sousa Santos](https://sonologi.co/) - [Corentin Leruth](https://github.com/tatchi) +- [Bob Atkey](https://bentnib.org/) - *please insert your name here if you believe you've been forgotten* From 168f3e32357f269f3b14cc28aa454788deff7f51 Mon Sep 17 00:00:00 2001 From: Shon Feder Date: Sun, 20 Nov 2022 15:24:12 -0500 Subject: [PATCH 11/11] Fix formatting --- src/parser.ml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/parser.ml b/src/parser.ml index 2837611c..5fbf669a 100644 --- a/src/parser.ml +++ b/src/parser.ml @@ -794,7 +794,8 @@ let parse s0 = | Some '<' -> raw_html s | Some '*' -> (thematic_break ||| unordered_list_item ind) s | Some '+' -> unordered_list_item ind s - | Some '0' .. '9' -> (ordered_list_item ind ||| table_row ~pipe_prefix:false) s + | Some '0' .. '9' -> + (ordered_list_item ind ||| table_row ~pipe_prefix:false) s | Some ':' -> (def_list ||| table_row ~pipe_prefix:false) s | Some '|' -> table_row ~pipe_prefix:true (StrSlice.tail s) | Some _ -> (blank ||| indented_code ind ||| table_row ~pipe_prefix:false) s