Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for GitHub-Flavoured Markdown tables #292

Merged
merged 11 commits into from
Nov 20, 2022
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
unreleased
----------

- Support GitHub-Flavoured Markdown tables (#292, @bobatkey)

- Update parser to support CommonMark Spec 0.30 (#266, @SquidDev)

- Preserve the order of input files in the HTML output to stdout (#258,
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,5 @@ Special thanks for feedback and contributions to this project goes out to:
- [Nicolás Ojeda Bär](https://github.com/nojb)
- [Raphael Sousa Santos](https://sonologi.co/)
- [Corentin Leruth](https://github.com/tatchi)
- [Bob Atkey](https://bentnib.org/)
- *please insert your name here if you believe you've been forgotten*
1 change: 1 addition & 0 deletions src/ast.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module Impl = struct
include Ast_inline
include Ast_block.List_types
include Ast_block.Table_alignments
include Ast_block.WithInline

type attributes = (string * string) list
Expand Down
17 changes: 17 additions & 0 deletions src/ast_block.ml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@ module List_types = struct
| Tight
end

module Table_alignments = struct
type cell_alignment =
| Default
| Left
| Centre
| Right
end

open List_types
open Table_alignments

module Make (C : BlockContent) = struct
type 'attr def_elt =
Expand All @@ -41,6 +50,9 @@ module Make (C : BlockContent) = struct
| Code_block of 'attr * string * string
| Html_block of 'attr * string
| Definition_list of 'attr * 'attr def_elt list
| Table of 'attr * ('attr C.t * cell_alignment) list * 'attr C.t list list
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if it's worth making the last two fields an inline record to identify for readers the header from the rows... not sure if that's worth it, but at least a comment to explain would be helpful.

(** A table is represented by a header row, which is a list of pairs of
header cells and alignments, and a list of rows *)
end

module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct
Expand All @@ -62,6 +74,11 @@ module MakeMapper (Src : BlockContent) (Dst : BlockContent) = struct
Definition_list (attr, List.map f l)
| Code_block (attr, label, code) -> Code_block (attr, label, code)
| Html_block (attr, x) -> Html_block (attr, x)
| Table (attr, headers, rows) ->
Table
( attr
, List.map (fun (header, alignment) -> (f header, alignment)) headers
, List.map (List.map f) rows )
end

module Mapper = MakeMapper (StringContent) (InlineContent)
Expand Down
80 changes: 79 additions & 1 deletion src/block_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ module Pre = struct
| Rindented_code of string list
| Rhtml of Parser.html_kind * string list
| Rdef_list of string * string list
| Rtable_header of StrSlice.t list * string
| Rtable of (string * cell_alignment) list * string list list
| Rempty

and t =
Expand Down Expand Up @@ -74,13 +76,57 @@ module Pre = struct
let rec loop = function "" :: l -> loop l | _ as l -> l in
Code_block ([], "", concat (loop l)) :: blocks
| Rhtml (_, l) -> Html_block ([], concat l) :: blocks
| Rtable_header (_header, line) ->
(* FIXME: this will only ever get called on the very last
line. Should it do the link definitions? *)
close link_defs { blocks; next = Rparagraph [ line ] }
| Rtable (header, rows) -> Table ([], header, List.rev rows) :: blocks
| Rempty -> blocks

and finish link_defs state = List.rev (close link_defs state)

let empty = { blocks = []; next = Rempty }
let classify_line s = Parser.parse s

let classify_delimiter s =
let left, s =
match StrSlice.head s with
| Some ':' -> (true, StrSlice.drop 1 s)
| _ -> (false, s)
in
let right, s =
match StrSlice.last s with
| Some ':' -> (true, StrSlice.drop_last s)
| _ -> (false, s)
in
if StrSlice.exists (fun c -> c <> '-') s then None
else
match (left, right) with
| true, true -> Some Centre
| true, false -> Some Left
| false, true -> Some Right
| false, false -> Some Default

let match_table_headers headers delimiters =
let rec loop processed = function
| [], [] -> Some (List.rev processed)
| header :: headers, line :: delimiters -> (
match classify_delimiter line with
| None -> None
| Some alignment ->
loop
((StrSlice.to_string header, alignment) :: processed)
(headers, delimiters))
| [], _ :: _ | _ :: _, [] -> None
in
loop [] (headers, delimiters)

let rec match_row_length l1 l2 =
match (l1, l2) with
| [], _ -> []
| l1, [] -> List.init (List.length l1) (fun _ -> "")
| _ :: l1, x :: l2 -> StrSlice.to_string x :: match_row_length l1 l2

let rec process link_defs { blocks; next } s =
let process = process link_defs in
let close = close link_defs in
Expand All @@ -103,8 +149,10 @@ module Pre = struct
{ blocks
; next = Rlist (kind, Tight, false, indent, [], process empty s)
}
| Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _) ->
| Rempty, (Lsetext_heading _ | Lparagraph | Ldef_list _ | Ltable_line []) ->
{ blocks; next = Rparagraph [ StrSlice.to_string s ] }
| Rempty, Ltable_line items ->
{ blocks; next = Rtable_header (items, StrSlice.to_string s) }
| Rparagraph [ h ], Ldef_list def ->
{ blocks; next = Rdef_list (h, [ def ]) }
| Rdef_list (term, defs), Ldef_list def ->
Expand Down Expand Up @@ -152,6 +200,36 @@ module Pre = struct
}
| Rdef_list _, _ ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rtable_header (headers, line), Ltable_line items -> (
match match_table_headers headers items with
| Some headers ->
(* Makes sure that there are the same number of delimiters
as headers. See
https://github.github.com/gfm/#example-203 *)
Comment on lines +206 to +208
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks very much for the helpful comments and references!

{ blocks; next = Rtable (headers, []) }
| None ->
(* Reinterpret the previous line as the start of a
paragraph. *)
process { blocks; next = Rparagraph [ line ] } s)
| Rtable_header (_, line), _ ->
(* If we only have a potential header, and the current line
doesn't look like a table delimiter, then reinterpret the
previous line as the start of a paragraph. *)
process { blocks; next = Rparagraph [ line ] } s
| Rtable (header, rows), Ltable_line row ->
(* Make sure the number of items in the row is consistent with
the headers and the rest of the rows. See
https://github.github.com/gfm/#example-204 *)
let row = match_row_length header row in
{ blocks; next = Rtable (header, row :: rows) }
| Rtable (header, rows), (Lparagraph | Lsetext_heading _) ->
(* Treat a contiguous line after a table as a row, even if it
doesn't contain any '|'
characters. https://github.github.com/gfm/#example-202 *)
let row = match_row_length header [ s ] in
{ blocks; next = Rtable (header, row :: rows) }
| Rtable _, _ ->
process { blocks = close { blocks; next }; next = Rempty } s
| Rindented_code lines, Lindented_code s ->
{ blocks; next = Rindented_code (StrSlice.to_string s :: lines) }
| Rindented_code lines, Lempty ->
Expand Down
63 changes: 60 additions & 3 deletions src/html.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ open Ast.Impl
type element_type =
| Inline
| Block
| Table

type t =
| Element of element_type * string * attributes * t option
Expand All @@ -20,6 +21,9 @@ let concat t1 t2 =

let concat_map f l = List.fold_left (fun accu x -> concat accu (f x)) Null l

let concat_map2 f l1 l2 =
List.fold_left2 (fun accu x y -> concat accu (f x y)) Null l1 l2

(* only convert when "necessary" *)
let htmlentities s =
let b = Buffer.create (String.length s) in
Expand Down Expand Up @@ -50,14 +54,15 @@ let rec add_to_buffer buf = function
| Element (eltype, name, attrs, Some c) ->
Printf.bprintf
buf
"<%s%a>%a</%s>"
"<%s%a>%s%a</%s>%s"
name
add_attrs_to_buffer
attrs
(match eltype with Table -> "\n" | _ -> "")
add_to_buffer
c
name;
if eltype = Block then Buffer.add_char buf '\n'
name
(match eltype with Table | Block -> "\n" | _ -> "")
| Text s -> Buffer.add_string buf (htmlentities s)
| Raw s -> Buffer.add_string buf s
| Null -> ()
Expand Down Expand Up @@ -128,6 +133,50 @@ and inline = function
| Image (attr, { label; destination; title }) ->
img label destination title attr

let alignment_attributes = function
| Default -> []
| Left -> [ ("align", "left") ]
| Right -> [ ("align", "right") ]
| Centre -> [ ("align", "center") ]

let table_header headers =
elt
Table
"thead"
[]
(Some
(elt
Table
"tr"
[]
(Some
(concat_map
(fun (header, alignment) ->
let attrs = alignment_attributes alignment in
elt Block "th" attrs (Some (inline header)))
headers))))

let table_body headers rows =
elt
Table
"tbody"
[]
(Some
(concat_map
(fun row ->
elt
Table
"tr"
[]
(Some
(concat_map2
(fun (_, alignment) cell ->
let attrs = alignment_attributes alignment in
elt Block "td" attrs (Some (inline cell)))
headers
row)))
rows))

let rec block = function
| Blockquote (attr, q) ->
elt Block "blockquote" attr (Some (concat nl (concat_map block q)))
Expand Down Expand Up @@ -177,6 +226,14 @@ let rec block = function
(concat_map (fun s -> elt Block "dd" [] (Some (inline s))) defs)
in
elt Block "dl" attr (Some (concat_map f l))
| Table (attr, headers, []) ->
elt Table "table" attr (Some (table_header headers))
| Table (attr, headers, rows) ->
elt
Table
"table"
attr
(Some (concat (table_header headers) (table_body headers rows)))

let of_doc doc = concat_map block doc

Expand Down
1 change: 1 addition & 0 deletions src/html.mli
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ open Ast.Impl
type element_type =
| Inline
| Block
| Table

type t =
| Element of element_type * string * attributes * t option
Expand Down
35 changes: 30 additions & 5 deletions src/parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ type t =
| Llist_item of list_type * int * StrSlice.t
| Lparagraph
| Ldef_list of string
| Ltable_line of StrSlice.t list

(* drop up to 3 spaces, returning the number of spaces dropped and the remainder of the string *)
let sp3 s =
Expand Down Expand Up @@ -755,25 +756,49 @@ let indented_code ind s =
if indent s + ind < 4 then raise Fail;
Lindented_code (StrSlice.offset (4 - ind) s)

(* A sequence of cell contents separated by unescaped '|'
characters. *)
let table_row ~pipe_prefix s =
let rec loop items seen_pipe s =
match StrSlice.index_unescaped '|' s with
| None ->
if StrSlice.for_all is_whitespace s then (items, seen_pipe)
else (s :: items, false)
| Some i ->
let item = StrSlice.take_prefix i s in
loop (item :: items) true (StrSlice.drop (i + 1) s)
in
let items, terminating_pipe = loop [] pipe_prefix s in
match (pipe_prefix, items, terminating_pipe) with
| true, _, _ | _, _ :: _, true | _, _ :: _ :: _, _ ->
Ltable_line (List.rev_map StrSlice.trim items)
| _ -> raise Fail

let parse s0 =
let ind, s = sp3 s0 in
match StrSlice.head s with
| Some '>' ->
let s = StrSlice.offset 1 s in
let s = if indent s > 0 then StrSlice.offset 1 s else s in
Lblockquote s
| Some '=' -> setext_heading s
| Some '=' -> (setext_heading ||| table_row ~pipe_prefix:false) s
| Some '-' ->
(setext_heading ||| thematic_break ||| unordered_list_item ind) s
(setext_heading
||| thematic_break
||| unordered_list_item ind
||| table_row ~pipe_prefix:false)
s
| Some '_' -> thematic_break s
| Some '#' -> atx_heading s
| Some ('~' | '`') -> fenced_code ind s
| Some '<' -> raw_html s
| Some '*' -> (thematic_break ||| unordered_list_item ind) s
| Some '+' -> unordered_list_item ind s
| Some '0' .. '9' -> ordered_list_item ind s
| Some ':' -> def_list s
| Some _ -> (blank ||| indented_code ind) s
| Some '0' .. '9' ->
(ordered_list_item ind ||| table_row ~pipe_prefix:false) s
| Some ':' -> (def_list ||| table_row ~pipe_prefix:false) s
| Some '|' -> table_row ~pipe_prefix:true (StrSlice.tail s)
| Some _ -> (blank ||| indented_code ind ||| table_row ~pipe_prefix:false) s
| None -> Lempty

let parse s = try parse s with Fail -> Lparagraph
Expand Down
18 changes: 17 additions & 1 deletion src/sexp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,23 @@ and inline = function
| Text (_, s) -> Atom s
| Emph (_, il) -> List [ Atom "emph"; inline il ]
| Strong (_, il) -> List [ Atom "strong"; inline il ]
| Code _ -> Atom "code"
| Code _ -> Atom "code" (* FIXME: this seems broken? *)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted in #293. Thanks!

| Hard_break _ -> Atom "hard-break"
| Soft_break _ -> Atom "soft-break"
| Link (_, def) -> List [ Atom "url"; link def ]
| Html (_, s) -> List [ Atom "html"; Atom s ]
| Image _ -> Atom "img"

let table_header (header, alignment) =
List
[ inline header
; (match alignment with
| Default -> Atom "default"
| Left -> Atom "left"
| Centre -> Atom "centre"
| Right -> Atom "right")
]

let rec block = function
| Paragraph (_, x) -> List [ Atom "paragraph"; inline x ]
| List (_, _, _, bls) ->
Expand All @@ -44,6 +54,12 @@ let rec block = function
List [ inline elt.term; List (List.map inline elt.defs) ])
l)
]
| Table (_, headers, rows) ->
List
[ Atom "table"
; List (List.map table_header headers)
; List (List.map (fun row -> List (List.map inline row)) rows)
]

let create ast = List (List.map block ast)

Expand Down
Loading