diff --git a/.gitignore b/.gitignore index f06221ce..104c935a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +_opam/ _build/ .merlin *.install diff --git a/bin/dune b/bin/dune index 4325564f..37716d76 100644 --- a/bin/dune +++ b/bin/dune @@ -1,4 +1,5 @@ (executable (name main) (public_name omd) + (package omd) (libraries omd)) diff --git a/dune-project b/dune-project index c8e8a0d7..148d0117 100644 --- a/dune-project +++ b/dune-project @@ -21,3 +21,13 @@ Additionally, OMD implements a few Github markdown features, an extension mechanism, and some other features. Note that the opam package installs both the OMD library and the command line tool `omd`.") (tags (org:ocamllabs org:mirage))) + +(package + (name omd-tyxml) + (synopsis "A library to convert OMD's markdown representation to Tyxml") + (description + "This optional library enables users of OMD to convert values of type Omd.doc, +representing parsed markdown, into values of type Tyxml.Html.t, which provides +statically correct represenations of HTML.") + (tags (org:ocamllabs org:mirage)) + (depends omd tyxml (lambdasoup :with-test))) diff --git a/omd-tyxml.opam b/omd-tyxml.opam new file mode 100644 index 00000000..15483a9c --- /dev/null +++ b/omd-tyxml.opam @@ -0,0 +1,37 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +version: "2.0.0" +synopsis: "A library to convert OMD's markdown representation to Tyxml" +description: """ +This optional library enables users of OMD to convert values of type Omd.doc, +representing parsed markdown, into values of type Tyxml.Html.t, which provides +statically correct represenations of HTML.""" +authors: [ + "Philippe Wang " + "Nicolás Ojeda Bär " +] +license: "ISC" +tags: ["org:ocamllabs" "org:mirage"] +homepage: "https://github.com/ocaml/omd" +bug-reports: "https://github.com/ocaml/omd/issues" +depends: [ + "dune" {>= "2.5"} + "omd" + "tyxml" + "lambdasoup" {with-test} +] +build: [ + ["dune" "subst"] {pinned} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/ocaml/omd.git" diff --git a/omd_tyxml/dune b/omd_tyxml/dune new file mode 100644 index 00000000..e44b8630 --- /dev/null +++ b/omd_tyxml/dune @@ -0,0 +1,3 @@ +(library + (name omd_tyxml) + (libraries omd tyxml)) diff --git a/omd_tyxml/omd_tyxml.ml b/omd_tyxml/omd_tyxml.ml new file mode 100644 index 00000000..4929b4ce --- /dev/null +++ b/omd_tyxml/omd_tyxml.ml @@ -0,0 +1,177 @@ +open Tyxml + +(** TODO move into Omd if we don't replace the html module with this one *) + +(** [cons_opt opt_x xs] is (x :: xs) if [opt_x] is [Some x] or else just [xs].*) +let cons_opt : 'a option -> 'a list -> 'a list = + fun x_opt xs -> + match x_opt with + | None -> xs + | Some x -> x :: xs + +(** TODO move into Omd if we don't replace the html module with this one *) + +(** [inline_to_plain_text il] is a string with just the textual content + of the the inline term [il]. All semantic and formatting nodes are ignored. + + This is intended for use internally, for converting inline elements which + do not support any markup, such as image labels. *) +let inline_to_plain_text : Omd.inline -> string = + fun il -> + let buf = Buffer.create 1024 in + let rec go {Omd.il_desc; _} = match il_desc with + | Concat xs -> List.iter go xs + | Emph t | Strong t -> go t + | Link l | Image l -> go l.label + | Hard_break | Soft_break -> () + | Code s | Html s | Text s -> Buffer.add_string buf s + in + go il; + Buffer.contents buf + +let of_omd_attributes attrs = + List.map (fun (a, v) -> Html.Unsafe.string_attrib a v) attrs + + +(* INLINE CONVERSION *) + +(* NOTE: The unfortunate duplication of inline handlers seems to be necessary + to get the Tyxml types constructed correctly. However, if you know how to + simplify, please help! *) +(* TODO Support verified html (instead of using Html.Unsafe.data) ?*) +let rec of_inline : Omd.inline -> Html_types.phrasing Html.elt list = + fun {il_attributes; il_desc} -> + let attrs = of_omd_attributes il_attributes in + match il_desc with + | Html raw -> Html.Unsafe.[data raw] + | Code c -> Html.[code ~a:attrs [txt c]] + | Emph e -> Html.[em ~a:attrs (of_inline e)] + | Hard_break -> Html.[br ~a:attrs ()] + | Soft_break -> Html.[txt "\n"] + | Strong s -> Html.[strong ~a:attrs (of_inline s)] + | Text t -> Html.[txt t] + | Concat ls -> List.concat_map of_inline ls + | Link l -> [of_link attrs l] + | Image img -> [(of_img attrs img :> Html_types.phrasing Html.elt)] + +and of_def_term : Omd.inline -> Html_types.dt_content Html.elt list = + fun {il_desc; il_attributes} -> + let attrs = of_omd_attributes il_attributes in + match il_desc with + | Html raw -> Html.Unsafe.[data raw] + | Code c -> Html.[code ~a:attrs [txt c]] + | Emph e -> Html.[em ~a:attrs (of_inline e)] + | Hard_break -> Html.[br ~a:attrs ()] + | Soft_break -> Html.[txt "\n"] + | Strong s -> Html.[strong ~a:attrs (of_inline s)] + | Text t -> Html.[txt t] + | Concat ls -> List.concat_map of_def_term ls + | Link l -> [(of_link attrs l :> Html_types.dt_content Html.elt)] + | Image img -> [(of_img attrs img :> Html_types.dt_content Html.elt)] + +and of_link_label : Omd.inline -> Html_types.phrasing_without_interactive Html.elt list = + fun {il_desc; il_attributes} -> + let attrs = of_omd_attributes il_attributes in + match il_desc with + | Code c -> Html.[code ~a:attrs [txt c]] + | Emph e -> Html.[em ~a:attrs (of_link_label e)] + | Strong s -> Html.[strong ~a:attrs (of_link_label s)] + | Text t -> Html.[txt t] + | Concat ls -> List.concat_map of_link_label ls + | Image img -> [(of_img attrs img :> Html_types.phrasing_without_interactive Html.elt)] + (* We ignore any elements that shouldn't be included in link labels. *) + | _ -> [] + +and of_link attrs (l : Omd.link) = + let escaped_url = Omd.Internal.escape_uri l.destination in + let attrs = + let url = Html.a_href escaped_url in + let title = Option.map Html.a_title l.title in + (* The url goes before the title to match the order in the spec.txt *) + url :: cons_opt (title) attrs + in + Html.(a ~a:attrs (of_link_label l.label)) + +and of_img attrs (img : Omd.link) = + let escaped_url = Omd.Internal.escape_uri img.destination in + let attrs = cons_opt (Option.map Html.a_title img.title) attrs in + let alt = inline_to_plain_text img.label in + Html.(img ~src:escaped_url ~alt ~a:attrs ()) + + +(* BLOCK CONVERSION *) + +let of_heading n attrs content = + let ctr = + let open Html in + match n with + | 1 -> h1 + | 2 -> h2 + | 3 -> h3 + | 4 -> h4 + | 5 -> h5 + | 6 -> h6 + | _ -> p (* See ATX Headings in the tests/spec.txt *) + in + ctr ~a:attrs (of_inline content) + +let of_code_block src attrs content = + let src_attr = match src with + | "" -> [] + | _ -> [Html.a_class ["language-" ^ src]] + in + Html.(pre ~a:attrs [code ~a:src_attr [txt content]]) + +let rec of_list (typ : Omd.list_type) (spacing : Omd.list_spacing) items = + let of_list_block (bl : Omd.block) : Html_types.li_content Html.elt list = + match bl.bl_desc, spacing with + | Paragraph il, Tight -> (of_def_term il :> Html_types.li_content_fun Html.elt list) + | _ -> [of_block bl] + in + let to_list_item i = List.concat_map of_list_block i |> Html.li in + let to_list_element = + match typ with + | Ordered (start, _) -> Html.ol ~a:(if start <> 1 then [Html.a_start start] else []) + | Bullet _ -> Html.ul ~a:[] + in + items + |> List.map to_list_item + |> to_list_element + +and of_definition_list defs = + let entry ({term; defs} : Omd.def_elt) = + (* "The term — word or phrase — defined in a definition." *) + let definiendum = Html.dt (of_def_term term) in + (* "The words or phrases that define the definiendum in a definition." *) + let definientia = List.map (fun d -> Html.dd (of_def_term d)) defs in + definiendum :: definientia + in + Html.dl (List.concat_map entry defs) + +and of_block : Omd.block -> Html_types.flow5 Html.elt = + fun block -> + let attrs = of_omd_attributes block.bl_attributes in + match block.bl_desc with + | Paragraph content -> Html.p (of_inline content) + | Blockquote content -> Html.blockquote (List.map of_block content) + | Thematic_break -> Html.hr () + | Html_block html -> Html.Unsafe.data html + | List (typ, spacing, items) -> of_list typ spacing items + | Heading (n, content) -> of_heading n attrs content + | Code_block (src, code) -> of_code_block src attrs code + | Definition_list content -> of_definition_list content + + +(* API *) + +let of_fragment : Omd.doc -> Html_types.flow5 Html.elt list = + fun omd -> List.map of_block omd + +let of_doc ?(title="") : Omd.doc -> Tyxml.Html.doc = + fun omd -> + let title' = title in + let body' = of_fragment omd in + let open Html in + html + (head (title (txt title')) []) + (body body') diff --git a/omd_tyxml/omd_tyxml.mli b/omd_tyxml/omd_tyxml.mli new file mode 100644 index 00000000..3b892eb9 --- /dev/null +++ b/omd_tyxml/omd_tyxml.mli @@ -0,0 +1,13 @@ +(** Convert values of type {!type:Omd.doc} to values of type {!type:Tyxml.Html.doc} *) + +(** [of_doc doc] is a {{:https://ocsigen.org/tyxml} Tyxml} document + representation markdown data [doc] as statically validated + {{:https://ocsigen.org/tyxml/latest/api/Html_sigs.T#TYPEdoc} HTML document}. *) +val of_doc : ?title:string -> Omd.doc -> Tyxml.Html.doc + +(** [of_fragment omd] is a {{:https://ocsigen.org/tyxml} Tyxml} representation + of the + {{:https://www.w3.org/TR/2011/WD-html5-20110525/content-models.html#flow-content} + flow} elements corresponding to the a given [omd]. This is useful when [omd] + is a fragment rather than a standalone document. *) +val of_fragment : Omd.doc -> Html_types.flow5 Tyxml.Html.elt list diff --git a/src/html.mli b/src/html.mli index de086f74..dfa2f11f 100644 --- a/src/html.mli +++ b/src/html.mli @@ -11,6 +11,8 @@ type t = | Null | Concat of t * t +val escape_uri : string -> string + val of_doc : block list -> t val to_string : t -> string diff --git a/src/omd.ml b/src/omd.ml index 4cb606ed..750b99b9 100644 --- a/src/omd.ml +++ b/src/omd.ml @@ -25,3 +25,7 @@ let to_html doc = let to_sexp ast = Format.asprintf "@[%a@]@." Sexp.print (Sexp.create ast) + +module Internal = struct + let escape_uri = Html.escape_uri +end diff --git a/src/omd.mli b/src/omd.mli index 2d781c6f..bd65d6dd 100644 --- a/src/omd.mli +++ b/src/omd.mli @@ -68,3 +68,9 @@ val of_string: string -> doc val to_html: doc -> string val to_sexp: doc -> string + +(* TODO rm if we can integrate Tyxml into main Omd package *) +(** Values for internal usage *) +module Internal : sig + val escape_uri : string -> string +end diff --git a/tests/common.ml b/tests/common.ml new file mode 100644 index 00000000..af8f2abc --- /dev/null +++ b/tests/common.ml @@ -0,0 +1,4 @@ +let normalize_html s = + String.trim s + |> Soup.parse + |> Soup.pretty_print diff --git a/tests/dune b/tests/dune index e0df1d93..6478096a 100644 --- a/tests/dune +++ b/tests/dune @@ -1,8 +1,15 @@ (executable (name extract_tests) - (libraries str) + (libraries str common) (modules extract_tests)) +; Code shared between various parts of the testing apartus +(library + (name common) + (libraries lambdasoup) + (modules common)) + +; Generate and run tests for the core omd package (rule (with-stdout-to dune.inc.new @@ -15,9 +22,11 @@ (executable (name omd) - (libraries str omd) + (libraries str omd omd_tyxml tyxml common) (modules omd)) +; Generate the rules for diff-based tests (rule (alias gen) - (action (diff dune.inc dune.inc.new))) + (action + (diff dune.inc dune.inc.new))) diff --git a/tests/extract_tests.ml b/tests/extract_tests.ml index 757d2cc5..66eae659 100644 --- a/tests/extract_tests.ml +++ b/tests/extract_tests.ml @@ -1,27 +1,28 @@ (* Extract test cases from Spec *) +(* TODO Remove if we can use Tyxml for html generation *) let disabled = [ - 164; - 175; - 184; - 185; - 334; - 353; - 410; - 411; - 414; - 415; - 416; - 428; - 468; - 469; - 486; - 516; - 536; - 570; - 519; - 591; + (* 164; + * 175; + * 184; + * 185; + * 334; + * 353; + * 410; + * 411; + * 414; + * 415; + * 416; + * 428; + * 468; + * 469; + * 486; + * 516; + * 536; + * 570; + * 519; + * 591; *) ] let with_open_in fn f = @@ -75,7 +76,7 @@ let parse_test_spec filename = let rec get_html () = let line = input_line ic in if begins_with line test_delim then - let html = Buffer.contents buf in + let html = Buffer.contents buf |> Common.normalize_html in {filename; example; markdown; html} else begin add_line buf line; @@ -117,20 +118,13 @@ let write_dune_file test_specs tests = "@[(alias@ (name runtest)@ @[(deps%t)@])@]@." (fun ppf -> List.iter (pp ppf) tests) -let li_begin_re = Str.regexp_string "
  • \n" -let li_end_re = Str.regexp_string "\n
  • " - -let normalize_html s = - Str.global_replace li_end_re "" - (Str.global_replace li_begin_re "
  • " s) - let generate_test_files tests = let f {filename; example; markdown; html} = let base = Filename.remove_extension filename in with_open_out (Printf.sprintf "%s-%03d.md" base example) (fun oc -> output_string oc markdown); with_open_out (Printf.sprintf "%s-%03d.html" base example) - (fun oc -> output_string oc (normalize_html html)) + (fun oc -> output_string oc html) in List.iter f tests diff --git a/tests/omd.ml b/tests/omd.ml index e3bf48d4..0694a7a6 100644 --- a/tests/omd.ml +++ b/tests/omd.ml @@ -1,15 +1,32 @@ -let li_begin_re = Str.regexp_string "
  • \n" -let li_end_re = Str.regexp_string "\n
  • " - -let normalize_html s = - Str.global_replace li_end_re "" - (Str.global_replace li_begin_re "
  • " s) - let with_open_in fn f = let ic = open_in fn in Fun.protect ~finally:(fun () -> close_in_noerr ic) (fun () -> f ic) +(* FIXME: Resolve preferred backend *) + +let replacements = + [ Str.regexp_string "
    ", "
    \n" + ; Str.regexp_string "
    \n
     Omd_tyxml.of_fragment
    +  |> List.map tyxml_elt_to_string
    +  |> String.concat ""
    +
    +let denormalize_html str =
    +  List.fold_left (fun s (re, rep) -> Str.global_replace re rep s) str replacements
    +
     let () =
       with_open_in Sys.argv.(1) @@ fun ic ->
    -  print_string (normalize_html (Omd.to_html (Omd.of_channel ic)))
    +  ic
    +  |> Omd.of_channel
    +  |> html_of_omd
    +  |> Common.normalize_html
    +  |> denormalize_html
    +  |> print_string