diff --git a/html2/parser_states.cpp b/html2/parser_states.cpp index 86851521..7f8c83a7 100644 --- a/html2/parser_states.cpp +++ b/html2/parser_states.cpp @@ -274,6 +274,7 @@ std::optional BeforeHead::process(IActions &a, html2::Token const } // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead +// TODO(robinlinden): Template nonsense. std::optional InHead::process(IActions &a, html2::Token const &token) { if (is_boring_whitespace(token)) { // TODO(robinlinden): Should be inserting characters, but our last @@ -286,19 +287,30 @@ std::optional InHead::process(IActions &a, html2::Token const &to return {}; } + if (std::holds_alternative(token)) { + // Parse error. + return {}; + } + if (auto const *start = std::get_if(&token)) { auto const &name = start->tag_name; - // These branches won't be the same once we're more spec-complete. - // NOLINTNEXTLINE(bugprone-branch-clone) + if (name == "html") { + InBody{}.process(a, token); + return {}; + } + if (name == "base" || name == "basefont" || name == "bgsound" || name == "link") { a.insert_element_for(*start); a.pop_current_node(); + // TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set. return {}; } if (name == "meta") { a.insert_element_for(*start); a.pop_current_node(); + // TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set. + // TODO(robinlinden): Active speculative HTML parser nonsense. return {}; } @@ -306,16 +318,17 @@ std::optional InHead::process(IActions &a, html2::Token const &to return generic_rcdata_parse(a, *start); } + if ((name == "noscript" && a.scripting()) || name == "noframes" || name == "style") { + return generic_raw_text_parse(a, *start); + } + if (name == "noscript" && !a.scripting()) { a.insert_element_for(*start); return InHeadNoscript{}; } - if (name == "style") { - return generic_raw_text_parse(a, *start); - } - if (name == "script") { + // TODO(robinlinden): A lot of things. See spec. a.insert_element_for(*start); a.set_tokenizer_state(html2::State::ScriptData); a.store_original_insertion_mode(InHead{}); @@ -327,6 +340,13 @@ std::optional InHead::process(IActions &a, html2::Token const &to a.pop_current_node(); return AfterHead{}; } + + if (end->tag_name == "body" || end->tag_name == "html" || end->tag_name == "br") { + // Fall through to "anything else." + } else { + // Parse error. + return {}; + } } assert(a.current_node_name() == "head"); diff --git a/html2/parser_states_test.cpp b/html2/parser_states_test.cpp index 05ce7249..c6585ada 100644 --- a/html2/parser_states_test.cpp +++ b/html2/parser_states_test.cpp @@ -171,6 +171,23 @@ void in_head_tests() { expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head", {}, {dom::Element{"meta"}}}}}); }); + etest::test("InHead: doctype", [] { + auto res = parse("", {}); + expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}}); + }); + + etest::test("InHead: end tag parse error", [] { + auto res = parse("

", {}); + expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}}); + }); + + etest::test("InHead: html attributes are reparented", [] { + auto res = parse("", {}); + auto const &head = std::get(res.document.html().children.at(0)); + expect_eq(res.document.html().attributes, dom::AttrMap{{"foo", "bar"}, {"hello", "world"}}); + expect_eq(head, dom::Element{"head"}); + }); + etest::test("InHead: base, basefont, bgsound, link", [] { auto res = parse(" ", {});