diff --git a/html2/parser_states.cpp b/html2/parser_states.cpp
index 86851521..7f8c83a7 100644
--- a/html2/parser_states.cpp
+++ b/html2/parser_states.cpp
@@ -274,6 +274,7 @@ std::optional BeforeHead::process(IActions &a, html2::Token const
}
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
+// TODO(robinlinden): Template nonsense.
std::optional InHead::process(IActions &a, html2::Token const &token) {
if (is_boring_whitespace(token)) {
// TODO(robinlinden): Should be inserting characters, but our last
@@ -286,19 +287,30 @@ std::optional InHead::process(IActions &a, html2::Token const &to
return {};
}
+ if (std::holds_alternative(token)) {
+ // Parse error.
+ return {};
+ }
+
if (auto const *start = std::get_if(&token)) {
auto const &name = start->tag_name;
- // These branches won't be the same once we're more spec-complete.
- // NOLINTNEXTLINE(bugprone-branch-clone)
+ if (name == "html") {
+ InBody{}.process(a, token);
+ return {};
+ }
+
if (name == "base" || name == "basefont" || name == "bgsound" || name == "link") {
a.insert_element_for(*start);
a.pop_current_node();
+ // TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set.
return {};
}
if (name == "meta") {
a.insert_element_for(*start);
a.pop_current_node();
+ // TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set.
+ // TODO(robinlinden): Active speculative HTML parser nonsense.
return {};
}
@@ -306,16 +318,17 @@ std::optional InHead::process(IActions &a, html2::Token const &to
return generic_rcdata_parse(a, *start);
}
+ if ((name == "noscript" && a.scripting()) || name == "noframes" || name == "style") {
+ return generic_raw_text_parse(a, *start);
+ }
+
if (name == "noscript" && !a.scripting()) {
a.insert_element_for(*start);
return InHeadNoscript{};
}
- if (name == "style") {
- return generic_raw_text_parse(a, *start);
- }
-
if (name == "script") {
+ // TODO(robinlinden): A lot of things. See spec.
a.insert_element_for(*start);
a.set_tokenizer_state(html2::State::ScriptData);
a.store_original_insertion_mode(InHead{});
@@ -327,6 +340,13 @@ std::optional InHead::process(IActions &a, html2::Token const &to
a.pop_current_node();
return AfterHead{};
}
+
+ if (end->tag_name == "body" || end->tag_name == "html" || end->tag_name == "br") {
+ // Fall through to "anything else."
+ } else {
+ // Parse error.
+ return {};
+ }
}
assert(a.current_node_name() == "head");
diff --git a/html2/parser_states_test.cpp b/html2/parser_states_test.cpp
index 05ce7249..c6585ada 100644
--- a/html2/parser_states_test.cpp
+++ b/html2/parser_states_test.cpp
@@ -171,6 +171,23 @@ void in_head_tests() {
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head", {}, {dom::Element{"meta"}}}}});
});
+ etest::test("InHead: doctype", [] {
+ auto res = parse("", {});
+ expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
+ });
+
+ etest::test("InHead: end tag parse error", [] {
+ auto res = parse("
", {});
+ expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
+ });
+
+ etest::test("InHead: html attributes are reparented", [] {
+ auto res = parse("", {});
+ auto const &head = std::get(res.document.html().children.at(0));
+ expect_eq(res.document.html().attributes, dom::AttrMap{{"foo", "bar"}, {"hello", "world"}});
+ expect_eq(head, dom::Element{"head"});
+ });
+
etest::test("InHead: base, basefont, bgsound, link", [] {
auto res = parse(" ", {});