Skip to content

Commit

Permalink
html2: Improve spec-compliance of the InHead parser state
Browse files Browse the repository at this point in the history
  • Loading branch information
robinlinden committed Nov 2, 2023
1 parent 458fefe commit de33140
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
32 changes: 26 additions & 6 deletions html2/parser_states.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ std::optional<InsertionMode> BeforeHead::process(IActions &a, html2::Token const
}

// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
// TODO(robinlinden): Template nonsense.
std::optional<InsertionMode> InHead::process(IActions &a, html2::Token const &token) {
if (is_boring_whitespace(token)) {
// TODO(robinlinden): Should be inserting characters, but our last
Expand All @@ -286,36 +287,48 @@ std::optional<InsertionMode> InHead::process(IActions &a, html2::Token const &to
return {};
}

if (std::holds_alternative<html2::DoctypeToken>(token)) {
// Parse error.
return {};
}

if (auto const *start = std::get_if<html2::StartTagToken>(&token)) {
auto const &name = start->tag_name;
// These branches won't be the same once we're more spec-complete.
// NOLINTNEXTLINE(bugprone-branch-clone)
if (name == "html") {
InBody{}.process(a, token);
return {};
}

if (name == "base" || name == "basefont" || name == "bgsound" || name == "link") {
a.insert_element_for(*start);
a.pop_current_node();
// TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set.
return {};
}

if (name == "meta") {
a.insert_element_for(*start);
a.pop_current_node();
// TODO(robinlinden): Acknowledge the token's self-closing flag, if it is set.
// TODO(robinlinden): Active speculative HTML parser nonsense.
return {};
}

if (name == "title") {
return generic_rcdata_parse(a, *start);
}

if ((name == "noscript" && a.scripting()) || name == "noframes" || name == "style") {
return generic_raw_text_parse(a, *start);
}

if (name == "noscript" && !a.scripting()) {
a.insert_element_for(*start);
return InHeadNoscript{};
}

if (name == "style") {
return generic_raw_text_parse(a, *start);
}

if (name == "script") {
// TODO(robinlinden): A lot of things. See spec.
a.insert_element_for(*start);
a.set_tokenizer_state(html2::State::ScriptData);
a.store_original_insertion_mode(InHead{});
Expand All @@ -327,6 +340,13 @@ std::optional<InsertionMode> InHead::process(IActions &a, html2::Token const &to
a.pop_current_node();
return AfterHead{};
}

if (end->tag_name == "body" || end->tag_name == "html" || end->tag_name == "br") {
// Fall through to "anything else."
} else {
// Parse error.
return {};
}
}

assert(a.current_node_name() == "head");
Expand Down
17 changes: 17 additions & 0 deletions html2/parser_states_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,23 @@ void in_head_tests() {
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head", {}, {dom::Element{"meta"}}}}});
});

etest::test("InHead: doctype", [] {
auto res = parse("<head><!doctype HTML>", {});
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
});

etest::test("InHead: end tag parse error", [] {
auto res = parse("<head></p>", {});
expect_eq(res.document.html(), dom::Element{"html", {}, {dom::Element{"head"}}});
});

etest::test("InHead: html attributes are reparented", [] {
auto res = parse("<html foo=bar><head><html foo=baz hello=world>", {});
auto const &head = std::get<dom::Element>(res.document.html().children.at(0));
expect_eq(res.document.html().attributes, dom::AttrMap{{"foo", "bar"}, {"hello", "world"}});
expect_eq(head, dom::Element{"head"});
});

etest::test("InHead: base, basefont, bgsound, link", [] {
auto res = parse("<base> <basefont> <bgsound> <link>", {});

Expand Down

0 comments on commit de33140

Please sign in to comment.