Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

css2: Support tokenizing %-tokens #1153

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions css2/token.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
// SPDX-FileCopyrightText: 2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2024-2025 Robin Lindén <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause

Expand Down Expand Up @@ -69,7 +69,7 @@ struct NumberToken {
};

struct PercentageToken {
std::variant<int, double> data{};
std::variant<std::int32_t, double> data{};
[[nodiscard]] bool operator==(PercentageToken const &) const = default;

[[nodiscard]] constexpr bool is_integer() const { return std::holds_alternative<int>(data); }
Expand Down
42 changes: 32 additions & 10 deletions css2/tokenizer.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause
Expand All @@ -18,6 +18,7 @@
#include <limits>
#include <optional>
#include <string>
#include <string_view>
#include <system_error>
#include <tuple>
#include <utility>
Expand All @@ -42,6 +43,21 @@ constexpr bool is_digit(std::optional<char> c) {

} // namespace

std::string_view to_string(ParseError e) {
switch (e) {
case ParseError::EofInComment:
return "EofInComment";
case ParseError::EofInEscapeSequence:
return "EofInEscapeSequence";
case ParseError::EofInString:
return "EofInString";
case ParseError::NewlineInString:
return "NewlineInString";
}

return "Unknown parse error";
}

void Tokenizer::run() {
while (true) {
switch (state_) {
Expand Down Expand Up @@ -77,8 +93,7 @@ void Tokenizer::run() {
continue;
case '+': {
if (inputs_starts_number(*c)) {
auto number = consume_number(*c);
emit(NumberToken{number});
emit(consume_a_numeric_token(*c));
} else {
emit(DelimToken{'+'});
}
Expand All @@ -89,8 +104,7 @@ void Tokenizer::run() {
continue;
case '-': {
if (inputs_starts_number(*c)) {
auto number = consume_number(*c);
emit(NumberToken{number});
emit(consume_a_numeric_token(*c));
continue;
}

Expand All @@ -110,8 +124,7 @@ void Tokenizer::run() {
}
case '.': {
if (auto next_input = peek_input(0); is_digit(next_input)) {
auto number = consume_number(*c);
emit(NumberToken{number});
emit(consume_a_numeric_token(*c));
continue;
}

Expand Down Expand Up @@ -146,9 +159,7 @@ void Tokenizer::run() {
case '7':
case '8':
case '9': {
// TODO(robinlinden): https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token
auto number = consume_number(*c);
emit(NumberToken{number});
emit(consume_a_numeric_token(*c));
continue;
}
default:
Expand Down Expand Up @@ -527,4 +538,15 @@ std::string Tokenizer::consume_an_escaped_code_point() {
return std::string{*c};
}

Token Tokenizer::consume_a_numeric_token(char first_byte) {
// TODO(robinlinden): https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token
auto number = consume_number(first_byte);
if (peek_input(0) == '%') {
std::ignore = consume_next_input_character();
return PercentageToken{number};
}

return NumberToken{number};
}

} // namespace css2
5 changes: 4 additions & 1 deletion css2/tokenizer.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause
Expand Down Expand Up @@ -39,6 +39,8 @@ enum class ParseError : std::uint8_t {
NewlineInString,
};

std::string_view to_string(ParseError);

class Tokenizer {
public:
Tokenizer(std::string_view input, std::function<void(Token &&)> on_emit, std::function<void(ParseError)> on_error)
Expand Down Expand Up @@ -70,6 +72,7 @@ class Tokenizer {

std::variant<std::int32_t, double> consume_number(char first_byte);
std::string consume_an_escaped_code_point();
Token consume_a_numeric_token(char first_byte);
};

} // namespace css2
Expand Down
61 changes: 58 additions & 3 deletions css2/tokenizer_test.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
// SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
//
// SPDX-License-Identifier: BSD-2-Clause
Expand All @@ -12,6 +12,7 @@
#include <cstdint>
#include <limits>
#include <source_location>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
Expand All @@ -27,8 +28,25 @@ constexpr char const *kReplacementCharacter = "\xef\xbf\xbd";
class TokenizerOutput {
public:
~TokenizerOutput() {
a.expect(tokens.empty(), "Not all tokens were handled", loc);
a.expect(errors.empty(), "Not all errors were handled", loc);
if (!tokens.empty()) {
std::stringstream ss;
ss << "Not all tokens were handled. Unhandled:\n";
for (auto const &t : tokens) {
ss << "* " << to_string(t) << '\n';
}

a.expectation_failure(ss.view(), loc);
}

if (!errors.empty()) {
std::stringstream ss;
ss << "Not all errors were handled. Unhandled:\n";
for (auto e : errors) {
ss << "* " << to_string(e) << '\n';
}

a.expectation_failure(ss.view(), loc);
}
}

etest::IActions &a;
Expand Down Expand Up @@ -68,6 +86,23 @@ void expect_error(

int main() {
etest::Suite s{};

s.add_test("to_string(ParseError)", [](etest::IActions &a) {
static constexpr auto kFirstError = ParseError::EofInComment;
static constexpr auto kLastError = ParseError::NewlineInString;

auto error = static_cast<int>(kFirstError);
a.expect_eq(error, 0);

while (error <= static_cast<int>(kLastError)) {
a.expect(to_string(static_cast<ParseError>(error)) != "Unknown parse error",
std::to_string(error) + " is missing an error message");
error += 1;
}

a.expect_eq(to_string(static_cast<ParseError>(error + 1)), "Unknown parse error");
});

s.add_test("delimiter", [](etest::IActions &a) {
auto output = run_tokenizer(a, "?");

Expand Down Expand Up @@ -404,6 +439,26 @@ int main() {
expect_token(output, NumberToken{.data = 1});
});

s.add_test("percentage: integer", [](etest::IActions &a) {
auto output = run_tokenizer(a, "13%");
expect_token(output, PercentageToken{.data = 13});
});

s.add_test("percentage: large", [](etest::IActions &a) {
auto output = run_tokenizer(a, "12147483647%");
expect_token(output, PercentageToken{std::numeric_limits<std::int32_t>::max()});
});

s.add_test("percentage: large negative", [](etest::IActions &a) {
auto output = run_tokenizer(a, "-12147483648%");
expect_token(output, PercentageToken{std::numeric_limits<std::int32_t>::min()});
});

s.add_test("percentage: number", [](etest::IActions &a) {
auto output = run_tokenizer(a, "13.25%");
expect_token(output, PercentageToken{.data = 13.25});
});

s.add_test("plus: delim", [](etest::IActions &a) {
auto output = run_tokenizer(a, "+hello");
expect_token(output, DelimToken{'+'});
Expand Down