robinlinden · robinlinden · Jan 15, 2025 · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025
diff --git a/css2/token.h b/css2/token.h
@@ -1,5 +1,5 @@
 // SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
-// SPDX-FileCopyrightText: 2024 Robin Lindén <[email protected]>
+// SPDX-FileCopyrightText: 2024-2025 Robin Lindén <[email protected]>
 //
 // SPDX-License-Identifier: BSD-2-Clause
 
@@ -69,7 +69,7 @@ struct NumberToken {
 };
 
 struct PercentageToken {
-    std::variant<int, double> data{};
+    std::variant<std::int32_t, double> data{};
     [[nodiscard]] bool operator==(PercentageToken const &) const = default;
 
     [[nodiscard]] constexpr bool is_integer() const { return std::holds_alternative<int>(data); }

diff --git a/css2/tokenizer.cpp b/css2/tokenizer.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
+// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
 // SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
 //
 // SPDX-License-Identifier: BSD-2-Clause
@@ -18,6 +18,7 @@
 #include <limits>
 #include <optional>
 #include <string>
+#include <string_view>
 #include <system_error>
 #include <tuple>
 #include <utility>
@@ -42,6 +43,21 @@ constexpr bool is_digit(std::optional<char> c) {
 
 } // namespace
 
+std::string_view to_string(ParseError e) {
+    switch (e) {
+        case ParseError::EofInComment:
+            return "EofInComment";
+        case ParseError::EofInEscapeSequence:
+            return "EofInEscapeSequence";
+        case ParseError::EofInString:
+            return "EofInString";
+        case ParseError::NewlineInString:
+            return "NewlineInString";
+    }
+
+    return "Unknown parse error";
+}
+
 void Tokenizer::run() {
     while (true) {
         switch (state_) {
@@ -77,8 +93,7 @@ void Tokenizer::run() {
                         continue;
                     case '+': {
                         if (inputs_starts_number(*c)) {
-                            auto number = consume_number(*c);
-                            emit(NumberToken{number});
+                            emit(consume_a_numeric_token(*c));
                         } else {
                             emit(DelimToken{'+'});
                         }
@@ -89,8 +104,7 @@ void Tokenizer::run() {
                         continue;
                     case '-': {
                         if (inputs_starts_number(*c)) {
-                            auto number = consume_number(*c);
-                            emit(NumberToken{number});
+                            emit(consume_a_numeric_token(*c));
                             continue;
                         }
 
@@ -110,8 +124,7 @@ void Tokenizer::run() {
                     }
                     case '.': {
                         if (auto next_input = peek_input(0); is_digit(next_input)) {
-                            auto number = consume_number(*c);
-                            emit(NumberToken{number});
+                            emit(consume_a_numeric_token(*c));
                             continue;
                         }
 
@@ -146,9 +159,7 @@ void Tokenizer::run() {
                     case '7':
                     case '8':
                     case '9': {
-                        // TODO(robinlinden): https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token
-                        auto number = consume_number(*c);
-                        emit(NumberToken{number});
+                        emit(consume_a_numeric_token(*c));
                         continue;
                     }
                     default:
@@ -527,4 +538,15 @@ std::string Tokenizer::consume_an_escaped_code_point() {
     return std::string{*c};
 }
 
+Token Tokenizer::consume_a_numeric_token(char first_byte) {
+    // TODO(robinlinden): https://www.w3.org/TR/css-syntax-3/#consume-a-numeric-token
+    auto number = consume_number(first_byte);
+    if (peek_input(0) == '%') {
+        std::ignore = consume_next_input_character();
+        return PercentageToken{number};
+    }
+
+    return NumberToken{number};
+}
+
 } // namespace css2
diff --git a/css2/tokenizer.h b/css2/tokenizer.h
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
+// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
 // SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
 //
 // SPDX-License-Identifier: BSD-2-Clause
@@ -39,6 +39,8 @@ enum class ParseError : std::uint8_t {
     NewlineInString,
 };
 
+std::string_view to_string(ParseError);
+
 class Tokenizer {
 public:
     Tokenizer(std::string_view input, std::function<void(Token &&)> on_emit, std::function<void(ParseError)> on_error)
@@ -70,6 +72,7 @@ class Tokenizer {
 
     std::variant<std::int32_t, double> consume_number(char first_byte);
     std::string consume_an_escaped_code_point();
+    Token consume_a_numeric_token(char first_byte);
 };
 
 } // namespace css2

diff --git a/css2/tokenizer_test.cpp b/css2/tokenizer_test.cpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2021-2024 Robin Lindén <[email protected]>
+// SPDX-FileCopyrightText: 2021-2025 Robin Lindén <[email protected]>
 // SPDX-FileCopyrightText: 2022 Mikael Larsson <[email protected]>
 //
 // SPDX-License-Identifier: BSD-2-Clause
@@ -12,6 +12,7 @@
 #include <cstdint>
 #include <limits>
 #include <source_location>
+#include <sstream>
 #include <string>
 #include <string_view>
 #include <utility>
@@ -27,8 +28,25 @@ constexpr char const *kReplacementCharacter = "\xef\xbf\xbd";
 class TokenizerOutput {
 public:
     ~TokenizerOutput() {
-        a.expect(tokens.empty(), "Not all tokens were handled", loc);
-        a.expect(errors.empty(), "Not all errors were handled", loc);
+        if (!tokens.empty()) {
+            std::stringstream ss;
+            ss << "Not all tokens were handled. Unhandled:\n";
+            for (auto const &t : tokens) {
+                ss << "* " << to_string(t) << '\n';
+            }
+
+            a.expectation_failure(ss.view(), loc);
+        }
+
+        if (!errors.empty()) {
+            std::stringstream ss;
+            ss << "Not all errors were handled. Unhandled:\n";
+            for (auto e : errors) {
+                ss << "* " << to_string(e) << '\n';
+            }
+
+            a.expectation_failure(ss.view(), loc);
+        }
     }
 
     etest::IActions &a;
@@ -68,6 +86,23 @@ void expect_error(
 
 int main() {
     etest::Suite s{};
+
+    s.add_test("to_string(ParseError)", [](etest::IActions &a) {
+        static constexpr auto kFirstError = ParseError::EofInComment;
+        static constexpr auto kLastError = ParseError::NewlineInString;
+
+        auto error = static_cast<int>(kFirstError);
+        a.expect_eq(error, 0);
+
+        while (error <= static_cast<int>(kLastError)) {
+            a.expect(to_string(static_cast<ParseError>(error)) != "Unknown parse error",
+                    std::to_string(error) + " is missing an error message");
+            error += 1;
+        }
+
+        a.expect_eq(to_string(static_cast<ParseError>(error + 1)), "Unknown parse error");
+    });
+
     s.add_test("delimiter", [](etest::IActions &a) {
         auto output = run_tokenizer(a, "?");
 
@@ -404,6 +439,26 @@ int main() {
         expect_token(output, NumberToken{.data = 1});
     });
 
+    s.add_test("percentage: integer", [](etest::IActions &a) {
+        auto output = run_tokenizer(a, "13%");
+        expect_token(output, PercentageToken{.data = 13});
+    });
+
+    s.add_test("percentage: large", [](etest::IActions &a) {
+        auto output = run_tokenizer(a, "12147483647%");
+        expect_token(output, PercentageToken{std::numeric_limits<std::int32_t>::max()});
+    });
+
+    s.add_test("percentage: large negative", [](etest::IActions &a) {
+        auto output = run_tokenizer(a, "-12147483648%");
+        expect_token(output, PercentageToken{std::numeric_limits<std::int32_t>::min()});
+    });
+
+    s.add_test("percentage: number", [](etest::IActions &a) {
+        auto output = run_tokenizer(a, "13.25%");
+        expect_token(output, PercentageToken{.data = 13.25});
+    });
+
     s.add_test("plus: delim", [](etest::IActions &a) {
         auto output = run_tokenizer(a, "+hello");
         expect_token(output, DelimToken{'+'});