From f7812e35a6d4a1160ecc7e5f94185787676efa5c Mon Sep 17 00:00:00 2001 From: cakevm <cakevm@proton.me> Date: Fri, 10 Jan 2025 20:11:39 +0100 Subject: [PATCH] Report error for invalid hex (#14) --- CHANGELOG.md | 1 + crates/lexer/src/lib.rs | 13 +++++++------ crates/lexer/tests/constant.rs | 14 ++++++++++++++ crates/utils/src/error.rs | 8 ++++++++ 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eedf29..22ad1a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ## [Unreleased] - Use latest stable Rust version 1.84 +- Report error for invalid hex literals `0x0x` ## [1.0.1] - 2025-01-10 - Validate that a constant hex literal is not longer than 32 bytes diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index a8a1948..aa866d6 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -444,17 +444,18 @@ impl<'a> Lexer<'a> { fn eat_hex_digit(&mut self, initial_char: char) -> TokenResult { let (integer_str, mut start, end) = self.eat_while(Some(initial_char), |ch| ch.is_ascii_hexdigit() | (ch == 'x')); + if integer_str.matches('x').count() != 1 { + return Err(LexicalError::new( + LexicalErrorKind::InvalidHexLiteral(integer_str.clone()), + self.source.relative_span_by_pos(start, end), + )); + } - // TODO: check for sure that we have a correct hex string, eg. 0x56 and not 0x56x34 let kind = if self.context == Context::CodeTableBody { // In codetables, the bytecode provided is of arbitrary length. We pass // the code as an Ident, and it is appended to the end of the runtime // bytecode in codegen. - if &integer_str[0..2] == "0x" { - TokenKind::Ident(integer_str[2..].to_owned()) - } else { - TokenKind::Ident(integer_str) - } + TokenKind::Ident(integer_str[2..].to_owned()) } else { // Only max 32 Bytes is allowed for hex string 0x. 2 + 64 = 66 characters if integer_str.len() > 66 { diff --git a/crates/lexer/tests/constant.rs b/crates/lexer/tests/constant.rs index 7f4a5dd..b970e98 100644 --- a/crates/lexer/tests/constant.rs +++ b/crates/lexer/tests/constant.rs @@ -18,3 +18,17 @@ fn constant_hex_literal_too_long() { } panic!("Error did not occurred") } + +#[test] +fn constant_invalid_hex_literal() { + let source = "#define constant TEST = 0x0x"; + let flattened_source = FullFileSource { source, file: None, spans: vec![] }; + let lexer = Lexer::new(flattened_source); + for tok in lexer { + if tok.is_err() { + assert_eq!(tok.unwrap_err().kind, LexicalErrorKind::InvalidHexLiteral("0x0x".to_string())); + return; + } + } + panic!("Error did not occurred") +} diff --git a/crates/utils/src/error.rs b/crates/utils/src/error.rs index 298f5ec..9724327 100644 --- a/crates/utils/src/error.rs +++ b/crates/utils/src/error.rs @@ -96,6 +96,8 @@ pub enum LexicalErrorKind { InvalidCharacter(char), /// Invalid hex literal HexLiteralTooLong(String), + /// Invalid Hex Literal + InvalidHexLiteral(String), /// Invalid Array Size /// String param expected to be usize parsable InvalidArraySize(String), @@ -123,6 +125,9 @@ impl<W: Write> Report<W> for LexicalError { LexicalErrorKind::HexLiteralTooLong(str) => { write!(f.out, "Hex literal has more than 32 bytes '{str}'") } + LexicalErrorKind::InvalidHexLiteral(str) => { + write!(f.out, "Invalid Hex literal '{str}'") + } } } } @@ -296,6 +301,9 @@ impl fmt::Display for CompilerError { LexicalErrorKind::HexLiteralTooLong(h) => { write!(f, "\nError: Hex literal has more than 32 bytes: \"{}\" {}{}\n", h, le.span.identifier(), le.span.source_seg()) } + LexicalErrorKind::InvalidHexLiteral(h) => { + write!(f, "\nError: Invalid Hex literal: \"{}\" {}{}\n", h, le.span.identifier(), le.span.source_seg()) + } }, CompilerError::FileUnpackError(ue) => match ue { UnpackError::InvalidDirectory(id) => {