From 25cd61da2bb85d3dd2160f90249d46c0e3ac6df7 Mon Sep 17 00:00:00 2001 From: cakevm Date: Fri, 10 Jan 2025 20:47:46 +0100 Subject: [PATCH] Improve lexer performance by removing some clones --- CHANGELOG.md | 1 + crates/lexer/src/lib.rs | 34 ++++++++++++++-------------------- 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22ad1a4..d20ab3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## [Unreleased] - Use latest stable Rust version 1.84 - Report error for invalid hex literals `0x0x` +- Improve lexer performance by 1-2% by removing unnecessary cloning ## [1.0.1] - 2025-01-10 - Validate that a constant hex literal is not longer than 32 bytes diff --git a/crates/lexer/src/lib.rs b/crates/lexer/src/lib.rs index aa866d6..80a4da6 100644 --- a/crates/lexer/src/lib.rs +++ b/crates/lexer/src/lib.rs @@ -3,11 +3,8 @@ use huff_neo_utils::prelude::*; use lazy_static::lazy_static; use regex::Regex; use std::collections::HashMap; -use std::{ - iter::{Peekable, Zip}, - ops::RangeFrom, - str::Chars, -}; +use std::iter::Enumerate; +use std::{iter::Peekable, str::Chars}; lazy_static! { static ref TOKEN: HashMap = HashMap::from_iter(vec![ @@ -62,7 +59,7 @@ pub enum Context { pub struct Lexer<'a> { /// The source code as peekable chars. /// WARN: SHOULD NEVER BE MODIFIED! - pub chars: Peekable, RangeFrom>>, + pub chars: Peekable>>, position: usize, /// The previous lexed Token. /// NOTE: Cannot be a whitespace. @@ -80,8 +77,7 @@ pub type TokenResult = Result; impl<'a> Lexer<'a> { pub fn new(source: FullFileSource<'a>) -> Self { Lexer { - // We zip with the character index here to ensure the first char has index 0 - chars: source.source.chars().zip(0..).peekable(), + chars: source.source.chars().enumerate().peekable(), position: 0, lookback: None, eof: false, @@ -92,15 +88,14 @@ impl<'a> Lexer<'a> { /// Consumes the next character pub fn consume(&mut self) -> Option { - let (c, index) = self.chars.next()?; + let (index, c) = self.chars.next()?; self.position = index; Some(c) } /// Try to peek at the next character from the source pub fn peek(&mut self) -> Option { - //self.chars.peek().copied() - self.chars.peek().map(|(c, _)| *c) + self.chars.peek().map(|(_, c)| *c) } fn next_token(&mut self) -> TokenResult { @@ -170,15 +165,14 @@ impl<'a> Lexer<'a> { let keys = [TokenKind::Define, TokenKind::Include]; for kind in keys.into_iter() { let key = kind.to_string(); - let peeked = word.clone(); - if key == peeked { + if key == word { found_kind = Some(kind); break; } } - if let Some(kind) = &found_kind { - Ok(kind.clone().into_token_with_span(self.source.relative_span_by_pos(start, end))) + if let Some(kind) = found_kind { + Ok(kind.into_token_with_span(self.source.relative_span_by_pos(start, end))) } else if self.context == Context::Global && self.peek().unwrap() == '[' { Ok(TokenKind::Pound.into_token_with_span(self.source.relative_span_by_pos(self.position, self.position))) } else { @@ -319,8 +313,8 @@ impl<'a> Lexer<'a> { } } - let kind = if let Some(kind) = &found_kind { - kind.clone() + let kind = if let Some(kind) = found_kind { + kind } else if self.context == Context::MacroBody && BuiltinFunctionKind::try_from(&word).is_ok() { TokenKind::BuiltinFunction(word) } else { @@ -408,8 +402,8 @@ impl<'a> Lexer<'a> { let start = self.position; // This function is only called when we want to continue consuming a character of the same - // type. For example, we see a digit and we want to consume the whole integer - // Therefore, the current character which triggered this function will need to be appended + // type. For example, we see a digit, and we want to consume the whole integer. + // Therefore, the current character which triggered this function will need to be appended. let mut word = String::new(); if let Some(init_char) = initial_char { word.push(init_char) @@ -486,7 +480,7 @@ impl<'a> Lexer<'a> { /// Checks the previous token kind against the input. pub fn checked_lookback(&self, kind: TokenKind) -> bool { - self.lookback.clone().and_then(|t| if t.kind == kind { Some(true) } else { None }).is_some() + self.lookback.as_ref().and_then(|t| if t.kind == kind { Some(true) } else { None }).is_some() } /// Check if a given keyword follows the keyword rules in the `source`. If not, it is a