From 7eb25731786f5cc2ca794c2bac0350acc5473603 Mon Sep 17 00:00:00 2001 From: Boshen Date: Mon, 8 Jan 2024 12:37:20 +0800 Subject: [PATCH] refactor(parser): parse BigInt lazily (#1924) This PR partially fixes #1803 and is part of #1880. BigInt is removed from the `Token` value, so that the token size can be reduced once we removed all the variants. `Token` is now also `Copy`, which removes all the `clone` and `drop` calls. This yields 5% performance improvement for the parser. --- crates/oxc_parser/src/cursor.rs | 2 +- crates/oxc_parser/src/js/expression.rs | 11 ++++++----- crates/oxc_parser/src/lexer/mod.rs | 19 +++++++++++-------- crates/oxc_parser/src/lexer/token.rs | 14 +++----------- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/crates/oxc_parser/src/cursor.rs b/crates/oxc_parser/src/cursor.rs index 02f377e200db0..ff4dbe798de14 100644 --- a/crates/oxc_parser/src/cursor.rs +++ b/crates/oxc_parser/src/cursor.rs @@ -236,7 +236,7 @@ impl<'a> Parser<'a> { pub(crate) fn checkpoint(&self) -> ParserCheckpoint<'a> { ParserCheckpoint { lexer: self.lexer.checkpoint(), - cur_token: self.token.clone(), + cur_token: self.token, prev_span_end: self.prev_token_end, errors_pos: self.errors.len(), } diff --git a/crates/oxc_parser/src/js/expression.rs b/crates/oxc_parser/src/js/expression.rs index b0dd4c0184fa1..72594a12e08c8 100644 --- a/crates/oxc_parser/src/js/expression.rs +++ b/crates/oxc_parser/src/js/expression.rs @@ -17,6 +17,7 @@ use super::{ }; use crate::{ diagnostics, + lexer::parse_big_int, lexer::{Kind, TokenValue}, list::SeparatedList, Context, Parser, @@ -306,12 +307,12 @@ impl<'a> Parser<'a> { Kind::Hex => BigintBase::Hex, _ => return Err(self.unexpected()), }; - let value = match self.cur_kind() { - kind if kind.is_number() => self.cur_token().value.as_bigint(), - _ => return Err(self.unexpected()), - }; + let token = self.cur_token(); + let src = self.cur_src().strip_suffix('n').unwrap(); + let value = parse_big_int(src, token.kind) + .map_err(|err| diagnostics::InvalidNumber(err, token.span()))?; self.bump_any(); - Ok(BigintLiteral { span: self.end_span(span), value, base }) + Ok(self.ast.bigint_literal(self.end_span(span), value, base)) } pub(crate) fn parse_literal_regexp(&mut self) -> Result { diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs index 7933e6379a8d8..1b016f9101645 100644 --- a/crates/oxc_parser/src/lexer/mod.rs +++ b/crates/oxc_parser/src/lexer/mod.rs @@ -26,9 +26,9 @@ use oxc_syntax::{ }; pub use token::{RegExp, Token, TokenValue}; -pub use self::kind::Kind; +pub use self::{kind::Kind, number::parse_big_int}; use self::{ - number::{parse_big_int, parse_float, parse_int}, + number::{parse_float, parse_int}, string_builder::AutoCow, trivia_builder::TriviaBuilder, }; @@ -105,7 +105,7 @@ impl<'a> Lexer<'a> { pub fn checkpoint(&self) -> LexerCheckpoint<'a> { LexerCheckpoint { chars: self.current.chars.clone(), - token: self.current.token.clone(), + token: self.current.token, errors_pos: self.errors.len(), } } @@ -178,7 +178,9 @@ impl<'a> Lexer<'a> { self.current.token.kind = kind; self.current.token.end = self.offset(); debug_assert!(self.current.token.start <= self.current.token.end); - std::mem::take(&mut self.current.token) + let token = self.current.token; + self.current.token = Token::default(); + token } /// Re-tokenize the current `/` or `/=` and return `RegExp` @@ -299,10 +301,11 @@ impl<'a> Lexer<'a> { fn set_numeric_value(&mut self, kind: Kind, src: &'a str) { let value = match kind { Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => { - src.strip_suffix('n').map_or_else( - || parse_int(src, kind).map(TokenValue::Number), - |src| parse_big_int(src, kind).map(TokenValue::BigInt), - ) + if src.ends_with('n') { + // BigInt is parsed lazily in the parser + return; + } + parse_int(src, kind).map(TokenValue::Number) } Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => { parse_float(src).map(TokenValue::Number) diff --git a/crates/oxc_parser/src/lexer/token.rs b/crates/oxc_parser/src/lexer/token.rs index f986e538a3e9b..b7ea930395794 100644 --- a/crates/oxc_parser/src/lexer/token.rs +++ b/crates/oxc_parser/src/lexer/token.rs @@ -5,7 +5,7 @@ use oxc_span::Span; use super::kind::Kind; -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone, Copy, Default)] pub struct Token<'a> { /// Token Kind pub kind: Kind, @@ -38,16 +38,15 @@ impl<'a> Token<'a> { } } -#[derive(Debug, Clone)] +#[derive(Debug, Copy, Clone)] pub enum TokenValue<'a> { None, Number(f64), - BigInt(num_bigint::BigInt), String(&'a str), RegExp(RegExp<'a>), } -#[derive(Debug, Clone)] +#[derive(Debug, Copy, Clone)] pub struct RegExp<'a> { pub pattern: &'a str, pub flags: RegExpFlags, @@ -67,13 +66,6 @@ impl<'a> TokenValue<'a> { } } - pub fn as_bigint(&self) -> num_bigint::BigInt { - match self { - Self::BigInt(s) => s.clone(), - _ => unreachable!("expected bigint!"), - } - } - pub fn as_regex(&self) -> &RegExp<'a> { match self { Self::RegExp(regex) => regex,