Skip to content

Commit

Permalink
refactor(parser): parse BigInt lazily (#1924)
Browse files Browse the repository at this point in the history
This PR partially fixes #1803 and is part of #1880.

BigInt is removed from the `Token` value, so that the token size can be
reduced once we removed all the variants.

`Token` is now also `Copy`, which removes all the `clone` and `drop`
calls.

This yields 5% performance improvement for the parser.
  • Loading branch information
Boshen authored Jan 8, 2024
1 parent 149f53e commit 7eb2573
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 25 deletions.
2 changes: 1 addition & 1 deletion crates/oxc_parser/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ impl<'a> Parser<'a> {
pub(crate) fn checkpoint(&self) -> ParserCheckpoint<'a> {
ParserCheckpoint {
lexer: self.lexer.checkpoint(),
cur_token: self.token.clone(),
cur_token: self.token,
prev_span_end: self.prev_token_end,
errors_pos: self.errors.len(),
}
Expand Down
11 changes: 6 additions & 5 deletions crates/oxc_parser/src/js/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use super::{
};
use crate::{
diagnostics,
lexer::parse_big_int,
lexer::{Kind, TokenValue},
list::SeparatedList,
Context, Parser,
Expand Down Expand Up @@ -306,12 +307,12 @@ impl<'a> Parser<'a> {
Kind::Hex => BigintBase::Hex,
_ => return Err(self.unexpected()),
};
let value = match self.cur_kind() {
kind if kind.is_number() => self.cur_token().value.as_bigint(),
_ => return Err(self.unexpected()),
};
let token = self.cur_token();
let src = self.cur_src().strip_suffix('n').unwrap();
let value = parse_big_int(src, token.kind)
.map_err(|err| diagnostics::InvalidNumber(err, token.span()))?;
self.bump_any();
Ok(BigintLiteral { span: self.end_span(span), value, base })
Ok(self.ast.bigint_literal(self.end_span(span), value, base))
}

pub(crate) fn parse_literal_regexp(&mut self) -> Result<RegExpLiteral> {
Expand Down
19 changes: 11 additions & 8 deletions crates/oxc_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ use oxc_syntax::{
};
pub use token::{RegExp, Token, TokenValue};

pub use self::kind::Kind;
pub use self::{kind::Kind, number::parse_big_int};
use self::{
number::{parse_big_int, parse_float, parse_int},
number::{parse_float, parse_int},
string_builder::AutoCow,
trivia_builder::TriviaBuilder,
};
Expand Down Expand Up @@ -105,7 +105,7 @@ impl<'a> Lexer<'a> {
pub fn checkpoint(&self) -> LexerCheckpoint<'a> {
LexerCheckpoint {
chars: self.current.chars.clone(),
token: self.current.token.clone(),
token: self.current.token,
errors_pos: self.errors.len(),
}
}
Expand Down Expand Up @@ -178,7 +178,9 @@ impl<'a> Lexer<'a> {
self.current.token.kind = kind;
self.current.token.end = self.offset();
debug_assert!(self.current.token.start <= self.current.token.end);
std::mem::take(&mut self.current.token)
let token = self.current.token;
self.current.token = Token::default();
token
}

/// Re-tokenize the current `/` or `/=` and return `RegExp`
Expand Down Expand Up @@ -299,10 +301,11 @@ impl<'a> Lexer<'a> {
fn set_numeric_value(&mut self, kind: Kind, src: &'a str) {
let value = match kind {
Kind::Decimal | Kind::Binary | Kind::Octal | Kind::Hex => {
src.strip_suffix('n').map_or_else(
|| parse_int(src, kind).map(TokenValue::Number),
|src| parse_big_int(src, kind).map(TokenValue::BigInt),
)
if src.ends_with('n') {
// BigInt is parsed lazily in the parser
return;
}
parse_int(src, kind).map(TokenValue::Number)
}
Kind::Float | Kind::PositiveExponential | Kind::NegativeExponential => {
parse_float(src).map(TokenValue::Number)
Expand Down
14 changes: 3 additions & 11 deletions crates/oxc_parser/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use oxc_span::Span;

use super::kind::Kind;

#[derive(Debug, Clone, Default)]
#[derive(Debug, Clone, Copy, Default)]
pub struct Token<'a> {
/// Token Kind
pub kind: Kind,
Expand Down Expand Up @@ -38,16 +38,15 @@ impl<'a> Token<'a> {
}
}

#[derive(Debug, Clone)]
#[derive(Debug, Copy, Clone)]
pub enum TokenValue<'a> {
None,
Number(f64),
BigInt(num_bigint::BigInt),
String(&'a str),
RegExp(RegExp<'a>),
}

#[derive(Debug, Clone)]
#[derive(Debug, Copy, Clone)]
pub struct RegExp<'a> {
pub pattern: &'a str,
pub flags: RegExpFlags,
Expand All @@ -67,13 +66,6 @@ impl<'a> TokenValue<'a> {
}
}

pub fn as_bigint(&self) -> num_bigint::BigInt {
match self {
Self::BigInt(s) => s.clone(),
_ => unreachable!("expected bigint!"),
}
}

pub fn as_regex(&self) -> &RegExp<'a> {
match self {
Self::RegExp(regex) => regex,
Expand Down

0 comments on commit 7eb2573

Please sign in to comment.