Skip to content

Commit

Permalink
Merge pull request #764 from epage/ws-comment-newline
Browse files Browse the repository at this point in the history
perf(parse): Reduce overheasd from "trivia"
  • Loading branch information
epage authored Jul 29, 2024
2 parents c27507e + cc0c6c8 commit 400a6c5
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 51 deletions.
21 changes: 9 additions & 12 deletions crates/benchmarks/examples/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ impl Args {
let mut parser = Parser::Document;

let mut args = lexopt::Parser::from_env();
let mut data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == "1-medium")
.unwrap();
let mut data_name = "1-medium".to_owned();
while let Some(arg) = args.next()? {
match arg {
Long("parser") => {
Expand All @@ -59,18 +56,18 @@ impl Args {
};
}
Long("manifest") => {
let name = args.value()?;
data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == name)
.ok_or_else(|| lexopt::Error::UnexpectedValue {
option: "manifest".to_owned(),
value: name.clone(),
})?;
data_name = args.value()?.string()?;
}
_ => return Err(arg.unexpected()),
}
}
let data = toml_benchmarks::MANIFESTS
.iter()
.find(|d| d.name() == data_name)
.ok_or_else(|| lexopt::Error::UnexpectedValue {
option: "manifest".to_owned(),
value: data_name.into(),
})?;

Ok(Self {
parser,
Expand Down
2 changes: 1 addition & 1 deletion crates/toml_edit/src/parser/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ fn ml_literal_body<'i>(input: &mut Input<'i>) -> PResult<&'i str> {

// mll-content = mll-char / newline
fn mll_content(input: &mut Input<'_>) -> PResult<u8> {
alt((one_of(MLL_CHAR), newline)).parse_next(input)
alt((one_of(MLL_CHAR), newline.value(b'\n'))).parse_next(input)
}

// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
Expand Down
63 changes: 29 additions & 34 deletions crates/toml_edit/src/parser/trivia.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use std::ops::RangeInclusive;

use winnow::combinator::alt;
use winnow::combinator::empty;
use winnow::combinator::eof;
use winnow::combinator::fail;
use winnow::combinator::opt;
use winnow::combinator::peek;
use winnow::combinator::repeat;
use winnow::combinator::terminated;
use winnow::prelude::*;
use winnow::token::any;
use winnow::token::one_of;
use winnow::token::take_while;

Expand Down Expand Up @@ -50,69 +54,60 @@ pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';

// comment = comment-start-symbol *non-eol
pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
pub(crate) fn comment(input: &mut Input<'_>) -> PResult<()> {
(COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
.recognize()
.void()
.parse_next(input)
}

// newline = ( %x0A / ; LF
// %x0D.0A ) ; CRLF
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<u8> {
alt((
one_of(LF).value(b'\n'),
(one_of(CR), one_of(LF)).value(b'\n'),
))
pub(crate) fn newline(input: &mut Input<'_>) -> PResult<()> {
dispatch! {any;
b'\n' => empty,
b'\r' => one_of(LF).void(),
_ => fail,
}
.parse_next(input)
}
pub(crate) const LF: u8 = b'\n';
pub(crate) const CR: u8 = b'\r';

// ws-newline = *( wschar / newline )
pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
pub(crate) fn ws_newline(input: &mut Input<'_>) -> PResult<()> {
repeat(
0..,
alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
)
.map(|()| ())
.recognize()
.map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") })
.parse_next(input)
}

// ws-newlines = newline *( wschar / newline )
pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
(newline, ws_newline)
.recognize()
.map(|b| unsafe {
from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII")
})
.parse_next(input)
pub(crate) fn ws_newlines(input: &mut Input<'_>) -> PResult<()> {
(newline, ws_newline).void().parse_next(input)
}

// note: this rule is not present in the original grammar
// ws-comment-newline = *( ws-newline-nonempty / comment )
pub(crate) fn ws_comment_newline<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> {
repeat(
0..,
alt((
repeat(
1..,
alt((take_while(1.., WSCHAR), newline.value(&b"\n"[..]))),
)
.map(|()| ()),
comment.void(),
)),
)
.map(|()| ())
.recognize()
pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> PResult<()> {
let _ = ws.parse_next(input)?;

dispatch! {opt(peek(any));
Some(b'#') => (comment, newline, ws_comment_newline).void(),
Some(b'\n') => (newline, ws_comment_newline).void(),
Some(b'\r') => (newline, ws_comment_newline).void(),
_ => empty,
}
.parse_next(input)
}

// note: this rule is not present in the original grammar
// line-ending = newline / eof
pub(crate) fn line_ending<'i>(input: &mut Input<'i>) -> PResult<&'i str> {
alt((newline.value("\n"), eof.value(""))).parse_next(input)
pub(crate) fn line_ending(input: &mut Input<'_>) -> PResult<()> {
alt((newline.value("\n"), eof.value("")))
.void()
.parse_next(input)
}

// note: this rule is not present in the original grammar
Expand Down Expand Up @@ -151,7 +146,7 @@ mod test {
];
for input in inputs {
dbg!(input);
let parsed = ws_comment_newline.parse(new_input(input));
let parsed = ws_comment_newline.recognize().parse(new_input(input));
assert!(parsed.is_ok(), "{:?}", parsed);
let parsed = parsed.unwrap();
assert_eq!(parsed, input.as_bytes());
Expand Down
7 changes: 3 additions & 4 deletions crates/toml_edit/tests/testsuite/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,13 +309,12 @@ TOML parse error at line 1, column 1
bad!(
"a = [ \r ]",
str![[r#"
TOML parse error at line 1, column 7
TOML parse error at line 1, column 8
|
1 | a = [
]
| ^
invalid array
expected `]`
| ^
"#]]
);
Expand Down

0 comments on commit 400a6c5

Please sign in to comment.