From ffdb4bef98530f0e0d8fa6ba4c2821a39cdb0814 Mon Sep 17 00:00:00 2001 From: jyn Date: Fri, 18 Oct 2024 10:21:54 -0400 Subject: [PATCH] Add a new `ParserExtra::State: Inspector` trait This allows using imperative concrete syntax tree parsers like `rowan` and `cstree`. In particular those libraries want to know every token that is parsed, and need to know when chumsky backtracks and reparses the same tokens again. This adds the following new API surface: ```rust pub trait Inspector<'a, I: Input<'a>>: Default { type SaveMarker: Copy + Clone; fn on_token(&mut self, token: &I::Token); fn on_save<'parse>(&self, offset: I::Offset) -> Self::SaveMarker; fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>); } pub struct SimpleState(pub T); impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState; impl DerefMut for SimpleState; impl From for SimpleState; ``` and additionally now requires `ParserExtra::State: Inspector`. --- src/combinator.rs | 4 ++- src/extra.rs | 7 +++-- src/input.rs | 49 +++++++++++++++++++++++++-------- src/lib.rs | 33 ++++++++++++---------- src/recorder.rs | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 30 deletions(-) create mode 100644 src/recorder.rs diff --git a/src/combinator.rs b/src/combinator.rs index 9cd07183..2753c88d 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -5,6 +5,8 @@ //! Although it's *sometimes* useful to be able to name their type, most of these parsers are much easier to work with //! when accessed through their respective methods on [`Parser`]. +use recorder::Inspector; + use super::*; /// The type of a lazy parser. @@ -1195,7 +1197,7 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, O, extra::Full>, - State: 'a + Clone, + State: 'a + Clone + Inspector<'a, I>, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { diff --git a/src/extra.rs b/src/extra.rs index 30d83c4a..63ab9af1 100644 --- a/src/extra.rs +++ b/src/extra.rs @@ -1,6 +1,9 @@ //! Generic error, state and context types for parsers //! Useful for custom allocation, error handling, context-specific parsers, and more. +use recorder::Inspector; +pub use recorder::SimpleState; + use super::*; type DefaultErr = EmptyErr; @@ -26,7 +29,7 @@ where /// the actual progress of the parser - for that, use [`Self::Context`]. /// /// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`]. - type State: 'a; + type State: Inspector<'a, I> + 'a; /// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*. /// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate /// from an earlier point in the stream than the parser relying on it. This can affect the output of a parser, @@ -61,7 +64,7 @@ impl<'a, I, E, S, C> ParserExtra<'a, I> for Full where I: Input<'a>, E: Error<'a, I> + 'a, - S: 'a, + S: Inspector<'a, I> + 'a, C: 'a, { type Error = E; diff --git a/src/input.rs b/src/input.rs index ae09b5c5..574658da 100644 --- a/src/input.rs +++ b/src/input.rs @@ -5,6 +5,8 @@ //! [`Input`] is the primary trait used to feed input data into a chumsky parser. You can create them in a number of //! ways: from strings, slices, arrays, etc. +use recorder::Inspector; + pub use crate::stream::{BoxedExactSizeStream, BoxedStream, Stream}; use super::*; @@ -894,13 +896,14 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput { /// Represents a location in an input that can be rewound to. /// /// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`]. -pub struct Marker<'a, 'parse, I: Input<'a>> { +pub struct Marker<'a, 'parse, I: Input<'a>, C> { pub(crate) offset: I::Offset, pub(crate) err_count: usize, + pub(crate) user_checkpoint: C, phantom: PhantomData &'parse ()>, // Invariance } -impl<'a, 'parse, I: Input<'a>> Marker<'a, 'parse, I> { +impl<'a, 'parse, I: Input<'a>, C> Marker<'a, 'parse, I, C> { /// Get the [`Offset`] that this marker corresponds to. pub fn offset(self) -> Offset<'a, 'parse, I> { Offset { @@ -908,13 +911,23 @@ impl<'a, 'parse, I: Input<'a>> Marker<'a, 'parse, I> { phantom: PhantomData, } } + + /// Get the [`SaveMarker`][Recorder::SaveMarker] that this marker corresponds to. + pub fn ext_checkpoint(self) -> C { + self.user_checkpoint + } } -impl<'a, I: Input<'a>> Copy for Marker<'a, '_, I> {} -impl<'a, I: Input<'a>> Clone for Marker<'a, '_, I> { +impl<'a, I: Input<'a>, C: Copy> Copy for Marker<'a, '_, I, C> {} +impl<'a, I: Input<'a>, C: Clone> Clone for Marker<'a, '_, I, C> { #[inline(always)] fn clone(&self) -> Self { - *self + Self { + user_checkpoint: self.user_checkpoint.clone(), + offset: self.offset, + err_count: self.err_count, + phantom: PhantomData, + } } } @@ -1102,7 +1115,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> ) -> O where 'parse: 'sub_parse, - S: 'a, + S: 'a + Inspector<'a, I>, { let mut new_inp = InputRef { input: self.input, @@ -1158,10 +1171,11 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// /// You can rewind back to this state later with [`InputRef::rewind`]. #[inline(always)] - pub fn save(&self) -> Marker<'a, 'parse, I> { + pub fn save(&self) -> Marker<'a, 'parse, I, >::SaveMarker> { Marker { offset: self.offset, err_count: self.errors.secondary.len(), + user_checkpoint: self.state.on_save(self.offset), phantom: PhantomData, } } @@ -1170,9 +1184,13 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// /// You can create a marker with which to perform rewinding using [`InputRef::save`]. #[inline(always)] - pub fn rewind(&mut self, marker: Marker<'a, 'parse, I>) { + pub fn rewind( + &mut self, + marker: Marker<'a, 'parse, I, >::SaveMarker>, + ) { self.errors.secondary.truncate(marker.err_count); self.offset = marker.offset; + self.state.on_rewind(marker); } /// Get a mutable reference to the state associated with the current parse. @@ -1199,9 +1217,10 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> loop { // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next(self.offset) }; - if token.filter(&mut f).is_none() { + if token.as_ref().filter(|&t| f(t)).is_none() { break; } else { + token.inspect(|t| self.state.on_token(t)); self.offset = offset; } } @@ -1215,14 +1234,20 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next(self.offset) }; self.offset = offset; - (self.offset, token) + ( + self.offset, + token.inspect(|t| self.state.on_token(t.borrow())), + ) } #[inline(always)] pub(crate) fn next_maybe_inner(&mut self) -> (I::Offset, Option) { // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next_maybe(self.offset) }; - let r = (self.offset, token); + let r = ( + self.offset, + token.inspect(|t| self.state.on_token(Borrow::borrow(t))), + ); self.offset = offset; r } @@ -1235,7 +1260,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next_ref(self.offset) }; self.offset = offset; - (self.offset, token) + (self.offset, token.inspect(|t| self.state.on_token(t))) } /// Attempt to parse this input using the given parser. diff --git a/src/lib.rs b/src/lib.rs index ecb961d5..ff01042a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,7 @@ pub mod number; pub mod pratt; pub mod primitive; mod private; +pub mod recorder; pub mod recovery; pub mod recursive; #[cfg(feature = "regex")] @@ -537,7 +538,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// #[derive(Copy, Clone)] /// pub struct Ident(Spur); /// - /// let ident = text::ascii::ident::<_, _, extra::Full, Rodeo, ()>>() + /// let ident = text::ascii::ident::<_, _, extra::Full, extra::SimpleState, ()>>() /// .map_with(|ident, e| Ident(e.state().get_or_intern(ident))) /// .padded() /// .repeated() @@ -546,7 +547,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// /// // Test out parser /// - /// let mut interner = Rodeo::new(); + /// let mut interner = extra::SimpleState(Rodeo::new()); /// /// match ident.parse_with_state("hello", &mut interner).into_result() { /// Ok(idents) => { @@ -1534,16 +1535,16 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ## General /// /// ``` - /// # use chumsky::{prelude::*, error::Simple}; - /// let int = text::int::<_, _, extra::Full, i32, ()>>(10) + /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState}; + /// let int = text::int::<_, _, extra::Full, SimpleState, ()>>(10) /// .from_str() /// .unwrapped(); /// /// let sum = int /// .clone() - /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * *e.state()); + /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * **e.state()); /// - /// let mut multiplier = 2i32; + /// let mut multiplier = SimpleState(2i32); /// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134)); /// assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6)); /// ``` @@ -1571,7 +1572,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// type NodeArena = SlotMap; /// /// // Now, define our parser - /// let int = text::int::<&str, _, extra::Full, NodeArena, ()>>(10) + /// let int = text::int::<&str, _, extra::Full, extra::SimpleState, ()>>(10) /// .padded() /// .map_with(|s, e| /// // Return the ID of the new integer node @@ -1587,7 +1588,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ); /// /// // Test our parser - /// let mut arena = NodeArena::default(); + /// let mut arena = extra::SimpleState(NodeArena::default()); /// let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap(); /// if let Expr::Add(a, b) = arena[four_plus_eight] { /// assert_eq!(arena[a], Expr::Int(4)); @@ -2445,8 +2446,8 @@ where /// # Examples /// /// ``` - /// # use chumsky::{prelude::*, error::Simple}; - /// let int = text::int::<_, _, extra::Full, i32, ()>>(10) + /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState}; + /// let int = text::int::<_, _, extra::Full, SimpleState, ()>>(10) /// .from_str() /// .unwrapped(); /// @@ -2454,12 +2455,12 @@ where /// .or(just('-').to(-1)) /// .repeated() /// .foldr_with(int, |a, b, e| { - /// *e.state() += 1; + /// **e.state() += 1; /// a * b /// }); /// /// // Test our parser - /// let mut folds = 0i32; + /// let mut folds = SimpleState(0i32); /// assert_eq!(signed.parse_with_state("3", &mut folds).into_result(), Ok(3)); /// assert_eq!(signed.parse_with_state("-17", &mut folds).into_result(), Ok(-17)); /// assert_eq!(signed.parse_with_state("--+-+-5", &mut folds).into_result(), Ok(5)); @@ -3175,10 +3176,12 @@ mod tests { #[should_panic] #[cfg(debug_assertions)] fn debug_assert_foldl_with() { - let mut state = 100; - empty::<&str, extra::Full>() + use extra::SimpleState; + + let state = 100; + empty::<&str, extra::Full, ()>>() .foldl_with(empty().to(()).repeated(), |_, _, _| ()) - .parse_with_state("a+b+c", &mut state); + .parse_with_state("a+b+c", &mut state.into()); } #[test] diff --git a/src/recorder.rs b/src/recorder.rs new file mode 100644 index 00000000..b7d5babc --- /dev/null +++ b/src/recorder.rs @@ -0,0 +1,70 @@ +//! Parser extensions that inspect the input without modifying it. +//! +//! *"Only one man stood and watched the sky, stood with terrible sadness in his eyes +//! and rubber bungs in his ears. He knew exactly what was happening and had known +//! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night +//! beside his pillar and woken him with a start."* +use crate::{input::Marker, Input}; +use core::ops::{Deref, DerefMut}; + +#[allow(unused)] // for intra-doc links +use crate::Parser; + +/// A type that receives event hooks when certain parsing actions occur. +/// +/// If you don't need to receive event hooks, use [`SimpleState`]. +pub trait Inspector<'a, I: Input<'a>> { + /// A type the Recorder can use to revert to a previous state. + /// + /// For implementation reasons, this is required to be `Copy + Clone`. + type SaveMarker: Copy + Clone; + + /// This function is called when a new token is read from the input stream. + // impl note: this should be called only when `self.offset` is updated, not when we only peek at the next token. + fn on_token(&mut self, token: &I::Token); + /// This function is called when a combinator saves the current state of the parse. + fn on_save<'parse>(&self, offset: I::Offset) -> Self::SaveMarker; + /// This function is called when a combinator rewinds to an earlier state of the parser. + /// + /// You can use [`Marker::ext_marker`] to get back the [`SaveMarker`][Self::SaveMarker] + /// you originally created in [`on_save`][Self::on_save]. + fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>); +} + +impl<'a, I: Input<'a>> Inspector<'a, I> for () { + type SaveMarker = (); + fn on_token(&mut self, _: &>::Token) {} + fn on_save<'parse>(&self, _: >::Offset) -> Self::SaveMarker {} + fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self>) {} +} + +/// A state type that should be accessible directly from `parser.state()` and has no special behavior. +/// +/// This wrapper implements the [`Recorder`] trait for you so you don't have to. +pub struct SimpleState(pub T); +impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState { + type SaveMarker = (); + fn on_token(&mut self, _: &>::Token) {} + fn on_save<'parse>(&self, _: >::Offset) -> Self::SaveMarker {} + fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self::SaveMarker>) {} +} + +impl Deref for SimpleState { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for SimpleState { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl From for SimpleState { + fn from(value: T) -> Self { + Self(value) + } +}