diff --git a/src/combinator.rs b/src/combinator.rs index 9cd07183..2753c88d 100644 --- a/src/combinator.rs +++ b/src/combinator.rs @@ -5,6 +5,8 @@ //! Although it's *sometimes* useful to be able to name their type, most of these parsers are much easier to work with //! when accessed through their respective methods on [`Parser`]. +use recorder::Inspector; + use super::*; /// The type of a lazy parser. @@ -1195,7 +1197,7 @@ where I: Input<'a>, E: ParserExtra<'a, I>, A: Parser<'a, I, O, extra::Full>, - State: 'a + Clone, + State: 'a + Clone + Inspector<'a, I>, { #[inline(always)] fn go(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult { diff --git a/src/extra.rs b/src/extra.rs index 30d83c4a..63ab9af1 100644 --- a/src/extra.rs +++ b/src/extra.rs @@ -1,6 +1,9 @@ //! Generic error, state and context types for parsers //! Useful for custom allocation, error handling, context-specific parsers, and more. +use recorder::Inspector; +pub use recorder::SimpleState; + use super::*; type DefaultErr = EmptyErr; @@ -26,7 +29,7 @@ where /// the actual progress of the parser - for that, use [`Self::Context`]. /// /// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`]. - type State: 'a; + type State: Inspector<'a, I> + 'a; /// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*. /// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate /// from an earlier point in the stream than the parser relying on it. This can affect the output of a parser, @@ -61,7 +64,7 @@ impl<'a, I, E, S, C> ParserExtra<'a, I> for Full where I: Input<'a>, E: Error<'a, I> + 'a, - S: 'a, + S: Inspector<'a, I> + 'a, C: 'a, { type Error = E; diff --git a/src/input.rs b/src/input.rs index ae09b5c5..574658da 100644 --- a/src/input.rs +++ b/src/input.rs @@ -5,6 +5,8 @@ //! [`Input`] is the primary trait used to feed input data into a chumsky parser. You can create them in a number of //! ways: from strings, slices, arrays, etc. +use recorder::Inspector; + pub use crate::stream::{BoxedExactSizeStream, BoxedStream, Stream}; use super::*; @@ -894,13 +896,14 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput { /// Represents a location in an input that can be rewound to. /// /// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`]. -pub struct Marker<'a, 'parse, I: Input<'a>> { +pub struct Marker<'a, 'parse, I: Input<'a>, C> { pub(crate) offset: I::Offset, pub(crate) err_count: usize, + pub(crate) user_checkpoint: C, phantom: PhantomData &'parse ()>, // Invariance } -impl<'a, 'parse, I: Input<'a>> Marker<'a, 'parse, I> { +impl<'a, 'parse, I: Input<'a>, C> Marker<'a, 'parse, I, C> { /// Get the [`Offset`] that this marker corresponds to. pub fn offset(self) -> Offset<'a, 'parse, I> { Offset { @@ -908,13 +911,23 @@ impl<'a, 'parse, I: Input<'a>> Marker<'a, 'parse, I> { phantom: PhantomData, } } + + /// Get the [`SaveMarker`][Recorder::SaveMarker] that this marker corresponds to. + pub fn ext_checkpoint(self) -> C { + self.user_checkpoint + } } -impl<'a, I: Input<'a>> Copy for Marker<'a, '_, I> {} -impl<'a, I: Input<'a>> Clone for Marker<'a, '_, I> { +impl<'a, I: Input<'a>, C: Copy> Copy for Marker<'a, '_, I, C> {} +impl<'a, I: Input<'a>, C: Clone> Clone for Marker<'a, '_, I, C> { #[inline(always)] fn clone(&self) -> Self { - *self + Self { + user_checkpoint: self.user_checkpoint.clone(), + offset: self.offset, + err_count: self.err_count, + phantom: PhantomData, + } } } @@ -1102,7 +1115,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> ) -> O where 'parse: 'sub_parse, - S: 'a, + S: 'a + Inspector<'a, I>, { let mut new_inp = InputRef { input: self.input, @@ -1158,10 +1171,11 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// /// You can rewind back to this state later with [`InputRef::rewind`]. #[inline(always)] - pub fn save(&self) -> Marker<'a, 'parse, I> { + pub fn save(&self) -> Marker<'a, 'parse, I, >::SaveMarker> { Marker { offset: self.offset, err_count: self.errors.secondary.len(), + user_checkpoint: self.state.on_save(self.offset), phantom: PhantomData, } } @@ -1170,9 +1184,13 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> /// /// You can create a marker with which to perform rewinding using [`InputRef::save`]. #[inline(always)] - pub fn rewind(&mut self, marker: Marker<'a, 'parse, I>) { + pub fn rewind( + &mut self, + marker: Marker<'a, 'parse, I, >::SaveMarker>, + ) { self.errors.secondary.truncate(marker.err_count); self.offset = marker.offset; + self.state.on_rewind(marker); } /// Get a mutable reference to the state associated with the current parse. @@ -1199,9 +1217,10 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> loop { // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next(self.offset) }; - if token.filter(&mut f).is_none() { + if token.as_ref().filter(|&t| f(t)).is_none() { break; } else { + token.inspect(|t| self.state.on_token(t)); self.offset = offset; } } @@ -1215,14 +1234,20 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next(self.offset) }; self.offset = offset; - (self.offset, token) + ( + self.offset, + token.inspect(|t| self.state.on_token(t.borrow())), + ) } #[inline(always)] pub(crate) fn next_maybe_inner(&mut self) -> (I::Offset, Option) { // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next_maybe(self.offset) }; - let r = (self.offset, token); + let r = ( + self.offset, + token.inspect(|t| self.state.on_token(Borrow::borrow(t))), + ); self.offset = offset; r } @@ -1235,7 +1260,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E> // SAFETY: offset was generated by previous call to `Input::next` let (offset, token) = unsafe { self.input.next_ref(self.offset) }; self.offset = offset; - (self.offset, token) + (self.offset, token.inspect(|t| self.state.on_token(t))) } /// Attempt to parse this input using the given parser. diff --git a/src/lib.rs b/src/lib.rs index ecb961d5..ff01042a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,6 +76,7 @@ pub mod number; pub mod pratt; pub mod primitive; mod private; +pub mod recorder; pub mod recovery; pub mod recursive; #[cfg(feature = "regex")] @@ -537,7 +538,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// #[derive(Copy, Clone)] /// pub struct Ident(Spur); /// - /// let ident = text::ascii::ident::<_, _, extra::Full, Rodeo, ()>>() + /// let ident = text::ascii::ident::<_, _, extra::Full, extra::SimpleState, ()>>() /// .map_with(|ident, e| Ident(e.state().get_or_intern(ident))) /// .padded() /// .repeated() @@ -546,7 +547,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// /// // Test out parser /// - /// let mut interner = Rodeo::new(); + /// let mut interner = extra::SimpleState(Rodeo::new()); /// /// match ident.parse_with_state("hello", &mut interner).into_result() { /// Ok(idents) => { @@ -1534,16 +1535,16 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ## General /// /// ``` - /// # use chumsky::{prelude::*, error::Simple}; - /// let int = text::int::<_, _, extra::Full, i32, ()>>(10) + /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState}; + /// let int = text::int::<_, _, extra::Full, SimpleState, ()>>(10) /// .from_str() /// .unwrapped(); /// /// let sum = int /// .clone() - /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * *e.state()); + /// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * **e.state()); /// - /// let mut multiplier = 2i32; + /// let mut multiplier = SimpleState(2i32); /// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134)); /// assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6)); /// ``` @@ -1571,7 +1572,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// type NodeArena = SlotMap; /// /// // Now, define our parser - /// let int = text::int::<&str, _, extra::Full, NodeArena, ()>>(10) + /// let int = text::int::<&str, _, extra::Full, extra::SimpleState, ()>>(10) /// .padded() /// .map_with(|s, e| /// // Return the ID of the new integer node @@ -1587,7 +1588,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>: /// ); /// /// // Test our parser - /// let mut arena = NodeArena::default(); + /// let mut arena = extra::SimpleState(NodeArena::default()); /// let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap(); /// if let Expr::Add(a, b) = arena[four_plus_eight] { /// assert_eq!(arena[a], Expr::Int(4)); @@ -2445,8 +2446,8 @@ where /// # Examples /// /// ``` - /// # use chumsky::{prelude::*, error::Simple}; - /// let int = text::int::<_, _, extra::Full, i32, ()>>(10) + /// # use chumsky::{prelude::*, error::Simple, extra::SimpleState}; + /// let int = text::int::<_, _, extra::Full, SimpleState, ()>>(10) /// .from_str() /// .unwrapped(); /// @@ -2454,12 +2455,12 @@ where /// .or(just('-').to(-1)) /// .repeated() /// .foldr_with(int, |a, b, e| { - /// *e.state() += 1; + /// **e.state() += 1; /// a * b /// }); /// /// // Test our parser - /// let mut folds = 0i32; + /// let mut folds = SimpleState(0i32); /// assert_eq!(signed.parse_with_state("3", &mut folds).into_result(), Ok(3)); /// assert_eq!(signed.parse_with_state("-17", &mut folds).into_result(), Ok(-17)); /// assert_eq!(signed.parse_with_state("--+-+-5", &mut folds).into_result(), Ok(5)); @@ -3175,10 +3176,12 @@ mod tests { #[should_panic] #[cfg(debug_assertions)] fn debug_assert_foldl_with() { - let mut state = 100; - empty::<&str, extra::Full>() + use extra::SimpleState; + + let state = 100; + empty::<&str, extra::Full, ()>>() .foldl_with(empty().to(()).repeated(), |_, _, _| ()) - .parse_with_state("a+b+c", &mut state); + .parse_with_state("a+b+c", &mut state.into()); } #[test] diff --git a/src/recorder.rs b/src/recorder.rs new file mode 100644 index 00000000..b7d5babc --- /dev/null +++ b/src/recorder.rs @@ -0,0 +1,70 @@ +//! Parser extensions that inspect the input without modifying it. +//! +//! *"Only one man stood and watched the sky, stood with terrible sadness in his eyes +//! and rubber bungs in his ears. He knew exactly what was happening and had known +//! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night +//! beside his pillar and woken him with a start."* +use crate::{input::Marker, Input}; +use core::ops::{Deref, DerefMut}; + +#[allow(unused)] // for intra-doc links +use crate::Parser; + +/// A type that receives event hooks when certain parsing actions occur. +/// +/// If you don't need to receive event hooks, use [`SimpleState`]. +pub trait Inspector<'a, I: Input<'a>> { + /// A type the Recorder can use to revert to a previous state. + /// + /// For implementation reasons, this is required to be `Copy + Clone`. + type SaveMarker: Copy + Clone; + + /// This function is called when a new token is read from the input stream. + // impl note: this should be called only when `self.offset` is updated, not when we only peek at the next token. + fn on_token(&mut self, token: &I::Token); + /// This function is called when a combinator saves the current state of the parse. + fn on_save<'parse>(&self, offset: I::Offset) -> Self::SaveMarker; + /// This function is called when a combinator rewinds to an earlier state of the parser. + /// + /// You can use [`Marker::ext_marker`] to get back the [`SaveMarker`][Self::SaveMarker] + /// you originally created in [`on_save`][Self::on_save]. + fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>); +} + +impl<'a, I: Input<'a>> Inspector<'a, I> for () { + type SaveMarker = (); + fn on_token(&mut self, _: &>::Token) {} + fn on_save<'parse>(&self, _: >::Offset) -> Self::SaveMarker {} + fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self>) {} +} + +/// A state type that should be accessible directly from `parser.state()` and has no special behavior. +/// +/// This wrapper implements the [`Recorder`] trait for you so you don't have to. +pub struct SimpleState(pub T); +impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState { + type SaveMarker = (); + fn on_token(&mut self, _: &>::Token) {} + fn on_save<'parse>(&self, _: >::Offset) -> Self::SaveMarker {} + fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self::SaveMarker>) {} +} + +impl Deref for SimpleState { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for SimpleState { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl From for SimpleState { + fn from(value: T) -> Self { + Self(value) + } +}