Skip to content

Commit

Permalink
Add a new ParserExtra::State: Inspector trait
Browse files Browse the repository at this point in the history
This allows using imperative concrete syntax tree parsers like `rowan` and `cstree`.
In particular those libraries want to know every token that is parsed, and need to know when chumsky backtracks and reparses the same tokens again.

This adds the following new API surface:
```rust
pub trait Inspector<'a, I: Input<'a>>: Default {
    type SaveMarker: Copy + Clone;
    fn on_token(&mut self, token: &I::Token);
    fn on_save<'parse>(&self, offset: I::Offset) -> Self::SaveMarker;
    fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>);
}
pub struct SimpleState<T>(pub T);
impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState<T>;
impl<T> DerefMut<Target = T> for SimpleState<T>;
impl<T> From<T> for SimpleState<T>;
```
and additionally now requires `ParserExtra::State: Inspector`.
  • Loading branch information
jyn514 committed Oct 19, 2024
1 parent f91f895 commit ffdb4be
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 30 deletions.
4 changes: 3 additions & 1 deletion src/combinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! Although it's *sometimes* useful to be able to name their type, most of these parsers are much easier to work with
//! when accessed through their respective methods on [`Parser`].
use recorder::Inspector;

use super::*;

/// The type of a lazy parser.
Expand Down Expand Up @@ -1195,7 +1197,7 @@ where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, O, extra::Full<E::Error, State, E::Context>>,
State: 'a + Clone,
State: 'a + Clone + Inspector<'a, I>,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
Expand Down
7 changes: 5 additions & 2 deletions src/extra.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Generic error, state and context types for parsers
//! Useful for custom allocation, error handling, context-specific parsers, and more.
use recorder::Inspector;
pub use recorder::SimpleState;

use super::*;

type DefaultErr = EmptyErr;
Expand All @@ -26,7 +29,7 @@ where
/// the actual progress of the parser - for that, use [`Self::Context`].
///
/// For examples of using this type, see [`Parser::map_with`] or [`Parser::foldl_with`].
type State: 'a;
type State: Inspector<'a, I> + 'a;
/// Context used for parser configuration. This is used to provide context-sensitive parsing of *input*.
/// Context-sensitive parsing in chumsky is always left-hand sensitive - context for the parse must originate
/// from an earlier point in the stream than the parser relying on it. This can affect the output of a parser,
Expand Down Expand Up @@ -61,7 +64,7 @@ impl<'a, I, E, S, C> ParserExtra<'a, I> for Full<E, S, C>
where
I: Input<'a>,
E: Error<'a, I> + 'a,
S: 'a,
S: Inspector<'a, I> + 'a,
C: 'a,
{
type Error = E;
Expand Down
49 changes: 37 additions & 12 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
//! [`Input`] is the primary trait used to feed input data into a chumsky parser. You can create them in a number of
//! ways: from strings, slices, arrays, etc.
use recorder::Inspector;

pub use crate::stream::{BoxedExactSizeStream, BoxedStream, Stream};

use super::*;
Expand Down Expand Up @@ -894,27 +896,38 @@ impl<'a, R: Read + Seek + 'a> ValueInput<'a> for IoInput<R> {
/// Represents a location in an input that can be rewound to.
///
/// Markers can be created with [`InputRef::save`] and rewound to with [`InputRef::rewind`].
pub struct Marker<'a, 'parse, I: Input<'a>> {
pub struct Marker<'a, 'parse, I: Input<'a>, C> {
pub(crate) offset: I::Offset,
pub(crate) err_count: usize,
pub(crate) user_checkpoint: C,
phantom: PhantomData<fn(&'parse ()) -> &'parse ()>, // Invariance
}

impl<'a, 'parse, I: Input<'a>> Marker<'a, 'parse, I> {
impl<'a, 'parse, I: Input<'a>, C> Marker<'a, 'parse, I, C> {
/// Get the [`Offset`] that this marker corresponds to.
pub fn offset(self) -> Offset<'a, 'parse, I> {
Offset {
offset: self.offset,
phantom: PhantomData,
}
}

/// Get the [`SaveMarker`][Recorder::SaveMarker] that this marker corresponds to.
pub fn ext_checkpoint(self) -> C {
self.user_checkpoint
}
}

impl<'a, I: Input<'a>> Copy for Marker<'a, '_, I> {}
impl<'a, I: Input<'a>> Clone for Marker<'a, '_, I> {
impl<'a, I: Input<'a>, C: Copy> Copy for Marker<'a, '_, I, C> {}
impl<'a, I: Input<'a>, C: Clone> Clone for Marker<'a, '_, I, C> {
#[inline(always)]
fn clone(&self) -> Self {
*self
Self {
user_checkpoint: self.user_checkpoint.clone(),
offset: self.offset,
err_count: self.err_count,
phantom: PhantomData,
}
}
}

Expand Down Expand Up @@ -1102,7 +1115,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
) -> O
where
'parse: 'sub_parse,
S: 'a,
S: 'a + Inspector<'a, I>,
{
let mut new_inp = InputRef {
input: self.input,
Expand Down Expand Up @@ -1158,10 +1171,11 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
///
/// You can rewind back to this state later with [`InputRef::rewind`].
#[inline(always)]
pub fn save(&self) -> Marker<'a, 'parse, I> {
pub fn save(&self) -> Marker<'a, 'parse, I, <E::State as Inspector<'a, I>>::SaveMarker> {
Marker {
offset: self.offset,
err_count: self.errors.secondary.len(),
user_checkpoint: self.state.on_save(self.offset),
phantom: PhantomData,
}
}
Expand All @@ -1170,9 +1184,13 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
///
/// You can create a marker with which to perform rewinding using [`InputRef::save`].
#[inline(always)]
pub fn rewind(&mut self, marker: Marker<'a, 'parse, I>) {
pub fn rewind(
&mut self,
marker: Marker<'a, 'parse, I, <E::State as Inspector<'a, I>>::SaveMarker>,
) {
self.errors.secondary.truncate(marker.err_count);
self.offset = marker.offset;
self.state.on_rewind(marker);
}

/// Get a mutable reference to the state associated with the current parse.
Expand All @@ -1199,9 +1217,10 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
loop {
// SAFETY: offset was generated by previous call to `Input::next`
let (offset, token) = unsafe { self.input.next(self.offset) };
if token.filter(&mut f).is_none() {
if token.as_ref().filter(|&t| f(t)).is_none() {
break;
} else {
token.inspect(|t| self.state.on_token(t));
self.offset = offset;
}
}
Expand All @@ -1215,14 +1234,20 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
// SAFETY: offset was generated by previous call to `Input::next`
let (offset, token) = unsafe { self.input.next(self.offset) };
self.offset = offset;
(self.offset, token)
(
self.offset,
token.inspect(|t| self.state.on_token(t.borrow())),
)
}

#[inline(always)]
pub(crate) fn next_maybe_inner(&mut self) -> (I::Offset, Option<I::TokenMaybe>) {
// SAFETY: offset was generated by previous call to `Input::next`
let (offset, token) = unsafe { self.input.next_maybe(self.offset) };
let r = (self.offset, token);
let r = (
self.offset,
token.inspect(|t| self.state.on_token(Borrow::borrow(t))),
);
self.offset = offset;
r
}
Expand All @@ -1235,7 +1260,7 @@ impl<'a, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> InputRef<'a, 'parse, I, E>
// SAFETY: offset was generated by previous call to `Input::next`
let (offset, token) = unsafe { self.input.next_ref(self.offset) };
self.offset = offset;
(self.offset, token)
(self.offset, token.inspect(|t| self.state.on_token(t)))
}

/// Attempt to parse this input using the given parser.
Expand Down
33 changes: 18 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub mod number;
pub mod pratt;
pub mod primitive;
mod private;
pub mod recorder;
pub mod recovery;
pub mod recursive;
#[cfg(feature = "regex")]
Expand Down Expand Up @@ -537,7 +538,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// #[derive(Copy, Clone)]
/// pub struct Ident(Spur);
///
/// let ident = text::ascii::ident::<_, _, extra::Full<Simple<char>, Rodeo, ()>>()
/// let ident = text::ascii::ident::<_, _, extra::Full<Simple<char>, extra::SimpleState<Rodeo>, ()>>()
/// .map_with(|ident, e| Ident(e.state().get_or_intern(ident)))
/// .padded()
/// .repeated()
Expand All @@ -546,7 +547,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
///
/// // Test out parser
///
/// let mut interner = Rodeo::new();
/// let mut interner = extra::SimpleState(Rodeo::new());
///
/// match ident.parse_with_state("hello", &mut interner).into_result() {
/// Ok(idents) => {
Expand Down Expand Up @@ -1534,16 +1535,16 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// ## General
///
/// ```
/// # use chumsky::{prelude::*, error::Simple};
/// let int = text::int::<_, _, extra::Full<Simple<char>, i32, ()>>(10)
/// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
/// let int = text::int::<_, _, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
/// .from_str()
/// .unwrapped();
///
/// let sum = int
/// .clone()
/// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * *e.state());
/// .foldl_with(just('+').ignore_then(int).repeated(), |a, b, e| (a + b) * **e.state());
///
/// let mut multiplier = 2i32;
/// let mut multiplier = SimpleState(2i32);
/// assert_eq!(sum.parse_with_state("1+12+3+9", &mut multiplier).into_result(), Ok(134));
/// assert_eq!(sum.parse_with_state("6", &mut multiplier).into_result(), Ok(6));
/// ```
Expand Down Expand Up @@ -1571,7 +1572,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// type NodeArena = SlotMap<NodeId, Expr>;
///
/// // Now, define our parser
/// let int = text::int::<&str, _, extra::Full<Simple<char>, NodeArena, ()>>(10)
/// let int = text::int::<&str, _, extra::Full<Simple<char>, extra::SimpleState<NodeArena>, ()>>(10)
/// .padded()
/// .map_with(|s, e|
/// // Return the ID of the new integer node
Expand All @@ -1587,7 +1588,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// );
///
/// // Test our parser
/// let mut arena = NodeArena::default();
/// let mut arena = extra::SimpleState(NodeArena::default());
/// let four_plus_eight = sum.parse_with_state("4 + 8", &mut arena).unwrap();
/// if let Expr::Add(a, b) = arena[four_plus_eight] {
/// assert_eq!(arena[a], Expr::Int(4));
Expand Down Expand Up @@ -2445,21 +2446,21 @@ where
/// # Examples
///
/// ```
/// # use chumsky::{prelude::*, error::Simple};
/// let int = text::int::<_, _, extra::Full<Simple<char>, i32, ()>>(10)
/// # use chumsky::{prelude::*, error::Simple, extra::SimpleState};
/// let int = text::int::<_, _, extra::Full<Simple<char>, SimpleState<i32>, ()>>(10)
/// .from_str()
/// .unwrapped();
///
/// let signed = just('+').to(1)
/// .or(just('-').to(-1))
/// .repeated()
/// .foldr_with(int, |a, b, e| {
/// *e.state() += 1;
/// **e.state() += 1;
/// a * b
/// });
///
/// // Test our parser
/// let mut folds = 0i32;
/// let mut folds = SimpleState(0i32);
/// assert_eq!(signed.parse_with_state("3", &mut folds).into_result(), Ok(3));
/// assert_eq!(signed.parse_with_state("-17", &mut folds).into_result(), Ok(-17));
/// assert_eq!(signed.parse_with_state("--+-+-5", &mut folds).into_result(), Ok(5));
Expand Down Expand Up @@ -3175,10 +3176,12 @@ mod tests {
#[should_panic]
#[cfg(debug_assertions)]
fn debug_assert_foldl_with() {
let mut state = 100;
empty::<&str, extra::Full<EmptyErr, i32, ()>>()
use extra::SimpleState;

let state = 100;
empty::<&str, extra::Full<EmptyErr, SimpleState<i32>, ()>>()
.foldl_with(empty().to(()).repeated(), |_, _, _| ())
.parse_with_state("a+b+c", &mut state);
.parse_with_state("a+b+c", &mut state.into());
}

#[test]
Expand Down
70 changes: 70 additions & 0 deletions src/recorder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//! Parser extensions that inspect the input without modifying it.
//!
//! *"Only one man stood and watched the sky, stood with terrible sadness in his eyes
//! and rubber bungs in his ears. He knew exactly what was happening and had known
//! ever since his Sub-Etha Sens-O-Matic had started winking in the dead of night
//! beside his pillar and woken him with a start."*
use crate::{input::Marker, Input};
use core::ops::{Deref, DerefMut};

#[allow(unused)] // for intra-doc links
use crate::Parser;

/// A type that receives event hooks when certain parsing actions occur.
///
/// If you don't need to receive event hooks, use [`SimpleState`].
pub trait Inspector<'a, I: Input<'a>> {
/// A type the Recorder can use to revert to a previous state.
///
/// For implementation reasons, this is required to be `Copy + Clone`.
type SaveMarker: Copy + Clone;

/// This function is called when a new token is read from the input stream.
// impl note: this should be called only when `self.offset` is updated, not when we only peek at the next token.
fn on_token(&mut self, token: &I::Token);
/// This function is called when a combinator saves the current state of the parse.
fn on_save<'parse>(&self, offset: I::Offset) -> Self::SaveMarker;
/// This function is called when a combinator rewinds to an earlier state of the parser.
///
/// You can use [`Marker::ext_marker`] to get back the [`SaveMarker`][Self::SaveMarker]
/// you originally created in [`on_save`][Self::on_save].
fn on_rewind<'parse>(&mut self, marker: Marker<'a, 'parse, I, Self::SaveMarker>);
}

impl<'a, I: Input<'a>> Inspector<'a, I> for () {
type SaveMarker = ();
fn on_token(&mut self, _: &<I as Input<'a>>::Token) {}
fn on_save<'parse>(&self, _: <I as Input<'a>>::Offset) -> Self::SaveMarker {}
fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self>) {}
}

/// A state type that should be accessible directly from `parser.state()` and has no special behavior.
///
/// This wrapper implements the [`Recorder`] trait for you so you don't have to.
pub struct SimpleState<T>(pub T);
impl<'a, T, I: Input<'a>> Inspector<'a, I> for SimpleState<T> {
type SaveMarker = ();
fn on_token(&mut self, _: &<I as Input<'a>>::Token) {}
fn on_save<'parse>(&self, _: <I as Input<'a>>::Offset) -> Self::SaveMarker {}
fn on_rewind<'parse>(&mut self, _: Marker<'a, 'parse, I, Self::SaveMarker>) {}
}

impl<T> Deref for SimpleState<T> {
type Target = T;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> DerefMut for SimpleState<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T> From<T> for SimpleState<T> {
fn from(value: T) -> Self {
Self(value)
}
}

0 comments on commit ffdb4be

Please sign in to comment.