From e7de8b77b63e1ee889d98cb6dfc2994178f23e6d Mon Sep 17 00:00:00 2001 From: Olle Fredriksson Date: Tue, 15 Mar 2016 20:38:42 +0100 Subject: [PATCH] Change Terminal to take an `a -> Maybe b` And rename and deprecate some operators --- CHANGELOG.md | 5 +++++ Text/Earley.hs | 4 +++- Text/Earley/Derived.hs | 40 +++++++++++++++++++++++++++++++++------ Text/Earley/Grammar.hs | 28 +++++++++++++++------------ Text/Earley/Internal.hs | 8 ++++---- bench/BenchAll.hs | 12 ++++++------ examples/Expr.hs | 6 +++--- examples/Expr2.hs | 2 +- examples/Infinite.hs | 2 +- examples/Mixfix.hs | 4 ++-- examples/VeryAmbiguous.hs | 2 +- tests/Expr.hs | 12 ++++++------ tests/InlineAlts.hs | 4 ++-- tests/Issue11.hs | 2 +- tests/Mixfix.hs | 4 ++-- tests/Optional.hs | 12 ++++++------ tests/ReversedWords.hs | 2 +- tests/VeryAmbiguous.hs | 2 +- 18 files changed, 95 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66e4ae1..31c028d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Unreleased - Add `IsString Prod` instance +- Change the signature of `Terminal` to take a function `a -> Maybe b`, and add a new operator `terminal` +- Move `satisfy` to the `Derived` module +- Add the `token`, `namedToken`, and `list` operators +- Deprecate the `symbol`, `namedSymbol`, and `word` operators (use the above instead) +- Add the `listLike` operator # 0.10.1.0 diff --git a/Text/Earley.hs b/Text/Earley.hs index 5cd825a..970848b 100644 --- a/Text/Earley.hs +++ b/Text/Earley.hs @@ -1,8 +1,10 @@ -- | Parsing all context-free grammars using Earley's algorithm. module Text.Earley ( -- * Context-free grammars - Prod, satisfy, (), Grammar, rule + Prod, terminal, (), Grammar, rule , -- * Derived operators + satisfy, token, namedToken, list, listLike + , -- * Deprecated operators symbol, namedSymbol, word , -- * Parsing Report(..), Result(..), parser, allParses, fullParses diff --git a/Text/Earley/Derived.hs b/Text/Earley/Derived.hs index 16f14b9..7fcf13c 100644 --- a/Text/Earley/Derived.hs +++ b/Text/Earley/Derived.hs @@ -1,18 +1,46 @@ -- | Derived operators. module Text.Earley.Derived where import Control.Applicative hiding (many) +import Data.ListLike(ListLike) +import qualified Data.ListLike as ListLike import Text.Earley.Grammar +-- | Match a token that satisfies the given predicate. Returns the matched +-- token. +{-# INLINE satisfy #-} +satisfy :: (t -> Bool) -> Prod r e t t +satisfy p = Terminal f $ Pure id + where + f t | p t = Just t + f _ = Nothing + -- | Match a single token. -symbol :: Eq t => t -> Prod r e t t -symbol x = satisfy (== x) +token :: Eq t => t -> Prod r e t t +token x = satisfy (== x) -- | Match a single token and give it the name of the token. -namedSymbol :: Eq t => t -> Prod r t t t -namedSymbol x = symbol x x +namedToken :: Eq t => t -> Prod r t t t +namedToken x = token x x -- | Match a list of tokens in sequence. -{-# INLINE word #-} +{-# INLINE list #-} +list :: Eq t => [t] -> Prod r e t [t] +list = foldr (liftA2 (:) . satisfy . (==)) (pure []) + +-- | Match a 'ListLike' of tokens in sequence. +{-# INLINE listLike #-} +listLike :: (Eq t, ListLike i t) => i -> Prod r e t i +listLike = ListLike.foldr (liftA2 ListLike.cons . satisfy . (==)) (pure ListLike.empty) + +{-# DEPRECATED symbol "Use `token` instead" #-} +symbol :: Eq t => t -> Prod r e t t +symbol = token + +{-# DEPRECATED namedSymbol "Use `namedToken` instead" #-} +namedSymbol :: Eq t => t -> Prod r e t t +namedSymbol = token + +{-# DEPRECATED word "Use `list` or `listLike` instead" #-} word :: Eq t => [t] -> Prod r e t [t] -word = foldr (liftA2 (:) . satisfy . (==)) (pure []) +word = list diff --git a/Text/Earley/Grammar.hs b/Text/Earley/Grammar.hs index c0ae2c1..d184497 100644 --- a/Text/Earley/Grammar.hs +++ b/Text/Earley/Grammar.hs @@ -2,7 +2,7 @@ {-# LANGUAGE CPP, GADTs, RankNTypes #-} module Text.Earley.Grammar ( Prod(..) - , satisfy + , terminal , () , alts , Grammar(..) @@ -44,7 +44,7 @@ infixr 0 -- 'Functor', 'Applicative', and 'Alternative'. data Prod r e t a where -- Applicative. - Terminal :: !(t -> Bool) -> !(Prod r e t (t -> b)) -> Prod r e t b + Terminal :: !(t -> Maybe a) -> !(Prod r e t (a -> b)) -> Prod r e t b NonTerminal :: !(r e t a) -> !(Prod r e t (a -> b)) -> Prod r e t b Pure :: a -> Prod r e t a -- Monoid/Alternative. We have to special-case 'many' (though it can be done @@ -54,10 +54,10 @@ data Prod r e t a where -- Error reporting. Named :: !(Prod r e t a) -> e -> Prod r e t a --- | Match a token that satisfies the given predicate. Returns the matched token. -{-# INLINE satisfy #-} -satisfy :: (t -> Bool) -> Prod r e t t -satisfy p = Terminal p $ Pure id +-- | Match a token for which the given predicate returns @Just a@, +-- and return the @a@. +terminal :: (t -> Maybe a) -> Prod r e t a +terminal p = Terminal p $ Pure id -- | A named production (used for reporting expected things). () :: Prod r e t a -> e -> Prod r e t a @@ -106,17 +106,21 @@ instance Alternative (Prod r e t) where many (Alts [] _) = pure [] many p = Many p $ Pure id some p = (:) <$> p <*> many p - + -- | String literals can be interpreted as 'Terminal's --- that match that string. --- +-- that match that string. +-- -- >>> :set -XOverloadedStrings -- >>> import Data.Text (Text) -- >>> let determiner = "the" <|> "a" <|> "an" :: Prod r e Text Text --- +-- instance (IsString t, Eq t, a ~ t) => IsString (Prod r e t a) where - fromString s = satisfy (== fromString s) - {-# INLINE fromString #-} + fromString s = Terminal f $ Pure id + where + fs = fromString s + f t | t == fs = Just fs + f _ = Nothing + {-# INLINE fromString #-} -- | A context-free grammar. -- diff --git a/Text/Earley/Internal.hs b/Text/Earley/Internal.hs index 5c31d17..174b376 100644 --- a/Text/Earley/Internal.hs +++ b/Text/Earley/Internal.hs @@ -239,10 +239,10 @@ parse [] env = do parse (st:ss) env = case st of Final res -> parse ss env {results = unResults res : results env} State pr args pos scont -> case pr of - Terminal f p -> case safeHead $ input env of - Just t | f t -> parse ss env {next = State p (args . ($ t)) Previous scont - : next env} - _ -> parse ss env + Terminal f p -> case safeHead (input env) >>= f of + Just a -> parse ss env {next = State p (args . ($ a)) Previous scont + : next env} + Nothing -> parse ss env NonTerminal r p -> do rkref <- readSTRef $ ruleConts r ks <- readSTRef rkref diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index deac732..19603c4 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -47,14 +47,14 @@ treeSum n = let a = n `div` 2 -- will be at least 1 expr :: Grammar r (Prod r String Token Expr) expr = mdo - x1 <- rule $ Add <$> x1 <* namedSymbol "+" <*> x2 + x1 <- rule $ Add <$> x1 <* namedToken "+" <*> x2 <|> x2 "sum" - x2 <- rule $ Mul <$> x2 <* namedSymbol "*" <*> x3 + x2 <- rule $ Mul <$> x2 <* namedToken "*" <*> x3 <|> x3 "product" x3 <- rule $ Var <$> (satisfy isIdent "identifier") - <|> namedSymbol "(" *> x1 <* namedSymbol ")" + <|> namedToken "(" *> x1 <* namedToken ")" return x1 isIdent :: String -> Bool @@ -68,9 +68,9 @@ sepBy1 p op = mdo expr' :: Grammar r (Prod r String Token Expr) expr' = mdo - let var = Var <$> satisfy isIdent <|> symbol "(" *> mul <* symbol ")" - mul <- fmap (foldl1 Mul) <$> add `sepBy1` symbol "*" - add <- fmap (foldl1 Add) <$> var `sepBy1` symbol "+" + let var = Var <$> satisfy isIdent <|> token "(" *> mul <* token ")" + mul <- fmap (foldl1 Mul) <$> add `sepBy1` token "*" + add <- fmap (foldl1 Add) <$> var `sepBy1` token "+" return mul parseEarley :: [Token] -> Maybe Expr diff --git a/examples/Expr.hs b/examples/Expr.hs index 442ea0a..db9eae8 100644 --- a/examples/Expr.hs +++ b/examples/Expr.hs @@ -12,14 +12,14 @@ data Expr expr :: Grammar r (Prod r String String Expr) expr = mdo - x1 <- rule $ Add <$> x1 <* namedSymbol "+" <*> x2 + x1 <- rule $ Add <$> x1 <* namedToken "+" <*> x2 <|> x2 "sum" - x2 <- rule $ Mul <$> x2 <* namedSymbol "*" <*> x3 + x2 <- rule $ Mul <$> x2 <* namedToken "*" <*> x3 <|> x3 "product" x3 <- rule $ Var <$> (satisfy ident "identifier") - <|> namedSymbol "(" *> x1 <* namedSymbol ")" + <|> namedToken "(" *> x1 <* namedToken ")" return x1 where ident (x:_) = isAlpha x diff --git a/examples/Expr2.hs b/examples/Expr2.hs index d4b4439..3853cee 100644 --- a/examples/Expr2.hs +++ b/examples/Expr2.hs @@ -19,7 +19,7 @@ grammar = mdo let token :: Prod r String Char a -> Prod r String Char a token p = whitespace *> p - sym x = token $ symbol x [x] + sym x = token $ token x [x] ident = token $ (:) <$> satisfy isAlpha <*> many (satisfy isAlphaNum) "identifier" num = token $ some (satisfy isDigit) "number" diff --git a/examples/Infinite.hs b/examples/Infinite.hs index ae44abd..5426b51 100644 --- a/examples/Infinite.hs +++ b/examples/Infinite.hs @@ -6,7 +6,7 @@ import Text.Earley grammar :: Grammar r (Prod r () Char [Maybe Char]) grammar = mdo as <- rule $ pure [] - <|> (:) <$> optional (symbol 'a') <*> as + <|> (:) <$> optional (token 'a') <*> as return as -- This grammar has an infinite number of results. We can still recognise the diff --git a/examples/Mixfix.hs b/examples/Mixfix.hs index a4e9095..ce90da1 100644 --- a/examples/Mixfix.hs +++ b/examples/Mixfix.hs @@ -34,13 +34,13 @@ grammar = mdo ident <- rule $ (V . pure . Just) <$> satisfy (not . (`HS.member` mixfixParts)) "identifier" atom <- rule $ ident - <|> namedSymbol "(" *> expr <* namedSymbol ")" + <|> namedToken "(" *> expr <* namedToken ")" normalApp <- rule $ atom <|> App <$> atom <*> some atom expr <- mixfixExpression table normalApp (App . V) return expr where - table = map (map $ first $ map $ fmap namedSymbol) identTable + table = map (map $ first $ map $ fmap namedToken) identTable mixfixParts = HS.fromList [s | xs <- identTable , (ys, _) <- xs , Just s <- ys] `mappend` HS.fromList ["(", ")"] diff --git a/examples/VeryAmbiguous.hs b/examples/VeryAmbiguous.hs index d2ed62e..bbc3787 100644 --- a/examples/VeryAmbiguous.hs +++ b/examples/VeryAmbiguous.hs @@ -5,7 +5,7 @@ import Text.Earley g :: Grammar r (Prod r Char Char ()) g = mdo - s <- rule $ () <$ symbol 'b' + s <- rule $ () <$ token 'b' <|> () <$ s <* s <|> () <$ s <* s <* s 's' diff --git a/tests/Expr.hs b/tests/Expr.hs index 3599c47..cc209eb 100644 --- a/tests/Expr.hs +++ b/tests/Expr.hs @@ -43,14 +43,14 @@ instance Arbitrary Expr where expr :: Grammar r (Prod r String String Expr) expr = mdo - x1 <- rule $ Add <$> x1 <* namedSymbol "+" <*> x2 + x1 <- rule $ Add <$> x1 <* namedToken "+" <*> x2 <|> x2 "sum" - x2 <- rule $ Mul <$> x2 <* namedSymbol "*" <*> x3 + x2 <- rule $ Mul <$> x2 <* namedToken "*" <*> x3 <|> x3 "product" x3 <- rule $ Var <$> (satisfy ident "identifier") - <|> namedSymbol "(" *> x1 <* namedSymbol ")" + <|> namedToken "(" *> x1 <* namedToken ")" return x1 where ident (x:_) = isAlpha x @@ -58,14 +58,14 @@ expr = mdo ambiguousExpr :: Grammar r (Prod r String String Expr) ambiguousExpr = mdo - x1 <- rule $ Add <$> x1 <* namedSymbol "+" <*> x1 + x1 <- rule $ Add <$> x1 <* namedToken "+" <*> x1 <|> x2 "sum" - x2 <- rule $ Mul <$> x2 <* namedSymbol "*" <*> x2 + x2 <- rule $ Mul <$> x2 <* namedToken "*" <*> x2 <|> x3 "product" x3 <- rule $ Var <$> (satisfy ident "identifier") - <|> namedSymbol "(" *> x1 <* namedSymbol ")" + <|> namedToken "(" *> x1 <* namedToken ")" return x1 where ident (x:_) = isAlpha x diff --git a/tests/InlineAlts.hs b/tests/InlineAlts.hs index ffb9b2b..b9312a4 100644 --- a/tests/InlineAlts.hs +++ b/tests/InlineAlts.hs @@ -16,11 +16,11 @@ tests = testGroup "Inline alternatives" inlineAlts :: Grammar r (Prod r Char Char String) inlineAlts = mdo p <- rule $ pure [] - <|> (:) <$> (namedSymbol 'a' <|> namedSymbol 'b') <*> p + <|> (:) <$> (namedToken 'a' <|> namedToken 'b') <*> p return p nonInlineAlts :: Grammar r (Prod r Char Char String) nonInlineAlts = mdo - ab <- rule $ namedSymbol 'a' <|> namedSymbol 'b' + ab <- rule $ namedToken 'a' <|> namedToken 'b' p <- rule $ pure [] <|> (:) <$> ab <*> p return p diff --git a/tests/Issue11.hs b/tests/Issue11.hs index 9017f0a..88fcffc 100644 --- a/tests/Issue11.hs +++ b/tests/Issue11.hs @@ -64,7 +64,7 @@ grammar a = mdo atomicExpr <- rule $ Var <$> satisfy (/= "+") expr <- mixfixExpression - [[([Just (symbol "+"), Nothing, Nothing], a)]] + [[([Just (token "+"), Nothing, Nothing], a)]] atomicExpr (\x y -> case (x,y) of ([Just "+", Nothing, Nothing], [e1,e2]) -> Plus e1 e2 diff --git a/tests/Mixfix.hs b/tests/Mixfix.hs index 5e47d42..a882f4b 100644 --- a/tests/Mixfix.hs +++ b/tests/Mixfix.hs @@ -31,10 +31,10 @@ data MixfixExpr = Ident (Holey String) | App (Holey String) [MixfixExpr] mixfixGrammar :: Grammar r (Prod r String String MixfixExpr) mixfixGrammar = mixfixExpression table - (Ident . pure . Just <$> namedSymbol "x") + (Ident . pure . Just <$> namedToken "x") App where - hident = map (fmap symbol) + hident = map (fmap token) table = [ [(hident ifthenelse, RightAssoc)] , [(hident prefix, RightAssoc)] diff --git a/tests/Optional.hs b/tests/Optional.hs index fd4cdec..6cb348d 100644 --- a/tests/Optional.hs +++ b/tests/Optional.hs @@ -21,23 +21,23 @@ tests = testGroup "Optional" fullParses (parser optionalRule) "ab" @?= (,) [(Just 'a', 'b')] Report {position = 2, expected = "", unconsumed = ""} , HU.testCase "Without continuation Nothing" $ - fullParses (parser $ return $ optional $ namedSymbol 'a') "" + fullParses (parser $ return $ optional $ namedToken 'a') "" @?= (,) [Nothing] Report {position = 0, expected = "a", unconsumed = ""} , HU.testCase "Without continuation Just" $ - fullParses (parser $ return $ optional $ namedSymbol 'a') "a" + fullParses (parser $ return $ optional $ namedToken 'a') "a" @?= (,) [Just 'a'] Report {position = 1, expected = "", unconsumed = ""} , HU.testCase "Using rules without continuation Nothing" $ - fullParses (parser $ rule $ optional $ namedSymbol 'a') "" + fullParses (parser $ rule $ optional $ namedToken 'a') "" @?= (,) [Nothing] Report {position = 0, expected = "a", unconsumed = ""} , HU.testCase "Using rules without continuation Just" $ - fullParses (parser $ rule $ optional $ namedSymbol 'a') "a" + fullParses (parser $ rule $ optional $ namedToken 'a') "a" @?= (,) [Just 'a'] Report {position = 1, expected = "", unconsumed = ""} ] optional_ :: Prod r Char Char (Maybe Char, Char) -optional_ = (,) <$> optional (namedSymbol 'a') <*> namedSymbol 'b' +optional_ = (,) <$> optional (namedToken 'a') <*> namedToken 'b' optionalRule :: Grammar r (Prod r Char Char (Maybe Char, Char)) optionalRule = mdo - test <- rule $ (,) <$> optional (namedSymbol 'a') <*> namedSymbol 'b' + test <- rule $ (,) <$> optional (namedToken 'a') <*> namedToken 'b' return test diff --git a/tests/ReversedWords.hs b/tests/ReversedWords.hs index c1ca87e..e77f4b9 100644 --- a/tests/ReversedWords.hs +++ b/tests/ReversedWords.hs @@ -6,7 +6,7 @@ import Test.Tasty.HUnit as HU import Text.Earley someWords :: Grammar r (Prod r () Char [String]) -someWords = return $ flip (:) <$> (map reverse <$> some (word "word")) <*> word "stop" +someWords = return $ flip (:) <$> (map reverse <$> some (list "word")) <*> list "stop" tests :: TestTree tests = testGroup "Unit Tests" diff --git a/tests/VeryAmbiguous.hs b/tests/VeryAmbiguous.hs index b4ddfd6..806579d 100644 --- a/tests/VeryAmbiguous.hs +++ b/tests/VeryAmbiguous.hs @@ -17,7 +17,7 @@ tests = testGroup "Very ambiguous" veryAmbiguous :: Grammar r (Prod r Char Char ()) veryAmbiguous = mdo - s <- rule $ () <$ symbol 'b' + s <- rule $ () <$ token 'b' <|> () <$ s <* s <|> () <$ s <* s <* s 's'