Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Disambiguation production #63

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Earley.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ executable earley-infinite
default-language: Haskell2010
build-depends: base, Earley

executable earley-disambiguate
if !flag(examples)
buildable: False
main-is: Disambiguate.hs
ghc-options: -Wall
hs-source-dirs: examples
default-language: Haskell2010
build-depends: base, Earley, containers

benchmark bench
type: exitcode-stdio-1.0
hs-source-dirs: . bench
Expand Down
2 changes: 1 addition & 1 deletion Text/Earley.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-- | Parsing all context-free grammars using Earley's algorithm.
module Text.Earley
( -- * Context-free grammars
Prod, terminal, (<?>), constraint, Grammar, rule
Prod, terminal, (<?>), constraint, disambiguate, Grammar, rule
, -- * Derived operators
satisfy, token, namedToken, anyToken, list, listLike, matches
, -- * Parsing
Expand Down
11 changes: 11 additions & 0 deletions Text/Earley/Grammar.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module Text.Earley.Grammar
, terminal
, (<?>)
, constraint
, disambiguate
, alts
, Grammar(..)
, rule
Expand Down Expand Up @@ -57,6 +58,8 @@ data Prod r e t a where
Named :: !(Prod r e t a) -> e -> Prod r e t a
-- Non-context-free extension: conditioning on the parsed output.
Constraint :: !(Prod r e t a) -> (a -> Bool) -> Prod r e t a
--
Disamb :: !(Prod r e t a) -> !(Prod r e t ([a] -> [b])) -> Prod r e t b

-- | Match a token for which the given predicate returns @Just a@,
-- and return the @a@.
Expand Down Expand Up @@ -88,6 +91,7 @@ instance Functor (Prod r e t) where
fmap f (Alts as p) = Alts as $ fmap (f .) p
fmap f (Many p q) = Many p $ fmap (f .) q
fmap f (Named p n) = Named (fmap f p) n
fmap f (Disamb p d) = Disamb p (fmap (fmap (fmap f)) d)

-- | Smart constructor for alternatives.
alts :: [Prod r e t a] -> Prod r e t (a -> b) -> Prod r e t b
Expand All @@ -110,6 +114,7 @@ instance Applicative (Prod r e t) where
Alts as p <*> q = alts as $ flip <$> p <*> q
Many a p <*> q = Many a $ flip <$> p <*> q
Named p n <*> q = Named (p <*> q) n
Disamb p d <*> q = Disamb p ((\a b c -> fmap ($ b) (a c)) <$> d <*> q)

instance Alternative (Prod r e t) where
empty = Alts [] $ pure id
Expand Down Expand Up @@ -177,6 +182,12 @@ instance MonadFix (Grammar r) where
rule :: Prod r e t a -> Grammar r (Prod r e t a)
rule p = RuleBind p return

-- | Create a non-terminal which is able to disambiguate possible parses
disambiguate :: ([a] -> b) -> Prod r e t a -> Grammar r (Prod r e t b)
disambiguate d p = do
r <- rule p
pure $ Disamb r (Pure (pure . d))

-- | Run a grammar, given an action to perform on productions to be turned into
-- non-terminals.
runGrammar :: MonadFix m
Expand Down
127 changes: 102 additions & 25 deletions Text/Earley/Parser/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo #-}
{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo, LambdaCase #-}
-- | This module exposes the internals of the package: its API may change
-- independently of the PVP-compliant version number.
module Text.Earley.Parser.Internal where
Expand All @@ -14,6 +14,10 @@ import Text.Earley.Grammar
import Data.Monoid
#endif
import Data.Semigroup
import Control.Category (Category)
import qualified Control.Category as C
import Data.Traversable (for)
import Data.Functor.Contravariant (contramap)

-------------------------------------------------------------------------------
-- * Concrete rules and productions
Expand Down Expand Up @@ -44,6 +48,10 @@ prodNulls prod = case prod of
Many a p -> prodNulls (pure [] <|> pure <$> a) <**> prodNulls p
Named p _ -> prodNulls p
Constraint p _ -> prodNulls p
Disamb p d -> Results $ do
ps <- unResults $ prodNulls p
ds <- unResults $ prodNulls d
pure $ ($ ps) =<< ds

-- | Remove (some) nulls from a production
removeNulls :: ProdR s r e t a -> ProdR s r e t a
Expand All @@ -56,6 +64,7 @@ removeNulls prod = case prod of
Many {} -> prod
Named p n -> Named (removeNulls p) n
Constraint p n -> Constraint (removeNulls p) n
Disamb p d -> Disamb (removeNulls p) d

type ProdR s r e t a = Prod (Rule s r) e t a

Expand Down Expand Up @@ -108,20 +117,47 @@ data BirthPos
-- | An Earley state with result type @a@.
data State s r e t a where
State :: !(ProdR s r e t a)
-> !(a -> Results s b)
-> !(ResultsCont s a b)
-> !BirthPos
-> !(Conts s r e t b c)
-> State s r e t c
Final :: !(Results s a) -> State s r e t a

newtype ResultsCont s a b = ResultsCont {unResultsCont :: [a] -> Results s b}
deriving(Functor)

instance Category (ResultsCont s) where
id = ResultsCont (Results . pure)
ResultsCont f . ResultsCont g = ResultsCont $ \xs -> Results $ do
ys <- unResults (g xs)
unResults (f ys)

resultArr' :: ([a] -> [b]) -> ResultsCont s a b
resultArr' f = ResultsCont (Results . pure . f)

resultArrs :: [a -> b] -> ResultsCont s a b
resultArrs f = ResultsCont (Results . pure . liftA2 ($) f)

resultArrs' :: [[a] -> [b]] -> ResultsCont s a b
resultArrs' f = ResultsCont (Results . pure . go)
where go x = ($ x) =<< f

resultBind :: ResultsCont s a b -> Results s a -> Results s b
resultBind (ResultsCont f) (Results x) = Results $ do
y <- x
unResults (f y)

manyResults :: [a] -> Results s a
manyResults = Results. pure

-- | A continuation accepting an @a@ and producing a @b@.
data Cont s r e t a b where
Cont :: !(a -> Results s b)
-> !(ProdR s r e t (b -> c))
-> !(c -> Results s d)
Cont :: !(ResultsCont s a b)
-> !(ProdR s r e t ([b] -> [c]))
-> !(ResultsCont s c d)
-> !(Conts s r e t d e')
-> Cont s r e t a e'
FinalCont :: (a -> Results s c) -> Cont s r e t a c
FinalCont :: ResultsCont s a c -> Cont s r e t a c

data Conts s r e t a c = Conts
{ conts :: !(STRef s [Cont s r e t a c])
Expand All @@ -131,13 +167,18 @@ data Conts s r e t a c = Conts
newConts :: STRef s [Cont s r e t a c] -> ST s (Conts s r e t a c)
newConts r = Conts r <$> newSTRef Nothing

contraMapCont :: (b -> Results s a) -> Cont s r e t a c -> Cont s r e t b c
contraMapCont f (Cont g p args cs) = Cont (f >=> g) p args cs
contraMapCont f (FinalCont args) = FinalCont (f >=> args)
contraMapCont :: ResultsCont s b a -> Cont s r e t a c -> Cont s r e t b c
contraMapCont f (Cont g p args cs) = Cont (f >>> g) p args cs
contraMapCont f (FinalCont args) = FinalCont (f >>> args)

contToState :: BirthPos -> Results s a -> Cont s r e t a c -> State s r e t c
contToState pos r (Cont g p args cs) = State p (\f -> r >>= g >>= args . f) pos cs
contToState _ r (FinalCont args) = Final $ r >>= args
contToState pos r (Cont g p args cs) =
State
p
(ResultsCont $ \f -> (args <<< resultArrs' f <<< g) `resultBind` r)
pos
cs
contToState _ r (FinalCont args) = Final $ resultBind args r

-- | Strings of non-ambiguous continuations can be optimised by removing
-- indirections.
Expand All @@ -146,7 +187,7 @@ simplifyCont Conts {conts = cont} = readSTRef cont >>= go False
where
go !_ [Cont g (Pure f) args cont'] = do
ks' <- simplifyCont cont'
go True $ map (contraMapCont $ g >=> args . f) ks'
go True $ map (contraMapCont $ args <<< resultArr' f <<< g) ks'
go True ks = do
writeSTRef cont ks
return ks
Expand All @@ -157,7 +198,7 @@ simplifyCont Conts {conts = cont} = readSTRef cont >>= go False
-------------------------------------------------------------------------------
-- | Given a grammar, construct an initial state.
initialState :: ProdR s a e t a -> ST s (State s a e t a)
initialState p = State p pure Previous <$> (newConts =<< newSTRef [FinalCont pure])
initialState p = State p C.id Previous <$> (newConts =<< newSTRef [FinalCont C.id])

-------------------------------------------------------------------------------
-- * Parsing
Expand Down Expand Up @@ -218,7 +259,8 @@ emptyParseEnv i = ParseEnv
-> ST s (Result s e [t] a) #-}
-- | The internal parsing routine
parse :: ListLike i t
=> [State s a e t a] -- ^ States to process at this position
=> [State s a e t a]
-- ^ States to process at this position, S(k) in the nomenclature
-> ParseEnv s e i t a
-> ST s (Result s e i a)
parse [] env@ParseEnv {results = [], next = []} = do
Expand All @@ -239,25 +281,38 @@ parse [] env = do
parse (st:ss) env = case st of
Final res -> parse ss env {results = unResults res : results env}
State pr args pos scont -> case pr of
-- Scanning operation
Terminal f p -> case ListLike.uncons (input env) >>= f . fst of
Just a -> parse ss env {next = State p (args . ($ a)) Previous scont
-- We have a state S(k) of the form (X → α • a β, j)
-- and thus add (X → α a • β, j) to S(k+1)
-- In our case, advancing the dot past a terminal means applying the
-- results of that terminal to the input of the continuation
-- TODO: applying args to a single 'a' here is not correct in the
-- presence of disambiguation
Just a -> parse ss env {next = State p (args <<< resultArrs [($ a)]) Previous scont
: next env}
Nothing -> parse ss env
-- Prediction operation
-- For every state in S(k) of the form (X → α • Y β, j)...
NonTerminal r p -> do
rkref <- readSTRef $ ruleConts r
ks <- readSTRef rkref
writeSTRef rkref (Cont pure p args scont : ks)
writeSTRef rkref (Cont C.id (fmap fmap p) args scont : ks)
ns <- unResults $ ruleNulls r
-- ...add (Y → • γ, k) to S(k) for every production in the grammar with Y
-- on the left-hand side (Y → γ).
let addNullState
| null ns = id
| otherwise = (:)
$ State p (\f -> Results (pure $ map f ns) >>= args) pos scont
$ State p (ResultsCont $ \f -> args `resultBind` manyResults (liftA2 ($) f ns)) pos scont
if null ks then do -- The rule has not been expanded at this position.
st' <- State (ruleProd r) pure Current <$> newConts rkref
st' <- State (ruleProd r) C.id Current <$> newConts rkref
parse (addNullState $ st' : ss)
env {reset = resetConts r >> reset env}
else -- The rule has already been expanded at this position.
parse (addNullState ss) env
-- Completion operation
-- For every state in S(k) of the form (Y → γ •, j)...
Pure a
-- Skip following continuations that stem from the current position; such
-- continuations are handled separately.
Expand All @@ -267,31 +322,53 @@ parse (st:ss) env = case st of
masref <- readSTRef argsRef
case masref of
Just asref -> do -- The continuation has already been followed at this position.
modifySTRef asref $ mappend $ args a
-- TODO: Applying args to a single a is incorrect in the presence
-- of disambiguation
modifySTRef asref $ mappend $ unResultsCont args [a]
parse ss env
Nothing -> do -- It hasn't.
asref <- newSTRef $ args a
-- ...find all states in S(j) of the form (X → α • Y β, i) and add
-- (X → α Y • β, i) to S(k).
-- In this implementation, advancing the dot requires applying 'a'
-- here to the continuation.
-- TODO: Applying args to a single a is incorrect in the presence
-- of disambiguation
asref <- newSTRef $ unResultsCont args [a]
writeSTRef argsRef $ Just asref
ks <- simplifyCont scont
res <- lazyResults $ unResults =<< readSTRef asref
let kstates = map (contToState pos res) ks
parse (kstates ++ ss)
env {reset = writeSTRef argsRef Nothing >> reset env}
-- We need to add p with a continuation which takes into account 'd'
Disamb p (Pure d) -> do
parse (State p (args <<< resultArr' d) pos scont : ss) env
Disamb p d -> do
scont' <- newConts =<< newSTRef [Cont C.id d args scont]
parse (State p C.id Previous scont' : ss) env
-- For every alternative, add a state for that production all pointing to
-- the same continuation.
Alts as (Pure f) -> do
let args' = args . f
-- TODO: is resultArrs safe in the presence of disambiguation
let args' = args <<< resultArrs [f]
sts = [State a args' pos scont | a <- as]
parse (sts ++ ss) env
Alts as p -> do
scont' <- newConts =<< newSTRef [Cont pure p args scont]
let sts = [State a pure Previous scont' | a <- as]
scont' <- newConts =<< newSTRef [Cont C.id (fmap fmap p) args scont]
let sts = [State a C.id Previous scont' | a <- as]
parse (sts ++ ss) env
-- Rustle up a left-recursive non-terminal and add it to the states to be
-- processed next.
Many p q -> mdo
r <- mkRule $ pure [] <|> (:) <$> p <*> NonTerminal r (Pure id)
parse (State (NonTerminal r q) args pos scont : ss) env
-- Insert a state for the named production, but add the name to the list of
-- names for this position
Named pr' n -> parse (State pr' args pos scont : ss)
env {names = n : names env}
Constraint pr' c -> parse (State pr' (test >=> args) pos scont : ss) env
where test x = if c x then return x else empty
-- Insert a state whose continuation filters any results
Constraint pr' c -> parse (State pr' (args <<< test) pos scont : ss) env
where test = resultArr' (filter c)

type Parser e i a = forall s. i -> ST s (Result s e i a)

Expand Down
68 changes: 68 additions & 0 deletions examples/Disambiguate.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RecursiveDo #-}

import Control.Applicative
import Data.Char
import Data.Foldable (traverse_)
import Data.Tree
import System.Environment
import Text.Earley
import Text.Earley.Grammar

type Expr = Tree String
pattern Add, Mul :: Tree String -> Tree String -> Tree String
pattern Add x y = Node "+" [x, y]
pattern Mul x y = Node "*" [x, y]
pattern Var :: a -> Tree a
pattern Var n = Node n []
pattern Amb :: [Tree String] -> Tree String
pattern Amb xs = Node "Ambiguous" xs

expr :: Grammar r (Prod r String String Expr)
expr = mdo
let exprProd = disambiguate $ \case
[x] -> x
xs -> Amb xs
e <-
exprProd $
Add <$> e <* namedToken "+" <*> e
<|> Mul <$> e <* namedToken "*" <*> e
<|> Var <$> satisfy ident
<|> namedToken "(" *> e <* namedToken "("
return e
where
ident (x : _) = isAlpha x
ident _ = False

-- λ> :main "A + B * C * G"
-- Ambiguous
-- ├╴ ((A+B)*(C*G))
-- ├╴ *
-- │ ├╴ Ambiguous
-- │ │ ├╴ ((A+B)*C)
-- │ │ └╴ (A+(B*C))
-- │ └╴ G
-- └╴ +
-- ├╴ A
-- └╴ Ambiguous
-- ├╴ (B*(C*G))
-- └╴ ((B*C)*G)
main :: IO ()
main = do
x : _ <- getArgs
let (ps, r) = fullParses (parser expr) (words x)
traverse_ (putStrLn . drawTree . simplifyTree) ps
print r

-- | render non-ambiguous expressions on one line to make the printed tree
-- smaller
simplifyTree :: Expr -> Expr
simplifyTree =
foldTree
( \op -> \case
[Node n [], Node m []] | op /= "Ambiguous" -> Node (parens (n <> op <> m)) []
ns -> Node op ns
)
where
parens x = "(" <> x <> ")"