From 39992649e455e172749fde9d0f12635589ac39be Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Fri, 12 May 2023 18:03:31 +0800 Subject: [PATCH 1/6] Add some comments and make ResultsCont a newtype with a Category instance --- Text/Earley/Parser/Internal.hs | 97 ++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 22 deletions(-) diff --git a/Text/Earley/Parser/Internal.hs b/Text/Earley/Parser/Internal.hs index 98dc43f..fcb2fbc 100644 --- a/Text/Earley/Parser/Internal.hs +++ b/Text/Earley/Parser/Internal.hs @@ -14,6 +14,8 @@ import Text.Earley.Grammar import Data.Monoid #endif import Data.Semigroup +import Control.Category (Category) +import qualified Control.Category as C ------------------------------------------------------------------------------- -- * Concrete rules and productions @@ -108,20 +110,43 @@ data BirthPos -- | An Earley state with result type @a@. data State s r e t a where State :: !(ProdR s r e t a) - -> !(a -> Results s b) + -> !(ResultsCont s a b) -> !BirthPos -> !(Conts s r e t b c) -> State s r e t c Final :: !(Results s a) -> State s r e t a +newtype ResultsCont s a b = ResultsCont {unResultsCont :: a -> Results s b} + deriving(Functor) + +instance Applicative (ResultsCont s a) where + pure = ResultsCont . const . pure + ResultsCont f <*> ResultsCont x = ResultsCont (\a -> f a <*> x a) + +instance Monad (ResultsCont s a) where + ResultsCont x >>= k = ResultsCont (\a -> ($ a) . unResultsCont . k =<< x a) + +instance Category (ResultsCont s) where + id = ResultsCont pure + ResultsCont f . ResultsCont g = ResultsCont (f <=< g) + +resultArr :: (a -> b) -> ResultsCont s a b +resultArr f = ResultsCont (pure . f) + +resultBind :: ResultsCont s a b -> Results s a -> Results s b +resultBind (ResultsCont f) x = f =<< x + +manyResults :: [a] -> Results s a +manyResults = Results. pure + -- | A continuation accepting an @a@ and producing a @b@. data Cont s r e t a b where - Cont :: !(a -> Results s b) + Cont :: !(ResultsCont s a b) -> !(ProdR s r e t (b -> c)) - -> !(c -> Results s d) + -> !(ResultsCont s c d) -> !(Conts s r e t d e') -> Cont s r e t a e' - FinalCont :: (a -> Results s c) -> Cont s r e t a c + FinalCont :: ResultsCont s a c -> Cont s r e t a c data Conts s r e t a c = Conts { conts :: !(STRef s [Cont s r e t a c]) @@ -131,13 +156,18 @@ data Conts s r e t a c = Conts newConts :: STRef s [Cont s r e t a c] -> ST s (Conts s r e t a c) newConts r = Conts r <$> newSTRef Nothing -contraMapCont :: (b -> Results s a) -> Cont s r e t a c -> Cont s r e t b c -contraMapCont f (Cont g p args cs) = Cont (f >=> g) p args cs -contraMapCont f (FinalCont args) = FinalCont (f >=> args) +contraMapCont :: ResultsCont s b a -> Cont s r e t a c -> Cont s r e t b c +contraMapCont f (Cont g p args cs) = Cont (f >>> g) p args cs +contraMapCont f (FinalCont args) = FinalCont (f >>> args) contToState :: BirthPos -> Results s a -> Cont s r e t a c -> State s r e t c -contToState pos r (Cont g p args cs) = State p (\f -> r >>= g >>= args . f) pos cs -contToState _ r (FinalCont args) = Final $ r >>= args +contToState pos r (Cont g p args cs) = + State + p + (ResultsCont $ \f -> unResultsCont (args <<< resultArr f <<< g) =<< r) + pos + cs +contToState _ r (FinalCont args) = Final $ resultBind args r -- | Strings of non-ambiguous continuations can be optimised by removing -- indirections. @@ -146,7 +176,7 @@ simplifyCont Conts {conts = cont} = readSTRef cont >>= go False where go !_ [Cont g (Pure f) args cont'] = do ks' <- simplifyCont cont' - go True $ map (contraMapCont $ g >=> args . f) ks' + go True $ map (contraMapCont $ args <<< resultArr f <<< g) ks' go True ks = do writeSTRef cont ks return ks @@ -157,7 +187,7 @@ simplifyCont Conts {conts = cont} = readSTRef cont >>= go False ------------------------------------------------------------------------------- -- | Given a grammar, construct an initial state. initialState :: ProdR s a e t a -> ST s (State s a e t a) -initialState p = State p pure Previous <$> (newConts =<< newSTRef [FinalCont pure]) +initialState p = State p C.id Previous <$> (newConts =<< newSTRef [FinalCont C.id]) ------------------------------------------------------------------------------- -- * Parsing @@ -218,7 +248,8 @@ emptyParseEnv i = ParseEnv -> ST s (Result s e [t] a) #-} -- | The internal parsing routine parse :: ListLike i t - => [State s a e t a] -- ^ States to process at this position + => [State s a e t a] + -- ^ States to process at this position, S(k) in the nomenclature -> ParseEnv s e i t a -> ST s (Result s e i a) parse [] env@ParseEnv {results = [], next = []} = do @@ -239,25 +270,36 @@ parse [] env = do parse (st:ss) env = case st of Final res -> parse ss env {results = unResults res : results env} State pr args pos scont -> case pr of + -- Scanning operation Terminal f p -> case ListLike.uncons (input env) >>= f . fst of - Just a -> parse ss env {next = State p (args . ($ a)) Previous scont + -- We have a state S(k) of the form (X → α • a β, j) + -- and thus add (X → α a • β, j) to S(k+1) + -- In our case, advancing the dot past a terminal means applying the + -- results of that terminal to the input of the continuation + Just a -> parse ss env {next = State p (args <<< resultArr ($ a)) Previous scont : next env} Nothing -> parse ss env + -- Prediction operation + -- For every state in S(k) of the form (X → α • Y β, j)... NonTerminal r p -> do rkref <- readSTRef $ ruleConts r ks <- readSTRef rkref - writeSTRef rkref (Cont pure p args scont : ks) + writeSTRef rkref (Cont C.id p args scont : ks) ns <- unResults $ ruleNulls r + -- ...add (Y → • γ, k) to S(k) for every production in the grammar with Y + -- on the left-hand side (Y → γ). let addNullState | null ns = id | otherwise = (:) - $ State p (\f -> Results (pure $ map f ns) >>= args) pos scont + $ State p (ResultsCont $ \f -> args `resultBind` manyResults (map f ns)) pos scont if null ks then do -- The rule has not been expanded at this position. - st' <- State (ruleProd r) pure Current <$> newConts rkref + st' <- State (ruleProd r) C.id Current <$> newConts rkref parse (addNullState $ st' : ss) env {reset = resetConts r >> reset env} else -- The rule has already been expanded at this position. parse (addNullState ss) env + -- Completion operation + -- For every state in S(k) of the form (Y → γ •, j)... Pure a -- Skip following continuations that stem from the current position; such -- continuations are handled separately. @@ -267,30 +309,41 @@ parse (st:ss) env = case st of masref <- readSTRef argsRef case masref of Just asref -> do -- The continuation has already been followed at this position. - modifySTRef asref $ mappend $ args a + modifySTRef asref $ mappend $ unResultsCont args a parse ss env Nothing -> do -- It hasn't. - asref <- newSTRef $ args a + -- ...find all states in S(j) of the form (X → α • Y β, i) and add + -- (X → α Y • β, i) to S(k). + -- In this implementation, advancing the dot requires applying 'a' + -- here to the continuation. + asref <- newSTRef $ unResultsCont args a writeSTRef argsRef $ Just asref ks <- simplifyCont scont res <- lazyResults $ unResults =<< readSTRef asref let kstates = map (contToState pos res) ks parse (kstates ++ ss) env {reset = writeSTRef argsRef Nothing >> reset env} + -- For every alternative, add a state for that production all pointing to + -- the same continuation. Alts as (Pure f) -> do - let args' = args . f + let args' = args <<< ResultsCont (\x -> pure (f x)) sts = [State a args' pos scont | a <- as] parse (sts ++ ss) env Alts as p -> do - scont' <- newConts =<< newSTRef [Cont pure p args scont] - let sts = [State a pure Previous scont' | a <- as] + scont' <- newConts =<< newSTRef [Cont C.id p args scont] + let sts = [State a C.id Previous scont' | a <- as] parse (sts ++ ss) env + -- Rustle up a left-recursive non-terminal and add it to the states to be + -- processed next. Many p q -> mdo r <- mkRule $ pure [] <|> (:) <$> p <*> NonTerminal r (Pure id) parse (State (NonTerminal r q) args pos scont : ss) env + -- Insert a state for the named production, but add the name to the list of + -- names for this position Named pr' n -> parse (State pr' args pos scont : ss) env {names = n : names env} - Constraint pr' c -> parse (State pr' (test >=> args) pos scont : ss) env + -- Insert a state whose continuation filters any results + Constraint pr' c -> parse (State pr' (ResultsCont test >>> args) pos scont : ss) env where test x = if c x then return x else empty type Parser e i a = forall s. i -> ST s (Result s e i a) From 74d0c9aa8171b3d54bebf627e2e67397c899bd8c Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sat, 13 May 2023 11:50:22 +0800 Subject: [PATCH 2/6] Add Disamb production Unhandled in parse at the moment --- Text/Earley/Grammar.hs | 8 ++++++++ Text/Earley/Parser/Internal.hs | 8 +++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Text/Earley/Grammar.hs b/Text/Earley/Grammar.hs index abd68ce..4c3c0ce 100644 --- a/Text/Earley/Grammar.hs +++ b/Text/Earley/Grammar.hs @@ -5,6 +5,7 @@ module Text.Earley.Grammar , terminal , () , constraint + , disambiguate , alts , Grammar(..) , rule @@ -57,6 +58,8 @@ data Prod r e t a where Named :: !(Prod r e t a) -> e -> Prod r e t a -- Non-context-free extension: conditioning on the parsed output. Constraint :: !(Prod r e t a) -> (a -> Bool) -> Prod r e t a + -- + Disamb :: !(Prod r e t a) -> !(Prod r e t ([a] -> [b])) -> Prod r e t b -- | Match a token for which the given predicate returns @Just a@, -- and return the @a@. @@ -71,6 +74,9 @@ terminal p = Terminal p $ Pure id constraint :: (a -> Bool) -> Prod r e t a -> Prod r e t a constraint = flip Constraint +disambiguate :: ([a] -> [b]) -> Prod r e t a -> Prod r e t b +disambiguate d = flip Disamb (Pure d) + -- | Lifted instance: @(<>) = 'liftA2' ('<>')@ instance Semigroup a => Semigroup (Prod r e t a) where (<>) = liftA2 (Data.Semigroup.<>) @@ -88,6 +94,7 @@ instance Functor (Prod r e t) where fmap f (Alts as p) = Alts as $ fmap (f .) p fmap f (Many p q) = Many p $ fmap (f .) q fmap f (Named p n) = Named (fmap f p) n + fmap f (Disamb p d) = Disamb p (fmap (fmap (fmap f)) d) -- | Smart constructor for alternatives. alts :: [Prod r e t a] -> Prod r e t (a -> b) -> Prod r e t b @@ -110,6 +117,7 @@ instance Applicative (Prod r e t) where Alts as p <*> q = alts as $ flip <$> p <*> q Many a p <*> q = Many a $ flip <$> p <*> q Named p n <*> q = Named (p <*> q) n + Disamb p d <*> q = Disamb p ((\a b c -> fmap ($ b) (a c)) <$> d <*> q) instance Alternative (Prod r e t) where empty = Alts [] $ pure id diff --git a/Text/Earley/Parser/Internal.hs b/Text/Earley/Parser/Internal.hs index fcb2fbc..7d031a1 100644 --- a/Text/Earley/Parser/Internal.hs +++ b/Text/Earley/Parser/Internal.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo #-} +{-# LANGUAGE CPP, BangPatterns, DeriveFunctor, GADTs, Rank2Types, RecursiveDo, LambdaCase #-} -- | This module exposes the internals of the package: its API may change -- independently of the PVP-compliant version number. module Text.Earley.Parser.Internal where @@ -16,6 +16,7 @@ import Data.Monoid import Data.Semigroup import Control.Category (Category) import qualified Control.Category as C +import Data.Traversable (for) ------------------------------------------------------------------------------- -- * Concrete rules and productions @@ -46,6 +47,10 @@ prodNulls prod = case prod of Many a p -> prodNulls (pure [] <|> pure <$> a) <**> prodNulls p Named p _ -> prodNulls p Constraint p _ -> prodNulls p + Disamb p d -> Results $ do + ps <- unResults $ prodNulls p + ds <- unResults $ prodNulls d + pure $ ($ ps) =<< ds -- | Remove (some) nulls from a production removeNulls :: ProdR s r e t a -> ProdR s r e t a @@ -58,6 +63,7 @@ removeNulls prod = case prod of Many {} -> prod Named p n -> Named (removeNulls p) n Constraint p n -> Constraint (removeNulls p) n + Disamb p d -> Disamb (removeNulls p) d type ProdR s r e t a = Prod (Rule s r) e t a From 886eec4cb2bd5fca6d3d627c1d054c244a3d5273 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sat, 13 May 2023 11:53:10 +0800 Subject: [PATCH 3/6] Make ResultsCont operate over lists --- Text/Earley/Parser/Internal.hs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Text/Earley/Parser/Internal.hs b/Text/Earley/Parser/Internal.hs index 7d031a1..1ceb61f 100644 --- a/Text/Earley/Parser/Internal.hs +++ b/Text/Earley/Parser/Internal.hs @@ -122,25 +122,29 @@ data State s r e t a where -> State s r e t c Final :: !(Results s a) -> State s r e t a -newtype ResultsCont s a b = ResultsCont {unResultsCont :: a -> Results s b} +newtype ResultsCont s a b = ResultsCont {unResultsCont :: [a] -> Results s b} deriving(Functor) -instance Applicative (ResultsCont s a) where - pure = ResultsCont . const . pure - ResultsCont f <*> ResultsCont x = ResultsCont (\a -> f a <*> x a) +instance Category (ResultsCont s) where + id = ResultsCont (Results . pure) + ResultsCont f . ResultsCont g = ResultsCont $ \xs -> Results $ do + ys <- unResults (g xs) + unResults (f ys) -instance Monad (ResultsCont s a) where - ResultsCont x >>= k = ResultsCont (\a -> ($ a) . unResultsCont . k =<< x a) +resultArr' :: ([a] -> [b]) -> ResultsCont s a b +resultArr' f = ResultsCont (Results . pure . f) -instance Category (ResultsCont s) where - id = ResultsCont pure - ResultsCont f . ResultsCont g = ResultsCont (f <=< g) +resultArrs :: [a -> b] -> ResultsCont s a b +resultArrs f = ResultsCont (Results . pure . liftA2 ($) f) -resultArr :: (a -> b) -> ResultsCont s a b -resultArr f = ResultsCont (pure . f) +resultArrs' :: [[a] -> [b]] -> ResultsCont s a b +resultArrs' f = ResultsCont (Results . pure . go) + where go x = ($ x) =<< f resultBind :: ResultsCont s a b -> Results s a -> Results s b -resultBind (ResultsCont f) x = f =<< x +resultBind (ResultsCont f) (Results x) = Results $ do + y <- x + unResults (f y) manyResults :: [a] -> Results s a manyResults = Results. pure From ec21f6fe0c5ec064d7f06126b9f4eaa7349f5504 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sat, 13 May 2023 11:54:08 +0800 Subject: [PATCH 4/6] Add disambiguating nonterminal --- Text/Earley/Grammar.hs | 9 +++++--- Text/Earley/Parser/Internal.hs | 38 +++++++++++++++++++++++----------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/Text/Earley/Grammar.hs b/Text/Earley/Grammar.hs index 4c3c0ce..285232e 100644 --- a/Text/Earley/Grammar.hs +++ b/Text/Earley/Grammar.hs @@ -74,9 +74,6 @@ terminal p = Terminal p $ Pure id constraint :: (a -> Bool) -> Prod r e t a -> Prod r e t a constraint = flip Constraint -disambiguate :: ([a] -> [b]) -> Prod r e t a -> Prod r e t b -disambiguate d = flip Disamb (Pure d) - -- | Lifted instance: @(<>) = 'liftA2' ('<>')@ instance Semigroup a => Semigroup (Prod r e t a) where (<>) = liftA2 (Data.Semigroup.<>) @@ -185,6 +182,12 @@ instance MonadFix (Grammar r) where rule :: Prod r e t a -> Grammar r (Prod r e t a) rule p = RuleBind p return +-- | Create a non-terminal which is able to disambiguate possible parses +disambiguate :: ([a] -> b) -> Prod r e t a -> Grammar r (Prod r e t b) +disambiguate d p = do + r <- rule p + pure $ Disamb r (Pure (pure . d)) + -- | Run a grammar, given an action to perform on productions to be turned into -- non-terminals. runGrammar :: MonadFix m diff --git a/Text/Earley/Parser/Internal.hs b/Text/Earley/Parser/Internal.hs index 1ceb61f..be790a0 100644 --- a/Text/Earley/Parser/Internal.hs +++ b/Text/Earley/Parser/Internal.hs @@ -17,6 +17,7 @@ import Data.Semigroup import Control.Category (Category) import qualified Control.Category as C import Data.Traversable (for) +import Data.Functor.Contravariant (contramap) ------------------------------------------------------------------------------- -- * Concrete rules and productions @@ -152,7 +153,7 @@ manyResults = Results. pure -- | A continuation accepting an @a@ and producing a @b@. data Cont s r e t a b where Cont :: !(ResultsCont s a b) - -> !(ProdR s r e t (b -> c)) + -> !(ProdR s r e t ([b] -> [c])) -> !(ResultsCont s c d) -> !(Conts s r e t d e') -> Cont s r e t a e' @@ -174,7 +175,7 @@ contToState :: BirthPos -> Results s a -> Cont s r e t a c -> State s r e t c contToState pos r (Cont g p args cs) = State p - (ResultsCont $ \f -> unResultsCont (args <<< resultArr f <<< g) =<< r) + (ResultsCont $ \f -> (args <<< resultArrs' f <<< g) `resultBind` r) pos cs contToState _ r (FinalCont args) = Final $ resultBind args r @@ -186,7 +187,7 @@ simplifyCont Conts {conts = cont} = readSTRef cont >>= go False where go !_ [Cont g (Pure f) args cont'] = do ks' <- simplifyCont cont' - go True $ map (contraMapCont $ args <<< resultArr f <<< g) ks' + go True $ map (contraMapCont $ args <<< resultArr' f <<< g) ks' go True ks = do writeSTRef cont ks return ks @@ -286,7 +287,9 @@ parse (st:ss) env = case st of -- and thus add (X → α a • β, j) to S(k+1) -- In our case, advancing the dot past a terminal means applying the -- results of that terminal to the input of the continuation - Just a -> parse ss env {next = State p (args <<< resultArr ($ a)) Previous scont + -- TODO: applying args to a single 'a' here is not correct in the + -- presence of disambiguation + Just a -> parse ss env {next = State p (args <<< resultArrs [($ a)]) Previous scont : next env} Nothing -> parse ss env -- Prediction operation @@ -294,14 +297,14 @@ parse (st:ss) env = case st of NonTerminal r p -> do rkref <- readSTRef $ ruleConts r ks <- readSTRef rkref - writeSTRef rkref (Cont C.id p args scont : ks) + writeSTRef rkref (Cont C.id (fmap fmap p) args scont : ks) ns <- unResults $ ruleNulls r -- ...add (Y → • γ, k) to S(k) for every production in the grammar with Y -- on the left-hand side (Y → γ). let addNullState | null ns = id | otherwise = (:) - $ State p (ResultsCont $ \f -> args `resultBind` manyResults (map f ns)) pos scont + $ State p (ResultsCont $ \f -> args `resultBind` manyResults (liftA2 ($) f ns)) pos scont if null ks then do -- The rule has not been expanded at this position. st' <- State (ruleProd r) C.id Current <$> newConts rkref parse (addNullState $ st' : ss) @@ -319,28 +322,39 @@ parse (st:ss) env = case st of masref <- readSTRef argsRef case masref of Just asref -> do -- The continuation has already been followed at this position. - modifySTRef asref $ mappend $ unResultsCont args a + -- TODO: Applying args to a single a is incorrect in the presence + -- of disambiguation + modifySTRef asref $ mappend $ unResultsCont args [a] parse ss env Nothing -> do -- It hasn't. -- ...find all states in S(j) of the form (X → α • Y β, i) and add -- (X → α Y • β, i) to S(k). -- In this implementation, advancing the dot requires applying 'a' -- here to the continuation. - asref <- newSTRef $ unResultsCont args a + -- TODO: Applying args to a single a is incorrect in the presence + -- of disambiguation + asref <- newSTRef $ unResultsCont args [a] writeSTRef argsRef $ Just asref ks <- simplifyCont scont res <- lazyResults $ unResults =<< readSTRef asref let kstates = map (contToState pos res) ks parse (kstates ++ ss) env {reset = writeSTRef argsRef Nothing >> reset env} + -- We need to add p with a continuation which takes into account 'd' + Disamb p (Pure d) -> do + parse (State p (args <<< resultArr' d) pos scont : ss) env + Disamb p d -> do + scont' <- newConts =<< newSTRef [Cont C.id d args scont] + parse (State p C.id Previous scont' : ss) env -- For every alternative, add a state for that production all pointing to -- the same continuation. Alts as (Pure f) -> do - let args' = args <<< ResultsCont (\x -> pure (f x)) + -- TODO: is resultArrs safe in the presence of disambiguation + let args' = args <<< resultArrs [f] sts = [State a args' pos scont | a <- as] parse (sts ++ ss) env Alts as p -> do - scont' <- newConts =<< newSTRef [Cont C.id p args scont] + scont' <- newConts =<< newSTRef [Cont C.id (fmap fmap p) args scont] let sts = [State a C.id Previous scont' | a <- as] parse (sts ++ ss) env -- Rustle up a left-recursive non-terminal and add it to the states to be @@ -353,8 +367,8 @@ parse (st:ss) env = case st of Named pr' n -> parse (State pr' args pos scont : ss) env {names = n : names env} -- Insert a state whose continuation filters any results - Constraint pr' c -> parse (State pr' (ResultsCont test >>> args) pos scont : ss) env - where test x = if c x then return x else empty + Constraint pr' c -> parse (State pr' (args <<< test) pos scont : ss) env + where test = resultArr' (filter c) type Parser e i a = forall s. i -> ST s (Result s e i a) From 14f37633c2f7073f27c54fdf2c8217ff6ad33f47 Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sat, 13 May 2023 13:28:35 +0800 Subject: [PATCH 5/6] Add disambiguation example --- Earley.cabal | 9 ++++++ examples/Disambiguate.hs | 68 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 examples/Disambiguate.hs diff --git a/Earley.cabal b/Earley.cabal index 25f502b..fb6eec5 100644 --- a/Earley.cabal +++ b/Earley.cabal @@ -117,6 +117,15 @@ executable earley-infinite default-language: Haskell2010 build-depends: base, Earley +executable earley-disambiguate + if !flag(examples) + buildable: False + main-is: Disambiguate.hs + ghc-options: -Wall + hs-source-dirs: examples + default-language: Haskell2010 + build-depends: base, Earley, containers + benchmark bench type: exitcode-stdio-1.0 hs-source-dirs: . bench diff --git a/examples/Disambiguate.hs b/examples/Disambiguate.hs new file mode 100644 index 0000000..8fc4118 --- /dev/null +++ b/examples/Disambiguate.hs @@ -0,0 +1,68 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE RecursiveDo #-} + +import Control.Applicative +import Data.Char +import Data.Foldable (traverse_) +import Data.Tree +import System.Environment +import Text.Earley +import Text.Earley.Grammar + +type Expr = Tree String +pattern Add, Mul :: Tree String -> Tree String -> Tree String +pattern Add x y = Node "+" [x, y] +pattern Mul x y = Node "*" [x, y] +pattern Var :: a -> Tree a +pattern Var n = Node n [] +pattern Amb :: [Tree String] -> Tree String +pattern Amb xs = Node "Ambiguous" xs + +expr :: Grammar r (Prod r String String Expr) +expr = mdo + let exprProd = disambiguate $ \case + [x] -> x + xs -> Amb xs + e <- + exprProd $ + Add <$> e <* namedToken "+" <*> e + <|> Mul <$> e <* namedToken "*" <*> e + <|> Var <$> satisfy ident + <|> namedToken "(" *> e <* namedToken "(" + return e + where + ident (x : _) = isAlpha x + ident _ = False + +-- λ> :main "A + B * C * G" +-- Ambiguous +-- ├╴ ((A+B)*(C*G)) +-- ├╴ * +-- │ ├╴ Ambiguous +-- │ │ ├╴ ((A+B)*C) +-- │ │ └╴ (A+(B*C)) +-- │ └╴ G +-- └╴ + +-- ├╴ A +-- └╴ Ambiguous +-- ├╴ (B*(C*G)) +-- └╴ ((B*C)*G) +main :: IO () +main = do + x : _ <- getArgs + let (ps, r) = fullParses (parser expr) (words x) + traverse_ (putStrLn . drawTree . simplifyTree) ps + print r + +-- | render non-ambiguous expressions on one line to make the printed tree +-- smaller +simplifyTree :: Expr -> Expr +simplifyTree = + foldTree + ( \op -> \case + [Node n [], Node m []] | op /= "Ambiguous" -> Node (parens (n <> op <> m)) [] + ns -> Node op ns + ) + where + parens x = "(" <> x <> ")" From 82771fe732d8d7bf70cf01b73b146fd35c79545f Mon Sep 17 00:00:00 2001 From: Ellie Hermaszewska Date: Sun, 14 May 2023 14:40:05 +0800 Subject: [PATCH 6/6] export disambiguate from Text.Earley --- Text/Earley.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Text/Earley.hs b/Text/Earley.hs index 7fe66f1..8a73945 100644 --- a/Text/Earley.hs +++ b/Text/Earley.hs @@ -1,7 +1,7 @@ -- | Parsing all context-free grammars using Earley's algorithm. module Text.Earley ( -- * Context-free grammars - Prod, terminal, (), constraint, Grammar, rule + Prod, terminal, (), constraint, disambiguate, Grammar, rule , -- * Derived operators satisfy, token, namedToken, anyToken, list, listLike, matches , -- * Parsing