diff --git a/.vitepress/config.mts b/.vitepress/config.mts index b759ddc..e55d683 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -38,6 +38,7 @@ export default defineConfig({ { text: '08: Haskell Types', link: '/lectures/lecture08'}, { text: '09: Type Classes', link: '/lectures/lecture09'}, { text: '10: IO & Monads', link: '/lectures/lecture10'}, + { text: '11: Monadic Parsing', link: '/lectures/lecture11'}, ] }, diff --git a/lectures/Parser.hs b/lectures/Parser.hs new file mode 100644 index 0000000..1d1fa18 --- /dev/null +++ b/lectures/Parser.hs @@ -0,0 +1,58 @@ +module Parser where + +import Control.Applicative +import Data.Char + +newtype Parser a = P { parse :: String -> Maybe (a, String) } + +instance Functor Parser where + -- fmap :: (a -> b) -> Parser a -> Parser b + fmap f p = P (\inp -> case parse p inp of + Nothing -> Nothing + Just (v,out) -> Just (f v, out)) + +instance Applicative Parser where + -- (<*>) :: Parser (a -> b) -> Parser a -> Parser b + pg <*> px = P (\inp -> case parse pg inp of + Nothing -> Nothing + Just (g,out) -> parse (fmap g px) out) + pure v = P (\inp -> Just (v,inp)) + +instance Monad Parser where + -- (>>=) :: Parser a -> (a -> Parser b) -> Parser b + p >>= f = P (\inp -> case parse p inp of + Nothing -> Nothing + Just (v,out) -> parse (f v) out) + +instance Alternative Parser where + -- empty :: Parser a + empty = P (\_ -> Nothing) + -- (<|>) :: Parser a -> Parser a -> Parser a + p <|> q = P (\inp -> case parse p inp of + Nothing -> parse q inp + Just (v,out) -> Just (v,out)) + -- many p = some p <|> pure [] + -- some p = (:) <$> p <*> many p + +-- Parsers +item :: Parser Char +item = P (\inp -> case inp of + "" -> Nothing + (x:xs) -> Just (x,xs)) + +sat :: (Char -> Bool) -> Parser Char +sat pr = item >>= \x -> if pr x then return x + else empty + +alphaNum :: Parser Char +alphaNum = sat isAlphaNum + +char :: Char -> Parser Char +char c = sat (== c) + +string :: String -> Parser String +string "" = pure "" +string s@(x:xs) = char x *> string xs *> pure s + +sep :: Parser () +sep = some (sat isSpace) *> pure () diff --git a/lectures/index.md b/lectures/index.md index 11dc4e9..c604f6c 100644 --- a/lectures/index.md +++ b/lectures/index.md @@ -82,6 +82,11 @@ We discuss some more examples of type classes, most importantly `Functor`s. [Slides](https://github.com/aicenter/FUP/blob/main/lectures/lecture10.pdf). [Log](https://github.com/aicenter/FUP/blob/main/lectures/lecture10.hs). +## [Lecture 11](lecture11): Monadic Parsing + +[Slides](https://github.com/aicenter/FUP/blob/main/lectures/lecture11.pdf). +[Log](https://github.com/aicenter/FUP/blob/main/lectures/lecture11.hs). + ## Old recorded lectures diff --git a/lectures/lecture06.md b/lectures/lecture06.md index 8627a60..97d30d5 100644 --- a/lectures/lecture06.md +++ b/lectures/lecture06.md @@ -2,7 +2,7 @@ outline: deep --- -## Introduction +# Lambda Calculus Lambda calculus is a model of computation. It was introduced in the 1930s by Alonzo Church. He was trying to develop formal foundations for mathematics. His attempts were unsuccessful, as several of diff --git a/lectures/lecture08.md b/lectures/lecture08.md index 2201340..3e9963b 100644 --- a/lectures/lecture08.md +++ b/lectures/lecture08.md @@ -1,6 +1,8 @@ -# Lecture 8 - Haskell Types +--- +outline: deep +--- -## Haskell's type system +# Haskell's type system - _**Strong**_: Haskell *guarantees* that your program does not have any type-level errors. Strong also means that it will not do automatic type coercion (i.e. casting/type conversion). diff --git a/lectures/lecture09.md b/lectures/lecture09.md index 9c86bba..de45609 100644 --- a/lectures/lecture09.md +++ b/lectures/lecture09.md @@ -1,4 +1,4 @@ -# Lecture 9 - Typeclasses +# Typeclasses During the last lecture we introduced [ad-hoc polymorphism](lecture08#polymorphism) via *typeclasses*. diff --git a/lectures/lecture11.hs b/lectures/lecture11.hs new file mode 100644 index 0000000..659d159 --- /dev/null +++ b/lectures/lecture11.hs @@ -0,0 +1,142 @@ +-- Lecture 11 +import Control.Applicative +import Data.Char +import Data.List +import Parser + +-- Example Maybe as applicative functor + +validateLength :: Int -> String -> Maybe String +validateLength maxLen s = if (length s) > maxLen + then Nothing + else Just s + +newtype Name = Name String deriving (Eq, Show) +newtype Address = Address String deriving (Eq, Show) + +mkName :: String -> Maybe Name +mkName s = Name <$> validateLength 12 s + +mkAddress :: String -> Maybe Address +mkAddress a = Address <$> validateLength 25 a + +data Person = Person Name Address deriving (Eq, Show) + +mkPerson :: String -> String -> Maybe Person +mkPerson n a = Person <$> mkName n <*> mkAddress a +-- It can be done also via monadic instance of Maybe. +{- +mkPerson n a = do name <- mkName n + addr <- mkAddress a + return $ Person name addr +-} + +-- definability of <*> in terms of >>= +(<<*>>) :: Monad m => m (a -> b) -> m a -> m b +x <<*>> y = x >>= \f -> fmap f y +infixl 4 <<*>> + +(<<*) :: Applicative f => f a -> f b -> f a +x <<* y = (\u -> (\_ -> u)) <$> x <*> y +infixl 4 <<* + +(*>>) :: Applicative f => f a -> f b -> f b +x *>> y = (\_ -> id) <$> x <*> y +infixl 4 *>> + +-- Monadic parsing + +data Expr a = Val a + | Var String + | Add [Expr a] + | Mul [Expr a] deriving Eq + +instance Show a => Show (Expr a) where + show (Val c) = show c + show (Var s) = s + show (Add es) = "(" ++ intercalate " + " (map show es) ++ ")" + show (Mul es) = "(" ++ intercalate " * " (map show es) ++ ")" + +{- + -> * * + -> + | + | + | + + -> * + -> "." + | + -> "-" + | + + + -> "(" ("+" )+ ")" + -> "(" ("*" )+ ")" +-} + +var :: Parser (Expr a) +-- var = fmap Var $ (:) <$> sat isLower <*> many alphaNum +var = do x <- sat isLower + xs <- many alphaNum + return $ Var (x:xs) + +digit :: Parser Char +digit = sat isDigit + +nat :: Parser String +nat = some digit + +int :: Parser String +-- int = (:) <$> char '-' <*> nat <|> nat +int = do char '-' + xs <- nat + return ('-':xs) + <|> nat + +float :: Parser Float +-- float = fmap read $ +-- join <$> int +-- <*> (char '.' *> nat) +-- <|> int +-- where join s1 s2 = s1 ++ "." ++ s2 +float = do xs <- int + char '.' + ys <- nat + return $ read (xs ++ "." ++ ys) + <|> read <$> int + +space :: Parser () +space = many (sat isSpace) *> pure () + +token :: Parser a -> Parser a +-- token p = space *> p <* space +token p = do space + x <- p + space + return x + +val :: Parser (Expr Float) +val = Val <$> float + +expr :: Parser (Expr Float) +expr = token (var <|> val <|> op '+' <|> op '*') + +opCons :: Char -> [Expr a] -> Expr a +opCons '+' = Add +opCons '*' = Mul +opCons c = error $ show c ++ " is unknown op" + +op :: Char -> Parser (Expr Float) +-- op c = fmap (opCons c) $ +-- (:) <$> (char '(' *> expr) +-- <*> some (char c *> expr) +-- <* char ')' +op c = do char '(' + e <- expr + es <- some (char c >> expr) + char ')' + return $ opCons c (e:es) + +readExpr :: String -> Maybe (Expr Float) +readExpr s = case parse expr s of + Just (e,"") -> Just e + Just (e,_) -> Nothing + _ -> Nothing diff --git a/lectures/lecture11.md b/lectures/lecture11.md new file mode 100644 index 0000000..1160305 --- /dev/null +++ b/lectures/lecture11.md @@ -0,0 +1,495 @@ +--- +outline: deep +--- + +# Monadic Parsing + +In this lecture we will use our newly gained knowledge about functors and monads to build parsers +(this lecture is the foundation for your last [homework](/homework/hw04)). + +## Applicative + +Before we get into the actual parser implementation we need to introduce one more useful and very +common typeclass: `Applicative`s. Imagine we want to add two `Maybe` values. Naively trying to add +them of course does not work: +```haskell +𝝺> (Just 2) + (Just 3) +:1:1: error: [GHC-39999] + • No instance for ‘Num (Maybe Integer)’ arising ... +``` +Using `fmap` will also not get us the desired result: +```haskell +fmap (+) (Just 2) :: Num a => Maybe (a -> a) +``` +Because we do not have a way of working with the resulting `Maybe (a -> a)`. +`Applicative` functors allow us to do exactly this! +```haskell +class Functor f => Applicative f where + (<*>) :: f (a -> b) -> f a -> f b + pure a -> f a +``` +The operator `<*>` accepts a boxed function and applies it to a boxed value, so we can use the +output of our `fmap` expression from before: +```haskell +𝝺> fmap (+) (Just 2) <*> Just 3 +Just 5 + +-- we can write the same with <$>, the fmap operator: +𝝺> (+) <$> (Just 2) <*> Just 3 +Just 5 +``` + +::: tip Exercise +Can you see why `<*>` now allows use to lift _**n-ary**_ functions? Hint: currying! +::: + +The implementation of `Applicative` for `Maybe` is pretty simple (as usual for `Maybe`). +```haskell +instance Applicative Maybe where + pure :: a -> Maybe a + pure = Just + + (<*>) :: Maybe (a -> b) -> Maybe a -> Maybe b + Nothing <*> _ = Nothing + _ <*> Nothing = Nothing + Just f <*> Just a = Just (f a) +``` + +For lists we have two options to implement the interface. We can either pair up a list of functions +with a list of values, or we can apply each function to each value (cartesian product). +Haskell chooses the latter: +```haskell +instance Applicative [] where + pure :: a -> [a] + pure x = [x] + + (<*>) :: [a -> b] -> [a] -> [b] + fs <*> xs = [f x | f <- fs, x <- xs] +``` + +such that the `[]` applicative lets us compute e.g. products of lists of functions +```haskell +𝝺> (,) <$> [1,2,3] <*> ['a','b','c'] +[ (1,'a'), (1,'b'), (1,'c') +, (2,'a'), (2,'b'), (2,'c') +, (3,'a'), (3,'b'), (3,'c') ] +``` + +### Example: Validation + +Let's say we want to create validated data structures, such as a `Person`: +```haskell +newtype Name = Name String deriving (Eq, Show) +newtype Address = Address String deriving (Eq, Show) + +data Person = Person Name Address deriving (Eq, Show) +``` +where `Name` and `Address` are type aliases for `String`. + +::: details What is `newtype`? +The `newtype` keyword can be used to define data types with exactly one constructor and exactly one +field, so its similar to a type alias, just that you can define instances for it (and it has some +slightly different lazy/strict evaluation properties than `data`). Small summary is e.g. +[here](https://stackoverflow.com/q/2649305). +::: + +For example, the constructor `Name` accepts a `String` and produces a `Name`, but for our +validation we want `Maybe Name`s. This is a familiar case for which we can use `fmap`: +```haskell +validateLength :: Int -> String -> Maybe String +validateLength maxLen s = if (length s) > maxLen + then Nothing + else Just s + +mkName :: String -> Maybe Name +mkName s = Name <$> validateLength 12 s + +mkAddress :: String -> Maybe Address +mkAddress a = Address <$> validateLength 25 a +``` + +Once we try to construct validated `Person`s we have to deal with the problem that `Person` accepts +`Name` and `Address`, but not their boxed maybe values. This is exactly the use case for the +applicative functor: +```haskell +mkPerson :: String -> String -> Maybe Person +mkPerson n a = Person <$> mkName n <*> mkAddress a +``` + +::: details Monadic `mkPerson` +The same function as above can also be implemented with the monadic instance of `Maybe` and the `do` +notation, but it is a bit longer: +```haskell +mkPerson :: String -> String -> Maybe Person +mkPerson n a = do name <- mkName n + addr <- mkAddress a + return $ Person name addr +``` +::: + + +## Monadic Parsing + + +A *parser* is a program that takes an input string and converts it into a data structure containing +all the information that was encoded in the input string. Most commonly, a parser can accept a source +code file and convert it into an AST (abstract syntax tree). + +In this lecture we will build a parser that accepts strings of the form +``` +(4.0 * (5 + 7 + x)) +``` +which represent numeric expressions composed of only addition and multiplication. +For simplicity we will assume that all expressions are appropriately parenthesized, so that we do +not have to worry about operator precedence. + +In Haskell, we could represent an AST data structure with an `Expr a` defined like: +```haskell +data Expr a = Val a + | Var String + | Add [Expr a] + | Mul [Expr a] deriving Eq +``` + +The example string above would be converted into the AST: +```haskell +Mul [Val 4, Add [Val 5, Val 7, Var "x"]] +``` + + +The language grammar of the expression we consider is given below. An expression `` is +composed of variables ``, values ``, additions `` or multiplications `` where each +of those four can be surrounded by an arbitrary amount of spaces `*`. Variables start with a +``case letter and then can have an arbitrary number of characters/numbers in their name. + +Values can either be either integers or floating point numbers. Integers `` are defined by an +optional `-` in the front and *at least one* digit (represented by `+`). Finally, `` or +`` are parenthesized expressions with `+`/`*`. +``` + -> * * + -> + | + | + | + + -> * + -> "." + | + -> "-" + | + + + -> "(" ("+" )+ ")" + -> "(" ("*" )+ ")" +``` + + +### The `Parser` type + +Before we start implementing our parsers we need a good data structure that will represent them. +Essentially, we have three things that our parser must be able to do, which have to be reflected in +the type we define: + +1. Parsers accept `String`s and produce trees as output. +2. Parsers can fail. +3. Parsers can return un-parsed strings as partial results. + +With can start with 1.) and define a parser as a type that contains a function which accepts a +string and returns `Expr a`. +```haskell +newtype Parser a = P (String -> Expr a) +``` + +Next, we can include 3.) the un-parsed/rest string: +```haskell +newtype Parser a = P (String -> (Expr a, String)) +``` + +Finally, parsers can fail, so we get: +```haskell +newtype Parser a = P (String -> Maybe (Expr a, String)) +``` + +To conveniently access the function contained in a parser, we give it a name and arrive at the final +type definition: +```haskell +newtype Parser a = P { parse :: String -> Maybe (Expr a, String)} +``` + +With this datatype, and by leveraging what we know about functors, monads, and applicatives, we will +end up with a very nice way of constructing parsers that looks almost like the grammar we defined in +the beginning: +```haskell +-- -> | | | + +expr :: Parser (Expr Float) +expr = token (var <|> val <|> op '+' <|> op '*') +``` + +But let's start much simpler. Our first parser will just parse a single, arbitrary character. +A `Char` parser always succedes as long as the given string is not empty. We will call this parser +`item`, because it parses an arbitrary item from the input string. To define this parser we use the +data constructor `P`, pass a function accepting a string, and return a `Maybe (Char, String)`: +```haskell +newtype Parser a = P { parse :: String -> Maybe (Expr a, String)} + +item :: Parser Char +item = P (\input -> case input of + "" -> Nothing + (x:xs) -> Just (x, xs)) +``` + +In order to construct more complicated parsers we just have to manipulate the `parse` function +contained in `P` (to make it do the more complicated things). The function `parse` is hidden inside our new +parser type, so this is where the typeclasses `Functor`, `Applicative`, and `Monad` will come in +handy. Hence, we will make `Parser`s instances of those before starting with implementing the actual +language grammar. + +#### `Functor Parser` + +Let's say we want to make sure that the first character in a string is a `'c'`. Then we just need to +compose the function inside the `item` parser with `==c`, which we can do my *lifting* `==c` via +`fmap`: +```haskell +instance Functor Parser where + fmap :: (a -> b) -> Parser a -> Parser b + fmap f p = P (\input -> + case parse p input of + Nothing -> Nothing + Just (v,out) -> Just (f v, out)) + +𝝺> parse (fmap (=='c') item) "cde" +Just (True, "de") +``` +In this case `fmap` runs a given parser `p`, and, if it succedes, applies the function `f` to the +resulting value that was parsed. + +#### `Applicative Parser` + +To lift n-ary functions we already know that we will need `Applicative`. +In our parser context, `<*>` will accept a parser who's parse function returns itself a function, +i.e. an `Expr (a -> b)`: +```haskell +parse :: String -> Maybe (Expr (a -> b), String) +``` +If we unpack that function we can use `fmap` to lift it to the second input which is again a parser: +```haskell +instance Applicative Parser where + (<*>) :: Parser (a -> b) -> Parser a -> Parser b + pg <*> px = P (\inp -> case parse pg inp of + Nothing -> Nothing + Just (g,out) -> parse (fmap g px) out) + + pure :: a -> Parser a + pure v = P (\inp -> Just (v,inp)) + +𝝺> parse ((/=) <$> item <*> item) "abc" +Just (True, "c") + +𝝺> parse ((/=) <$> item <*> item) "aac" +Just (True, "c") +``` + + +#### `Monad Parser` + +Most important is probably the monad instance, which will let us sequence multiple small parsers +to combine them to more complex ones. Bind (`>>=`) accepts a parser, and a function that again +returns a parser. We can use this, for example, to make sure that a parsed character is an `a`: +```haskell +instance Monad Parser where + (>>=) :: Parser a -> (a -> Parser b) -> Parser b + p >>= f = P (\inp -> case parse p inp of + Nothing -> Nothing + Just (v,out) -> parse (f v) out) + +𝝺> parse (item >>= \c -> if c == 'a' then item else return ' ') "abc" +Just ('b', "c") + +𝝺> parse (item >>= \c -> if c == 'a' then item else return ' ') "abc" +Just (' ', "bc") +``` + + +#### `Alternative Parser` + + +Another very convenient type class we will use is `Alternative`. It allows us to try different +parsers and use the result of the parser that succeedes (this is essentially the `|` operator in the +grammar). +```haskell +instance Alternative Parser where + empty :: Parser a + empty = P (\_ -> Nothing) + + (<|>) :: Parser a -> Parser a -> Parser a + p <|> q = P (\inp -> case parse p inp of + Nothing -> parse q inp + Just (v,out) -> Just (v,out)) + + -- you get for free: (we will need them in the next section) + -- many :: Parser a -> Parser [a] + -- many p = some p <|> pure [] + + -- some :: Parser a -> Parser [a] + -- some p = (:) <$> p <*> many p + +𝝺> parse (item <|> return 'x') "" +Just ('x', "") +``` + + +#### Parser building blocks + +With the implementations of the necessary typeclasses we can finally start building some basic +parsers. First we can start with a parser that simply checks if a condition is true: +```haskell +sat :: (Char -> Bool) -> Parser Char +sat pr = item >>= \x -> if pr x then return x + else empty +``` + +::: details do-notation +You can also use do-notation to define the `sat` parser: +```haskell +sat :: (Char -> Bool) -> Parser Char +sat pr = do x <- item + if pr x then return x + else empty +``` +A good way to figure out how do-notation works for a new type is by assuming that the left of `<-` +will contain whatever the type `a` is that you are boxing, so in our case of `Parser a`, we will +have whatever the parser is parsing in `a`. Which makes sense!;) +::: + +Next, we can build two more very simple building block parsers: +```haskell +alphaNum :: Parser Char +alpahNum = sat isAlphaNum + +char :: Char -> Parser Char +char c = sat (==c) +``` +`alphaNum` makes sure that a character is alpha-numeric, `char c` checks if a character is equal to +`c`. + +Note that the resulting characters are still part of the parser output (not `True` like in the +beginning of the lecture): +```haskell +𝝺> parse (char 'a') "agwehj" +Just ('a',"gwehj") +``` + +Another useful thing to do is parse larger strings instead of single characters. We can do this by +sequencing multiple `char c` parsers. +```haskell +string :: String -> Parser String +string [] = return [] +string (x:xs) = char x + >> string xs + >> return (x:xs) +``` + +We can use the `some` and `many` functions that come with the `Alternative` typeclass to +parse zero or more (`many`, corresponds to `*` in the grammar) or one or more (`some`, corresponds +to `+`): +```haskell +𝝺> parse (some (string "abc")) "abcabc_gwehj" +Just (["abc", "abc"], "_gwehj") +``` + + +### Parsing numeric expressions + +Finally, we can start implementing our language grammar. This part is what we have worked towards +during the whole lecture today. You will see that all the work in setting things up will pay off: +The parsers we define new look almost like the grammar itself! + +For variables we just have to compose a parser making sure we have a lower first letter and a bunch +of alpha-numeric characters: +```haskell +-- -> * + +var :: Parser (Expr a) +var = do x <- sat isLower + xs <- many alphaNum + return $ Var (x:xs) +``` + +To parse values we start with integers, which are either positive or negative `some digit` parsers. +Then we can parse `float`s with the help of `int <|>`. The `float :: Parser Float` also `read`s +the parsed string into an actual `Float`. +```haskell +-- -> "-" + | + +-- -> "." + | + +digit :: Parser Char +digit = sat isDigit + +nat :: Parser String +nat = some digit + +int :: Parser String +int = do char '-' + xs <- nat + return ('-':xs) + <|> nat + + +float :: Parser Float +float = do xs <- int + char '.' + ys <- nat + return $ read (xs ++ "." ++ ys) + <|> read <$> int + +val :: Parser (Expr Float) +val = Val <$> float +``` + +Full expressions can be surrounded by spaces, so we can define a `token` parser that discards an +arbitrary number of spaces: +```haskell +space :: Parser () +space = many (sat isSpace) *> pure () + +token :: Parser a -> Parser a +token p = do space + x <- p + space + return x + +expr :: Parser (Expr Float) +expr = token (var <|> val <|> op '+' <|> op '*') +``` +The above `expr` parser is the final implementation of our grammar. The only thing that we have not +defined yet are the `op` parsers: +```haskell +-- -> "(" ("+" )+ ")" +-- -> "(" ("*" )+ ")" + +opCons :: Char -> [Expr a] -> Expr a +opCons '+' = Add +opCons '*' = Mul +opCons c = error $ show c ++ " is unknown op" + +op :: Char -> Parser (Expr Float) +op c = do char '(' + e <- expr + es <- some (char c >> expr) + char ')' + return $ opCons c (e:es) + +𝝺> parse (op '*') "(2 * 3 * 4) asdfasdf" +Just ((2.0 * 3.0 * 4.0)," asdfasdf") +``` + +Finally, we initialize the parsing. The only thing we need to do is call our `expr` parser and make +sure that the remaining string is empty: +```haskell +readExpr :: String -> Maybe (Expr Float) +readExpr s = case parse expr s of + Just (e,"") -> Just e + Just (e,_) -> Nothing + _ -> Nothing + +𝝺> readExpr "(4 * (5 + 7 + x2) * y2)" +Just (4.0 * (5.0 + 7.0 + x2) * y2) +```