This repository has been archived by the owner on Apr 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathLec14Live.hs
309 lines (217 loc) · 6.57 KB
/
Lec14Live.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
module Lec14Live where
import Data.Char (isDigit, digitToInt)
{- Lecture 14 : PARSER COMBINATORS I
This lecture and the next are about writing parsers.
Parsers turn "flat" sequences of characters (or, more low level,
bytes):
"(1+1)*2"
which is really a sequence of characters:
'(', '1', '+', '1', ')', '*', '2'
into structured representations like:
Mul (Add (Num 1) (Num 1)) (Num 2)
-}
{- Almost every program does some parsing:
- Configuration files
- Input data (CSV, JSON, XML, JPG, PNG, ...)
- Network protocols (HTTP, SMTP, DNS, ...)
- Compilers and interpreters parse programs
Often, there's a standardised library to do it for you, but
occasionally you'll need to write your own. -}
{- Part I : Writing Parsers, a first attempt
How should we write parsers in Haskell? One way to go about this is
to have the following two thoughts:
- Parsers take input that is a list of 'Char's, i.e., a 'String'.
- Parsers can either fail to parse, because it recieved invalid
input, or succeed, in which case it should return the structured
representation.
Thinking like this leads to the following definition ('version 1'): -}
type Parser_v1 a = String -> Maybe a
-- parseBool_v1
parseBool_v1 :: Parser_v1 Bool
parseBool_v1 "True" = Just True
parseBool_v1 "False" = Just False
parseBool_v1 _ = Nothing
-- How to parse *two* booleans?
--
-- TrueTrue
--
-- (True, True)
--
-- TrueFalse
--
-- (True, False)
-- How can we fix this?
{- Part II : Parsing with leftovers -}
-- The Parser type
newtype Parser a = MkParser (String -> Maybe (String, a))
-- runParser
runParser :: Parser a -> String -> Maybe (String, a)
runParser (MkParser p) input = p input
-- parseBool_v2
parseBool_v2 :: Parser Bool
parseBool_v2 =
MkParser (\input ->
case input of
'T':'r':'u':'e':rest -> Just (rest, True)
'F':'a':'l':'s':'e':rest -> Just (rest, False)
_ -> Nothing)
-- parsePair
parsePair :: Parser a -> Parser b -> Parser (a,b)
parsePair p1 p2 =
MkParser (\input ->
case runParser p1 input of
Nothing -> Nothing
Just (input2, a) ->
case runParser p2 input2 of
Nothing -> Nothing
Just (input3, b) ->
Just (input3, (a,b)))
-- parsePairOfBools
-- Monad?
instance Monad Parser where
-- return :: a -> Parser a
return a = MkParser (\input -> Just (input, a))
-- >>= :: Parser a -> (a -> Parser b) -> Parser b
p1 >>= k =
MkParser (\input ->
case runParser p1 input of
Nothing -> Nothing
Just (input2, a) ->
runParser (k a) input2)
-- parsePairOfBools_v2
parsePairOfBools_v2 :: Parser (Bool, Bool)
parsePairOfBools_v2 =
do b1 <- parseBool_v2
b2 <- parseBool_v2
return (b1, b2)
{- Part III : Building Parsers
We'll build more complex parsers from simple ones. -}
-- parseChar
parseChar :: Parser Char
parseChar =
MkParser (\input ->
case input of
c:rest -> Just (rest, c)
"" -> Nothing)
parseTwoChars :: Parser (Char, Char)
parseTwoChars =
do c1 <- parseChar
c2 <- parseChar
return (c1, c2)
-- failParse
failParse :: Parser a
failParse = MkParser (\input -> Nothing)
-- parseLiteralChar
parseLiteralChar :: Char -> Parser ()
parseLiteralChar expected =
do got <- parseChar
if got == expected then
return ()
else
failParse
-- parseLiteral
parseLiteral :: String -> Parser ()
parseLiteral string =
mapM_ parseLiteralChar string
-- parseBool_v3
parseBool_v3 :: Parser Bool
parseBool_v3 =
do c <- parseChar
case c of
'T' -> do parseLiteral "rue"
return True
'F' -> do parseLiteral "alse"
return False
_ -> failParse
orElse :: Parser a -> Parser a -> Parser a
orElse p1 p2 =
MkParser (\input ->
case runParser p1 input of
Nothing -> runParser p2 input
Just (rest, a) -> Just (rest, a))
parseBool :: Parser Bool
parseBool =
do parseLiteral "True"
return True
`orElse`
do parseLiteral "False"
return False
-- orderedDemo
orderedDemo :: Parser String
orderedDemo =
do parseLiteral "for"
return "The keyword 'for'"
`orElse`
do parseLiteral "forty"
return "The number 40"
{- Have the following basic 'Parser's and ways of combining them:
1. The 'parseChar' 'Parser' for parsing single characters.
2. The monad interface 'return' and '>>=' for parsers that do
nothing and sequence two parsers.
3. The 'failParse' parser that always fails.
4. The 'orElse' parser that tries one parser, and if that fails
tries another.
With these basic parsers, we have built 'parseLiteral' that
recognises literal strings.
With these basic functions, we no longer have to write anything
involving 'MkParser' directly! -}
-- zeroOrMore
zeroOrMore :: Parser a -> Parser [a]
zeroOrMore p =
do x <- p
xs <- zeroOrMore p
return (x:xs)
`orElse`
return []
-- <list-of-ps> ::= <p> <list-of-ps>
-- | /* empty */
twoBoolLists :: Parser ([Bool],[Bool])
twoBoolLists =
do bools1 <- zeroOrMore parseBool
bools2 <- zeroOrMore parseBool
return (bools1, bools2)
{- Example: Parsing Integers
Want to parse strings like
"1", "100", "123", "-90"
into their numerical representations:
1, 100, 123, -90
-}
parseDigit :: Parser Int
parseDigit =
do c <- parseChar
case c of
'0' -> return 0
'1' -> return 1
'2' -> return 2
'3' -> return 3
'4' -> return 4
'5' -> return 5
'6' -> return 6
'7' -> return 7
'8' -> return 8
'9' -> return 9
_ -> failParse
parsePositiveNumber :: Parser Int
parsePositiveNumber =
do d <- parseDigit
ds <- zeroOrMore parseDigit
return (foldl (\t d -> t*10 + d) d ds)
parseSign :: Parser Int
parseSign =
do parseLiteralChar '-'
return (-1)
`orElse`
return 1
parseInt :: Parser Int
parseInt =
do sign <- parseSign
number <- parsePositiveNumber
return (sign * number)
-- parseInt
-- These two type class instances are needed for the 'Monad' instance
-- defined above to be allowed. They will be explained in Lecture 16.
instance Applicative Parser where
pure = return
mf <*> ma = do f <- mf; a <- ma; return (f a)
instance Functor Parser where
fmap f p = pure f <*> p