-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.ml
72 lines (60 loc) · 1.92 KB
/
lexer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
open Parser
exception Unexpected_char
exception Unclosed_block_comment
let dec_digit = [%sedlex.regexp? '0'..'9']
let hex_digit = [%sedlex.regexp? '0'..'9' | 'A'..'F' | 'a'..'f']
let dec_number = [%sedlex.regexp? Plus dec_digit]
let hex_number = [%sedlex.regexp? "0x", Plus hex_digit]
let name_start = [%sedlex.regexp? 'a'..'z' | 'A'..'Z']
let name_continue = [%sedlex.regexp? '-' | '_' | 'a'..'z' | 'A'..'Z' | '0'..'9']
let name = [%sedlex.regexp? name_start, Star name_continue]
let rec block_comment lexbuf level =
match%sedlex lexbuf with
| "/-" -> (block_comment [@tailcall]) lexbuf (level + 1)
| "-/" -> if level = 0 then () else (block_comment [@tailcall]) lexbuf (level - 1)
| any -> (block_comment [@tailcall]) lexbuf level
| eof -> raise Unclosed_block_comment
| _ -> raise Unexpected_char
let rec token lexbuf =
match%sedlex lexbuf with
| ' ' | '\t' | '\n' -> (token [@tailcall]) lexbuf
| "--", Star (Compl '\n'), '\n' -> (token [@tailcall]) lexbuf
| "/-" -> block_comment lexbuf 0; (token [@tailcall]) lexbuf
| dec_number -> INT (Sedlexing.Latin1.lexeme lexbuf)
| hex_number -> INT (Sedlexing.Latin1.lexeme lexbuf)
| "def" -> KEYWORD_DEF
| "else" -> KEYWORD_ELSE
| "format" -> KEYWORD_FORMAT
| "if" -> KEYWORD_IF
| "let" -> KEYWORD_LET
| "then" -> KEYWORD_THEN
| "type" -> KEYWORD_TYPE
| name -> NAME (Sedlexing.Latin1.lexeme lexbuf)
| "&" -> AMPERSAND
| "*" -> ASTERISK
| "!" -> BANG
| "!=" -> BANG_EQUALS
| "^" -> CARET
| ":" -> COLON
| ":=" -> COLON_EQUALS
| "=" -> EQUALS
| ">" -> GREATER
| ">=" -> GREATER_EQUAL
| ">>" -> GREATER_GREATER
(* | "," -> COMMA *)
| "/" -> FORWARD_SLASH
| "." -> FULL_STOP
| "-" -> HYPHEN
| "<-" -> LESS_HYPHEN
| "<" -> LESS
| "<=" -> LESS_EQUAL
| "<<" -> LESS_LESS
| "|" -> PIPE
| "+" -> PLUS
| ";" -> SEMI
| '{' -> LBRACE
| '}' -> RBRACE
| '(' -> LPAREN
| ')' -> RPAREN
| eof -> END
| _ -> raise Unexpected_char