forked from drobertadams/GraphGen
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLexer.py
121 lines (114 loc) · 4.82 KB
/
Lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from Token import TokenTypes
from Token import Token
class Lexer(object):
"""
Lexer for the graph productions parser.
"""
#--------------------------------------------------------------------------
def __init__(self, str):
"""Constructor.
str is the input to the lexer
"""
self.input = str # input string
self.p = 0 # index of current character within self.input
self.lineNum = 1 # current line number
self.charNum = 1 # current character number within the line
# Initialize the current character (self.c)
if len(str) != 0:
self.c = self.input[self.p]
else:
self.c = TokenTypes.EOF
#--------------------------------------------------------------------------
def nextToken(self):
"""Return the next Token in the input stream, ignoring whitespace."""
while self.c != TokenTypes.EOF:
if self.c in [' ', '\t', '\n', '\r']:
self._consume()
elif self.c == ';':
self._consume()
return Token(TokenTypes.SEMICOLON, ';')
elif self.c == ',':
self._consume()
return Token(TokenTypes.COMMA, ',')
elif self.c == '{':
self._consume()
return Token(TokenTypes.LBRACE, '{')
elif self.c == '}':
self._consume()
return Token(TokenTypes.RBRACE, '}')
elif self.c == '-':
# '->'' is an ARROW, '-' followed by anything else is invalid.
self._consume()
if self.c == '>':
self._consume()
return Token(TokenTypes.ARROW, '->')
else:
self._error()
elif self.c == '=':
# '==>' is a DOUBLEARROW, '==' followed by anything else is
# invalid. '=' followed by anything but a '=' is simply an
# EQUALS.
self._consume()
if self.c == '=':
self._consume()
if self.c == '>':
self._consume()
return Token(TokenTypes.DOUBLEARROW, '==>')
else:
self._error()
else:
return Token(TokenTypes.EQUALS, '=')
elif self.c == '#':
# Consume everything until the end-of-line.
lexeme = ""
while self.c != TokenTypes.EOF and self.c != '\n':
self._consume()
elif self.c.isdigit():
# Consume all contiguous digits and turn them into a NUMBER.
lexeme = ""
while self.c != TokenTypes.EOF and self.c.isdigit():
lexeme += self.c
self._consume()
return Token(TokenTypes.NUMBER, lexeme)
elif self.c.isalpha():
# Consume all contiguous alpha, digits, or _ characters, then check to
# see if we recognize it as a reserved word.
lexeme = ""
while self.c != TokenTypes.EOF and (self.c.isalpha() or self.c.isdigit() or self.c == '_'):
lexeme += self.c
self._consume()
if lexeme == 'configuration':
t = Token(TokenTypes.CONFIGURATION, lexeme)
elif lexeme == 'productions':
t = Token(TokenTypes.PRODUCTIONS, lexeme)
else:
t = Token(TokenTypes.ID, lexeme)
return t
else:
# Every other character is invalid.
self._error()
return Token(TokenTypes.EOF, "<EOF>")
#--------------------------------------------------------------------------
# PRIVATE METHODS - These aren't the methods you're looking for.
#--------------------------------------------------------------------------
def _consume(self):
"""Advance to the next character of input, or EOF."""
# Update line number and character number.
if self.c in ['\n', '\r']:
self.lineNum = self.lineNum + 1
self.charNum = 1
else:
self.charNum = self.charNum + 1
# To to the next character.
self.p += 1
if self.p >= len(self.input):
self.c = TokenTypes.EOF
else:
self.c = self.input[self.p]
#--------------------------------------------------------------------------
def _error(self):
"""Raises an exception indicating that the current character is
invalid.
"""
raise SyntaxError("Invalid character %c at [%d,%d]." % \
(self.c, self.lineNum, self.charNum))