-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
154 lines (123 loc) · 4.66 KB
/
parser.py
File metadata and controls
154 lines (123 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from dataclasses import dataclass
from abc import ABC
from tokens import Token, TokenType, UNARY_OP_TOKENS
from cerrors import CSyntaxError
class Expression(ABC):
pass
@dataclass
class ConstantInt(Expression):
value: int
@dataclass
class UnaryOp(Expression):
operator: TokenType
inner_expr: Expression
def __init__(self, operator: TokenType, operand: Expression):
if operator not in UNARY_OP_TOKENS:
raise ValueError(f"Invalid token type for unary operation {operator}.")
self.operator = operator
self.inner_expr = operand
@dataclass
class Statement:
expr: Expression
@dataclass
class Parameter:
type: Token
identifier: str
class Function:
def __init__(
self, name: str, statements: list[Statement] | None = None, parameters: list[Parameter] | None = None
) -> None:
self.name = name
self.statements: list[Statement] = statements or []
self.parameters: list[Parameter] = parameters or []
class Program:
def __init__(self, functions: list[Function] | None = None) -> None:
self.functions: list[Function] = functions or []
class Parser:
def __init__(self, tokens: list[Token]):
self.tokens = tokens
self.idx = 0
def get(self, n: int = 0) -> Token:
return self.tokens[self.idx + n]
def get_safe(self, n: int = 0) -> Token | None:
if self.idx + n >= len(self.tokens):
return None
else:
return self.get(n)
def check_sequence(self, *token_types: TokenType) -> bool:
i = 0
for token_type in token_types:
if (t := self.get_safe(i)) is not None:
if t.token_type != token_type:
return False
i += 1
return True
def consume(self) -> Token:
token = self.get()
self.idx += 1
return token
def expect(self, token_type: TokenType) -> Token:
token = self.consume()
if token.token_type == token_type:
return token
else:
raise CSyntaxError(token_type, token)
def parse_program(self) -> Program:
functions = self.parse_functions()
program = Program(functions)
return program
def parse_functions(self) -> list[Function]:
functions: list[Function] = []
while self.get().token_type != TokenType.EOF:
if self.check_sequence(TokenType.KW_INT, TokenType.IDENTIFIER, TokenType.OPEN_PAREN):
name = self.get(1).lexeme
self.idx += 3
parameters: list[Parameter] = []
if self.check_sequence(TokenType.KW_VOID):
param = Parameter(self.get(), "")
parameters.append(param)
self.idx += 1
self.idx += 2
# self.idx += 5
statements = self.parse_statements()
function = Function(name, statements)
_ = self.expect(TokenType.CLOSE_BRACE)
functions.append(function)
return functions
def parse_statements(self) -> list[Statement]:
statements: list[Statement] = []
while self.get().token_type == TokenType.KW_RETURN:
self.idx += 1
statement = Statement(self.parse_expression())
_ = self.expect(TokenType.SEMICOLON)
statements.append(statement)
return statements
def parse_expression(self) -> Expression:
token = self.consume()
if token.token_type in UNARY_OP_TOKENS:
operator = token
inner_expr = self.parse_expression()
return UnaryOp(operator.token_type, inner_expr)
elif token.token_type == TokenType.OPEN_PAREN:
inner_expr = self.parse_expression()
_ = self.expect(TokenType.CLOSE_PAREN)
return inner_expr
else:
integer = int(token.lexeme)
return ConstantInt(integer)
def pretty_print(node: Program | Function | Statement | Expression, indent: int = 0) -> None:
padding = " " * indent
if isinstance(node, Program):
print(f"\n{padding}Program begins:")
for function in node.functions:
pretty_print(function, indent + 2)
elif isinstance(node, Function):
print(f"{padding}Function - {node.name}")
for statement in node.statements:
pretty_print(statement, indent + 2)
elif isinstance(node, Statement):
print(f"{padding}Statement - {node.expr}")
# Hack! A statement can only have one expr right now.
pretty_print(node.expr, indent + 2)
else: # Always an expression by this point
print(f"{padding}Expr value - {node}")