Skip to content

Commit a1f38a6

Browse files
author
DigitalCodeCrafter
committed
added better error recovery
1 parent e6eef41 commit a1f38a6

6 files changed

Lines changed: 372 additions & 129 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
/target
22
/kasm_programs
3+
/kep_programs

src/compiler.rs

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11

2-
// lexer [MVP]
3-
// parser [MVP]
4-
// encoder [ ]
2+
// [X] lexer
3+
// [/] parser
4+
// [ ] expander
5+
// [ ] resolver
6+
// [ ] validator
7+
// [ ] type checker
8+
// [ ] lowerer
9+
// [ ] optimizer
10+
// [ ] code generator
11+
12+
use std::path::Path;
513

614
mod lexer;
715
mod parser;
@@ -12,32 +20,36 @@ type CResult<T> = Result<T, CompilerError>;
1220
pub enum CompilerError {
1321
Error { text: String },
1422

15-
LexerUnexpectedChar { line: usize, col: usize, c: char },
16-
LexerUnterminatedString { line: usize, col: usize },
23+
LexError(Vec<lexer::LexError>),
24+
ParseError(Vec<parser::ParseError>),
1725

18-
ParseError(Vec<parser::ParseError>)
26+
IoError(std::io::Error),
1927
}
2028
impl std::fmt::Display for CompilerError {
2129
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2230
use CompilerError::*;
2331
match self {
2432
Error { text } => write!(f, "Error: {}", text),
25-
26-
LexerUnexpectedChar { line, col, c } => write!(f, "Error: Unexpected Char {} at line {}, column {}", c, line, col),
27-
LexerUnterminatedString { line, col } => write!(f, "Error: Unterminated String starting at line {}, column {}", line, col),
28-
33+
34+
LexError(err) => write!(f, "Error: {:?}", err),
2935
ParseError(err) => write!(f, "Error: {:?}", err),
36+
37+
IoError(e) => write!(f, "{}", e),
3038
}
3139
}
3240
}
3341
impl std::error::Error for CompilerError {}
3442

35-
pub fn compile(input: &str) -> Result<(), CompilerError> {
36-
let mut lexer = lexer::Lexer::new(input);
37-
let tokens = lexer.lex_all()?;
43+
trait ToCompileResult<T> {
44+
fn into_cresult(self) -> CResult<T>;
45+
}
46+
47+
48+
pub fn compile(file_path: impl AsRef<Path>) -> Result<(), CompilerError> {
49+
let tokens = lexer::lex_file(file_path).map_err(|err| CompilerError::IoError(err))?.into_cresult()?;
3850

3951
let mut parser = parser::Parser::new(tokens);
40-
let root = parser.parse_program().map_err(|errs| CompilerError::ParseError(errs))?;
52+
let root = parser.parse_program().into_cresult()?;
4153

4254
todo!()
4355
}

src/compiler/ast.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ pub enum NodeKind {
2323
TupleIndexExpression { tuple: NodeId , index: i32 },
2424
PathExpression { segments: Vec<NodeId> },
2525
PathSegment { ident: String },
26-
26+
ErrorExpr,
27+
2728
// Statements
2829
LetStmt { name: String, mutable: bool, ty: Option<TypeId>, value: Option<NodeId> },
2930
ExprStmt { expr: NodeId },
31+
EmptyStmt,
3032

3133
// Items
3234
Function { public: bool, name: String, params: Vec<(String, TypeId)>, return_type: Option<TypeId>, body: NodeId },
@@ -110,4 +112,5 @@ pub enum TypeKind {
110112
params: Vec<TypeId>,
111113
ret: Option<TypeId>,
112114
},
115+
ErrorType,
113116
}

src/compiler/expander.rs

Whitespace-only changes.

src/compiler/lexer.rs

Lines changed: 114 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
1-
use crate::compiler::{ast::{Pos, Span}, CResult, CompilerError};
1+
// Source text -> Tokens
22

3+
use std::{fs::File, io::Read, path::Path};
4+
use crate::compiler::{ast::{Pos, Span}, CompilerError, ToCompileResult};
35

46
const CASE_SENSITIVITY: bool = true;
57

8+
#[derive(Debug, Clone)]
9+
pub struct LexError {
10+
pub span: Span,
11+
pub message: String,
12+
}
13+
14+
impl<T> ToCompileResult<T> for Result<T, Vec<LexError>> {
15+
fn into_cresult(self) -> Result<T, super::CompilerError> {
16+
self.map_err(|err| CompilerError::LexError(err))
17+
}
18+
}
19+
20+
621
#[derive(Debug, Clone, PartialEq)]
722
pub enum TokenKind {
823
// Identifiers and Literals
@@ -63,6 +78,9 @@ pub enum TokenKind {
6378
Dot, // .
6479

6580
// Misc
81+
Invalid(String),
82+
UnterminatedString(String),
83+
6684
EOF,
6785
}
6886

@@ -72,9 +90,19 @@ pub struct Token {
7290
pub span: Span,
7391
}
7492

93+
pub fn lex_file(path: impl AsRef<Path>) -> std::io::Result<Result<Vec<Token>, Vec<LexError>>> {
94+
let mut file = File::open(path)?;
95+
let mut src = String::new();
96+
file.read_to_string(&mut src)?;
97+
98+
let mut lexer = Lexer::new(&src);
99+
Ok(lexer.lex_all())
100+
}
101+
75102
pub struct Lexer {
76103
src: Vec<char>,
77104
pos: usize,
105+
errors: Vec<LexError>,
78106
line: usize,
79107
col: usize,
80108
}
@@ -83,35 +111,40 @@ impl Lexer {
83111
Self {
84112
src: input.chars().collect(),
85113
pos: 0,
114+
errors: Vec::new(),
86115
line: 1,
87116
col: 1,
88117
}
89118
}
90119

91-
pub fn lex_all(&mut self) -> CResult<Vec<Token>> {
120+
pub fn lex_all(&mut self) -> Result<Vec<Token>, Vec<LexError>> {
92121
let mut tokens = Vec::new();
93122
loop {
94-
let tok = self.next_token()?;
123+
let tok = self.next_token();
95124
if matches!(tok.kind, TokenKind::EOF) {
96125
tokens.push(tok); break;
97126
} else {
98127
tokens.push(tok);
99128
}
100129
}
101-
Ok(tokens)
130+
if self.errors.is_empty() {
131+
Ok(tokens)
132+
} else {
133+
Err(self.errors.clone())
134+
}
102135
}
103136

104-
pub fn next_token(&mut self) -> CResult<Token> {
137+
pub fn next_token(&mut self) -> Token {
105138
self.skip_whitespace_and_comment();
106139

107140
let start_line = self.line;
108141
let start_col = self.col;
109142

110143
let Some(c) = self.peek() else {
111-
return Ok(self.make_token(TokenKind::EOF, start_line, start_col));
144+
return self.make_token(TokenKind::EOF, start_line, start_col);
112145
};
113146

114-
Ok(if c.is_alphabetic() {
147+
if c.is_alphabetic() {
115148
self.lex_identifier_or_keyword(start_line, start_col)
116149
} else if c.is_ascii_digit() {
117150
self.lex_number(start_line, start_col)
@@ -120,14 +153,18 @@ impl Lexer {
120153
} else if c == '-' && self.peek_ahead(1).map_or(false, |n| n.is_ascii_digit()) {
121154
self.lex_number(start_line, start_col)
122155
} else if c == '"' {
123-
self.lex_string(start_line, start_col)?
156+
self.lex_string(start_line, start_col)
124157
} else {
125-
self.lex_symbol(start_line, start_col)?
126-
})
158+
self.lex_symbol(start_line, start_col)
159+
}
127160
}
128161

129162
fn make_token(&self, kind: TokenKind, line: usize, col: usize) -> Token {
130-
Token { kind, span: Span { start: Pos { line, col }, end: Pos { line: self.line, col: self.col } }}
163+
Token { kind, span: self.make_span(line, col) }
164+
}
165+
166+
fn make_span(&self, line: usize, col: usize) -> Span {
167+
Span { start: Pos { line, col }, end: Pos { line: self.line, col: self.col } }
131168
}
132169

133170
// --------- Iteration ---------
@@ -174,11 +211,25 @@ impl Lexer {
174211
}
175212
}
176213
Some('/') if self.peek_ahead(1) == Some('*') => {
177-
self.advance(); self.advance(); // consume "/*"
178-
while !(self.peek() == Some('*') && self.peek_ahead(1) == Some('/')) {
179-
if self.advance().is_none() { break; }
214+
let start_line = self.line;
215+
let start_col = self.col;
216+
let mut block_count = 0;
217+
loop {
218+
if self.peek() == Some('/') && self.peek_ahead(1) == Some('*') {
219+
self.advance(); self.advance(); // consume "/*"
220+
block_count += 1;
221+
}
222+
if self.peek() == Some('*') && self.peek_ahead(1) == Some('/') {
223+
self.advance(); self.advance(); // consume "*/"
224+
block_count -= 1;
225+
}
226+
if block_count <= 0 { break; }
227+
228+
if self.advance().is_none() {
229+
self.error(self.make_span(start_line, start_col), "Unterminated block comment");
230+
break;
231+
}
180232
}
181-
self.advance(); self.advance(); // consume "*/"
182233
}
183234
_ => break,
184235
}
@@ -323,13 +374,12 @@ impl Lexer {
323374

324375
// --------- Strings ---------
325376

326-
fn lex_string(&mut self, line: usize, col: usize) -> CResult<Token> {
377+
fn lex_string(&mut self, line: usize, col: usize) -> Token {
327378
self.advance(); // consume '"'
328379
let mut s = String::new();
329380

330381
loop {
331382
match self.advance() {
332-
None => return Err(CompilerError::LexerUnterminatedString { line, col }),
333383
Some('"') => break,
334384
Some('\\') => {
335385
if let Some(escaped) = self.advance() {
@@ -346,15 +396,19 @@ impl Lexer {
346396
}
347397
}
348398
Some(c) => s.push(c),
399+
None => {
400+
self.error(self.make_span(line, col), "Unterminated string literal");
401+
return self.make_token(TokenKind::UnterminatedString(s), line, col);
402+
}
349403
}
350404
}
351-
352-
Ok(self.make_token(TokenKind::String(s), line, col))
405+
406+
self.make_token(TokenKind::String(s), line, col)
353407
}
354408

355409
// --------- Symbols & Operators ---------
356410

357-
fn lex_symbol(&mut self, line: usize, col: usize) -> CResult<Token> {
411+
fn lex_symbol(&mut self, line: usize, col: usize) -> Token {
358412
use TokenKind::*;
359413
let c = self.advance().unwrap();
360414

@@ -390,10 +444,22 @@ impl Lexer {
390444
':' => Colon,
391445
'.' => Dot,
392446
'_' => Underscore,
393-
_ => return Err(CompilerError::LexerUnexpectedChar { line, col, c }),
447+
c => {
448+
self.error(self.make_span(line, col), format!("Unexpected character '{}'", c));
449+
Invalid(c.to_string())
450+
}
394451
};
395452

396-
Ok(self.make_token(kind, line, col))
453+
self.make_token(kind, line, col)
454+
}
455+
456+
// --------- Errors ---------
457+
458+
fn error(&mut self, span: Span, message: impl Into<String>) {
459+
self.errors.push(LexError {
460+
span,
461+
message: message.into(),
462+
});
397463
}
398464
}
399465

@@ -417,12 +483,13 @@ mod tests {
417483
let mut lexer = Lexer::new(src);
418484
let mut i = 0;
419485
loop {
420-
let tok = lexer.next_token().unwrap();
486+
let tok = lexer.next_token();
421487
println!("{:?}", tok);
422488
assert_eq!(tok.kind, expected[i]);
423489
if matches!(tok.kind, TokenKind::EOF) { break; }
424490
i += 1;
425491
}
492+
assert!(lexer.errors.is_empty(), "ERRORS: {:?}", lexer.errors);
426493
}
427494

428495
#[test]
@@ -438,11 +505,34 @@ mod tests {
438505
let mut lexer = Lexer::new(src);
439506
let mut i = 0;
440507
loop {
441-
let tok = lexer.next_token().unwrap();
508+
let tok = lexer.next_token();
509+
println!("{:?}", tok);
510+
assert_eq!(tok.kind, expected[i]);
511+
if matches!(tok.kind, TokenKind::EOF) { break; }
512+
i += 1;
513+
}
514+
assert!(lexer.errors.is_empty(), "ERRORS: {:?}", lexer.errors);
515+
}
516+
517+
#[test]
518+
fn test_error() {
519+
let src = r#"
520+
° "adsjd
521+
"#;
522+
523+
let expected = vec![
524+
TokenKind::Invalid("°".into()), TokenKind::UnterminatedString("adsjd\n ".into()), TokenKind::EOF
525+
];
526+
let mut lexer = Lexer::new(src);
527+
let mut i = 0;
528+
loop {
529+
let tok = lexer.next_token();
442530
println!("{:?}", tok);
443531
assert_eq!(tok.kind, expected[i]);
444532
if matches!(tok.kind, TokenKind::EOF) { break; }
445533
i += 1;
446534
}
535+
assert_eq!(lexer.errors.len(), 2, "Error length mismatch: {:?}", lexer.errors);
536+
println!("Errors: {:?}", lexer.errors)
447537
}
448538
}

0 commit comments

Comments
 (0)