From 815869606771b0117dd7be657232ec6dfab136c4 Mon Sep 17 00:00:00 2001 From: Mel Date: Wed, 9 Jul 2025 22:15:10 +0200 Subject: Rudimentary semi-recoverable parser panic mode Signed-off-by: Mel --- boot/parse.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 8 deletions(-) (limited to 'boot/parse.c') diff --git a/boot/parse.c b/boot/parse.c index aeb4d88..adf434b 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -278,21 +278,77 @@ parser_unglue(struct Parser* p) void parser_panic(struct Parser* p) { - // TODO: find all places where panic can stop + // NOTE: this is still very non-robust and naive and will not work in many cases, + // and swallow errors that should be reported. + // it also basically destroys the interim tree state, so when it is used, + // we should not expect the tree to be in a valid state to continue + // compilation after the parse phase. + // for a simple bootstrapping compiler, this is okay, + // but for the final compiler, this should be replaced with a way more sophisticated system. + // an idea i had for future implementations is to start a "pseudo-parse" pass that tries + // recognizing familiar valid patterns to return control to the parser once it find a valid + // pattern. for (;;) { struct Token token = parser_peek(p); switch (token.kind) { - case TOKEN_NEWLINE: + // if we're panicking and see an opening brace, bracket or parenthesis, + // we assume it's part of the broken statement and + // consume until we find its matching closing brace. + case TOKEN_SQUARE_OPEN: + case TOKEN_ROUND_OPEN: + case TOKEN_CURLY_OPEN: { + parser_next(p); // consume '{' '(' or '[' + enum Token_Kind open_kind = token.kind; + enum Token_Kind close_kind; + switch (open_kind) { + case TOKEN_SQUARE_OPEN: + close_kind = TOKEN_SQUARE_CLOSE; + break; + case TOKEN_ROUND_OPEN: + close_kind = TOKEN_ROUND_CLOSE; + break; + case TOKEN_CURLY_OPEN: + close_kind = TOKEN_CURLY_CLOSE; + break; + default: + unreachable(); + } + + int balance = 1; + while (balance > 0 && !parser_reached_end(p)) { + struct Token t = parser_next(p); + if (t.kind == open_kind) balance++; + if (t.kind == close_kind) balance--; + } + + break; + } + + // Check if the next token is a good place to resume. + case TOKEN_WORD_FUN: + case TOKEN_WORD_LET: + case TOKEN_WORD_VAR: + case TOKEN_WORD_IF: + case TOKEN_WORD_FOR: + case TOKEN_WORD_WHILE: + case TOKEN_WORD_RETURN: + case TOKEN_WORD_TYPE: + case TOKEN_WORD_CLASS: + case TOKEN_WORD_VARIANT: + case TOKEN_PIPE: + case TOKEN_END_OF_FILE: - parser_next(p); - goto panic_end; + // continue normal parsing + return; + default: - parser_next(p); + // no synchronization point found yet :( + break; } - } -panic_end: - return; + token = parser_next(p); + if (token.kind == TOKEN_NEWLINE) { return; } + } } struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error); -- cgit 1.4.1