about summary refs log tree commit diff
path: root/boot/parse.c
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-07-09 04:46:58 +0200
committerMel <mel@rnrd.eu>2025-07-09 04:46:58 +0200
commitefa510e6b58ce13c53e94f13a5be0007240e9dcc (patch)
tree7bcebe71e6d90d7e0004be1f975e5028b3c1b530 /boot/parse.c
parent3620cabd1d722e4acc761c7278aa44aba902006c (diff)
downloadcatskill-efa510e6b58ce13c53e94f13a5be0007240e9dcc.tar.zst
catskill-efa510e6b58ce13c53e94f13a5be0007240e9dcc.zip
Display human-readable, informative parser error messages w/ source snippet
Signed-off-by: Mel <mel@rnrd.eu>
Diffstat (limited to 'boot/parse.c')
-rw-r--r--boot/parse.c228
1 files changed, 177 insertions, 51 deletions
diff --git a/boot/parse.c b/boot/parse.c
index 264cf45..6daca8f 100644
--- a/boot/parse.c
+++ b/boot/parse.c
@@ -21,43 +21,23 @@
     parse;                       \
     if (!parser_error_is_none(error)) return (ret){ 0 };
 
-struct Parser_Error
-{
-    enum Parser_Error_Kind
-    {
-        PARSER_ERROR_NONE,
-        PARSER_ERROR_UNEXPECTED_TOKEN,
-        PARSER_ERROR_UNEXPECTED_EOF,
-        PARSER_ERROR_EXPECTED_STATEMENT_END,
-        PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION,
-        PARSER_ERROR_EXPECTED_TYPE,
-        PARSER_ERROR_EXPECTED_PRAGMA,
-    } kind;
-    // TODO: add span to error
-};
-
-struct Parser_Error
-parser_error(enum Parser_Error_Kind kind)
+enum Parser_Error_Kind
 {
-    return (struct Parser_Error){ kind };
-}
-
-struct Parser_Error
-parser_error_none(void)
-{
-    return parser_error(PARSER_ERROR_NONE);
-}
+    PARSER_ERROR_NONE,
+    PARSER_ERROR_UNEXPECTED_TOKEN,
+    PARSER_ERROR_UNEXPECTED_EOF,
 
-bool
-parser_error_is_none(const struct Parser_Error* error)
-{
-    return error->kind == PARSER_ERROR_NONE;
-}
+    PARSER_ERROR_EXPECTED_STATEMENT_END,
+    PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION,
+    PARSER_ERROR_EXPECTED_TYPE,
+    PARSER_ERROR_EXPECTED_PRAGMA,
+    PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT
+};
 
 const ascii*
-parser_error_to_string(const struct Parser_Error* error)
+parser_error_kind_to_string(enum Parser_Error_Kind error_kind)
 {
-    switch (error->kind) {
+    switch (error_kind) {
     case PARSER_ERROR_NONE:
         return "none";
     case PARSER_ERROR_UNEXPECTED_TOKEN:
@@ -72,22 +52,127 @@ parser_error_to_string(const struct Parser_Error* error)
         return "expected type";
     case PARSER_ERROR_EXPECTED_PRAGMA:
         return "expected pragma";
+    case PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT:
+        return "expected pragma argument";
     default:
         return "unknown error";
     }
 }
 
+struct Parser_Error
+{
+    // kind is the top-level error type,
+    // with subkind being a more specific error.
+    // i.e. kind: EXPECTED_PRAGMA, subkind: UNEXPECTED_TOKEN(integer)
+    enum Parser_Error_Kind kind, subkind;
+
+    struct Token cause;
+};
+
+void
+parser_error(struct Parser_Error* error, enum Parser_Error_Kind kind, struct Token token)
+{
+    *error = (struct Parser_Error){ .kind = kind, .subkind = PARSER_ERROR_NONE, .cause = token };
+}
+
+void
+parser_error_wrap(struct Parser_Error* error, enum Parser_Error_Kind super_kind)
+{
+    *error = (struct Parser_Error){
+        .kind = super_kind,
+        .subkind = error->kind,
+        .cause = error->cause,
+    };
+}
+
+void
+parser_error_none(struct Parser_Error* error)
+{
+    parser_error(error, PARSER_ERROR_NONE, token_none());
+}
+
+bool
+parser_error_is_none(const struct Parser_Error* error)
+{
+    return error->kind == PARSER_ERROR_NONE;
+}
+
+struct Span
+token_span_to_line_span(struct Span span, struct String source)
+{
+    Pos line_start = span.start + 1, line_end = span.end - 1;
+    // expand `line_start` to start from the beginning of the line,
+    // and `line_end` to end at the end of the line.
+    ascii c;
+    if (line_start > 0) {
+        do {
+            c = string_at(source, line_start - 1);
+            line_start--;
+        } while (line_start > 0 && c != '\n');
+    }
+    if (line_end < string_length(source)) {
+        do {
+            c = string_at(source, line_end + 1);
+            line_end++;
+        } while (line_end < string_length(source) && c != '\n');
+    }
+
+    return span_new(line_start, line_end);
+}
+
+// print out nice, human-readable error message
+// pointing to the location of the error in the source file.
+// TODO: bring out the infrastructure for displaying errors in the same format
+// outside of the parser, so that it can be used in other places.
+void
+parser_error_display(const struct Parser_Error* error, struct Source_File source_file)
+{
+    if (parser_error_is_none(error)) return;
+
+    uint line = error->cause.location.line;
+    uint column = error->cause.location.column;
+
+    struct Token cause = error->cause;
+
+    fprintf(stderr, ANSI_WHITE "%s:%lu:%lu:\n", source_file.path.data, line, column);
+    fprintf(stderr, ANSI_BOLD ANSI_RED "error: " ANSI_WHITE);
+    if (error->subkind != PARSER_ERROR_NONE) {
+        fprintf(stderr, "%s: %s ", parser_error_kind_to_string(error->kind),
+                parser_error_kind_to_string(error->subkind));
+    } else {
+        fprintf(stderr, "%s ", parser_error_kind_to_string(error->kind));
+    }
+    // TODO: display tokens nicely
+    fprintf(stderr, "'%s' :(\n", token_kind_to_string(cause.kind));
+
+    struct Span line_span = token_span_to_line_span(cause.span, source_file.source);
+
+    ascii* source_line = source_file.source.data + line_span.start;
+    int source_line_length = line_span.end - line_span.start;
+
+    fprintf(stderr, ANSI_WHITE ANSI_NO_BOLD "%lu| %.*s\n", line, source_line_length, source_line);
+    fprintf(stderr, ANSI_RED "%*s", (int)column + 3 - 1, " "); // +3 for the line number and space
+    for (uint w = 0; w < span_length(cause.span); w++) { fprintf(stderr, "^"); }
+    fprintf(stderr, "\n" ANSI_RESET);
+}
+
 struct Parser
 {
     struct Lexer* lexer;
     struct Token lookahead[PARSER_LOOKAHEAD];
+
+    struct Source_File source_file;
+
+    bool had_errors;
 };
 
 void
-parser_new(struct Parser* p, struct Lexer* lexer)
+parser_new(struct Parser* p, struct Lexer* lexer, struct Source_File source_file)
 {
     p->lexer = lexer;
     memset(p->lookahead, 0, sizeof(p->lookahead));
+    p->source_file = source_file;
+    p->had_errors = false;
 }
 
 bool
@@ -137,7 +222,7 @@ parser_need(struct Parser* p, enum Token_Kind kind, struct Parser_Error* error)
 {
     struct Token token = parser_next(p);
     if (!token_is(&token, kind)) {
-        *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN);
+        parser_error(error, PARSER_ERROR_UNEXPECTED_TOKEN, token);
         return token_none();
     }
     return token;
@@ -196,6 +281,28 @@ parser_unglue(struct Parser* p)
     while (parser_probe(p, TOKEN_NEWLINE)) parser_next(p);
 }
 
+// discard tokens until a "synchronization" point is found.
+// used to avoid errors cascading from one part of the code to another.
+void
+parser_panic(struct Parser* p)
+{
+    // TODO: find all places where panic can stop
+    for (;;) {
+        struct Token token = parser_peek(p);
+        switch (token.kind) {
+        case TOKEN_NEWLINE:
+        case TOKEN_END_OF_FILE:
+            parser_next(p);
+            goto panic_end;
+        default:
+            parser_next(p);
+        }
+    }
+
+panic_end:
+    return;
+}
+
 struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error);
 struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error);
 
@@ -204,7 +311,7 @@ parser_end_statement(struct Parser* p, struct Parser_Error* error)
 {
     struct Token token = parser_peek(p);
     if (!token_ends_statement(&token)) {
-        *error = parser_error(PARSER_ERROR_EXPECTED_STATEMENT_END);
+        parser_error(error, PARSER_ERROR_EXPECTED_STATEMENT_END, token);
         return;
     }
     parser_next(p);
@@ -516,9 +623,9 @@ parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error)
 {
     struct Token open_token = CHECK(parser_need(p, TOKEN_SQUARE_OPEN, error));
 
-    struct Type_Node* element_or_key_type = CHECK(parser_node_type(p, error));
+    struct Type_Node* element_or_key_type = parser_node_type(p, error);
     if (!element_or_key_type) {
-        *error = parser_error(PARSER_ERROR_EXPECTED_TYPE);
+        parser_error_wrap(error, PARSER_ERROR_EXPECTED_TYPE);
         return nil;
     }
 
@@ -529,9 +636,9 @@ parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error)
         parser_next(p); // consume the assignment token
 
         struct Type_Node* key_type = element_or_key_type;
-        struct Type_Node* value_type = CHECK(parser_node_type(p, error));
+        struct Type_Node* value_type = parser_node_type(p, error);
         if (!value_type) {
-            *error = parser_error(PARSER_ERROR_EXPECTED_TYPE);
+            parser_error_wrap(error, PARSER_ERROR_EXPECTED_TYPE);
             return nil;
         }
 
@@ -559,7 +666,7 @@ parser_node_type_reference(struct Parser* p, struct Parser_Error* error)
 
     struct Type_Node* referenced_type = CHECK(parser_node_type(p, error));
     if (!referenced_type) {
-        *error = parser_error(PARSER_ERROR_EXPECTED_TYPE);
+        parser_error(error, PARSER_ERROR_EXPECTED_TYPE, ampersand_token);
         return nil;
     }
 
@@ -591,7 +698,7 @@ parser_node_type_inner(struct Parser* p, struct Parser_Error* error)
     case TOKEN_AMPERSAND:
         return parser_node_type_reference(p, error);
     default:
-        *error = parser_error(PARSER_ERROR_EXPECTED_TYPE);
+        parser_error(error, PARSER_ERROR_UNEXPECTED_TOKEN, token);
         return nil;
     }
 }
@@ -625,7 +732,7 @@ parser_pragma_node(struct Parser* p, struct Parser_Error* error)
         struct Token pragma_token = CHECK(parser_need(p, TOKEN_NAME, error));
         enum Pragma_Type pragma_type = pragma_type_from_string(pragma_token.value.name);
         if (!pragma_type) {
-            *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA);
+            parser_error(error, PARSER_ERROR_EXPECTED_PRAGMA, pragma_token);
             return nil;
         }
 
@@ -660,7 +767,7 @@ parser_pragma_node(struct Parser* p, struct Parser_Error* error)
                 argument->value.name_or_string = v->name;
                 break;
             default:
-                *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN);
+                parser_error(error, PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT, token);
                 return nil;
             }
             argument_index++;
@@ -813,7 +920,7 @@ parser_expression_primary(struct Parser* p, struct Parser_Error* error)
     case TOKEN_WORD_CLASS:
         return parser_expression_type(p, error);
     default:
-        *error = parser_error(PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION);
+        parser_error(error, PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, token);
         return nil;
     }
 }
@@ -1290,7 +1397,7 @@ parser_statement_pragma(struct Parser* p, struct Parser_Error* error)
 
     struct Pragma_Node* pragma_node = parser_pragma_node(p, error);
     if (!parser_error_is_none(error)) {
-        *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA);
+        parser_error_wrap(error, PARSER_ERROR_EXPECTED_PRAGMA);
         return nil;
     }
 
@@ -1348,19 +1455,36 @@ parser_statement(struct Parser* p, struct Parser_Error* error)
     return statement_new(STATEMENT_EXPRESSION, value, span, expression->location);
 }
 
+void
+parser_handle_error(struct Parser* p, struct Parser_Error* error)
+{
+    if (parser_error_is_none(error)) return;
+
+    p->had_errors = true;
+    parser_error_display(error, p->source_file);
+    parser_error_none(error);
+    parser_panic(p);
+}
+
 // parse the lexer tokens into a single AST.
 // note: it was either `parser_parse` or this. :)
-struct Tree
-parser_do_your_thing(struct Parser* p, struct Parser_Error* error)
+int
+parser_do_your_thing(struct Parser* p, struct Tree* tree)
 {
+    struct Parser_Error error;
+    parser_error_none(&error);
+
     struct Statement* head = nil;
 
     struct Statement* current = nil;
     while (!parser_reached_end(p)) {
-        struct Statement* next = CHECK_RETURN(parser_statement(p, error), struct Tree);
+        struct Statement* next = parser_statement(p, &error);
+        parser_handle_error(p, &error);
+
         if (!next) break; // on eof
 
-        CHECK_RETURN(parser_end_statement(p, error), struct Tree);
+        parser_end_statement(p, &error);
+        parser_handle_error(p, &error);
 
         if (current) {
             current->next = next;
@@ -1370,8 +1494,10 @@ parser_do_your_thing(struct Parser* p, struct Parser_Error* error)
         current = next;
     }
 
-    *error = parser_error_none();
-    return (struct Tree){ head };
+    parser_error_none(&error);
+    *tree = (struct Tree){ head };
+
+    return p->had_errors;
 }
 
 #undef CHECK