diff options
Diffstat (limited to 'boot')
| -rw-r--r-- | boot/catboot.c | 29 | ||||
| -rw-r--r-- | boot/common.c | 8 | ||||
| -rw-r--r-- | boot/lex.c | 25 | ||||
| -rw-r--r-- | boot/parse.c | 228 |
4 files changed, 228 insertions, 62 deletions
diff --git a/boot/catboot.c b/boot/catboot.c index 53b2bed..ed5831e 100644 --- a/boot/catboot.c +++ b/boot/catboot.c @@ -95,24 +95,23 @@ debug_lex_pass(struct String source) } integer -debug_parse_pass(struct String source) +debug_parse_pass(struct Source_File source_file) { struct Lexer lexer; - lexer_new(&lexer, source); + lexer_new(&lexer, source_file.source); struct Parser parser; - parser_new(&parser, &lexer); + parser_new(&parser, &lexer, source_file); - struct Parser_Error parser_error = parser_error_none(); - struct Tree tree = parser_do_your_thing(&parser, &parser_error); - if (!parser_error_is_none(&parser_error)) { - fprintf(stderr, "parser error: %s\n", parser_error_to_string(&parser_error)); - return 1; + struct Tree tree; + int parser_result = parser_do_your_thing(&parser, &tree); + if (parser_result != 0) { + fprintf(stderr, "parser finished with errors\n"); } tree_printer(&tree); - return 0; + return parser_result; } enum Command_Result @@ -155,8 +154,12 @@ enum Command_Result test_parse_command(struct Command_Arguments* arguments) { struct String source = read_file(arguments->input); + struct Source_File source_file = { + .source = source, + .path = string_from_static_c_string(arguments->input), + }; - if (debug_parse_pass(source)) return COMMAND_FAIL; + if (debug_parse_pass(source_file)) return COMMAND_FAIL; return COMMAND_OK; } @@ -165,10 +168,14 @@ enum Command_Result default_command(struct Command_Arguments* arguments) { struct String source = read_file(arguments->input); + struct Source_File source_file = { + .source = source, + .path = string_from_static_c_string(arguments->input), + }; debug_lex_pass(source); printf("\n"); - if (debug_parse_pass(source)) return COMMAND_FAIL; + if (debug_parse_pass(source_file)) return COMMAND_FAIL; printf("\n"); return COMMAND_OK; diff --git a/boot/common.c b/boot/common.c index a072f60..1835cd7 100644 --- a/boot/common.c +++ b/boot/common.c @@ -249,6 +249,14 @@ string_array_at(const struct String_Array* array, uint index) struct String str = array.strings[0]; \ for (uint cursor = 0; cursor < array.count; str = array.strings[++cursor]) +// a source file given to the compiler. +struct Source_File +{ + struct String source; + // path to the source file, relative to the current working directory. + struct String path; +}; + // single iteration of the CRC32 checksum algorithm // described in POSIX. // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html diff --git a/boot/lex.c b/boot/lex.c index c705f26..2efb33d 100644 --- a/boot/lex.c +++ b/boot/lex.c @@ -31,6 +31,12 @@ span_new(Pos start, Pos end) } struct Span +span_empty(void) +{ + return (struct Span){ 0, 0 }; +} + +struct Span span_width(Pos start, uint width) { return (struct Span){ .start = start, .end = start + width }; @@ -74,6 +80,13 @@ span_is_empty(struct Span span) return span_equals(span, (struct Span){ 0, 0 }); } +uint +span_length(struct Span span) +{ + uint length = span.end - span.start; + return length == 0 ? 1 : length; +} + // a cursor position placed within a text file. struct Cursor { @@ -81,6 +94,18 @@ struct Cursor uint column; }; +struct Cursor +cursor_new(uint line, uint column) +{ + return (struct Cursor){ .line = line, .column = column }; +} + +struct Cursor +cursor_empty(void) +{ + return cursor_new(0, 0); +} + // what kind of token is it? enum Token_Kind { diff --git a/boot/parse.c b/boot/parse.c index 264cf45..6daca8f 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -21,43 +21,23 @@ parse; \ if (!parser_error_is_none(error)) return (ret){ 0 }; -struct Parser_Error -{ - enum Parser_Error_Kind - { - PARSER_ERROR_NONE, - PARSER_ERROR_UNEXPECTED_TOKEN, - PARSER_ERROR_UNEXPECTED_EOF, - PARSER_ERROR_EXPECTED_STATEMENT_END, - PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, - PARSER_ERROR_EXPECTED_TYPE, - PARSER_ERROR_EXPECTED_PRAGMA, - } kind; - // TODO: add span to error -}; - -struct Parser_Error -parser_error(enum Parser_Error_Kind kind) +enum Parser_Error_Kind { - return (struct Parser_Error){ kind }; -} - -struct Parser_Error -parser_error_none(void) -{ - return parser_error(PARSER_ERROR_NONE); -} + PARSER_ERROR_NONE, + PARSER_ERROR_UNEXPECTED_TOKEN, + PARSER_ERROR_UNEXPECTED_EOF, -bool -parser_error_is_none(const struct Parser_Error* error) -{ - return error->kind == PARSER_ERROR_NONE; -} + PARSER_ERROR_EXPECTED_STATEMENT_END, + PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, + PARSER_ERROR_EXPECTED_TYPE, + PARSER_ERROR_EXPECTED_PRAGMA, + PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT +}; const ascii* -parser_error_to_string(const struct Parser_Error* error) +parser_error_kind_to_string(enum Parser_Error_Kind error_kind) { - switch (error->kind) { + switch (error_kind) { case PARSER_ERROR_NONE: return "none"; case PARSER_ERROR_UNEXPECTED_TOKEN: @@ -72,22 +52,127 @@ parser_error_to_string(const struct Parser_Error* error) return "expected type"; case PARSER_ERROR_EXPECTED_PRAGMA: return "expected pragma"; + case PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT: + return "expected pragma argument"; default: return "unknown error"; } } +struct Parser_Error +{ + // kind is the top-level error type, + // with subkind being a more specific error. + // i.e. kind: EXPECTED_PRAGMA, subkind: UNEXPECTED_TOKEN(integer) + enum Parser_Error_Kind kind, subkind; + + struct Token cause; +}; + +void +parser_error(struct Parser_Error* error, enum Parser_Error_Kind kind, struct Token token) +{ + *error = (struct Parser_Error){ .kind = kind, .subkind = PARSER_ERROR_NONE, .cause = token }; +} + +void +parser_error_wrap(struct Parser_Error* error, enum Parser_Error_Kind super_kind) +{ + *error = (struct Parser_Error){ + .kind = super_kind, + .subkind = error->kind, + .cause = error->cause, + }; +} + +void +parser_error_none(struct Parser_Error* error) +{ + parser_error(error, PARSER_ERROR_NONE, token_none()); +} + +bool +parser_error_is_none(const struct Parser_Error* error) +{ + return error->kind == PARSER_ERROR_NONE; +} + +struct Span +token_span_to_line_span(struct Span span, struct String source) +{ + Pos line_start = span.start + 1, line_end = span.end - 1; + // expand `line_start` to start from the beginning of the line, + // and `line_end` to end at the end of the line. + ascii c; + if (line_start > 0) { + do { + c = string_at(source, line_start - 1); + line_start--; + } while (line_start > 0 && c != '\n'); + } + if (line_end < string_length(source)) { + do { + c = string_at(source, line_end + 1); + line_end++; + } while (line_end < string_length(source) && c != '\n'); + } + + return span_new(line_start, line_end); +} + +// print out nice, human-readable error message +// pointing to the location of the error in the source file. +// TODO: bring out the infrastructure for displaying errors in the same format +// outside of the parser, so that it can be used in other places. +void +parser_error_display(const struct Parser_Error* error, struct Source_File source_file) +{ + if (parser_error_is_none(error)) return; + + uint line = error->cause.location.line; + uint column = error->cause.location.column; + + struct Token cause = error->cause; + + fprintf(stderr, ANSI_WHITE "%s:%lu:%lu:\n", source_file.path.data, line, column); + fprintf(stderr, ANSI_BOLD ANSI_RED "error: " ANSI_WHITE); + if (error->subkind != PARSER_ERROR_NONE) { + fprintf(stderr, "%s: %s ", parser_error_kind_to_string(error->kind), + parser_error_kind_to_string(error->subkind)); + } else { + fprintf(stderr, "%s ", parser_error_kind_to_string(error->kind)); + } + // TODO: display tokens nicely + fprintf(stderr, "'%s' :(\n", token_kind_to_string(cause.kind)); + + struct Span line_span = token_span_to_line_span(cause.span, source_file.source); + + ascii* source_line = source_file.source.data + line_span.start; + int source_line_length = line_span.end - line_span.start; + + fprintf(stderr, ANSI_WHITE ANSI_NO_BOLD "%lu| %.*s\n", line, source_line_length, source_line); + fprintf(stderr, ANSI_RED "%*s", (int)column + 3 - 1, " "); // +3 for the line number and space + for (uint w = 0; w < span_length(cause.span); w++) { fprintf(stderr, "^"); } + fprintf(stderr, "\n" ANSI_RESET); +} + struct Parser { struct Lexer* lexer; struct Token lookahead[PARSER_LOOKAHEAD]; + + struct Source_File source_file; + + bool had_errors; }; void -parser_new(struct Parser* p, struct Lexer* lexer) +parser_new(struct Parser* p, struct Lexer* lexer, struct Source_File source_file) { p->lexer = lexer; memset(p->lookahead, 0, sizeof(p->lookahead)); + p->source_file = source_file; + p->had_errors = false; } bool @@ -137,7 +222,7 @@ parser_need(struct Parser* p, enum Token_Kind kind, struct Parser_Error* error) { struct Token token = parser_next(p); if (!token_is(&token, kind)) { - *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN); + parser_error(error, PARSER_ERROR_UNEXPECTED_TOKEN, token); return token_none(); } return token; @@ -196,6 +281,28 @@ parser_unglue(struct Parser* p) while (parser_probe(p, TOKEN_NEWLINE)) parser_next(p); } +// discard tokens until a "synchronization" point is found. +// used to avoid errors cascading from one part of the code to another. +void +parser_panic(struct Parser* p) +{ + // TODO: find all places where panic can stop + for (;;) { + struct Token token = parser_peek(p); + switch (token.kind) { + case TOKEN_NEWLINE: + case TOKEN_END_OF_FILE: + parser_next(p); + goto panic_end; + default: + parser_next(p); + } + } + +panic_end: + return; +} + struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error); struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error); @@ -204,7 +311,7 @@ parser_end_statement(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); if (!token_ends_statement(&token)) { - *error = parser_error(PARSER_ERROR_EXPECTED_STATEMENT_END); + parser_error(error, PARSER_ERROR_EXPECTED_STATEMENT_END, token); return; } parser_next(p); @@ -516,9 +623,9 @@ parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error) { struct Token open_token = CHECK(parser_need(p, TOKEN_SQUARE_OPEN, error)); - struct Type_Node* element_or_key_type = CHECK(parser_node_type(p, error)); + struct Type_Node* element_or_key_type = parser_node_type(p, error); if (!element_or_key_type) { - *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + parser_error_wrap(error, PARSER_ERROR_EXPECTED_TYPE); return nil; } @@ -529,9 +636,9 @@ parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error) parser_next(p); // consume the assignment token struct Type_Node* key_type = element_or_key_type; - struct Type_Node* value_type = CHECK(parser_node_type(p, error)); + struct Type_Node* value_type = parser_node_type(p, error); if (!value_type) { - *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + parser_error_wrap(error, PARSER_ERROR_EXPECTED_TYPE); return nil; } @@ -559,7 +666,7 @@ parser_node_type_reference(struct Parser* p, struct Parser_Error* error) struct Type_Node* referenced_type = CHECK(parser_node_type(p, error)); if (!referenced_type) { - *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + parser_error(error, PARSER_ERROR_EXPECTED_TYPE, ampersand_token); return nil; } @@ -591,7 +698,7 @@ parser_node_type_inner(struct Parser* p, struct Parser_Error* error) case TOKEN_AMPERSAND: return parser_node_type_reference(p, error); default: - *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + parser_error(error, PARSER_ERROR_UNEXPECTED_TOKEN, token); return nil; } } @@ -625,7 +732,7 @@ parser_pragma_node(struct Parser* p, struct Parser_Error* error) struct Token pragma_token = CHECK(parser_need(p, TOKEN_NAME, error)); enum Pragma_Type pragma_type = pragma_type_from_string(pragma_token.value.name); if (!pragma_type) { - *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA); + parser_error(error, PARSER_ERROR_EXPECTED_PRAGMA, pragma_token); return nil; } @@ -660,7 +767,7 @@ parser_pragma_node(struct Parser* p, struct Parser_Error* error) argument->value.name_or_string = v->name; break; default: - *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN); + parser_error(error, PARSER_ERROR_EXPECTED_PRAGMA_ARGUMENT, token); return nil; } argument_index++; @@ -813,7 +920,7 @@ parser_expression_primary(struct Parser* p, struct Parser_Error* error) case TOKEN_WORD_CLASS: return parser_expression_type(p, error); default: - *error = parser_error(PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION); + parser_error(error, PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, token); return nil; } } @@ -1290,7 +1397,7 @@ parser_statement_pragma(struct Parser* p, struct Parser_Error* error) struct Pragma_Node* pragma_node = parser_pragma_node(p, error); if (!parser_error_is_none(error)) { - *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA); + parser_error_wrap(error, PARSER_ERROR_EXPECTED_PRAGMA); return nil; } @@ -1348,19 +1455,36 @@ parser_statement(struct Parser* p, struct Parser_Error* error) return statement_new(STATEMENT_EXPRESSION, value, span, expression->location); } +void +parser_handle_error(struct Parser* p, struct Parser_Error* error) +{ + if (parser_error_is_none(error)) return; + + p->had_errors = true; + parser_error_display(error, p->source_file); + parser_error_none(error); + parser_panic(p); +} + // parse the lexer tokens into a single AST. // note: it was either `parser_parse` or this. :) -struct Tree -parser_do_your_thing(struct Parser* p, struct Parser_Error* error) +int +parser_do_your_thing(struct Parser* p, struct Tree* tree) { + struct Parser_Error error; + parser_error_none(&error); + struct Statement* head = nil; struct Statement* current = nil; while (!parser_reached_end(p)) { - struct Statement* next = CHECK_RETURN(parser_statement(p, error), struct Tree); + struct Statement* next = parser_statement(p, &error); + parser_handle_error(p, &error); + if (!next) break; // on eof - CHECK_RETURN(parser_end_statement(p, error), struct Tree); + parser_end_statement(p, &error); + parser_handle_error(p, &error); if (current) { current->next = next; @@ -1370,8 +1494,10 @@ parser_do_your_thing(struct Parser* p, struct Parser_Error* error) current = next; } - *error = parser_error_none(); - return (struct Tree){ head }; + parser_error_none(&error); + *tree = (struct Tree){ head }; + + return p->had_errors; } #undef CHECK |
