From a422f9aead499a526179ba2df2aff1aa44fe48d6 Mon Sep 17 00:00:00 2001 From: Mel Date: Sat, 31 May 2025 03:29:51 +0200 Subject: Keyword-less variable declaration parsing Signed-off-by: Mel --- boot/common.c | 41 ++++++++++++++++++++++++++++++ boot/lex.c | 14 +++++++++++ boot/parse.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---- boot/tree.c | 50 ++++++++++++++++++++++++++++++++++-- 4 files changed, 179 insertions(+), 7 deletions(-) (limited to 'boot') diff --git a/boot/common.c b/boot/common.c index d814538..65dc780 100644 --- a/boot/common.c +++ b/boot/common.c @@ -166,6 +166,47 @@ string_print(struct String s) printf("%.*s", (int32)s.length, s.data); } +#define STRING_ARRAY_MAX 8 + +// a string array, used for storing multiple strings. +// if we ever need more strings, just bump `STRING_ARRAY_MAX` up. +struct String_Array +{ + struct String strings[STRING_ARRAY_MAX]; + uint count; +}; + +// initializes a string array with no strings. +struct String_Array +string_array_new() +{ + return (struct String_Array){ + .strings = { 0 }, + .count = 0, + }; +} + +// adds a string to the string array. +bool +string_array_add(struct String_Array* array, struct String string) +{ + if (array->count >= STRING_ARRAY_MAX) return false; + + array->strings[array->count++] = string; + return true; +} + +struct String +string_array_at(const struct String_Array* array, uint index) +{ + check(index < array->count, "index out of bounds"); + return array->strings[index]; +} + +#define STRING_ARRAY_FOR_EACH(cursor, str, array) \ + struct String str = array.strings[0]; \ + for (uint cursor = 0; cursor < array.count; str = array.strings[++cursor]) + // single iteration of the CRC32 checksum algorithm // described in POSIX. // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html diff --git a/boot/lex.c b/boot/lex.c index c66d160..37eabcd 100644 --- a/boot/lex.c +++ b/boot/lex.c @@ -52,6 +52,20 @@ span_expand(struct Span span, integer by) }; } +// check if two spans are equal. +bool +span_equals(struct Span a, struct Span b) +{ + return a.start == b.start && a.end == b.end; +} + +// check if span equals = { 0, 0 }. +bool +span_is_empty(struct Span span) +{ + return span_equals(span, (struct Span){ 0, 0 }); +} + // a cursor position placed within a text file. struct Cursor { diff --git a/boot/parse.c b/boot/parse.c index b009656..4fc6107 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -18,6 +18,7 @@ struct Parser_Error PARSER_ERROR_UNEXPECTED_EOF, PARSER_ERROR_EXPECTED_STATEMENT_END, PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, + PARSER_ERROR_EXPECTED_TYPE, } kind; // TODO: add span to error }; @@ -54,6 +55,8 @@ parser_error_to_string(const struct Parser_Error* error) return "expected statement end"; case PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION: return "expected primary expression"; + case PARSER_ERROR_EXPECTED_TYPE: + return "expected type"; default: return "unknown error"; } @@ -161,6 +164,25 @@ parser_probe(struct Parser* p, enum Token_Kind kind) struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error); +struct Type_Node +parser_node_type(struct Parser* p, struct Parser_Error* error) +{ + struct Token token = parser_need(p, TOKEN_NAME, error); + if (token_is_empty(&token)) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return (struct Type_Node){ 0 }; + } + struct String type_name = token.value.name; + + // for now, we only support a single type name. + // in the future, we might want to support more complex types. + return (struct Type_Node){ + .name = type_name, + .span = token.span, + .location = token.location, + }; +} + struct Expression* parser_expression_primary_name(struct Parser* p, struct Parser_Error* error) { @@ -424,6 +446,42 @@ parser_expression(struct Parser* p, struct Parser_Error* error) return parser_expression_binary_operation(p, error); } +struct Statement* +parser_statement_declaration(struct Parser* p, struct Parser_Error* error) +{ + struct String_Array names = string_array_new(); + + struct Span span = { 0 }; + struct Cursor location = parser_peek(p).location; + for (;;) { + struct Token name_token = parser_need(p, TOKEN_NAME, error); + if (!parser_error_is_none(error)) return nil; + + span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span); + string_array_add(&names, name_token.value.name); + + struct Token next = parser_peek(p); + if (next.kind == TOKEN_NAME) break; + if (next.kind == TOKEN_COMMA) parser_next(p); + } + + // for now, type is always required. + struct Type_Node type = CHECK(parser_node_type(p, error)); + CHECK(parser_need(p, TOKEN_ASSIGN, error)); + struct Expression* initializer = CHECK(parser_expression(p, error)); + + span = span_merge(span, initializer->span); + union Statement_Value value = { + .declaration = { + .names = names, + .has_type = true, + .type = type, + .initializer = initializer, + }, + }; + return statement_new(STATEMENT_DECLARATION, value, span, location); +} + void parser_end_statement(struct Parser* p, struct Parser_Error* error) { @@ -438,21 +496,31 @@ parser_end_statement(struct Parser* p, struct Parser_Error* error) struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error) { - // skip empty statements. struct Token token = parser_peek(p); + + // skip empty statements. if (token_ends_statement(&token)) { parser_next(p); return nil; } - // TODO: no statements for now, just go straight to expressions. - struct Expression* expression = CHECK(parser_expression(p, error)); + if (token.kind == TOKEN_NAME) { + // NOTE: these can be a variable declaration: + // x uint = 123 + // me, them Obj = create() + // otherwise without a type, it is counted as an assignment: + // a = "hi!" + + struct Token next_token = parser_peek_further(p); + if (next_token.kind == TOKEN_COMMA || next_token.kind == TOKEN_NAME) + return parser_statement_declaration(p, error); + } - CHECK(parser_end_statement(p, error)); + struct Expression* expression = CHECK(parser_expression(p, error)); // expand by one byte to include the statement terminator. struct Span span = span_expand(expression->span, 1); - union Statement_Value value = { .expression = expression }; + union Statement_Value value = { .expression.inner = expression }; return statement_new(STATEMENT_EXPRESSION, value, span, expression->location); } @@ -468,6 +536,9 @@ parser_do_your_thing(struct Parser* p, struct Parser_Error* error) struct Statement* next = parser_statement(p, error); if (!parser_error_is_none(error)) return (struct Tree){ nil }; + parser_end_statement(p, error); + if (!parser_error_is_none(error)) return (struct Tree){ nil }; + if (current) { current->next = next; } else { diff --git a/boot/tree.c b/boot/tree.c index 593103a..1d88743 100644 --- a/boot/tree.c +++ b/boot/tree.c @@ -323,6 +323,19 @@ binary_operation_to_string(enum Binary_Operation operation) } } +// nodes are parts of the syntax tree that are reused often +// and in different places. + +// a type node represents a type in the syntax tree. +// currently, we only support types that are simple names. +struct Type_Node +{ + // note: we could also just include the token here i think? + struct String name; + struct Span span; + struct Cursor location; +}; + enum Expression_Kind { EXPRESSION_NONE, @@ -524,11 +537,27 @@ enum Statement_Kind { STATEMENT_NONE, STATEMENT_EXPRESSION, + STATEMENT_DECLARATION, +}; + +struct Statement_Value_Expression +{ + struct Expression* inner; +}; + +struct Statement_Value_Declaration +{ + struct String_Array names; // the names of the variables being declared. + struct Expression* initializer; // the expression to initialize the variable with. + + bool has_type; // whether the declaration has a type. + struct Type_Node type; // the type of the variable, if any. }; union Statement_Value { - struct Expression* expression; + struct Statement_Value_Expression expression; + struct Statement_Value_Declaration declaration; }; struct Statement @@ -571,10 +600,27 @@ statement_print(const struct Statement* statement) printf("none"); break; case STATEMENT_EXPRESSION: { - const struct Expression* expression = statement->value.expression; + const struct Expression* expression = statement->value.expression.inner; expression_print(expression); break; } + case STATEMENT_DECLARATION: { + printf("(declaration "); + STRING_ARRAY_FOR_EACH(i, name, statement->value.declaration.names) + { + printf("%s ", name.data); + } + if (statement->value.declaration.has_type) { + printf("(type %s) ", statement->value.declaration.type.name.data); + } + if (statement->value.declaration.initializer) { + printf("(initializer "); + expression_print(statement->value.declaration.initializer); + printf(")"); + } + printf(")"); + break; + } default: failure("unexpected statement kind passed to `statement_print`"); break; -- cgit 1.4.1