From 4eba2d39dc40775bbd18020593e46d1a367db357 Mon Sep 17 00:00:00 2001 From: Mel Date: Sat, 14 Jun 2025 01:08:51 +0200 Subject: Bare-declaration parsing and `for` and `while` for loops Signed-off-by: Mel --- boot/lex.c | 10 ++--- boot/parse.c | 130 ++++++++++++++++++++++++++++++++++++----------------------- boot/tree.c | 116 +++++++++++++++++++++++++++++++++++----------------- 3 files changed, 163 insertions(+), 93 deletions(-) diff --git a/boot/lex.c b/boot/lex.c index f09e5e3..84657fa 100644 --- a/boot/lex.c +++ b/boot/lex.c @@ -90,7 +90,7 @@ enum Token_Kind TOKEN_WORD_IF, TOKEN_WORD_ELSE, TOKEN_WORD_FOR, - TOKEN_WORD_LOOP, + TOKEN_WORD_WHILE, TOKEN_WORD_BREAK, TOKEN_WORD_CONTINUE, TOKEN_WORD_DEFER, @@ -187,8 +187,8 @@ token_kind_to_string(enum Token_Kind kind) return "WORD_ELSE"; case TOKEN_WORD_FOR: return "WORD_FOR"; - case TOKEN_WORD_LOOP: - return "WORD_LOOP"; + case TOKEN_WORD_WHILE: + return "WORD_WHILE"; case TOKEN_WORD_BREAK: return "WORD_BREAK"; case TOKEN_WORD_CONTINUE: @@ -788,8 +788,8 @@ lexer_word_from_name(struct Lexer* l, struct String word_or_name) return TOKEN_WORD_ELSE; case 2652874405: // "for" return TOKEN_WORD_FOR; - case 1637870694: // "loop" - return TOKEN_WORD_LOOP; + case 1327426133: // "loop" + return TOKEN_WORD_WHILE; case 1007193266: // "break" return TOKEN_WORD_BREAK; case 1827824793: // "continue" diff --git a/boot/parse.c b/boot/parse.c index 54461fa..3da213a 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -275,6 +275,41 @@ parser_function_header_node(struct Parser* p, struct Parser_Error* error) return header; } +struct Bare_Declaration_Node +parser_bare_declaration_node(struct Parser* p, struct Parser_Error* error) +{ + struct String_Array names = string_array_new(); + + struct Span span = { 0 }; + struct Cursor location = parser_peek(p).location; + for (;;) { + struct Token name_token = + CHECK_RETURN(parser_need(p, TOKEN_NAME, error), struct Bare_Declaration_Node); + + span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span); + string_array_add(&names, name_token.value.name); + + struct Token next = parser_peek(p); + if (token_can_begin_type(&next)) break; + if (next.kind == TOKEN_COMMA) parser_next(p); + } + + // for now, type is always required. + struct Type_Node* type = CHECK_RETURN(parser_node_type(p, error), struct Bare_Declaration_Node); + CHECK_RETURN(parser_need(p, TOKEN_ASSIGN, error), struct Bare_Declaration_Node); + struct Expression* initializer = + CHECK_RETURN(parser_expression(p, error), struct Bare_Declaration_Node); + + return (struct Bare_Declaration_Node){ + .names = names, + .initializer = initializer, + .type = type, + + .span = span, + .location = location, + }; +} + struct Type_Node* parser_node_type_name(struct Parser* p, struct Parser_Error* error) { @@ -787,37 +822,16 @@ parser_statement_declaration(struct Parser* p, struct Parser_Error* error) statement_declaration_kind_from_token(&declaration_token); check(declaration_kind, "expected valid declaration token"); - struct String_Array names = string_array_new(); + struct Bare_Declaration_Node inner = CHECK(parser_bare_declaration_node(p, error)); - struct Span span = { 0 }; - struct Cursor location = parser_peek(p).location; - for (;;) { - struct Token name_token = parser_need(p, TOKEN_NAME, error); - if (!parser_error_is_none(error)) return nil; - - span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span); - string_array_add(&names, name_token.value.name); - - struct Token next = parser_peek(p); - if (next.kind == TOKEN_NAME) break; - if (next.kind == TOKEN_COMMA) parser_next(p); - } - - // for now, type is always required. - struct Type_Node* type = CHECK(parser_node_type(p, error)); - CHECK(parser_need(p, TOKEN_ASSIGN, error)); - struct Expression* initializer = CHECK(parser_expression(p, error)); - - span = span_merge(span, initializer->span); + struct Span span = span_merge(declaration_token.span, inner.span); union Statement_Value value = { .declaration = { .kind = declaration_kind, - .names = names, - .type = type, - .initializer = initializer, + .inner = inner, }, }; - return statement_new(STATEMENT_DECLARATION, value, span, location); + return statement_new(STATEMENT_DECLARATION, value, span, declaration_token.location); } struct Statement* @@ -870,44 +884,35 @@ parser_statement_conditional(struct Parser* p, struct Parser_Error* error) } struct Statement* -parser_statement_loop(struct Parser* p, struct Parser_Error* error) +parser_statement_for(struct Parser* p, struct Parser_Error* error) { struct Token for_token = CHECK(parser_need(p, TOKEN_WORD_FOR, error)); // these are the possible for loop variants: - // * `for var name String = collection {}`, as iteration over container - // * `for check() {}`, as a simple while-style loop - // * `for var i u8 = 0, i < 10, i++ {}`, as a c-style semi-semi loop - // * `for {}`, as an infinite loop + // * `for name String = collection {}`, as iteration over container + // * `for i u8 = 0, i < 10, i++ {}`, as a c-style semi-semi loop - struct Statement* declaration = nil; - struct Expression *condition = nil, *iteration = nil; + // a declaration without a signifier like `var` or `let`. + struct Bare_Declaration_Node declaration = CHECK(parser_bare_declaration_node(p, error)); + enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_FOR_EACH; - // c-style or iterator-style - struct Token next = parser_peek(p); - // TODO: i do not like the `for var` combination, it is too verbose. - // it would be beneficial to bring back `while`, so that declarations without `var` - // are no longer ambiguous. - if (token_can_begin_declaration(&next)) { - declaration = CHECK(parser_statement_declaration(p, error)); - - // c-style - if (parser_probe(p, TOKEN_COMMA)) { - parser_next(p); - condition = CHECK(parser_expression(p, error)); - CHECK(parser_need(p, TOKEN_COMMA, error)); + // c-style semi-semi loop. + struct Expression *condition = nil, *iteration = nil; + if (parser_probe(p, TOKEN_COMMA)) { + parser_next(p); + condition = CHECK(parser_expression(p, error)); + CHECK(parser_need(p, TOKEN_COMMA, error)); + iteration = CHECK(parser_expression(p, error)); - iteration = CHECK(parser_expression(p, error)); - } + style = STATEMENT_LOOP_STYLE_C; } - // while-style - if (!token_is(&next, TOKEN_CURLY_OPEN)) condition = CHECK(parser_expression(p, error)); - struct Block_Node body = CHECK(parser_block_node(p, error)); + struct Span span = span_merge(for_token.span, body.span); union Statement_Value value = { .loop = { + .style = style, .declaration = declaration, .condition = condition, .iteration = iteration, @@ -917,6 +922,27 @@ parser_statement_loop(struct Parser* p, struct Parser_Error* error) return statement_new(STATEMENT_LOOP, value, span, for_token.location); } +struct Statement* +parser_statement_while(struct Parser* p, struct Parser_Error* error) +{ + struct Token while_token = CHECK(parser_need(p, TOKEN_WORD_WHILE, error)); + + enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_ENDLESS; + struct Expression* condition = nil; + if (!parser_probe(p, TOKEN_CURLY_OPEN)) { + condition = CHECK(parser_expression(p, error)); + style = STATEMENT_LOOP_STYLE_WHILE; + } + + struct Block_Node body = CHECK(parser_block_node(p, error)); + + struct Span span = span_merge(while_token.span, body.span); + union Statement_Value value = { + .loop = { .style = style, .condition = condition, .body = body } + }; + return statement_new(STATEMENT_LOOP, value, span, while_token.location); +} + struct Statement* parser_statement_block(struct Parser* p, struct Parser_Error* error) { @@ -1011,7 +1037,9 @@ parser_statement(struct Parser* p, struct Parser_Error* error) case TOKEN_WORD_IF: return parser_statement_conditional(p, error); case TOKEN_WORD_FOR: - return parser_statement_loop(p, error); + return parser_statement_for(p, error); + case TOKEN_WORD_WHILE: + return parser_statement_while(p, error); case TOKEN_CURLY_OPEN: return parser_statement_block(p, error); case TOKEN_WORD_RETURN: diff --git a/boot/tree.c b/boot/tree.c index dff0858..adee06a 100644 --- a/boot/tree.c +++ b/boot/tree.c @@ -408,6 +408,22 @@ struct Function_Header_Node void function_header_node_print(const struct Function_Header_Node* header); +// a declaration of a variable, constant, or other binding, without a mutability +// signifier, like `let` or `var`. +// the mutability is determined by some outside context, where +// a bare declaration in a for-loop, for example, is always mutable. +struct Bare_Declaration_Node +{ + struct String_Array names; + struct Expression* initializer; + struct Type_Node* type; + + struct Span span; + struct Cursor location; +}; + +void bare_declaration_node_print(const struct Bare_Declaration_Node* declaration); + enum Type_Node_Type { TYPE_NODE_NONE, @@ -936,9 +952,7 @@ statement_declaration_kind_from_token(const struct Token* token) struct Statement_Value_Declaration { enum Statement_Declaration_Kind kind; - struct String_Array names; - struct Expression* initializer; - struct Type_Node* type; + struct Bare_Declaration_Node inner; }; struct Statement_Value_Block @@ -959,13 +973,20 @@ struct Statement_Value_Conditional uint condition_count; }; +enum Statement_Loop_Style { + STATEMENT_LOOP_STYLE_NONE, + STATEMENT_LOOP_STYLE_C, // for i int = 0; i < 10; ++i {} + STATEMENT_LOOP_STYLE_FOR_EACH, // for x Obj = list {} + STATEMENT_LOOP_STYLE_WHILE, // while true {} + STATEMENT_LOOP_STYLE_ENDLESS, // while {} +}; + +// stands for both `for` and `while` loops. struct Statement_Value_Loop { - // exists for iterator-style + c-style loops. - struct Statement* declaration; - // exists for all loop types, except for infinite loops. + enum Statement_Loop_Style style; + struct Bare_Declaration_Node declaration; struct Expression* condition; - // exists for c-style loops. struct Expression* iteration; struct Block_Node body; @@ -1033,6 +1054,27 @@ block_node_print(const struct Block_Node* block) printf(")"); } +void +bare_declaration_node_print(const struct Bare_Declaration_Node* declaration) +{ + printf("(declaration "); + STRING_ARRAY_FOR_EACH(i, name, declaration->names) + { + printf("%s ", name.data); + } + if (!type_node_is_none(declaration->type)) { + type_node_print(declaration->type); + printf(" "); + } + + if (declaration->initializer) { + printf("(initializer "); + expression_print(declaration->initializer); + printf(")"); + } + printf(")"); +} + struct Statement* statement_new( enum Statement_Kind kind, union Statement_Value value, struct Span span, struct Cursor location) @@ -1063,24 +1105,12 @@ statement_print(const struct Statement* statement) break; } case STATEMENT_DECLARATION: { - printf("(declaration "); if (statement->value.declaration.kind == STATEMENT_DECLARATION_VARIABLE) - printf("variable "); + printf("(variable "); else if (statement->value.declaration.kind == STATEMENT_DECLARATION_CONSTANT) - printf("constant "); + printf("(constant "); - STRING_ARRAY_FOR_EACH(i, name, statement->value.declaration.names) - { - printf("%s ", name.data); - } - if (type_node_is_none(statement->value.declaration.type)) - type_node_print(statement->value.declaration.type); - - if (statement->value.declaration.initializer) { - printf("(initializer "); - expression_print(statement->value.declaration.initializer); - printf(")"); - } + bare_declaration_node_print(&statement->value.declaration.inner); printf(")"); break; } @@ -1109,25 +1139,37 @@ statement_print(const struct Statement* statement) } case STATEMENT_LOOP: { printf("(loop "); - if (statement->value.loop.declaration) { - printf("(declaration "); - statement_print(statement->value.loop.declaration); - printf(") "); - } - if (statement->value.loop.condition) { - printf("(condition "); - expression_print(statement->value.loop.condition); - printf(") "); - } - - if (statement->value.loop.iteration) { - printf("(iteration "); - expression_print(statement->value.loop.iteration); - printf(") "); + switch (statement->value.loop.style) { + case STATEMENT_LOOP_STYLE_C: + printf("c-style "); + bare_declaration_node_print(&statement->value.loop.declaration); + printf(" (condition "); + expression_print(statement->value.loop.condition); + printf(") (iteration "); + expression_print(statement->value.loop.iteration); + printf(") "); + break; + case STATEMENT_LOOP_STYLE_FOR_EACH: + printf("for-each "); + bare_declaration_node_print(&statement->value.loop.declaration); + printf(" "); + break; + case STATEMENT_LOOP_STYLE_WHILE: + printf("while (condition "); + expression_print(statement->value.loop.condition); + printf(") "); + break; + case STATEMENT_LOOP_STYLE_ENDLESS: + printf("endless "); + break; + default: + failure("unexpected loop style in `statement_print`"); + break; } block_node_print(&statement->value.loop.body); + printf(")"); break; } case STATEMENT_RETURN: { -- cgit 1.4.1