about summary refs log tree commit diff
path: root/boot
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-06-14 01:08:51 +0200
committerMel <mel@rnrd.eu>2025-06-14 01:08:51 +0200
commit4eba2d39dc40775bbd18020593e46d1a367db357 (patch)
tree300afc647d56436d461c1242fd29f24afb626ecb /boot
parent5f4f2c7c87053eeda22a32b0f944f39f27c1e522 (diff)
downloadcatskill-4eba2d39dc40775bbd18020593e46d1a367db357.tar.zst
catskill-4eba2d39dc40775bbd18020593e46d1a367db357.zip
Bare-declaration parsing and `for` and `while` for loops
Signed-off-by: Mel <mel@rnrd.eu>
Diffstat (limited to 'boot')
-rw-r--r--boot/lex.c10
-rw-r--r--boot/parse.c130
-rw-r--r--boot/tree.c116
3 files changed, 163 insertions, 93 deletions
diff --git a/boot/lex.c b/boot/lex.c
index f09e5e3..84657fa 100644
--- a/boot/lex.c
+++ b/boot/lex.c
@@ -90,7 +90,7 @@ enum Token_Kind
     TOKEN_WORD_IF,
     TOKEN_WORD_ELSE,
     TOKEN_WORD_FOR,
-    TOKEN_WORD_LOOP,
+    TOKEN_WORD_WHILE,
     TOKEN_WORD_BREAK,
     TOKEN_WORD_CONTINUE,
     TOKEN_WORD_DEFER,
@@ -187,8 +187,8 @@ token_kind_to_string(enum Token_Kind kind)
         return "WORD_ELSE";
     case TOKEN_WORD_FOR:
         return "WORD_FOR";
-    case TOKEN_WORD_LOOP:
-        return "WORD_LOOP";
+    case TOKEN_WORD_WHILE:
+        return "WORD_WHILE";
     case TOKEN_WORD_BREAK:
         return "WORD_BREAK";
     case TOKEN_WORD_CONTINUE:
@@ -788,8 +788,8 @@ lexer_word_from_name(struct Lexer* l, struct String word_or_name)
         return TOKEN_WORD_ELSE;
     case 2652874405: // "for"
         return TOKEN_WORD_FOR;
-    case 1637870694: // "loop"
-        return TOKEN_WORD_LOOP;
+    case 1327426133: // "loop"
+        return TOKEN_WORD_WHILE;
     case 1007193266: // "break"
         return TOKEN_WORD_BREAK;
     case 1827824793: // "continue"
diff --git a/boot/parse.c b/boot/parse.c
index 54461fa..3da213a 100644
--- a/boot/parse.c
+++ b/boot/parse.c
@@ -275,6 +275,41 @@ parser_function_header_node(struct Parser* p, struct Parser_Error* error)
     return header;
 }
 
+struct Bare_Declaration_Node
+parser_bare_declaration_node(struct Parser* p, struct Parser_Error* error)
+{
+    struct String_Array names = string_array_new();
+
+    struct Span span = { 0 };
+    struct Cursor location = parser_peek(p).location;
+    for (;;) {
+        struct Token name_token =
+            CHECK_RETURN(parser_need(p, TOKEN_NAME, error), struct Bare_Declaration_Node);
+
+        span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span);
+        string_array_add(&names, name_token.value.name);
+
+        struct Token next = parser_peek(p);
+        if (token_can_begin_type(&next)) break;
+        if (next.kind == TOKEN_COMMA) parser_next(p);
+    }
+
+    // for now, type is always required.
+    struct Type_Node* type = CHECK_RETURN(parser_node_type(p, error), struct Bare_Declaration_Node);
+    CHECK_RETURN(parser_need(p, TOKEN_ASSIGN, error), struct Bare_Declaration_Node);
+    struct Expression* initializer =
+        CHECK_RETURN(parser_expression(p, error), struct Bare_Declaration_Node);
+
+    return (struct Bare_Declaration_Node){
+        .names = names,
+        .initializer = initializer,
+        .type = type,
+
+        .span = span,
+        .location = location,
+    };
+}
+
 struct Type_Node*
 parser_node_type_name(struct Parser* p, struct Parser_Error* error)
 {
@@ -787,37 +822,16 @@ parser_statement_declaration(struct Parser* p, struct Parser_Error* error)
         statement_declaration_kind_from_token(&declaration_token);
     check(declaration_kind, "expected valid declaration token");
 
-    struct String_Array names = string_array_new();
+    struct Bare_Declaration_Node inner = CHECK(parser_bare_declaration_node(p, error));
 
-    struct Span span = { 0 };
-    struct Cursor location = parser_peek(p).location;
-    for (;;) {
-        struct Token name_token = parser_need(p, TOKEN_NAME, error);
-        if (!parser_error_is_none(error)) return nil;
-
-        span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span);
-        string_array_add(&names, name_token.value.name);
-
-        struct Token next = parser_peek(p);
-        if (next.kind == TOKEN_NAME) break;
-        if (next.kind == TOKEN_COMMA) parser_next(p);
-    }
-
-    // for now, type is always required.
-    struct Type_Node* type = CHECK(parser_node_type(p, error));
-    CHECK(parser_need(p, TOKEN_ASSIGN, error));
-    struct Expression* initializer = CHECK(parser_expression(p, error));
-
-    span = span_merge(span, initializer->span);
+    struct Span span = span_merge(declaration_token.span, inner.span);
     union Statement_Value value = {
         .declaration = {
             .kind = declaration_kind,
-            .names = names,
-            .type = type,
-            .initializer = initializer,
+            .inner = inner,
         },
     };
-    return statement_new(STATEMENT_DECLARATION, value, span, location);
+    return statement_new(STATEMENT_DECLARATION, value, span, declaration_token.location);
 }
 
 struct Statement*
@@ -870,44 +884,35 @@ parser_statement_conditional(struct Parser* p, struct Parser_Error* error)
 }
 
 struct Statement*
-parser_statement_loop(struct Parser* p, struct Parser_Error* error)
+parser_statement_for(struct Parser* p, struct Parser_Error* error)
 {
     struct Token for_token = CHECK(parser_need(p, TOKEN_WORD_FOR, error));
 
     // these are the possible for loop variants:
-    // * `for var name String = collection {}`, as iteration over container
-    // * `for check() {}`, as a simple while-style loop
-    // * `for var i u8 = 0, i < 10, i++ {}`, as a c-style semi-semi loop
-    // * `for {}`, as an infinite loop
+    // * `for name String = collection {}`, as iteration over container
+    // * `for i u8 = 0, i < 10, i++ {}`, as a c-style semi-semi loop
 
-    struct Statement* declaration = nil;
-    struct Expression *condition = nil, *iteration = nil;
+    // a declaration without a signifier like `var` or `let`.
+    struct Bare_Declaration_Node declaration = CHECK(parser_bare_declaration_node(p, error));
+    enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_FOR_EACH;
 
-    // c-style or iterator-style
-    struct Token next = parser_peek(p);
-    // TODO: i do not like the `for var` combination, it is too verbose.
-    // it would be beneficial to bring back `while`, so that declarations without `var`
-    // are no longer ambiguous.
-    if (token_can_begin_declaration(&next)) {
-        declaration = CHECK(parser_statement_declaration(p, error));
-
-        // c-style
-        if (parser_probe(p, TOKEN_COMMA)) {
-            parser_next(p);
-            condition = CHECK(parser_expression(p, error));
-            CHECK(parser_need(p, TOKEN_COMMA, error));
+    // c-style semi-semi loop.
+    struct Expression *condition = nil, *iteration = nil;
+    if (parser_probe(p, TOKEN_COMMA)) {
+        parser_next(p);
+        condition = CHECK(parser_expression(p, error));
+        CHECK(parser_need(p, TOKEN_COMMA, error));
+        iteration = CHECK(parser_expression(p, error));
 
-            iteration = CHECK(parser_expression(p, error));
-        }
+        style = STATEMENT_LOOP_STYLE_C;
     }
 
-    // while-style
-    if (!token_is(&next, TOKEN_CURLY_OPEN)) condition = CHECK(parser_expression(p, error));
-
     struct Block_Node body = CHECK(parser_block_node(p, error));
+
     struct Span span = span_merge(for_token.span, body.span);
     union Statement_Value value = {
         .loop = {
+            .style = style,
             .declaration = declaration,
             .condition = condition,
             .iteration = iteration,
@@ -918,6 +923,27 @@ parser_statement_loop(struct Parser* p, struct Parser_Error* error)
 }
 
 struct Statement*
+parser_statement_while(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token while_token = CHECK(parser_need(p, TOKEN_WORD_WHILE, error));
+
+    enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_ENDLESS;
+    struct Expression* condition = nil;
+    if (!parser_probe(p, TOKEN_CURLY_OPEN)) {
+        condition = CHECK(parser_expression(p, error));
+        style = STATEMENT_LOOP_STYLE_WHILE;
+    }
+
+    struct Block_Node body = CHECK(parser_block_node(p, error));
+
+    struct Span span = span_merge(while_token.span, body.span);
+    union Statement_Value value = {
+        .loop = { .style = style, .condition = condition, .body = body }
+    };
+    return statement_new(STATEMENT_LOOP, value, span, while_token.location);
+}
+
+struct Statement*
 parser_statement_block(struct Parser* p, struct Parser_Error* error)
 {
     struct Block_Node block = CHECK(parser_block_node(p, error));
@@ -1011,7 +1037,9 @@ parser_statement(struct Parser* p, struct Parser_Error* error)
     case TOKEN_WORD_IF:
         return parser_statement_conditional(p, error);
     case TOKEN_WORD_FOR:
-        return parser_statement_loop(p, error);
+        return parser_statement_for(p, error);
+    case TOKEN_WORD_WHILE:
+        return parser_statement_while(p, error);
     case TOKEN_CURLY_OPEN:
         return parser_statement_block(p, error);
     case TOKEN_WORD_RETURN:
diff --git a/boot/tree.c b/boot/tree.c
index dff0858..adee06a 100644
--- a/boot/tree.c
+++ b/boot/tree.c
@@ -408,6 +408,22 @@ struct Function_Header_Node
 
 void function_header_node_print(const struct Function_Header_Node* header);
 
+// a declaration of a variable, constant, or other binding, without a mutability
+// signifier, like `let` or `var`.
+// the mutability is determined by some outside context, where
+// a bare declaration in a for-loop, for example, is always mutable.
+struct Bare_Declaration_Node
+{
+    struct String_Array names;
+    struct Expression* initializer;
+    struct Type_Node* type;
+
+    struct Span span;
+    struct Cursor location;
+};
+
+void bare_declaration_node_print(const struct Bare_Declaration_Node* declaration);
+
 enum Type_Node_Type
 {
     TYPE_NODE_NONE,
@@ -936,9 +952,7 @@ statement_declaration_kind_from_token(const struct Token* token)
 struct Statement_Value_Declaration
 {
     enum Statement_Declaration_Kind kind;
-    struct String_Array names;
-    struct Expression* initializer;
-    struct Type_Node* type;
+    struct Bare_Declaration_Node inner;
 };
 
 struct Statement_Value_Block
@@ -959,13 +973,20 @@ struct Statement_Value_Conditional
     uint condition_count;
 };
 
+enum Statement_Loop_Style {
+    STATEMENT_LOOP_STYLE_NONE,
+    STATEMENT_LOOP_STYLE_C, // for i int = 0; i < 10; ++i {}
+    STATEMENT_LOOP_STYLE_FOR_EACH, // for x Obj = list {}
+    STATEMENT_LOOP_STYLE_WHILE, // while true {}
+    STATEMENT_LOOP_STYLE_ENDLESS, // while {}
+};
+
+// stands for both `for` and `while` loops.
 struct Statement_Value_Loop
 {
-    // exists for iterator-style + c-style loops.
-    struct Statement* declaration;
-    // exists for all loop types, except for infinite loops.
+    enum Statement_Loop_Style style;
+    struct Bare_Declaration_Node declaration;
     struct Expression* condition;
-    // exists for c-style loops.
     struct Expression* iteration;
 
     struct Block_Node body;
@@ -1033,6 +1054,27 @@ block_node_print(const struct Block_Node* block)
     printf(")");
 }
 
+void
+bare_declaration_node_print(const struct Bare_Declaration_Node* declaration)
+{
+    printf("(declaration ");
+    STRING_ARRAY_FOR_EACH(i, name, declaration->names)
+    {
+        printf("%s ", name.data);
+    }
+    if (!type_node_is_none(declaration->type)) {
+        type_node_print(declaration->type);
+        printf(" ");
+    }
+
+    if (declaration->initializer) {
+        printf("(initializer ");
+        expression_print(declaration->initializer);
+        printf(")");
+    }
+    printf(")");
+}
+
 struct Statement*
 statement_new(
     enum Statement_Kind kind, union Statement_Value value, struct Span span, struct Cursor location)
@@ -1063,24 +1105,12 @@ statement_print(const struct Statement* statement)
         break;
     }
     case STATEMENT_DECLARATION: {
-        printf("(declaration ");
         if (statement->value.declaration.kind == STATEMENT_DECLARATION_VARIABLE)
-            printf("variable ");
+            printf("(variable ");
         else if (statement->value.declaration.kind == STATEMENT_DECLARATION_CONSTANT)
-            printf("constant ");
+            printf("(constant ");
 
-        STRING_ARRAY_FOR_EACH(i, name, statement->value.declaration.names)
-        {
-            printf("%s ", name.data);
-        }
-        if (type_node_is_none(statement->value.declaration.type))
-            type_node_print(statement->value.declaration.type);
-
-        if (statement->value.declaration.initializer) {
-            printf("(initializer ");
-            expression_print(statement->value.declaration.initializer);
-            printf(")");
-        }
+        bare_declaration_node_print(&statement->value.declaration.inner);
         printf(")");
         break;
     }
@@ -1109,25 +1139,37 @@ statement_print(const struct Statement* statement)
     }
     case STATEMENT_LOOP: {
         printf("(loop ");
-        if (statement->value.loop.declaration) {
-            printf("(declaration ");
-            statement_print(statement->value.loop.declaration);
-            printf(") ");
-        }
 
-        if (statement->value.loop.condition) {
-            printf("(condition ");
-            expression_print(statement->value.loop.condition);
-            printf(") ");
-        }
-
-        if (statement->value.loop.iteration) {
-            printf("(iteration ");
-            expression_print(statement->value.loop.iteration);
-            printf(") ");
+        switch (statement->value.loop.style) {
+            case STATEMENT_LOOP_STYLE_C:
+                printf("c-style ");
+                bare_declaration_node_print(&statement->value.loop.declaration);
+                printf(" (condition ");
+                expression_print(statement->value.loop.condition);
+                printf(") (iteration ");
+                expression_print(statement->value.loop.iteration);
+                printf(") ");
+                break;
+            case STATEMENT_LOOP_STYLE_FOR_EACH:
+                printf("for-each ");
+                bare_declaration_node_print(&statement->value.loop.declaration);
+                printf(" ");
+                break;
+            case STATEMENT_LOOP_STYLE_WHILE:
+                printf("while (condition ");
+                expression_print(statement->value.loop.condition);
+                printf(") ");
+                break;
+            case STATEMENT_LOOP_STYLE_ENDLESS:
+                printf("endless ");
+                break;
+            default:
+                failure("unexpected loop style in `statement_print`");
+                break;
         }
 
         block_node_print(&statement->value.loop.body);
+        printf(")");
         break;
     }
     case STATEMENT_RETURN: {