about summary refs log tree commit diff
path: root/boot
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-05-31 03:29:51 +0200
committerMel <mel@rnrd.eu>2025-05-31 03:29:51 +0200
commita422f9aead499a526179ba2df2aff1aa44fe48d6 (patch)
treef597bed2231e3ee05ce1d4f6c9901ed5b6a3882b /boot
parent41e0e31b8586c1f93b5e65cd62ef910227a6677d (diff)
downloadcatskill-a422f9aead499a526179ba2df2aff1aa44fe48d6.tar.zst
catskill-a422f9aead499a526179ba2df2aff1aa44fe48d6.zip
Keyword-less variable declaration parsing
Signed-off-by: Mel <mel@rnrd.eu>
Diffstat (limited to 'boot')
-rw-r--r--boot/common.c41
-rw-r--r--boot/lex.c14
-rw-r--r--boot/parse.c81
-rw-r--r--boot/tree.c50
4 files changed, 179 insertions, 7 deletions
diff --git a/boot/common.c b/boot/common.c
index d814538..65dc780 100644
--- a/boot/common.c
+++ b/boot/common.c
@@ -166,6 +166,47 @@ string_print(struct String s)
     printf("%.*s", (int32)s.length, s.data);
 }
 
+#define STRING_ARRAY_MAX 8
+
+// a string array, used for storing multiple strings.
+// if we ever need more strings, just bump `STRING_ARRAY_MAX` up.
+struct String_Array
+{
+    struct String strings[STRING_ARRAY_MAX];
+    uint count;
+};
+
+// initializes a string array with no strings.
+struct String_Array
+string_array_new()
+{
+    return (struct String_Array){
+        .strings = { 0 },
+        .count = 0,
+    };
+}
+
+// adds a string to the string array.
+bool
+string_array_add(struct String_Array* array, struct String string)
+{
+    if (array->count >= STRING_ARRAY_MAX) return false;
+
+    array->strings[array->count++] = string;
+    return true;
+}
+
+struct String
+string_array_at(const struct String_Array* array, uint index)
+{
+    check(index < array->count, "index out of bounds");
+    return array->strings[index];
+}
+
+#define STRING_ARRAY_FOR_EACH(cursor, str, array) \
+    struct String str = array.strings[0]; \
+    for (uint cursor = 0; cursor < array.count; str = array.strings[++cursor])
+
 // single iteration of the CRC32 checksum algorithm
 // described in POSIX.
 // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html
diff --git a/boot/lex.c b/boot/lex.c
index c66d160..37eabcd 100644
--- a/boot/lex.c
+++ b/boot/lex.c
@@ -52,6 +52,20 @@ span_expand(struct Span span, integer by)
     };
 }
 
+// check if two spans are equal.
+bool
+span_equals(struct Span a, struct Span b)
+{
+    return a.start == b.start && a.end == b.end;
+}
+
+// check if span equals = { 0, 0 }.
+bool
+span_is_empty(struct Span span)
+{
+    return span_equals(span, (struct Span){ 0, 0 });
+}
+
 // a cursor position placed within a text file.
 struct Cursor
 {
diff --git a/boot/parse.c b/boot/parse.c
index b009656..4fc6107 100644
--- a/boot/parse.c
+++ b/boot/parse.c
@@ -18,6 +18,7 @@ struct Parser_Error
         PARSER_ERROR_UNEXPECTED_EOF,
         PARSER_ERROR_EXPECTED_STATEMENT_END,
         PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION,
+        PARSER_ERROR_EXPECTED_TYPE,
     } kind;
     // TODO: add span to error
 };
@@ -54,6 +55,8 @@ parser_error_to_string(const struct Parser_Error* error)
         return "expected statement end";
     case PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION:
         return "expected primary expression";
+    case PARSER_ERROR_EXPECTED_TYPE:
+        return "expected type";
     default:
         return "unknown error";
     }
@@ -161,6 +164,25 @@ parser_probe(struct Parser* p, enum Token_Kind kind)
 
 struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error);
 
+struct Type_Node
+parser_node_type(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = parser_need(p, TOKEN_NAME, error);
+    if (token_is_empty(&token)) {
+        *error = parser_error(PARSER_ERROR_EXPECTED_TYPE);
+        return (struct Type_Node){ 0 };
+    }
+    struct String type_name = token.value.name;
+
+    // for now, we only support a single type name.
+    // in the future, we might want to support more complex types.
+    return (struct Type_Node){
+        .name = type_name,
+        .span = token.span,
+        .location = token.location,
+    };
+}
+
 struct Expression*
 parser_expression_primary_name(struct Parser* p, struct Parser_Error* error)
 {
@@ -424,6 +446,42 @@ parser_expression(struct Parser* p, struct Parser_Error* error)
     return parser_expression_binary_operation(p, error);
 }
 
+struct Statement*
+parser_statement_declaration(struct Parser* p, struct Parser_Error* error)
+{
+    struct String_Array names = string_array_new();
+
+    struct Span span = { 0 };
+    struct Cursor location = parser_peek(p).location;
+    for (;;) {
+        struct Token name_token = parser_need(p, TOKEN_NAME, error);
+        if (!parser_error_is_none(error)) return nil;
+
+        span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span);
+        string_array_add(&names, name_token.value.name);
+
+        struct Token next = parser_peek(p);
+        if (next.kind == TOKEN_NAME) break;
+        if (next.kind == TOKEN_COMMA) parser_next(p);
+    }
+
+    // for now, type is always required.
+    struct Type_Node type = CHECK(parser_node_type(p, error));
+    CHECK(parser_need(p, TOKEN_ASSIGN, error));
+    struct Expression* initializer = CHECK(parser_expression(p, error));
+
+    span = span_merge(span, initializer->span);
+    union Statement_Value value = {
+        .declaration = {
+            .names = names,
+            .has_type = true,
+            .type = type,
+            .initializer = initializer,
+        },
+    };
+    return statement_new(STATEMENT_DECLARATION, value, span, location);
+}
+
 void
 parser_end_statement(struct Parser* p, struct Parser_Error* error)
 {
@@ -438,21 +496,31 @@ parser_end_statement(struct Parser* p, struct Parser_Error* error)
 struct Statement*
 parser_statement(struct Parser* p, struct Parser_Error* error)
 {
-    // skip empty statements.
     struct Token token = parser_peek(p);
+
+    // skip empty statements.
     if (token_ends_statement(&token)) {
         parser_next(p);
         return nil;
     }
 
-    // TODO: no statements for now, just go straight to expressions.
-    struct Expression* expression = CHECK(parser_expression(p, error));
+    if (token.kind == TOKEN_NAME) {
+        // NOTE: these can be a variable declaration:
+        //     x uint = 123
+        //     me, them Obj = create()
+        // otherwise without a type, it is counted as an assignment:
+        //     a = "hi!"
+
+        struct Token next_token = parser_peek_further(p);
+        if (next_token.kind == TOKEN_COMMA || next_token.kind == TOKEN_NAME)
+            return parser_statement_declaration(p, error);
+    }
 
-    CHECK(parser_end_statement(p, error));
+    struct Expression* expression = CHECK(parser_expression(p, error));
 
     // expand by one byte to include the statement terminator.
     struct Span span = span_expand(expression->span, 1);
-    union Statement_Value value = { .expression = expression };
+    union Statement_Value value = { .expression.inner = expression };
     return statement_new(STATEMENT_EXPRESSION, value, span, expression->location);
 }
 
@@ -468,6 +536,9 @@ parser_do_your_thing(struct Parser* p, struct Parser_Error* error)
         struct Statement* next = parser_statement(p, error);
         if (!parser_error_is_none(error)) return (struct Tree){ nil };
 
+        parser_end_statement(p, error);
+        if (!parser_error_is_none(error)) return (struct Tree){ nil };
+
         if (current) {
             current->next = next;
         } else {
diff --git a/boot/tree.c b/boot/tree.c
index 593103a..1d88743 100644
--- a/boot/tree.c
+++ b/boot/tree.c
@@ -323,6 +323,19 @@ binary_operation_to_string(enum Binary_Operation operation)
     }
 }
 
+// nodes are parts of the syntax tree that are reused often
+// and in different places.
+
+// a type node represents a type in the syntax tree.
+// currently, we only support types that are simple names.
+struct Type_Node
+{
+    // note: we could also just include the token here i think?
+    struct String name;
+    struct Span span;
+    struct Cursor location;
+};
+
 enum Expression_Kind
 {
     EXPRESSION_NONE,
@@ -524,11 +537,27 @@ enum Statement_Kind
 {
     STATEMENT_NONE,
     STATEMENT_EXPRESSION,
+    STATEMENT_DECLARATION,
+};
+
+struct Statement_Value_Expression
+{
+    struct Expression* inner;
+};
+
+struct Statement_Value_Declaration
+{
+    struct String_Array names;      // the names of the variables being declared.
+    struct Expression* initializer; // the expression to initialize the variable with.
+
+    bool has_type;         // whether the declaration has a type.
+    struct Type_Node type; // the type of the variable, if any.
 };
 
 union Statement_Value
 {
-    struct Expression* expression;
+    struct Statement_Value_Expression expression;
+    struct Statement_Value_Declaration declaration;
 };
 
 struct Statement
@@ -571,10 +600,27 @@ statement_print(const struct Statement* statement)
         printf("none");
         break;
     case STATEMENT_EXPRESSION: {
-        const struct Expression* expression = statement->value.expression;
+        const struct Expression* expression = statement->value.expression.inner;
         expression_print(expression);
         break;
     }
+    case STATEMENT_DECLARATION: {
+        printf("(declaration ");
+        STRING_ARRAY_FOR_EACH(i, name, statement->value.declaration.names)
+        {
+            printf("%s ", name.data);
+        }
+        if (statement->value.declaration.has_type) {
+            printf("(type %s) ", statement->value.declaration.type.name.data);
+        }
+        if (statement->value.declaration.initializer) {
+            printf("(initializer ");
+            expression_print(statement->value.declaration.initializer);
+            printf(")");
+        }
+        printf(")");
+        break;
+    }
     default:
         failure("unexpected statement kind passed to `statement_print`");
         break;