about summary refs log tree commit diff
path: root/boot
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-07-01 03:04:07 +0200
committerMel <mel@rnrd.eu>2025-07-01 03:04:07 +0200
commit8e0beabeb4efa50a3072ef805682c0f42b6c16a8 (patch)
treecd6b82d1741cdd52aadddae87c59e0617fb74ffd /boot
parente51799842aa367692152717a8db7c519dbc66c1f (diff)
downloadcatskill-8e0beabeb4efa50a3072ef805682c0f42b6c16a8.tar.zst
catskill-8e0beabeb4efa50a3072ef805682c0f42b6c16a8.zip
Parse pragmas as free-standing statements, without attachment
Signed-off-by: Mel <mel@rnrd.eu>
Diffstat (limited to 'boot')
-rw-r--r--boot/parse.c104
-rw-r--r--boot/tree.c106
-rw-r--r--boot/visit.c45
3 files changed, 255 insertions, 0 deletions
diff --git a/boot/parse.c b/boot/parse.c
index d410476..7b40fb1 100644
--- a/boot/parse.c
+++ b/boot/parse.c
@@ -31,6 +31,7 @@ struct Parser_Error
         PARSER_ERROR_EXPECTED_STATEMENT_END,
         PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION,
         PARSER_ERROR_EXPECTED_TYPE,
+        PARSER_ERROR_EXPECTED_PRAGMA,
     } kind;
     // TODO: add span to error
 };
@@ -69,6 +70,8 @@ parser_error_to_string(const struct Parser_Error* error)
         return "expected primary expression";
     case PARSER_ERROR_EXPECTED_TYPE:
         return "expected type";
+    case PARSER_ERROR_EXPECTED_PRAGMA:
+        return "expected pragma";
     default:
         return "unknown error";
     }
@@ -596,6 +599,85 @@ parser_node_type(struct Parser* p, struct Parser_Error* error)
     return type;
 }
 
+struct Pragma_Node*
+parser_pragma_node(struct Parser* p, struct Parser_Error* error)
+{
+    // `| c_header "stdio.h"`
+    // `| clone always, printable
+    CHECK(parser_need(p, TOKEN_PIPE, error));
+
+    struct Pragma_Node *head = nil, *current = nil;
+    struct Token token = parser_peek(p);
+    while (!token_ends_statement(&token)) {
+        struct Token pragma_token = CHECK(parser_need(p, TOKEN_NAME, error));
+        enum Pragma_Type pragma_type = pragma_type_from_string(pragma_token.value.name);
+        if (!pragma_type) {
+            *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA);
+            return nil;
+        }
+
+        struct Span span = pragma_token.span;
+
+        // parse the arguments until either statement end or comma
+        // arguments can either be numbers, names or strings.
+        uint argument_index = 0;
+        struct Pragma_Argument arguments[PRAGMA_ARGUMENT_MAX] = { 0 };
+
+        token = parser_peek(p);
+        while (!token_ends_statement(&token)) {
+            check(argument_index < PRAGMA_ARGUMENT_MAX, "too many pragma arguments");
+            struct Pragma_Argument* argument = &arguments[argument_index];
+
+            union Token_Value* v = &token.value;
+            switch (token.kind) {
+            case TOKEN_LITERAL_INTEGER:
+                argument->type = PRAGMA_ARGUMENT_NUMBER;
+                argument->value.number = v->literal_integer;
+                break;
+            case TOKEN_LITERAL_FLOAT:
+                argument->type = PRAGMA_ARGUMENT_DECIMAL;
+                argument->value.decimal = v->literal_float;
+                break;
+            case TOKEN_LITERAL_STRING:
+                argument->type = PRAGMA_ARGUMENT_NAME_OR_STRING;
+                argument->value.name_or_string = v->literal_string;
+                break;
+            case TOKEN_NAME:
+                argument->type = PRAGMA_ARGUMENT_NAME_OR_STRING;
+                argument->value.name_or_string = v->name;
+                break;
+            default:
+                *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN);
+                return nil;
+            }
+            argument_index++;
+            span = span_merge(span, token.span);
+            parser_next(p);
+
+            // comma separates pragmas on a single line.
+            token = parser_peek(p);
+            if (token_is(&token, TOKEN_COMMA)) {
+                parser_next(p);
+                break;
+            }
+            if (token_ends_statement(&token)) { break; }
+        }
+
+        struct Pragma_Node* pragma = pragma_node_new(pragma_type, span, pragma_token.location);
+        pragma->argument_count = argument_index;
+        memcpy(pragma->arguments, arguments, sizeof(arguments));
+
+        if (!head) {
+            head = pragma;
+        } else {
+            current->next = pragma;
+        }
+        current = pragma;
+    }
+
+    return head;
+}
+
 struct Expression*
 parser_expression_primary_name(struct Parser* p, struct Parser_Error* error)
 {
@@ -1152,6 +1234,26 @@ parser_statement_defer(struct Parser* p, struct Parser_Error* error)
 }
 
 struct Statement*
+parser_statement_pragma(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token pipe_token = parser_peek(p);
+
+    struct Pragma_Node* pragma_node = parser_pragma_node(p, error);
+    if (!parser_error_is_none(error)) {
+        *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA);
+        return nil;
+    }
+
+    struct Span span = {};
+    FOR_EACH (struct Pragma_Node*, node, pragma_node) {
+        span = span_is_empty(span) ? node->span : span_merge(span, node->span);
+    }
+
+    union Statement_Value value = { .pragma.inner = pragma_node };
+    return statement_new(STATEMENT_PRAGMA, value, span, pipe_token.location);
+}
+
+struct Statement*
 parser_statement(struct Parser* p, struct Parser_Error* error)
 {
     struct Token token = parser_peek(p);
@@ -1182,6 +1284,8 @@ parser_statement(struct Parser* p, struct Parser_Error* error)
         return parser_statement_continue(p, error);
     case TOKEN_WORD_DEFER:
         return parser_statement_defer(p, error);
+    case TOKEN_PIPE:
+        return parser_statement_pragma(p, error);
     default:
         break;
     }
diff --git a/boot/tree.c b/boot/tree.c
index ea8ed69..dc22bfd 100644
--- a/boot/tree.c
+++ b/boot/tree.c
@@ -572,6 +572,104 @@ type_node_is_none(const struct Type_Node* type_node)
     return type_node->type == TYPE_NODE_NONE;
 }
 
+enum Pragma_Type
+{
+    PRAGMA_NONE,
+    PRAGMA_UNKNOWN,
+    PRAGMA_C_HEADER,
+    // TODO: further pragma types.
+
+    // NOTE: there would be plenty of use for user-defined pragmas,
+    // acting similar to attributes in other languages or #[derive] macros
+    // in Rust. for now we only support out hard-coded pragmas,
+    // but it's something to definitely consider in the future.
+};
+
+#define PRAGMA_ARGUMENT_MAX 3
+
+struct Pragma_Argument
+{
+    enum Pragma_Argument_Type
+    {
+        PRAGMA_ARGUMENT_NONE,
+        PRAGMA_ARGUMENT_NAME_OR_STRING,
+        PRAGMA_ARGUMENT_NUMBER,
+        PRAGMA_ARGUMENT_DECIMAL,
+    } type;
+
+    union Pragma_Argument_Value
+    {
+        struct String name_or_string;
+        int64 number;
+        float64 decimal;
+    } value;
+};
+
+// a "pragma" is what we call compiler hints used for giving almost every piece of information
+// the compiler might require to compile your code.
+// you can recognize a pragma by the '|' token, like in '| c_header "stdio.h"'.
+// their use ranges from setting alignment/padding for structures, defining default copy or move
+// behaviour, to including different C compilation units and other catskill modules.
+// pragmas are parsed as lone statements in the source code at first, but are then
+// "attached" to the relevant nodes of the type the pragma is relevant to.
+struct Pragma_Node
+{
+    enum Pragma_Type type;
+    struct Pragma_Argument arguments[PRAGMA_ARGUMENT_MAX];
+    uint argument_count;
+
+    struct Span span;
+    struct Cursor location;
+
+    struct Pragma_Node* next; // further pragmas on the same line.
+};
+
+REGION(struct Pragma_Node, pragma_node)
+
+struct Pragma_Node*
+pragma_node_new(enum Pragma_Type type, struct Span span, struct Cursor location)
+{
+    check(region_pragma_node_cursor < REGION_SIZE, "out of pragma node memory");
+    struct Pragma_Node* pragma = &region_pragma_node[region_pragma_node_cursor++];
+    *pragma = (struct Pragma_Node){
+        .type = type,
+        .arguments = {},
+        .argument_count = 0,
+        .span = span,
+        .location = location,
+        .next = nil,
+    };
+    return pragma;
+}
+
+enum Pragma_Type
+pragma_type_from_string(struct String name)
+{
+    // look up hash values with:
+    // `echo -ne "string to hash" | cksum`
+    uint32 hash = crc32_posix(name);
+    switch (hash) {
+    case 2852954401: // "c_header"
+        return PRAGMA_C_HEADER;
+    default:
+        return PRAGMA_UNKNOWN;
+    }
+}
+
+const ascii*
+pragma_type_to_string(enum Pragma_Type type)
+{
+    switch (type) {
+    case PRAGMA_C_HEADER:
+        return "c_header";
+    case PRAGMA_UNKNOWN:
+        return "unknown";
+    default:
+        failure("unexpected pragma type passed to `pragma_type_to_string`");
+        return nil;
+    }
+}
+
 enum Expression_Kind
 {
     EXPRESSION_NONE,
@@ -742,6 +840,8 @@ enum Statement_Kind
     STATEMENT_BREAK,
     STATEMENT_CONTINUE,
     STATEMENT_DEFER,
+
+    STATEMENT_PRAGMA,
 };
 
 struct Statement_Value_Expression
@@ -828,6 +928,11 @@ struct Statement_Value_Defer
     struct Block_Node block;
 };
 
+struct Statement_Value_Pragma
+{
+    struct Pragma_Node* inner;
+};
+
 union Statement_Value
 {
     struct Statement_Value_Expression expression;
@@ -837,6 +942,7 @@ union Statement_Value
     struct Statement_Value_Loop loop;
     struct Statement_Value_Return return_value;
     struct Statement_Value_Defer defer;
+    struct Statement_Value_Pragma pragma;
 };
 
 struct Statement
diff --git a/boot/visit.c b/boot/visit.c
index c297fb4..500fb5a 100644
--- a/boot/visit.c
+++ b/boot/visit.c
@@ -62,6 +62,7 @@ struct Visit_Table
     void (*visit_block_node)(struct Visit* visitor, struct Block_Node* node);
     void (*visit_bare_declaration_node)(struct Visit* visitor, struct Bare_Declaration_Node* node);
     void (*visit_function_header_node)(struct Visit* visitor, struct Function_Header_Node* header);
+    void (*visit_pragma_node)(struct Visit* visitor, struct Pragma_Node* node);
 };
 
 #define VISIT(visit_function, node) visit->table->visit_function(visit, node);
@@ -125,6 +126,9 @@ walk_statement(struct Visit* visit, struct Statement* statement)
     case STATEMENT_DEFER:
         VISIT(visit_statement_defer, statement);
         break;
+    case STATEMENT_PRAGMA:
+        VISIT(visit_pragma_node, statement->value.pragma.inner);
+        break;
     default:
         failure("unexpected statement kind in `walk_statement`");
     }
@@ -417,6 +421,13 @@ walk_function_header_node(struct Visit* visit, struct Function_Header_Node* head
     VISIT_MAYBE(visit_type_node, header->return_type);
 }
 
+void
+walk_pragma_node(struct Visit* visit, struct Pragma_Node* node)
+{
+    // visit each pragma node in the linked list.
+    VISIT_MAYBE(visit_pragma_node, node->next);
+}
+
 struct Visit_Table walk_functions = {
     .visit_tree = walk_tree,
 
@@ -459,6 +470,7 @@ struct Visit_Table walk_functions = {
     .visit_block_node = walk_block_node,
     .visit_bare_declaration_node = walk_bare_declaration_node,
     .visit_function_header_node = walk_function_header_node,
+    .visit_pragma_node = walk_pragma_node,
 };
 
 // fills in the visit table with default walk functions
@@ -964,6 +976,38 @@ printer_visit_function_header(struct Visit* visit, struct Function_Header_Node*
     }
 }
 
+void
+printer_visit_pragma_node(struct Visit* visit, struct Pragma_Node* node)
+{
+    DATA_FOR_VISIT(struct Tree_Printer, printer);
+
+    const ascii* pragma_name = pragma_type_to_string(node->type);
+
+    fprintf(printer->output, "(pragma %s", pragma_name);
+    for (uint ai = 0; ai < node->argument_count; ++ai) {
+        struct Pragma_Argument* arg = &node->arguments[ai];
+        switch (arg->type) {
+        case PRAGMA_ARGUMENT_NUMBER:
+            fprintf(printer->output, " (number %ld)", arg->value.number);
+            break;
+        case PRAGMA_ARGUMENT_DECIMAL:
+            fprintf(printer->output, " (decimal %lf)", arg->value.decimal);
+            break;
+        case PRAGMA_ARGUMENT_NAME_OR_STRING:
+            fprintf(printer->output, " (name/string '%s')", arg->value.name_or_string.data);
+            break;
+        default:
+            failure("unexpected pragma argument type in `printer_visit_pragma_node`");
+        }
+    }
+    fprintf(printer->output, ")");
+
+    if (node->next) {
+        fprintf(printer->output, " ");
+        VISIT(visit_pragma_node, node->next);
+    }
+}
+
 struct Visit_Table printer_visit_functions = {
     .visit_tree = printer_visit_tree,
 
@@ -1006,6 +1050,7 @@ struct Visit_Table printer_visit_functions = {
     .visit_block_node = printer_visit_block_node,
     .visit_bare_declaration_node = printer_visit_bare_declaration_node,
     .visit_function_header_node = printer_visit_function_header,
+    .visit_pragma_node = printer_visit_pragma_node,
 };
 
 void