diff options
| author | Mel <mel@rnrd.eu> | 2025-07-01 03:04:07 +0200 |
|---|---|---|
| committer | Mel <mel@rnrd.eu> | 2025-07-01 03:04:07 +0200 |
| commit | 8e0beabeb4efa50a3072ef805682c0f42b6c16a8 (patch) | |
| tree | cd6b82d1741cdd52aadddae87c59e0617fb74ffd | |
| parent | e51799842aa367692152717a8db7c519dbc66c1f (diff) | |
| download | catskill-8e0beabeb4efa50a3072ef805682c0f42b6c16a8.tar.zst catskill-8e0beabeb4efa50a3072ef805682c0f42b6c16a8.zip | |
Parse pragmas as free-standing statements, without attachment
Signed-off-by: Mel <mel@rnrd.eu>
| -rw-r--r-- | boot/parse.c | 104 | ||||
| -rw-r--r-- | boot/tree.c | 106 | ||||
| -rw-r--r-- | boot/visit.c | 45 |
3 files changed, 255 insertions, 0 deletions
diff --git a/boot/parse.c b/boot/parse.c index d410476..7b40fb1 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -31,6 +31,7 @@ struct Parser_Error PARSER_ERROR_EXPECTED_STATEMENT_END, PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, PARSER_ERROR_EXPECTED_TYPE, + PARSER_ERROR_EXPECTED_PRAGMA, } kind; // TODO: add span to error }; @@ -69,6 +70,8 @@ parser_error_to_string(const struct Parser_Error* error) return "expected primary expression"; case PARSER_ERROR_EXPECTED_TYPE: return "expected type"; + case PARSER_ERROR_EXPECTED_PRAGMA: + return "expected pragma"; default: return "unknown error"; } @@ -596,6 +599,85 @@ parser_node_type(struct Parser* p, struct Parser_Error* error) return type; } +struct Pragma_Node* +parser_pragma_node(struct Parser* p, struct Parser_Error* error) +{ + // `| c_header "stdio.h"` + // `| clone always, printable + CHECK(parser_need(p, TOKEN_PIPE, error)); + + struct Pragma_Node *head = nil, *current = nil; + struct Token token = parser_peek(p); + while (!token_ends_statement(&token)) { + struct Token pragma_token = CHECK(parser_need(p, TOKEN_NAME, error)); + enum Pragma_Type pragma_type = pragma_type_from_string(pragma_token.value.name); + if (!pragma_type) { + *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA); + return nil; + } + + struct Span span = pragma_token.span; + + // parse the arguments until either statement end or comma + // arguments can either be numbers, names or strings. + uint argument_index = 0; + struct Pragma_Argument arguments[PRAGMA_ARGUMENT_MAX] = { 0 }; + + token = parser_peek(p); + while (!token_ends_statement(&token)) { + check(argument_index < PRAGMA_ARGUMENT_MAX, "too many pragma arguments"); + struct Pragma_Argument* argument = &arguments[argument_index]; + + union Token_Value* v = &token.value; + switch (token.kind) { + case TOKEN_LITERAL_INTEGER: + argument->type = PRAGMA_ARGUMENT_NUMBER; + argument->value.number = v->literal_integer; + break; + case TOKEN_LITERAL_FLOAT: + argument->type = PRAGMA_ARGUMENT_DECIMAL; + argument->value.decimal = v->literal_float; + break; + case TOKEN_LITERAL_STRING: + argument->type = PRAGMA_ARGUMENT_NAME_OR_STRING; + argument->value.name_or_string = v->literal_string; + break; + case TOKEN_NAME: + argument->type = PRAGMA_ARGUMENT_NAME_OR_STRING; + argument->value.name_or_string = v->name; + break; + default: + *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN); + return nil; + } + argument_index++; + span = span_merge(span, token.span); + parser_next(p); + + // comma separates pragmas on a single line. + token = parser_peek(p); + if (token_is(&token, TOKEN_COMMA)) { + parser_next(p); + break; + } + if (token_ends_statement(&token)) { break; } + } + + struct Pragma_Node* pragma = pragma_node_new(pragma_type, span, pragma_token.location); + pragma->argument_count = argument_index; + memcpy(pragma->arguments, arguments, sizeof(arguments)); + + if (!head) { + head = pragma; + } else { + current->next = pragma; + } + current = pragma; + } + + return head; +} + struct Expression* parser_expression_primary_name(struct Parser* p, struct Parser_Error* error) { @@ -1152,6 +1234,26 @@ parser_statement_defer(struct Parser* p, struct Parser_Error* error) } struct Statement* +parser_statement_pragma(struct Parser* p, struct Parser_Error* error) +{ + struct Token pipe_token = parser_peek(p); + + struct Pragma_Node* pragma_node = parser_pragma_node(p, error); + if (!parser_error_is_none(error)) { + *error = parser_error(PARSER_ERROR_EXPECTED_PRAGMA); + return nil; + } + + struct Span span = {}; + FOR_EACH (struct Pragma_Node*, node, pragma_node) { + span = span_is_empty(span) ? node->span : span_merge(span, node->span); + } + + union Statement_Value value = { .pragma.inner = pragma_node }; + return statement_new(STATEMENT_PRAGMA, value, span, pipe_token.location); +} + +struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); @@ -1182,6 +1284,8 @@ parser_statement(struct Parser* p, struct Parser_Error* error) return parser_statement_continue(p, error); case TOKEN_WORD_DEFER: return parser_statement_defer(p, error); + case TOKEN_PIPE: + return parser_statement_pragma(p, error); default: break; } diff --git a/boot/tree.c b/boot/tree.c index ea8ed69..dc22bfd 100644 --- a/boot/tree.c +++ b/boot/tree.c @@ -572,6 +572,104 @@ type_node_is_none(const struct Type_Node* type_node) return type_node->type == TYPE_NODE_NONE; } +enum Pragma_Type +{ + PRAGMA_NONE, + PRAGMA_UNKNOWN, + PRAGMA_C_HEADER, + // TODO: further pragma types. + + // NOTE: there would be plenty of use for user-defined pragmas, + // acting similar to attributes in other languages or #[derive] macros + // in Rust. for now we only support out hard-coded pragmas, + // but it's something to definitely consider in the future. +}; + +#define PRAGMA_ARGUMENT_MAX 3 + +struct Pragma_Argument +{ + enum Pragma_Argument_Type + { + PRAGMA_ARGUMENT_NONE, + PRAGMA_ARGUMENT_NAME_OR_STRING, + PRAGMA_ARGUMENT_NUMBER, + PRAGMA_ARGUMENT_DECIMAL, + } type; + + union Pragma_Argument_Value + { + struct String name_or_string; + int64 number; + float64 decimal; + } value; +}; + +// a "pragma" is what we call compiler hints used for giving almost every piece of information +// the compiler might require to compile your code. +// you can recognize a pragma by the '|' token, like in '| c_header "stdio.h"'. +// their use ranges from setting alignment/padding for structures, defining default copy or move +// behaviour, to including different C compilation units and other catskill modules. +// pragmas are parsed as lone statements in the source code at first, but are then +// "attached" to the relevant nodes of the type the pragma is relevant to. +struct Pragma_Node +{ + enum Pragma_Type type; + struct Pragma_Argument arguments[PRAGMA_ARGUMENT_MAX]; + uint argument_count; + + struct Span span; + struct Cursor location; + + struct Pragma_Node* next; // further pragmas on the same line. +}; + +REGION(struct Pragma_Node, pragma_node) + +struct Pragma_Node* +pragma_node_new(enum Pragma_Type type, struct Span span, struct Cursor location) +{ + check(region_pragma_node_cursor < REGION_SIZE, "out of pragma node memory"); + struct Pragma_Node* pragma = ®ion_pragma_node[region_pragma_node_cursor++]; + *pragma = (struct Pragma_Node){ + .type = type, + .arguments = {}, + .argument_count = 0, + .span = span, + .location = location, + .next = nil, + }; + return pragma; +} + +enum Pragma_Type +pragma_type_from_string(struct String name) +{ + // look up hash values with: + // `echo -ne "string to hash" | cksum` + uint32 hash = crc32_posix(name); + switch (hash) { + case 2852954401: // "c_header" + return PRAGMA_C_HEADER; + default: + return PRAGMA_UNKNOWN; + } +} + +const ascii* +pragma_type_to_string(enum Pragma_Type type) +{ + switch (type) { + case PRAGMA_C_HEADER: + return "c_header"; + case PRAGMA_UNKNOWN: + return "unknown"; + default: + failure("unexpected pragma type passed to `pragma_type_to_string`"); + return nil; + } +} + enum Expression_Kind { EXPRESSION_NONE, @@ -742,6 +840,8 @@ enum Statement_Kind STATEMENT_BREAK, STATEMENT_CONTINUE, STATEMENT_DEFER, + + STATEMENT_PRAGMA, }; struct Statement_Value_Expression @@ -828,6 +928,11 @@ struct Statement_Value_Defer struct Block_Node block; }; +struct Statement_Value_Pragma +{ + struct Pragma_Node* inner; +}; + union Statement_Value { struct Statement_Value_Expression expression; @@ -837,6 +942,7 @@ union Statement_Value struct Statement_Value_Loop loop; struct Statement_Value_Return return_value; struct Statement_Value_Defer defer; + struct Statement_Value_Pragma pragma; }; struct Statement diff --git a/boot/visit.c b/boot/visit.c index c297fb4..500fb5a 100644 --- a/boot/visit.c +++ b/boot/visit.c @@ -62,6 +62,7 @@ struct Visit_Table void (*visit_block_node)(struct Visit* visitor, struct Block_Node* node); void (*visit_bare_declaration_node)(struct Visit* visitor, struct Bare_Declaration_Node* node); void (*visit_function_header_node)(struct Visit* visitor, struct Function_Header_Node* header); + void (*visit_pragma_node)(struct Visit* visitor, struct Pragma_Node* node); }; #define VISIT(visit_function, node) visit->table->visit_function(visit, node); @@ -125,6 +126,9 @@ walk_statement(struct Visit* visit, struct Statement* statement) case STATEMENT_DEFER: VISIT(visit_statement_defer, statement); break; + case STATEMENT_PRAGMA: + VISIT(visit_pragma_node, statement->value.pragma.inner); + break; default: failure("unexpected statement kind in `walk_statement`"); } @@ -417,6 +421,13 @@ walk_function_header_node(struct Visit* visit, struct Function_Header_Node* head VISIT_MAYBE(visit_type_node, header->return_type); } +void +walk_pragma_node(struct Visit* visit, struct Pragma_Node* node) +{ + // visit each pragma node in the linked list. + VISIT_MAYBE(visit_pragma_node, node->next); +} + struct Visit_Table walk_functions = { .visit_tree = walk_tree, @@ -459,6 +470,7 @@ struct Visit_Table walk_functions = { .visit_block_node = walk_block_node, .visit_bare_declaration_node = walk_bare_declaration_node, .visit_function_header_node = walk_function_header_node, + .visit_pragma_node = walk_pragma_node, }; // fills in the visit table with default walk functions @@ -964,6 +976,38 @@ printer_visit_function_header(struct Visit* visit, struct Function_Header_Node* } } +void +printer_visit_pragma_node(struct Visit* visit, struct Pragma_Node* node) +{ + DATA_FOR_VISIT(struct Tree_Printer, printer); + + const ascii* pragma_name = pragma_type_to_string(node->type); + + fprintf(printer->output, "(pragma %s", pragma_name); + for (uint ai = 0; ai < node->argument_count; ++ai) { + struct Pragma_Argument* arg = &node->arguments[ai]; + switch (arg->type) { + case PRAGMA_ARGUMENT_NUMBER: + fprintf(printer->output, " (number %ld)", arg->value.number); + break; + case PRAGMA_ARGUMENT_DECIMAL: + fprintf(printer->output, " (decimal %lf)", arg->value.decimal); + break; + case PRAGMA_ARGUMENT_NAME_OR_STRING: + fprintf(printer->output, " (name/string '%s')", arg->value.name_or_string.data); + break; + default: + failure("unexpected pragma argument type in `printer_visit_pragma_node`"); + } + } + fprintf(printer->output, ")"); + + if (node->next) { + fprintf(printer->output, " "); + VISIT(visit_pragma_node, node->next); + } +} + struct Visit_Table printer_visit_functions = { .visit_tree = printer_visit_tree, @@ -1006,6 +1050,7 @@ struct Visit_Table printer_visit_functions = { .visit_block_node = printer_visit_block_node, .visit_bare_declaration_node = printer_visit_bare_declaration_node, .visit_function_header_node = printer_visit_function_header, + .visit_pragma_node = printer_visit_pragma_node, }; void |
