From 9676d1ca61b025af9e52506f8aec1f9c0792f51c Mon Sep 17 00:00:00 2001 From: Mel Date: Thu, 12 Jun 2025 17:14:12 +0200 Subject: Parse most primitive types into tree Signed-off-by: Mel --- boot/parse.c | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 269 insertions(+), 40 deletions(-) (limited to 'boot/parse.c') diff --git a/boot/parse.c b/boot/parse.c index 94b1e91..a0dd12c 100644 --- a/boot/parse.c +++ b/boot/parse.c @@ -166,6 +166,19 @@ parser_probe(struct Parser* p, enum Token_Kind kind) return token_is(&token, kind); } +// skip all consecutive newlines and other non-interrupting +// tokens in the token stream. +// necessary for parsing statements and expressions that are +// split-able over multiple lines. +// TODO: this needs to be used in many more places and +// is currently mostly redundant, due to only skipping newlines, +// acting mostly as a marking for `the tokens don't have to follow precisely`. +void +parser_unglue(struct Parser* p) +{ + while (parser_probe(p, TOKEN_NEWLINE)) parser_next(p); +} + struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error); struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error); @@ -180,6 +193,7 @@ parser_end_statement(struct Parser* p, struct Parser_Error* error) parser_next(p); } +// checks if the next 2 tokens could begin a variable declaration. bool parser_could_be_variable_declaration(struct Parser* p) { @@ -189,11 +203,13 @@ parser_could_be_variable_declaration(struct Parser* p) // otherwise without a type, it is instead counted as an assignment: // a = "hi!" + // NOTE: maybe move this into `lex.c`? + // or change the API a bit, this isn't really a parser method. struct Token first = parser_peek(p); struct Token second = parser_peek_further(p); bool first_matches = token_is(&first, TOKEN_NAME); - bool second_matches = token_is(&second, TOKEN_COMMA) || token_is(&second, TOKEN_NAME); + bool second_matches = token_is(&second, TOKEN_COMMA) || token_can_begin_type(&second); return first_matches && second_matches; } @@ -203,6 +219,8 @@ parser_block_node(struct Parser* p, struct Parser_Error* error) struct Token start_token = CHECK_RETURN(parser_need(p, TOKEN_CURLY_OPEN, error), struct Block_Node); + parser_unglue(p); + struct Statement* head = nil; struct Statement* current = nil; @@ -229,24 +247,237 @@ parser_block_node(struct Parser* p, struct Parser_Error* error) }; } -struct Type_Node +struct Type_Node* parser_node_type(struct Parser* p, struct Parser_Error* error); + +struct Function_Header_Node +parser_function_header_node(struct Parser* p, struct Parser_Error* error) +{ + struct Token open_parameters_token = + CHECK_RETURN(parser_need(p, TOKEN_ROUND_OPEN, error), struct Function_Header_Node); + + struct Function_Header_Node header = { 0 }; + while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { + struct Token name_token = + CHECK_RETURN(parser_need(p, TOKEN_NAME, error), struct Function_Header_Node); + struct String name = name_token.value.name; + + struct Type_Node* type; + if (!parser_probe(p, TOKEN_ROUND_CLOSE) && !parser_probe(p, TOKEN_COMMA)) { + type = CHECK_RETURN(parser_node_type(p, error), struct Function_Header_Node); + } else { + type = type_node_none(name_token.span, name_token.location); + } + + type->value_name = name; + if (!header.parameters_type_and_name) + header.parameters_type_and_name = type; + else + header.parameters_type_and_name->next = type; + + if (parser_probe(p, TOKEN_COMMA)) parser_next(p); + } + struct Token close_parameters_token = parser_next(p); + + header.span = span_merge(open_parameters_token.span, close_parameters_token.span); + + struct Token next = parser_peek(p); + if (token_can_begin_type(&next)) { + header.return_type = CHECK_RETURN(parser_node_type(p, error), struct Function_Header_Node); + header.span = span_merge(header.span, header.return_type->span); + } + + return header; +} + +struct Type_Node* +parser_node_type_name(struct Parser* p, struct Parser_Error* error) +{ + struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error)); + return type_node_new( + TYPE_NODE_NAME, (union Type_Node_Value){ .name = { name_token.value.name } }, + name_token.span, name_token.location); +} + +struct Type_Node* +parser_node_type_structure(struct Parser* p, struct Parser_Error* error) +{ + struct Token open_token = CHECK(parser_need(p, TOKEN_CURLY_OPEN, error)); + + parser_unglue(p); + + struct Type_Node* head = nil; + struct Type_Node* current = nil; + while (!parser_probe(p, TOKEN_CURLY_CLOSE)) { + struct Token field_name_token = CHECK(parser_need(p, TOKEN_NAME, error)); + + struct Type_Node* field_type = CHECK(parser_node_type(p, error)); + if (!field_type) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return nil; + } + field_type->value_name = field_name_token.value.name; + + if (!head) + head = field_type; + else + current->next = field_type; + current = field_type; + + if (parser_probe(p, TOKEN_COMMA) || parser_probe(p, TOKEN_NEWLINE)) parser_next(p); + } + + parser_unglue(p); + + struct Token close_token = CHECK(parser_need(p, TOKEN_CURLY_CLOSE, error)); + struct Span span = span_merge(open_token.span, close_token.span); + + return type_node_new( + TYPE_NODE_STRUCTURE, (union Type_Node_Value){ .structure = { head } }, span, + open_token.location); +} + +struct Type_Node* +parser_node_type_variant(struct Parser* p, struct Parser_Error* error) +{ + // TODO + return nil; +} + +struct Type_Node* +parser_node_type_function(struct Parser* p, struct Parser_Error* error) +{ + struct Token fun_token = CHECK(parser_need(p, TOKEN_WORD_FUN, error)); + struct Function_Header_Node header = CHECK(parser_function_header_node(p, error)); + + struct Span span = span_merge(fun_token.span, header.span); + return type_node_new( + TYPE_NODE_FUNCTION, (union Type_Node_Value){ .function = { header } }, span, + fun_token.location); +} + +struct Type_Node* +parser_node_type_class(struct Parser* p, struct Parser_Error* error) +{ + // TODO + return nil; +} + +struct Type_Node* +parser_node_type_tuple(struct Parser* p, struct Parser_Error* error) +{ + struct Token open_token = CHECK(parser_need(p, TOKEN_ROUND_OPEN, error)); + + struct Type_Node* head = nil; + struct Type_Node* current = nil; + while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { + struct Type_Node* type = CHECK(parser_node_type(p, error)); + if (!type) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return nil; + } + + if (!head) + head = type; + else + current->next = type; + current = type; + + if (parser_probe(p, TOKEN_COMMA)) + parser_next(p); + else + break; + } + + struct Token close_token = CHECK(parser_need(p, TOKEN_ROUND_CLOSE, error)); + struct Span span = span_merge(open_token.span, close_token.span); + return type_node_new( + TYPE_NODE_TUPLE, (union Type_Node_Value){ .tuple = { head } }, span, open_token.location); +} + +struct Type_Node* +parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error) +{ + struct Token open_token = CHECK(parser_need(p, TOKEN_SQUARE_OPEN, error)); + + struct Type_Node* element_or_key_type = CHECK(parser_node_type(p, error)); + if (!element_or_key_type) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return nil; + } + + enum Type_Node_Type type; + union Type_Node_Value value; + if (parser_probe(p, TOKEN_ASSIGN)) { + // this is a map type, e.g. `[string = int]` + parser_next(p); // consume the assignment token + + struct Type_Node* key_type = element_or_key_type; + struct Type_Node* value_type = CHECK(parser_node_type(p, error)); + if (!value_type) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return nil; + } + + type = TYPE_NODE_MAP; + value.map = (struct Type_Node_Map){ + .key_type = key_type, + .value_type = value_type, + }; + } else { + // this is an array type, e.g. `[int]` + type = TYPE_NODE_ARRAY; + value.array = (struct Type_Node_Array){ .element_type = element_or_key_type }; + } + + struct Token close_token = CHECK(parser_need(p, TOKEN_SQUARE_CLOSE, error)); + + struct Span span = span_merge(open_token.span, close_token.span); + return type_node_new(type, value, span, open_token.location); +} + +struct Type_Node* +parser_node_type_reference(struct Parser* p, struct Parser_Error* error) +{ + struct Token ampersand_token = CHECK(parser_need(p, TOKEN_AMPERSAND, error)); + + struct Type_Node* referenced_type = CHECK(parser_node_type(p, error)); + if (!referenced_type) { + *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); + return nil; + } + + struct Span span = span_merge(ampersand_token.span, referenced_type->span); + return type_node_new( + TYPE_NODE_REFERENCE, (union Type_Node_Value){ .reference = { referenced_type } }, span, + ampersand_token.location); +} + +struct Type_Node* parser_node_type(struct Parser* p, struct Parser_Error* error) { - struct Token token = parser_need(p, TOKEN_NAME, error); - if (token_is_empty(&token)) { + // TODO: maybe, variant, class + struct Token token = parser_peek(p); + switch (token.kind) { + case TOKEN_NAME: + return parser_node_type_name(p, error); + case TOKEN_WORD_VARIANT: + return parser_node_type_variant(p, error); + case TOKEN_WORD_CLASS: + return parser_node_type_class(p, error); + case TOKEN_WORD_FUN: + return parser_node_type_function(p, error); + case TOKEN_CURLY_OPEN: + return parser_node_type_structure(p, error); + case TOKEN_ROUND_OPEN: + return parser_node_type_tuple(p, error); + case TOKEN_SQUARE_OPEN: + return parser_node_type_array_or_map(p, error); + case TOKEN_AMPERSAND: + return parser_node_type_reference(p, error); + default: *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); - return (struct Type_Node){ 0 }; + return nil; } - struct String type_name = token.value.name; - - // for now, we only support a single type name. - // in the future, we might want to support more complex types. - return (struct Type_Node){ - .type = TYPE_NAME, - .name = type_name, - .span = token.span, - .location = token.location, - }; } struct Expression* @@ -308,31 +539,9 @@ struct Expression* parser_expression_function(struct Parser* p, struct Parser_Error* error) { struct Token fun_token = CHECK(parser_need(p, TOKEN_WORD_FUN, error)); - CHECK(parser_need(p, TOKEN_ROUND_OPEN, error)); - struct Expression_Function fun = { 0 }; - while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { - struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error)); - struct String name = name_token.value.name; - - struct Type_Node type = { 0 }; - - struct Token next = parser_peek(p); - if (!token_is(&next, TOKEN_ROUND_CLOSE) && !token_is(&next, TOKEN_COMMA)) - type = CHECK(parser_node_type(p, error)); - - if (parser_probe(p, TOKEN_COMMA)) parser_next(p); - - check(fun.parameter_count < EXPRESSION_FUNCTION_MAX_PARAMS, "too many function parameters"); - fun.parameters[fun.parameter_count++] = (struct Expression_Function_Parameter){ - .name = name, - .type = type, - }; - } - parser_next(p); - - if (!parser_probe(p, TOKEN_CURLY_OPEN)) fun.return_type = CHECK(parser_node_type(p, error)); + fun.header = CHECK(parser_function_header_node(p, error)); fun.body = CHECK(parser_block_node(p, error)); return expression_new( @@ -603,7 +812,7 @@ parser_statement_declaration(struct Parser* p, struct Parser_Error* error) } // for now, type is always required. - struct Type_Node type = CHECK(parser_node_type(p, error)); + struct Type_Node* type = CHECK(parser_node_type(p, error)); CHECK(parser_need(p, TOKEN_ASSIGN, error)); struct Expression* initializer = CHECK(parser_expression(p, error)); @@ -770,6 +979,23 @@ parser_statement_defer(struct Parser* p, struct Parser_Error* error) return statement_new(STATEMENT_DEFER, value, span, defer_token.location); } +struct Statement* +parser_statement_type(struct Parser* p, struct Parser_Error* error) +{ + struct Token type_token = CHECK(parser_need(p, TOKEN_WORD_TYPE, error)); + + // TODO: parse type paths, i.e. `Thing.SubType` + struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error)); + CHECK(parser_need(p, TOKEN_ASSIGN, error)); + struct String name = name_token.value.name; + + struct Type_Node* type = CHECK(parser_node_type(p, error)); + + struct Span span = span_merge(type_token.span, type->span); + union Statement_Value value = { .type = { type, name } }; + return statement_new(STATEMENT_TYPE, value, span, type_token.location); +} + struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error) { @@ -798,7 +1024,10 @@ parser_statement(struct Parser* p, struct Parser_Error* error) return parser_statement_continue(p, error); case TOKEN_WORD_DEFER: return parser_statement_defer(p, error); - default: break; + case TOKEN_WORD_TYPE: + return parser_statement_type(p, error); + default: + break; } struct Expression* expression = CHECK(parser_expression(p, error)); -- cgit 1.4.1