/* * very simple handwritten recursive descent * parser for a catskill source file. */ #define PARSER_LOOKAHEAD 2 #define CHECK(parse) \ parse; \ if (!parser_error_is_none(error)) return nil; #define CHECK_RETURN(parse, ret) \ parse; \ if (!parser_error_is_none(error)) return (ret){ 0 }; struct Parser_Error { enum Parser_Error_Kind { PARSER_ERROR_NONE, PARSER_ERROR_UNEXPECTED_TOKEN, PARSER_ERROR_UNEXPECTED_EOF, PARSER_ERROR_EXPECTED_STATEMENT_END, PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION, PARSER_ERROR_EXPECTED_TYPE, } kind; // TODO: add span to error }; struct Parser_Error parser_error(enum Parser_Error_Kind kind) { return (struct Parser_Error){ kind }; } struct Parser_Error parser_error_none(void) { return parser_error(PARSER_ERROR_NONE); } bool parser_error_is_none(const struct Parser_Error* error) { return error->kind == PARSER_ERROR_NONE; } const ascii* parser_error_to_string(const struct Parser_Error* error) { switch (error->kind) { case PARSER_ERROR_NONE: return "none"; case PARSER_ERROR_UNEXPECTED_TOKEN: return "unexpected token"; case PARSER_ERROR_UNEXPECTED_EOF: return "unexpected end of file"; case PARSER_ERROR_EXPECTED_STATEMENT_END: return "expected statement end"; case PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION: return "expected primary expression"; case PARSER_ERROR_EXPECTED_TYPE: return "expected type"; default: return "unknown error"; } } struct Parser { struct Lexer* lexer; struct Token lookahead[PARSER_LOOKAHEAD]; }; void parser_new(struct Parser* p, struct Lexer* lexer) { p->lexer = lexer; memset(p->lookahead, 0, sizeof(p->lookahead)); } bool parser_reached_end(struct Parser* p) { return p->lexer->eof; } bool parser_lookahead_pop(struct Parser* p, struct Token* token) { struct Token head = p->lookahead[0]; if (token_is_empty(&head)) return false; for (uint i = 0; i < PARSER_LOOKAHEAD - 1; i++) p->lookahead[i] = p->lookahead[i + 1]; p->lookahead[PARSER_LOOKAHEAD - 1] = token_none(); *token = head; return true; } bool parser_lookahead_push(struct Parser* p, struct Token token) { for (uint i = 0; i < PARSER_LOOKAHEAD; i++) { if (token_is_empty(&p->lookahead[i])) { p->lookahead[i] = token; return true; } } return false; } // advance the token stream and return the next token. struct Token parser_next(struct Parser* p) { struct Token token; if (!parser_lookahead_pop(p, &token)) token = lexer_next(p->lexer); return token; } // advance the token stream if the token is of the right type. // if not, return none-token and set the error. struct Token parser_need(struct Parser* p, enum Token_Kind kind, struct Parser_Error* error) { struct Token token = parser_next(p); if (!token_is(&token, kind)) { *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN); return token_none(); } return token; } // peek at the token stream at given index, without advancing it. // index 0 is the token that would be returned by a next `parser_next` call. // peek limit is defined by `PARSER_LOOKAHEAD`. struct Token parser_peek_at(struct Parser* p, uint index) { check(index < 2, "parser peek index out of range"); while (token_is_empty(&p->lookahead[index])) { struct Token next_token = lexer_next(p->lexer); parser_lookahead_push(p, next_token); } return p->lookahead[index]; } // peek at the next token in the stream, without advancing it. // synonym for `parser_peek_at(p, 0)`. struct Token parser_peek(struct Parser* p) { return parser_peek_at(p, 0); } // peek at the token one beyond the current cursor in the stream, without advancing it. // synonym for `parser_peek_at(p, 1)`. struct Token parser_peek_further(struct Parser* p) { return parser_peek_at(p, 1); } // check if the next token is of a given type. bool parser_probe(struct Parser* p, enum Token_Kind kind) { struct Token token = parser_peek(p); return token_is(&token, kind); } // skip all consecutive newlines and other non-interrupting // tokens in the token stream. // necessary for parsing statements and expressions that are // split-able over multiple lines. // TODO: this needs to be used in many more places and // is currently mostly redundant, due to only skipping newlines, // acting mostly as a marking for `the tokens don't have to follow precisely`. void parser_unglue(struct Parser* p) { while (parser_probe(p, TOKEN_NEWLINE)) parser_next(p); } struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error); struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error); void parser_end_statement(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); if (!token_ends_statement(&token)) { *error = parser_error(PARSER_ERROR_EXPECTED_STATEMENT_END); return; } parser_next(p); } struct Block_Node parser_block_node(struct Parser* p, struct Parser_Error* error) { struct Token start_token = CHECK_RETURN(parser_need(p, TOKEN_CURLY_OPEN, error), struct Block_Node); parser_unglue(p); struct Statement* head = nil; struct Statement* current = nil; while (!parser_probe(p, TOKEN_CURLY_CLOSE)) { struct Statement* statement = CHECK_RETURN(parser_statement(p, error), struct Block_Node); CHECK_RETURN(parser_end_statement(p, error), struct Block_Node); if (!head) { head = statement; } else { current->next = statement; } current = statement; } struct Token end_token = CHECK_RETURN(parser_need(p, TOKEN_CURLY_CLOSE, error), struct Block_Node); struct Span span = span_merge(start_token.span, end_token.span); return (struct Block_Node){ .statements = head, .span = span, .location = start_token.location, }; } struct Type_Node* parser_node_type(struct Parser* p, struct Parser_Error* error); struct Function_Header_Node parser_function_header_node(struct Parser* p, struct Parser_Error* error) { struct Token open_parameters_token = CHECK_RETURN(parser_need(p, TOKEN_ROUND_OPEN, error), struct Function_Header_Node); struct Function_Header_Node header = { 0 }; while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { struct Token name_token = CHECK_RETURN(parser_need(p, TOKEN_NAME, error), struct Function_Header_Node); struct String name = name_token.value.name; struct Type_Node* type; if (!parser_probe(p, TOKEN_ROUND_CLOSE) && !parser_probe(p, TOKEN_COMMA)) { type = CHECK_RETURN(parser_node_type(p, error), struct Function_Header_Node); } else { type = type_node_none(name_token.span, name_token.location); } type->value_name = name; if (!header.parameters_type_and_name) header.parameters_type_and_name = type; else header.parameters_type_and_name->next = type; if (parser_probe(p, TOKEN_COMMA)) parser_next(p); } struct Token close_parameters_token = parser_next(p); header.span = span_merge(open_parameters_token.span, close_parameters_token.span); struct Token next = parser_peek(p); struct Token further = parser_peek_further(p); if (token_can_begin_type(&next)) { // NOTE: this is very uncomfortable. to avoid ambiguity between a no-return-type function // body (i.e. `fun () {`) and a function with a return type of inline structure (i.e. `fun // () {x int} {`), we require that inline structs are declared in one line, without a // line-break, and function bodies always have a line-break after the opening brace. this // means that we cannot have a single line function, and we can't have a function with an // inline structure as a return type that is too long to fit on a single line. // TODO: to allow single line functions, introduce a `:` token as a replacement for the // opening brace. `fun (x int): x + 1` if (token_is(&next, TOKEN_CURLY_OPEN) && token_is(&further, TOKEN_NEWLINE)) return header; header.return_type = CHECK_RETURN(parser_node_type(p, error), struct Function_Header_Node); header.span = span_merge(header.span, header.return_type->span); } return header; } struct Bare_Declaration_Node parser_bare_declaration_node(struct Parser* p, struct Parser_Error* error) { struct String_Array names = string_array_new(); struct Span span = { 0 }; struct Cursor location = parser_peek(p).location; for (;;) { struct Token name_token = CHECK_RETURN(parser_need(p, TOKEN_NAME, error), struct Bare_Declaration_Node); span = span_is_empty(span) ? name_token.span : span_merge(span, name_token.span); string_array_add(&names, name_token.value.name); struct Token next = parser_peek(p); if (token_can_begin_type(&next)) break; if (next.kind == TOKEN_COMMA) parser_next(p); } // for now, type is always required. struct Type_Node* type = CHECK_RETURN(parser_node_type(p, error), struct Bare_Declaration_Node); CHECK_RETURN(parser_need(p, TOKEN_ASSIGN, error), struct Bare_Declaration_Node); struct Expression* initializer = CHECK_RETURN(parser_expression(p, error), struct Bare_Declaration_Node); return (struct Bare_Declaration_Node){ .names = names, .initializer = initializer, .type = type, .span = span, .location = location, }; } struct Type_Node* parser_node_type_name(struct Parser* p, struct Parser_Error* error) { struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error)); return type_node_new( TYPE_NODE_NAME, (union Type_Node_Value){ .name = { name_token.value.name } }, name_token.span, name_token.location); } struct Type_Node* parser_node_type_structure(struct Parser* p, struct Parser_Error* error) { struct Token open_token = CHECK(parser_need(p, TOKEN_CURLY_OPEN, error)); parser_unglue(p); struct Type_Node *head = nil, *current = nil; while (!parser_probe(p, TOKEN_CURLY_CLOSE)) { struct Token field_name_token = CHECK(parser_need(p, TOKEN_NAME, error)); struct Type_Node* field_type = CHECK(parser_node_type(p, error)); field_type->value_name = field_name_token.value.name; if (!head) head = field_type; else current->next = field_type; current = field_type; if (parser_probe(p, TOKEN_COMMA) || parser_probe(p, TOKEN_NEWLINE)) parser_next(p); } parser_unglue(p); struct Token close_token = CHECK(parser_need(p, TOKEN_CURLY_CLOSE, error)); struct Span span = span_merge(open_token.span, close_token.span); return type_node_new( TYPE_NODE_STRUCTURE, (union Type_Node_Value){ .structure = { head } }, span, open_token.location); } struct Type_Node* parser_node_type_variant(struct Parser* p, struct Parser_Error* error) { struct Token variant_token = CHECK(parser_need(p, TOKEN_WORD_VARIANT, error)); CHECK(parser_need(p, TOKEN_CURLY_OPEN, error)); parser_unglue(p); struct Type_Node *head = nil, *current = nil; while (!parser_probe(p, TOKEN_CURLY_CLOSE)) { struct Token variant_name_token = CHECK(parser_need(p, TOKEN_NAME, error)); struct String variant_name = variant_name_token.value.name; struct Type_Node* backing_type = nil; struct Token next = parser_peek(p); bool has_backing_type = !token_is(&next, TOKEN_NEWLINE) && !token_is(&next, TOKEN_COMMA) && !token_is(&next, TOKEN_CURLY_CLOSE); if (has_backing_type) { backing_type = CHECK(parser_node_type(p, error)); } else { backing_type = type_node_none(variant_name_token.span, variant_name_token.location); } backing_type->value_name = variant_name; if (!head) head = backing_type; else current->next = backing_type; current = backing_type; next = parser_peek(p); if (token_is(&next, TOKEN_COMMA) || token_is(&next, TOKEN_NEWLINE)) parser_next(p); } parser_unglue(p); struct Token close_token = CHECK(parser_need(p, TOKEN_CURLY_CLOSE, error)); struct Span span = span_merge(variant_token.span, close_token.span); return type_node_new( TYPE_NODE_VARIANT, (union Type_Node_Value){ .variant = { head } }, span, variant_token.location); } struct Type_Node* parser_node_type_function(struct Parser* p, struct Parser_Error* error) { struct Token fun_token = CHECK(parser_need(p, TOKEN_WORD_FUN, error)); struct Function_Header_Node header = CHECK(parser_function_header_node(p, error)); struct Span span = span_merge(fun_token.span, header.span); return type_node_new( TYPE_NODE_FUNCTION, (union Type_Node_Value){ .function = { header } }, span, fun_token.location); } struct Type_Node* parser_node_type_class(struct Parser* p, struct Parser_Error* error) { struct Token class_token = CHECK(parser_need(p, TOKEN_WORD_CLASS, error)); CHECK(parser_need(p, TOKEN_CURLY_OPEN, error)); parser_unglue(p); struct Type_Node *head = nil, *current = nil; while (!parser_probe(p, TOKEN_CURLY_CLOSE)) { struct Token method_name_token = CHECK(parser_need(p, TOKEN_NAME, error)); struct String method_name = method_name_token.value.name; struct Function_Header_Node header = CHECK(parser_function_header_node(p, error)); // allocate a new type node for the method type, // for the sake of consistency and the built-in type node linked list. struct Type_Node* method_type = type_node_new( TYPE_NODE_FUNCTION, (union Type_Node_Value){ .function = { header } }, header.span, header.parameters_type_and_name->location); method_type->value_name = method_name; if (!head) head = method_type; else current->next = method_type; current = method_type; if (parser_probe(p, TOKEN_COMMA) || parser_probe(p, TOKEN_NEWLINE)) parser_next(p); } parser_unglue(p); struct Token close_token = CHECK(parser_need(p, TOKEN_CURLY_CLOSE, error)); struct Span span = span_merge(class_token.span, close_token.span); return type_node_new( TYPE_NODE_CLASS, (union Type_Node_Value){ .class = { head } }, span, class_token.location); } struct Type_Node* parser_node_type_tuple(struct Parser* p, struct Parser_Error* error) { struct Token open_token = CHECK(parser_need(p, TOKEN_ROUND_OPEN, error)); struct Type_Node *head = nil, *current = nil; while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { struct Type_Node* type = CHECK(parser_node_type(p, error)); if (!head) head = type; else current->next = type; current = type; if (parser_probe(p, TOKEN_COMMA)) parser_next(p); else break; } struct Token close_token = CHECK(parser_need(p, TOKEN_ROUND_CLOSE, error)); struct Span span = span_merge(open_token.span, close_token.span); return type_node_new( TYPE_NODE_TUPLE, (union Type_Node_Value){ .tuple = { head } }, span, open_token.location); } struct Type_Node* parser_node_type_array_or_map(struct Parser* p, struct Parser_Error* error) { struct Token open_token = CHECK(parser_need(p, TOKEN_SQUARE_OPEN, error)); struct Type_Node* element_or_key_type = CHECK(parser_node_type(p, error)); if (!element_or_key_type) { *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); return nil; } enum Type_Node_Type type; union Type_Node_Value value; if (parser_probe(p, TOKEN_ASSIGN)) { // this is a map type, e.g. `[string = int]` parser_next(p); // consume the assignment token struct Type_Node* key_type = element_or_key_type; struct Type_Node* value_type = CHECK(parser_node_type(p, error)); if (!value_type) { *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); return nil; } type = TYPE_NODE_MAP; value.map = (struct Type_Node_Map){ .key_type = key_type, .value_type = value_type, }; } else { // this is an array type, e.g. `[int]` type = TYPE_NODE_ARRAY; value.array = (struct Type_Node_Array){ .element_type = element_or_key_type }; } struct Token close_token = CHECK(parser_need(p, TOKEN_SQUARE_CLOSE, error)); struct Span span = span_merge(open_token.span, close_token.span); return type_node_new(type, value, span, open_token.location); } struct Type_Node* parser_node_type_reference(struct Parser* p, struct Parser_Error* error) { struct Token ampersand_token = CHECK(parser_need(p, TOKEN_AMPERSAND, error)); struct Type_Node* referenced_type = CHECK(parser_node_type(p, error)); if (!referenced_type) { *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); return nil; } struct Span span = span_merge(ampersand_token.span, referenced_type->span); return type_node_new( TYPE_NODE_REFERENCE, (union Type_Node_Value){ .reference = { referenced_type } }, span, ampersand_token.location); } struct Type_Node* parser_node_type(struct Parser* p, struct Parser_Error* error) { // TODO: maybe, variant, class struct Token token = parser_peek(p); switch (token.kind) { case TOKEN_NAME: return parser_node_type_name(p, error); case TOKEN_WORD_VARIANT: return parser_node_type_variant(p, error); case TOKEN_WORD_CLASS: return parser_node_type_class(p, error); case TOKEN_WORD_FUN: return parser_node_type_function(p, error); case TOKEN_CURLY_OPEN: return parser_node_type_structure(p, error); case TOKEN_ROUND_OPEN: return parser_node_type_tuple(p, error); case TOKEN_SQUARE_OPEN: return parser_node_type_array_or_map(p, error); case TOKEN_AMPERSAND: return parser_node_type_reference(p, error); default: *error = parser_error(PARSER_ERROR_EXPECTED_TYPE); return nil; } } struct Expression* parser_expression_primary_name(struct Parser* p, struct Parser_Error* error) { struct Token token = CHECK(parser_need(p, TOKEN_NAME, error)); union Expression_Value value = { .name = { token.value.name } }; return expression_new(EXPRESSION_NAME, value, token.span, token.location); } struct Expression* parser_expression_primary_integer(struct Parser* p, struct Parser_Error* error) { struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_INTEGER, error)); union Expression_Value value = { .integer_literal = { token.value.literal_integer } }; return expression_new(EXPRESSION_INTEGER_LITERAL, value, token.span, token.location); } struct Expression* parser_expression_primary_float(struct Parser* p, struct Parser_Error* error) { struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_FLOAT, error)); union Expression_Value value = { .float_literal = { token.value.literal_float } }; return expression_new(EXPRESSION_FLOAT_LITERAL, value, token.span, token.location); } struct Expression* parser_expression_primary_string(struct Parser* p, struct Parser_Error* error) { struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_STRING, error)); union Expression_Value value = { .string_literal = { token.value.literal_string } }; return expression_new(EXPRESSION_STRING_LITERAL, value, token.span, token.location); } struct Expression* parser_expression_primary_boolean(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_next(p); check(token.kind == TOKEN_WORD_TRUE || token.kind == TOKEN_WORD_FALSE, "expected boolean literal"); bool literal = token.kind == TOKEN_WORD_TRUE; union Expression_Value expr_value = { .bool_literal = { literal } }; return expression_new(EXPRESSION_BOOLEAN_LITERAL, expr_value, token.span, token.location); } struct Expression* parser_expression_primary_group(struct Parser* p, struct Parser_Error* error) { struct Token start_token = CHECK(parser_need(p, TOKEN_ROUND_OPEN, error)); struct Expression* expression = CHECK(parser_expression(p, error)); struct Token end_token = CHECK(parser_need(p, TOKEN_ROUND_CLOSE, error)); struct Span span = span_merge(start_token.span, end_token.span); union Expression_Value value = { .group = { expression } }; return expression_new(EXPRESSION_GROUP, value, span, start_token.location); } struct Expression* parser_expression_function(struct Parser* p, struct Parser_Error* error) { struct Token fun_token = CHECK(parser_need(p, TOKEN_WORD_FUN, error)); struct Expression_Function fun = { 0 }; fun.header = CHECK(parser_function_header_node(p, error)); fun.body = CHECK(parser_block_node(p, error)); return expression_new( EXPRESSION_FUNCTION, (union Expression_Value){ .function = fun }, span_merge(fun_token.span, fun.body.span), fun_token.location); } struct Expression* parser_expression_type(struct Parser* p, struct Parser_Error* error) { struct Token start_token = parser_peek(p); struct Type_Node* type = nil; switch (start_token.kind) { case TOKEN_WORD_TYPE: parser_next(p); // skip the `type` keyword. type = CHECK(parser_node_type(p, error)); break; case TOKEN_WORD_VARIANT: type = CHECK(parser_node_type_variant(p, error)); break; case TOKEN_WORD_CLASS: type = CHECK(parser_node_type_class(p, error)); break; default: failure("expected type signifying keyword"); } struct Span span = span_merge(start_token.span, type->span); return expression_new( EXPRESSION_TYPE, (union Expression_Value){ .type = { type } }, span, start_token.location); } struct Expression* parser_expression_primary(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); switch (token.kind) { case TOKEN_NAME: return parser_expression_primary_name(p, error); case TOKEN_LITERAL_INTEGER: return parser_expression_primary_integer(p, error); case TOKEN_LITERAL_FLOAT: return parser_expression_primary_float(p, error); case TOKEN_LITERAL_STRING: return parser_expression_primary_string(p, error); case TOKEN_WORD_TRUE: case TOKEN_WORD_FALSE: return parser_expression_primary_boolean(p, error); case TOKEN_ROUND_OPEN: return parser_expression_primary_group(p, error); case TOKEN_WORD_FUN: return parser_expression_function(p, error); // `variant` and `class` here are essentially a shortcut, as they already // imply a type, so we don't have to write `type variant` or `type class`. case TOKEN_WORD_TYPE: case TOKEN_WORD_VARIANT: case TOKEN_WORD_CLASS: return parser_expression_type(p, error); default: *error = parser_error(PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION); return nil; } } struct Expression* parser_expression_member(struct Parser* p, struct Parser_Error* error) { struct Expression* left = CHECK(parser_expression_primary(p, error)); // NOTE: see `parser_expression_postfix_call`. while (parser_probe(p, TOKEN_DOT)) { parser_next(p); struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error)); struct String name = name_token.value.name; struct Span span = span_merge(left->span, name_token.span); union Expression_Value value = { .member = { left, name } }; left = expression_new(EXPRESSION_MEMBER, value, span, name_token.location); } return left; } struct Expression* parser_expression_postfix_call( struct Parser* p, struct Expression* subject, struct Parser_Error* error) { // NOTE: because of the way the parser works, we have to parse all subsequent // call expressions in the same loop. // this is the case with an expression like `meow_function()(123)`, // where the hypothetical `meow_function` returns a function pointer. while (parser_probe(p, TOKEN_ROUND_OPEN)) { parser_next(p); struct Expression *arguments_head = nil, *arguments_current = nil; while (!parser_probe(p, TOKEN_ROUND_CLOSE)) { struct Expression* argument = CHECK(parser_expression(p, error)); if (!arguments_head) arguments_head = argument; else arguments_current->next = argument; arguments_current = argument; if (parser_probe(p, TOKEN_COMMA)) parser_next(p); } struct Token token = CHECK(parser_need(p, TOKEN_ROUND_CLOSE, error)); struct Span span = span_merge(subject->span, token.span); union Expression_Value value = { .call = { subject, arguments_head } }; subject = expression_new(EXPRESSION_CALL, value, span, token.location); } return subject; } struct Expression* parser_expression_postfix_subscript( struct Parser* p, struct Expression* subject, struct Parser_Error* error) { // NOTE: see `parser_expression_postfix_call`. while (parser_probe(p, TOKEN_SQUARE_OPEN)) { parser_next(p); struct Expression* index = CHECK(parser_expression(p, error)); struct Token token = CHECK(parser_need(p, TOKEN_SQUARE_CLOSE, error)); struct Span span = span_merge(subject->span, span_merge(index->span, token.span)); union Expression_Value value = { .subscript = { subject, index } }; subject = expression_new(EXPRESSION_SUBSCRIPT, value, span, token.location); } return subject; } struct Expression* parser_expression_postfix(struct Parser* p, struct Parser_Error* error) { struct Expression* expression = CHECK(parser_expression_member(p, error)); struct Token token = parser_peek(p); switch (token.kind) { case TOKEN_ROUND_OPEN: return parser_expression_postfix_call(p, expression, error); case TOKEN_SQUARE_OPEN: return parser_expression_postfix_subscript(p, expression, error); default:; } enum Increment_Decrement_Operation inc_dec_op = increment_decrement_operation_from_token(&token); if (inc_dec_op) { parser_next(p); struct Expression_Increment_Decrement inc_dec = { .prefix = false, .subject = expression, .operation = inc_dec_op, }; struct Span span = span_merge(expression->span, token.span); union Expression_Value value = { .increment_decrement = inc_dec }; return expression_new(EXPRESSION_INCREMENT_DECREMENT, value, span, token.location); } return expression; } struct Expression* parser_expression_unary_operation(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); enum Unary_Operation operation = unary_operation_from_token(&token); if (operation) { parser_next(p); struct Expression* operand = CHECK(parser_expression_unary_operation(p, error)); struct Span span = span_merge(token.span, operand->span); union Expression_Value value = { .unary_operator = { operation, operand } }; return expression_new(EXPRESSION_UNARY_OPERATION, value, span, token.location); } enum Increment_Decrement_Operation inc_dec_op = increment_decrement_operation_from_token(&token); if (inc_dec_op) { parser_next(p); struct Expression* subject = CHECK(parser_expression_unary_operation(p, error)); struct Expression_Increment_Decrement inc_dec = { .prefix = true, .subject = subject, .operation = inc_dec_op, }; struct Span span = span_merge(token.span, inc_dec.subject->span); union Expression_Value value = { .increment_decrement = inc_dec }; return expression_new(EXPRESSION_INCREMENT_DECREMENT, value, span, token.location); } return parser_expression_postfix(p, error); } // given two expressions and some kind of binary operation between them, // merge them into a single binary expression, with attention to operator precedence // and associativity. struct Expression_Binary_Operator parser_merge_into_single_binary_expression( struct Parser* p, struct Expression* left, enum Binary_Operation operation, struct Expression* right) { // NOTE: due to the parser structure, the left expression is never a binary operation // so we can worry about fixing up the right side only. check(left->kind != EXPRESSION_BINARY_OPERATION, "left expression is a binary operation"); if (right->kind == EXPRESSION_BINARY_OPERATION) { struct Expression_Binary_Operator right_binary = right->value.binary_operator; uint right_precedence = binary_operation_precedence(right_binary.operation); uint precedence = binary_operation_precedence(operation); bool switch_due_to_precedence = right_precedence < precedence; bool switch_due_to_associativity = precedence == right_precedence && binary_operation_associativity(operation) == BINARY_ASSOCIATIVITY_LEFT; // essentially check if an expression like `a ~ (b ~ c)` needs to be // switched into `(a ~ b) ~ c` or not. if (switch_due_to_precedence || switch_due_to_associativity) { // since we only use static memory, we need to switch the operands around // without allocating more expressions, thus we have to reuse the allocation // slots of the previous expressions. struct Expression* operands[3] = { left, right_binary.left_operand, right_binary.right_operand }; struct Expression* slots[2] = { right, right_binary.right_operand }; struct Expression_Binary_Operator new_binary_operator = parser_merge_into_single_binary_expression(p, operands[0], operation, operands[1]); *slots[0] = (struct Expression){ .kind = EXPRESSION_BINARY_OPERATION, .value = { .binary_operator = new_binary_operator }, .span = span_merge(left->span, right->span), .location = left->location }; *slots[1] = *operands[2]; left = slots[0]; right = slots[1]; operation = right_binary.operation; } } return (struct Expression_Binary_Operator){ .operation = operation, .left_operand = left, .right_operand = right, }; } struct Expression* parser_expression_binary_operation(struct Parser* p, struct Parser_Error* error) { struct Expression* left = CHECK(parser_expression_unary_operation(p, error)); struct Token token = parser_peek(p); enum Binary_Operation operation = binary_operation_from_token(&token); if (operation) { parser_next(p); struct Expression* right = CHECK(parser_expression_binary_operation(p, error)); struct Span span = span_merge(left->span, right->span); struct Expression_Binary_Operator binary_value = parser_merge_into_single_binary_expression(p, left, operation, right); union Expression_Value value = { .binary_operator = binary_value }; return expression_new(EXPRESSION_BINARY_OPERATION, value, span, left->location); } return left; } struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error) { return parser_expression_binary_operation(p, error); } struct Statement* parser_statement_declaration(struct Parser* p, struct Parser_Error* error) { struct Token declaration_token = parser_next(p); enum Statement_Declaration_Kind declaration_kind = statement_declaration_kind_from_token(&declaration_token); check(declaration_kind, "expected valid declaration token"); struct Bare_Declaration_Node inner = CHECK(parser_bare_declaration_node(p, error)); struct Span span = span_merge(declaration_token.span, inner.span); union Statement_Value value = { .declaration = { .kind = declaration_kind, .inner = inner, }, }; return statement_new(STATEMENT_DECLARATION, value, span, declaration_token.location); } struct Statement* parser_statement_conditional(struct Parser* p, struct Parser_Error* error) { struct Statement_Value_Conditional conditional = { 0 }; struct Token if_token = parser_need(p, TOKEN_WORD_IF, error); // primary if condition + block. struct Expression* if_condition = CHECK(parser_expression(p, error)); struct Block_Node then_block = CHECK(parser_block_node(p, error)); conditional.conditions[conditional.condition_count++] = (struct Statement_Conditional_Branch){ .when = if_condition, .then = then_block, }; struct Span span = span_merge(if_token.span, then_block.span); while (parser_probe(p, TOKEN_WORD_ELSE)) { check(conditional.condition_count < STATEMENT_VALUE_CONDITIONAL_MAX, "too many conditional branches"); parser_next(p); struct Statement_Conditional_Branch branch = { 0 }; if (parser_probe(p, TOKEN_WORD_IF)) { // else if condition + block. parser_next(p); struct Expression* else_condition = CHECK(parser_expression(p, error)); struct Block_Node else_block = CHECK(parser_block_node(p, error)); branch = (struct Statement_Conditional_Branch){ .when = else_condition, .then = else_block, }; } else { // else block. struct Block_Node else_block = CHECK(parser_block_node(p, error)); branch = (struct Statement_Conditional_Branch){ .when = nil, .then = else_block, }; } conditional.conditions[conditional.condition_count++] = branch; span = span_merge(span, branch.then.span); } return statement_new( STATEMENT_CONDITIONAL, (union Statement_Value){ .conditional = conditional }, span, if_token.location); } struct Statement* parser_statement_for(struct Parser* p, struct Parser_Error* error) { struct Token for_token = CHECK(parser_need(p, TOKEN_WORD_FOR, error)); // these are the possible for loop variants: // * `for name String = collection {}`, as iteration over container // * `for i u8 = 0, i < 10, i++ {}`, as a c-style semi-semi loop // a declaration without a signifier like `var` or `let`. struct Bare_Declaration_Node declaration = CHECK(parser_bare_declaration_node(p, error)); enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_FOR_EACH; // c-style semi-semi loop. struct Expression *condition = nil, *iteration = nil; if (parser_probe(p, TOKEN_COMMA)) { parser_next(p); condition = CHECK(parser_expression(p, error)); CHECK(parser_need(p, TOKEN_COMMA, error)); iteration = CHECK(parser_expression(p, error)); style = STATEMENT_LOOP_STYLE_C; } struct Block_Node body = CHECK(parser_block_node(p, error)); struct Span span = span_merge(for_token.span, body.span); union Statement_Value value = { .loop = { .style = style, .declaration = declaration, .condition = condition, .iteration = iteration, .body = body, }, }; return statement_new(STATEMENT_LOOP, value, span, for_token.location); } struct Statement* parser_statement_while(struct Parser* p, struct Parser_Error* error) { struct Token while_token = CHECK(parser_need(p, TOKEN_WORD_WHILE, error)); enum Statement_Loop_Style style = STATEMENT_LOOP_STYLE_ENDLESS; struct Expression* condition = nil; if (!parser_probe(p, TOKEN_CURLY_OPEN)) { condition = CHECK(parser_expression(p, error)); style = STATEMENT_LOOP_STYLE_WHILE; } struct Block_Node body = CHECK(parser_block_node(p, error)); struct Span span = span_merge(while_token.span, body.span); union Statement_Value value = { .loop = { .style = style, .condition = condition, .body = body } }; return statement_new(STATEMENT_LOOP, value, span, while_token.location); } struct Statement* parser_statement_block(struct Parser* p, struct Parser_Error* error) { struct Block_Node block = CHECK(parser_block_node(p, error)); return statement_new( STATEMENT_BLOCK, (union Statement_Value){ .block = { block } }, block.span, block.location); } struct Statement* parser_statement_return(struct Parser* p, struct Parser_Error* error) { struct Token return_token = CHECK(parser_need(p, TOKEN_WORD_RETURN, error)); struct Expression* value = nil; if (!token_ends_statement(&return_token)) { value = CHECK(parser_expression(p, error)); } struct Span span = value ? return_token.span : span_merge(return_token.span, value->span); union Statement_Value statement_value = { .return_value = { value } }; return statement_new(STATEMENT_RETURN, statement_value, span, return_token.location); } struct Statement* parser_statement_break(struct Parser* p, struct Parser_Error* error) { struct Token break_token = CHECK(parser_need(p, TOKEN_WORD_BREAK, error)); struct Span span = break_token.span; return statement_new(STATEMENT_BREAK, (union Statement_Value){ 0 }, span, break_token.location); } struct Statement* parser_statement_continue(struct Parser* p, struct Parser_Error* error) { struct Token continue_token = CHECK(parser_need(p, TOKEN_WORD_CONTINUE, error)); struct Span span = continue_token.span; return statement_new( STATEMENT_CONTINUE, (union Statement_Value){ 0 }, span, continue_token.location); } struct Statement* parser_statement_defer(struct Parser* p, struct Parser_Error* error) { struct Token defer_token = CHECK(parser_need(p, TOKEN_WORD_DEFER, error)); struct Span span = defer_token.span; struct Statement_Value_Defer defer = { 0 }; if (parser_probe(p, TOKEN_CURLY_OPEN)) { struct Block_Node block = CHECK(parser_block_node(p, error)); span = span_merge(span, block.span); defer.block = block; } else { struct Expression* expression = CHECK(parser_expression(p, error)); span = span_merge(span, expression->span); defer.expression = expression; } union Statement_Value value = { .defer = defer }; return statement_new(STATEMENT_DEFER, value, span, defer_token.location); } struct Statement* parser_statement(struct Parser* p, struct Parser_Error* error) { struct Token token = parser_peek(p); // skip empty statements. while (token_ends_statement(&token)) { if (parser_reached_end(p)) return nil; parser_next(p); token = parser_peek(p); } if (token_can_begin_declaration(&token)) return parser_statement_declaration(p, error); switch (token.kind) { case TOKEN_WORD_IF: return parser_statement_conditional(p, error); case TOKEN_WORD_FOR: return parser_statement_for(p, error); case TOKEN_WORD_WHILE: return parser_statement_while(p, error); case TOKEN_CURLY_OPEN: return parser_statement_block(p, error); case TOKEN_WORD_RETURN: return parser_statement_return(p, error); case TOKEN_WORD_BREAK: return parser_statement_break(p, error); case TOKEN_WORD_CONTINUE: return parser_statement_continue(p, error); case TOKEN_WORD_DEFER: return parser_statement_defer(p, error); default: break; } struct Expression* expression = CHECK(parser_expression(p, error)); // expand by one byte to include the statement terminator. struct Span span = span_expand(expression->span, 1); union Statement_Value value = { .expression.inner = expression }; return statement_new(STATEMENT_EXPRESSION, value, span, expression->location); } // parse the lexer tokens into a single AST. // note: it was either `parser_parse` or this. :) struct Tree parser_do_your_thing(struct Parser* p, struct Parser_Error* error) { struct Statement* head = nil; struct Statement* current = nil; while (!parser_reached_end(p)) { struct Statement* next = CHECK_RETURN(parser_statement(p, error), struct Tree); if (!next) break; // on eof CHECK_RETURN(parser_end_statement(p, error), struct Tree); if (current) { current->next = next; } else { head = next; } current = next; } *error = parser_error_none(); return (struct Tree){ head }; } #undef CHECK #undef CHECK_RETURN