about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-05-21 16:19:59 +0200
committerMel <mel@rnrd.eu>2025-05-21 16:19:59 +0200
commit1e07cecafc62d18ba05f21101e13b131a10e8c45 (patch)
treee036129c57b734292ff15b71a1d193aa0a97c678
parent8c8d65f026121b4d75a31bff8a91c3fbf969fca5 (diff)
downloadcatskill-1e07cecafc62d18ba05f21101e13b131a10e8c45.tar.zst
catskill-1e07cecafc62d18ba05f21101e13b131a10e8c45.zip
Basic expression parser with operator precedence and associativity
Signed-off-by: Mel <mel@rnrd.eu>
-rw-r--r--Makefile2
-rw-r--r--boot/catboot.c18
-rw-r--r--boot/lex.c48
-rw-r--r--boot/parse.c470
-rw-r--r--boot/tree.c149
5 files changed, 686 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 91d1853..0d4eb74 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 CFLAGS ?= -std=c99 -Wall -Werror -static -g -O0
 .DEFAULT_GOAL := all
 
-BOOTSTRAP_SOURCES = boot/catboot.c boot/common.c boot/lex.c boot/tree.c
+BOOTSTRAP_SOURCES = boot/catboot.c boot/common.c boot/lex.c boot/tree.c boot/parse.c
 SOURCES = src/catskill.csk
 
 build/catskill: build $(SOURCES)
diff --git a/boot/catboot.c b/boot/catboot.c
index fec47c0..23b649e 100644
--- a/boot/catboot.c
+++ b/boot/catboot.c
@@ -23,6 +23,7 @@
 #include "common.c"
 #include "lex.c"
 #include "tree.c"
+#include "parse.c"
 
 const ascii*
 read_file(const ascii* path)
@@ -57,10 +58,27 @@ main(const int32 argc, const ascii* argv[])
     lexer_new(&lexer, source);
 
     struct Token token;
+    printf("tokens: ");
     do {
         token = lexer_next(&lexer);
         printf("%s ", token_kind_to_string(token.kind));
     } while (token.kind != TOKEN_END_OF_FILE);
+    printf("\n");
+
+    // reset lexer
+    lexer_new(&lexer, source);
+
+    struct Parser parser;
+    parser_new(&parser, &lexer);
+
+    struct Parser_Error parser_error = parser_error_none();
+    struct Tree tree = parser_do_your_thing(&parser, &parser_error);
+    if (!parser_error_is_none(&parser_error)) {
+        printf("parser error: %s\n", parser_error_to_string(&parser_error));
+        return EXIT_FAILURE;
+    }
+
+    tree_print(&tree);
 
     return EXIT_SUCCESS;
 }
diff --git a/boot/lex.c b/boot/lex.c
index 9a94c55..e8432f6 100644
--- a/boot/lex.c
+++ b/boot/lex.c
@@ -28,6 +28,30 @@ span_width(Pos start, uint width)
     return (struct Span){ .start = start, .end = start + width };
 }
 
+// create a span which encompasses two spans.
+struct Span
+span_merge(struct Span a, struct Span b)
+{
+    return (struct Span){
+        .start = a.start < b.start ? a.start : b.start,
+        .end = a.end > b.end ? a.end : b.end,
+    };
+}
+
+// expand a span by a number of bytes.
+// negative number expands the span to the left,
+// positive number expands the span to the right.
+struct Span
+span_expand(struct Span span, integer by)
+{
+    uint start_expand = by < 0 ? -by : 0;
+    uint end_expand = by > 0 ? by : 0;
+    return (struct Span){
+        .start = span.start - start_expand,
+        .end = span.end + end_expand,
+    };
+}
+
 // a cursor position placed within a text file.
 struct Cursor
 {
@@ -235,6 +259,30 @@ token_simple(enum Token_Kind kind, Pos pos, struct Cursor location)
     return token_wide(kind, (struct Span){ pos, pos }, location);
 }
 
+struct Token
+token_none()
+{
+    return token_simple(TOKEN_NONE, 0, (struct Cursor){ 0 });
+}
+
+bool
+token_is(const struct Token* t, enum Token_Kind kind)
+{
+    return t->kind == kind;
+}
+
+bool
+token_is_empty(const struct Token* t)
+{
+    return token_is(t, TOKEN_NONE);
+}
+
+bool
+token_ends_statement(const struct Token* t)
+{
+    return token_is(t, TOKEN_END_OF_FILE) || token_is(t, TOKEN_NEWLINE);
+}
+
 bool
 ascii_in_range(ascii c, ascii from, ascii to)
 {
diff --git a/boot/parse.c b/boot/parse.c
new file mode 100644
index 0000000..38cb712
--- /dev/null
+++ b/boot/parse.c
@@ -0,0 +1,470 @@
+/*
+ * very simple handwritten recursive descent
+ * parser for a catskill source file.
+ */
+
+#define PARSER_LOOKAHEAD 2
+
+#define CHECK(parse) \
+    parse;           \
+    if (!parser_error_is_none(error)) return nil;
+
+struct Parser_Error
+{
+    enum Parser_Error_Kind
+    {
+        PARSER_ERROR_NONE,
+        PARSER_ERROR_UNEXPECTED_TOKEN,
+        PARSER_ERROR_UNEXPECTED_EOF,
+        PARSER_ERROR_EXPECTED_STATEMENT_END,
+        PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION,
+    } kind;
+    // TODO: add span to error
+};
+
+struct Parser_Error
+parser_error(enum Parser_Error_Kind kind)
+{
+    return (struct Parser_Error){ kind };
+}
+
+struct Parser_Error
+parser_error_none()
+{
+    return parser_error(PARSER_ERROR_NONE);
+}
+
+bool
+parser_error_is_none(const struct Parser_Error* error)
+{
+    return error->kind == PARSER_ERROR_NONE;
+}
+
+const ascii*
+parser_error_to_string(const struct Parser_Error* error)
+{
+    switch (error->kind) {
+    case PARSER_ERROR_NONE:
+        return "none";
+    case PARSER_ERROR_UNEXPECTED_TOKEN:
+        return "unexpected token";
+    case PARSER_ERROR_UNEXPECTED_EOF:
+        return "unexpected end of file";
+    case PARSER_ERROR_EXPECTED_STATEMENT_END:
+        return "expected statement end";
+    case PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION:
+        return "expected primary expression";
+    default:
+        return "unknown error";
+    }
+}
+
+struct Parser
+{
+    struct Lexer* lexer;
+    struct Token lookahead[PARSER_LOOKAHEAD];
+};
+
+void
+parser_new(struct Parser* p, struct Lexer* lexer)
+{
+    p->lexer = lexer;
+    memset(p->lookahead, 0, sizeof(p->lookahead));
+}
+
+bool
+parser_lookahead_pop(struct Parser* p, struct Token* token)
+{
+    struct Token head = p->lookahead[0];
+    if (token_is_empty(&head)) return false;
+
+    for (uint i = 0; i < PARSER_LOOKAHEAD - 1; i++) p->lookahead[i] = p->lookahead[i + 1];
+    p->lookahead[PARSER_LOOKAHEAD - 1] = token_none();
+
+    *token = head;
+    return true;
+}
+
+bool
+parser_lookahead_push(struct Parser* p, struct Token token)
+{
+    for (uint i = 0; i < PARSER_LOOKAHEAD; i++) {
+        if (token_is_empty(&p->lookahead[i])) {
+            p->lookahead[i] = token;
+            return true;
+        }
+    }
+    return false;
+}
+
+// advance the token stream and return the next token.
+struct Token
+parser_next(struct Parser* p)
+{
+    struct Token token;
+    if (!parser_lookahead_pop(p, &token)) token = lexer_next(p->lexer);
+    return token;
+}
+
+// advance the token stream if the token is of the right type.
+// if not, return none-token and set the error.
+struct Token
+parser_need(struct Parser* p, enum Token_Kind kind, struct Parser_Error* error)
+{
+    struct Token token = parser_next(p);
+    if (!token_is(&token, kind)) {
+        *error = parser_error(PARSER_ERROR_UNEXPECTED_TOKEN);
+        return token_none();
+    }
+    return token;
+}
+
+// peek at the token stream at given index, without advancing it.
+// index 0 is the token that would be returned by a next `parser_next` call.
+// peek limit is defined by `PARSER_LOOKAHEAD`.
+struct Token
+parser_peek_at(struct Parser* p, uint index)
+{
+    check(index < 2, "parser peek index out of range");
+
+    while (token_is_empty(&p->lookahead[index])) {
+        struct Token next_token = lexer_next(p->lexer);
+        parser_lookahead_push(p, next_token);
+    }
+
+    return p->lookahead[index];
+}
+
+// peek at the next token in the stream, without advancing it.
+// synonym for `parser_peek_at(p, 0)`.
+struct Token
+parser_peek(struct Parser* p)
+{
+    return parser_peek_at(p, 0);
+}
+
+// peek at the token one beyond the current cursor in the stream, without advancing it.
+// synonym for `parser_peek_at(p, 1)`.
+struct Token
+parser_peek_further(struct Parser* p)
+{
+    return parser_peek_at(p, 1);
+}
+
+// check if the next token is of a given type.
+bool
+parser_probe(struct Parser* p, enum Token_Kind kind)
+{
+    struct Token token = parser_peek(p);
+    return token_is(&token, kind);
+}
+
+struct Expression* parser_expression(struct Parser* p, struct Parser_Error* error);
+
+struct Expression*
+parser_expression_primary_name(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = CHECK(parser_need(p, TOKEN_NAME, error));
+    union Expression_Value value = { .name = { token.value.name } };
+    return expression_new(EXPRESSION_NAME, value, token.span, token.location);
+}
+
+struct Expression*
+parser_expression_primary_integer(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_INTEGER, error));
+    union Expression_Value value = { .integer_literal = { token.value.literal_integer } };
+    return expression_new(EXPRESSION_INTEGER_LITERAL, value, token.span, token.location);
+}
+
+struct Expression*
+parser_expression_primary_float(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_FLOAT, error));
+    union Expression_Value value = { .float_literal = { token.value.literal_float } };
+    return expression_new(EXPRESSION_FLOAT_LITERAL, value, token.span, token.location);
+}
+
+struct Expression*
+parser_expression_primary_string(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = CHECK(parser_need(p, TOKEN_LITERAL_STRING, error));
+    union Expression_Value value = { .string_literal = { token.value.literal_string } };
+    return expression_new(EXPRESSION_STRING_LITERAL, value, token.span, token.location);
+}
+
+struct Expression*
+parser_expression_primary_group(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token start_token = CHECK(parser_need(p, TOKEN_ROUND_OPEN, error));
+    struct Expression* expression = CHECK(parser_expression(p, error));
+    struct Token end_token = CHECK(parser_need(p, TOKEN_ROUND_OPEN, error));
+
+    struct Span span = span_merge(start_token.span, end_token.span);
+    union Expression_Value value = { .group = { expression } };
+    return expression_new(EXPRESSION_GROUP, value, span, start_token.location);
+}
+
+struct Expression*
+parser_expression_primary(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = parser_peek(p);
+    switch (token.kind) {
+    case TOKEN_NAME:
+        return parser_expression_primary_name(p, error);
+    case TOKEN_LITERAL_INTEGER:
+        return parser_expression_primary_integer(p, error);
+    case TOKEN_LITERAL_FLOAT:
+        return parser_expression_primary_float(p, error);
+    case TOKEN_LITERAL_STRING:
+        return parser_expression_primary_string(p, error);
+    case TOKEN_ROUND_OPEN:
+        return parser_expression_primary_group(p, error);
+    // TODO: boolean literals.
+    default:
+        *error = parser_error(PARSER_ERROR_EXPECTED_PRIMARY_EXPRESSION);
+        return nil;
+    }
+}
+
+struct Expression*
+parser_expression_member(struct Parser* p, struct Parser_Error* error)
+{
+    struct Expression* left = CHECK(parser_expression_primary(p, error));
+
+    // NOTE: see `parser_expression_postfix_call`.
+    while (parser_probe(p, TOKEN_DOT)) {
+        parser_next(p);
+        struct Token name_token = CHECK(parser_need(p, TOKEN_NAME, error));
+        struct String name = name_token.value.name;
+
+        struct Span span = span_merge(left->span, name_token.span);
+        union Expression_Value value = { .member = { left, name } };
+        left = expression_new(EXPRESSION_MEMBER, value, span, name_token.location);
+    }
+
+    return left;
+}
+
+struct Expression*
+parser_expression_postfix_call(
+    struct Parser* p, struct Expression* subject, struct Parser_Error* error)
+{
+    // NOTE: because of the way the parser works, we have to parse all subsequent
+    // call expressions in the same loop.
+    // this is the case with an expression like `meow_function()(123)`,
+    // where the hypothetical `meow_function` returns a function pointer.
+    while (parser_probe(p, TOKEN_ROUND_OPEN)) {
+        parser_next(p);
+
+        struct Expression *arguments_head = nil, *arguments_current = nil;
+        while (!parser_probe(p, TOKEN_ROUND_CLOSE)) {
+            struct Expression* argument = CHECK(parser_expression(p, error));
+            if (!arguments_head)
+                arguments_head = argument;
+            else
+                arguments_current->next = argument;
+            arguments_current = argument;
+            if (parser_probe(p, TOKEN_COMMA)) parser_next(p);
+        }
+
+        struct Token token = CHECK(parser_need(p, TOKEN_ROUND_CLOSE, error));
+        struct Span span = span_merge(subject->span, token.span);
+        union Expression_Value value = { .call = { subject, arguments_head } };
+        subject = expression_new(EXPRESSION_CALL, value, span, token.location);
+    }
+
+    return subject;
+}
+
+struct Expression*
+parser_expression_postfix_subscript(
+    struct Parser* p, struct Expression* subject, struct Parser_Error* error)
+{
+    // NOTE: see `parser_expression_postfix_call`.
+    while (parser_probe(p, TOKEN_SQUARE_OPEN)) {
+        parser_next(p);
+
+        struct Expression* index = CHECK(parser_expression(p, error));
+        struct Token token = CHECK(parser_need(p, TOKEN_SQUARE_CLOSE, error));
+
+        struct Span span = span_merge(subject->span, span_merge(index->span, token.span));
+        union Expression_Value value = { .subscript = { subject, index } };
+        subject = expression_new(EXPRESSION_SUBSCRIPT, value, span, token.location);
+    }
+    return subject;
+}
+
+struct Expression*
+parser_expression_postfix(struct Parser* p, struct Parser_Error* error)
+{
+    struct Expression* expression = CHECK(parser_expression_member(p, error));
+
+    switch (parser_peek(p).kind) {
+    case TOKEN_ROUND_OPEN:
+        return parser_expression_postfix_call(p, expression, error);
+    case TOKEN_SQUARE_OPEN:
+        return parser_expression_postfix_subscript(p, expression, error);
+    default:
+        return expression;
+    }
+}
+
+struct Expression*
+parser_expression_unary_operation(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = parser_peek(p);
+    enum Unary_Operation operation = unary_operation_from_token(&token);
+    if (operation) {
+        parser_next(p);
+        struct Expression* operand = CHECK(parser_expression_unary_operation(p, error));
+
+        struct Span span = span_merge(token.span, operand->span);
+        union Expression_Value value = { .unary_operator = { operation, operand } };
+        return expression_new(EXPRESSION_UNARY_OPERATION, value, span, token.location);
+    }
+
+    return parser_expression_postfix(p, error);
+}
+
+// given two expressions and some kind of binary operation between them,
+// merge them into a single binary expression, with attention to operator precedence
+// and associativity.
+struct Expression_Binary_Operator
+parser_merge_into_single_binary_expression(
+    struct Parser* p, struct Expression* left, enum Binary_Operation operation,
+    struct Expression* right)
+{
+    // NOTE: due to the parser structure, the left expression is never a binary operation
+    // so we can worry about fixing up the right side only.
+    check(left->kind != EXPRESSION_BINARY_OPERATION, "left expression is a binary operation");
+    if (right->kind == EXPRESSION_BINARY_OPERATION) {
+        struct Expression_Binary_Operator right_binary = right->value.binary_operator;
+
+        uint right_precedence = binary_operation_precedence(right_binary.operation);
+        uint precedence = binary_operation_precedence(operation);
+
+        bool switch_due_to_precedence = right_precedence < precedence;
+        bool switch_due_to_associativity =
+            precedence == right_precedence
+            && binary_operation_associativity(operation) == BINARY_ASSOCIATIVITY_LEFT;
+
+        // essentially check if an expression like `a ~ (b ~ c)` needs to be
+        // switched into `(a ~ b) ~ c` or not.
+        if (switch_due_to_precedence || switch_due_to_associativity) {
+            // since we only use static memory, we need to switch the operands around
+            // without allocating more expressions, thus we have to reuse the allocation
+            // slots of the previous expressions.
+            struct Expression* operands[3] = {
+                left, right_binary.left_operand, right_binary.right_operand
+            };
+            struct Expression* slots[2] = { right, right_binary.right_operand };
+
+            struct Expression_Binary_Operator new_binary_operator =
+                parser_merge_into_single_binary_expression(p, operands[0], operation, operands[1]);
+            *slots[0] = (struct Expression){
+                .kind = EXPRESSION_BINARY_OPERATION,
+                .value = { .binary_operator = new_binary_operator },
+                .span = span_merge(left->span, right->span),
+                .location = left->location
+            };
+
+            *slots[1] = *operands[2];
+
+            left = slots[0];
+            right = slots[1];
+            operation = right_binary.operation;
+        }
+    }
+
+    return (struct Expression_Binary_Operator){
+        .operation = operation,
+        .left_operand = left,
+        .right_operand = right,
+    };
+}
+
+struct Expression*
+parser_expression_binary_operation(struct Parser* p, struct Parser_Error* error)
+{
+    struct Expression* left = CHECK(parser_expression_unary_operation(p, error));
+
+    struct Token token = parser_peek(p);
+    enum Binary_Operation operation = binary_operation_from_token(&token);
+    if (operation) {
+        parser_next(p);
+        struct Expression* right = CHECK(parser_expression_binary_operation(p, error));
+
+        struct Span span = span_merge(left->span, right->span);
+        struct Expression_Binary_Operator binary_value =
+            parser_merge_into_single_binary_expression(p, left, operation, right);
+        union Expression_Value value = { .binary_operator = binary_value };
+        return expression_new(EXPRESSION_BINARY_OPERATION, value, span, left->location);
+    }
+
+    return left;
+}
+
+struct Expression*
+parser_expression(struct Parser* p, struct Parser_Error* error)
+{
+    return parser_expression_binary_operation(p, error);
+}
+
+void
+parser_end_statement(struct Parser* p, struct Parser_Error* error)
+{
+    struct Token token = parser_peek(p);
+    if (!token_ends_statement(&token)) {
+        *error = parser_error(PARSER_ERROR_EXPECTED_STATEMENT_END);
+        return;
+    }
+    parser_next(p);
+}
+
+struct Statement*
+parser_statement(struct Parser* p, struct Parser_Error* error)
+{
+    // skip empty statements.
+    struct Token token = parser_peek(p);
+    if (token_ends_statement(&token)) {
+        parser_next(p);
+        return nil;
+    }
+
+    // TODO: no statements for now, just go straight to expressions.
+    struct Expression* expression = CHECK(parser_expression(p, error));
+
+    CHECK(parser_end_statement(p, error));
+
+    // expand by one byte to include the statement terminator.
+    struct Span span = span_expand(expression->span, 1);
+    union Statement_Value value = { .expression = expression };
+    return statement_new(STATEMENT_EXPRESSION, value, span, expression->location);
+}
+
+// parse the lexer tokens into a single AST.
+// note: it was either `parser_parse` or this. :)
+struct Tree
+parser_do_your_thing(struct Parser* p, struct Parser_Error* error)
+{
+    struct Statement* head = nil;
+
+    struct Statement* current = nil;
+    while (!p->lexer->eof) {
+        struct Statement* next = parser_statement(p, error);
+        if (!parser_error_is_none(error)) return (struct Tree){ nil };
+
+        if (current) {
+            current->next = next;
+        } else {
+            head = next;
+        }
+        current = next;
+    }
+
+    *error = parser_error_none();
+    return (struct Tree){ head };
+}
+
+#undef CHECK
\ No newline at end of file
diff --git a/boot/tree.c b/boot/tree.c
index 339f12c..4fb4b73 100644
--- a/boot/tree.c
+++ b/boot/tree.c
@@ -4,12 +4,31 @@
 
 enum Unary_Operation
 {
+    UNARY_NONE,
+
     UNARY_MINUS,
     UNARY_NOT,
 
     UNARY_BITWISE_NOT,
 };
 
+enum Unary_Operation
+unary_operation_from_token(const struct Token* token)
+{
+    switch (token->kind) {
+    case TOKEN_MINUS:
+        return UNARY_MINUS;
+    case TOKEN_NOT:
+        return UNARY_NOT;
+        // TODO: tilde token
+        // case TOKEN_TILDE:
+        //    return UNARY_BITWISE_NOT;
+
+    default:
+        return UNARY_NONE;
+    }
+}
+
 const ascii*
 unary_operation_to_string(enum Unary_Operation operation)
 {
@@ -28,6 +47,8 @@ unary_operation_to_string(enum Unary_Operation operation)
 
 enum Binary_Operation
 {
+    BINARY_NONE,
+
     BINARY_PLUS,
     BINARY_MINUS,
     BINARY_MULTIPLY,
@@ -50,6 +71,101 @@ enum Binary_Operation
     BINARY_BITWISE_RIGHT_SHIFT,
 };
 
+enum Binary_Operation
+binary_operation_from_token(const struct Token* token)
+{
+    switch (token->kind) {
+    case TOKEN_PLUS:
+        return BINARY_PLUS;
+    case TOKEN_MINUS:
+        return BINARY_MINUS;
+    case TOKEN_STAR:
+        return BINARY_MULTIPLY;
+    case TOKEN_SLASH:
+        return BINARY_DIVIDE;
+        // TODO: percent token
+        // case TOKEN_PERCENT:
+        // return BINARY_MODULO;
+
+    case TOKEN_EQUAL:
+        return BINARY_EQUAL;
+    case TOKEN_NOT_EQUAL:
+        return BINARY_NOT_EQUAL;
+    case TOKEN_GREATER:
+        return BINARY_GREATER_THAN;
+    case TOKEN_GREATER_EQUAL:
+        return BINARY_GREATER_THAN_EQUAL;
+    case TOKEN_LESS:
+        return BINARY_LESS_THAN;
+    case TOKEN_LESS_EQUAL:
+        return BINARY_LESS_THAN_EQUAL;
+    case TOKEN_AND:
+        return BINARY_AND;
+    case TOKEN_OR:
+        return BINARY_OR;
+
+    default:
+        return BINARY_NONE;
+    }
+}
+
+// return the precedence of the given binary operation.
+// lower values are weaker, with 1 being the weakest.
+// the values are taken mostly the same as other C-family languages.
+uint
+binary_operation_precedence(enum Binary_Operation operation)
+{
+    switch (operation) {
+    // weakest
+    case BINARY_OR:
+        return 1;
+    case BINARY_AND:
+        return 2;
+    case BINARY_BITWISE_OR:
+        return 3;
+    case BINARY_BITWISE_XOR:
+        return 4;
+    case BINARY_BITWISE_AND:
+        return 5;
+    case BINARY_EQUAL:
+    case BINARY_NOT_EQUAL:
+        return 6;
+    case BINARY_GREATER_THAN:
+    case BINARY_GREATER_THAN_EQUAL:
+    case BINARY_LESS_THAN:
+    case BINARY_LESS_THAN_EQUAL:
+        return 7;
+    case BINARY_BITWISE_LEFT_SHIFT:
+    case BINARY_BITWISE_RIGHT_SHIFT:
+        return 8;
+    case BINARY_PLUS:
+    case BINARY_MINUS:
+        return 9;
+    case BINARY_MULTIPLY:
+    case BINARY_DIVIDE:
+    case BINARY_MODULO:
+        return 10;
+        // strongest
+
+    default:
+        return 0;
+    }
+}
+
+enum Binary_Operation_Associativity
+{
+    BINARY_ASSOCIATIVITY_NONE,
+    BINARY_ASSOCIATIVITY_LEFT,
+    BINARY_ASSOCIATIVITY_RIGHT,
+};
+
+enum Binary_Operation_Associativity
+binary_operation_associativity(enum Binary_Operation operation)
+{
+    // all operations are left associative by default.
+    return BINARY_ASSOCIATIVITY_LEFT;
+}
+
 const ascii*
 binary_operation_to_string(enum Binary_Operation operation)
 {
@@ -208,6 +324,23 @@ struct Expression
 
 REGION(struct Expression, expression)
 
+struct Expression*
+expression_new(
+    enum Expression_Kind kind, union Expression_Value value, struct Span span,
+    struct Cursor location)
+{
+    check(region_expression_cursor < REGION_SIZE, "out of expression memory");
+    struct Expression* expression = &region_expression[region_expression_cursor++];
+    *expression = (struct Expression){
+        .kind = kind,
+        .value = value,
+        .span = span,
+        .location = location,
+        .next = nil,
+    };
+    return expression;
+}
+
 void
 expression_print(const struct Expression* expression)
 {
@@ -303,6 +436,22 @@ struct Statement
 
 REGION(struct Statement, statement)
 
+struct Statement*
+statement_new(
+    enum Statement_Kind kind, union Statement_Value value, struct Span span, struct Cursor location)
+{
+    check(region_statement_cursor < REGION_SIZE, "out of statement memory");
+    struct Statement* statement = &region_statement[region_statement_cursor++];
+    *statement = (struct Statement){
+        .kind = kind,
+        .value = value,
+        .span = span,
+        .location = location,
+        .next = nil,
+    };
+    return statement;
+}
+
 void
 statement_print(const struct Statement* statement)
 {