/* * abstract syntax tree types for a catskill source. */ enum Unary_Operation { UNARY_NONE, UNARY_MINUS, UNARY_NOT, UNARY_BITWISE_NOT, }; enum Unary_Operation unary_operation_from_token(const struct Token* token) { switch (token->kind) { case TOKEN_MINUS: return UNARY_MINUS; case TOKEN_BANG: return UNARY_NOT; case TOKEN_TILDE: return UNARY_BITWISE_NOT; default: return UNARY_NONE; } } const ascii* unary_operation_to_string(enum Unary_Operation operation) { switch (operation) { case UNARY_MINUS: return "-"; case UNARY_NOT: return "!"; case UNARY_BITWISE_NOT: return "~"; default: failure("unexpected unary operation passed to `unary_operation_to_string`"); return nil; } } enum Binary_Operation { BINARY_NONE, BINARY_PLUS, BINARY_MINUS, BINARY_MULTIPLY, BINARY_DIVIDE, BINARY_MODULO, BINARY_EQUAL, BINARY_NOT_EQUAL, BINARY_GREATER_THAN, BINARY_GREATER_THAN_EQUAL, BINARY_LESS_THAN, BINARY_LESS_THAN_EQUAL, BINARY_AND, BINARY_OR, BINARY_BITWISE_AND, BINARY_BITWISE_OR, BINARY_BITWISE_XOR, BINARY_BITWISE_LEFT_SHIFT, BINARY_BITWISE_RIGHT_SHIFT, BINARY_ASSIGN, BINARY_ASSIGN_PLUS, BINARY_ASSIGN_MINUS, BINARY_ASSIGN_MULTIPLY, BINARY_ASSIGN_DIVIDE, BINARY_ASSIGN_MODULO, BINARY_ASSIGN_AND, BINARY_ASSIGN_OR, BINARY_ASSIGN_BITWISE_AND, BINARY_ASSIGN_BITWISE_OR, BINARY_ASSIGN_BITWISE_XOR, BINARY_ASSIGN_BITWISE_LEFT_SHIFT, BINARY_ASSIGN_BITWISE_RIGHT_SHIFT, }; enum Binary_Operation binary_operation_from_token(const struct Token* token) { switch (token->kind) { case TOKEN_PLUS: return BINARY_PLUS; case TOKEN_MINUS: return BINARY_MINUS; case TOKEN_STAR: return BINARY_MULTIPLY; case TOKEN_SLASH: return BINARY_DIVIDE; case TOKEN_PERCENT: return BINARY_MODULO; case TOKEN_EQUAL: return BINARY_EQUAL; case TOKEN_NOT_EQUAL: return BINARY_NOT_EQUAL; case TOKEN_GREATER: return BINARY_GREATER_THAN; case TOKEN_GREATER_EQUAL: return BINARY_GREATER_THAN_EQUAL; case TOKEN_LESS: return BINARY_LESS_THAN; case TOKEN_LESS_EQUAL: return BINARY_LESS_THAN_EQUAL; case TOKEN_AND: return BINARY_AND; case TOKEN_OR: return BINARY_OR; case TOKEN_AMPERSAND: return BINARY_BITWISE_AND; case TOKEN_PIPE: return BINARY_BITWISE_OR; case TOKEN_CARET: return BINARY_BITWISE_XOR; case TOKEN_LEFT_SHIFT: return BINARY_BITWISE_LEFT_SHIFT; case TOKEN_RIGHT_SHIFT: return BINARY_BITWISE_RIGHT_SHIFT; case TOKEN_ASSIGN: return BINARY_ASSIGN; case TOKEN_ASSIGN_PLUS: return BINARY_ASSIGN_PLUS; case TOKEN_ASSIGN_MINUS: return BINARY_ASSIGN_MINUS; case TOKEN_ASSIGN_STAR: return BINARY_ASSIGN_MULTIPLY; case TOKEN_ASSIGN_SLASH: return BINARY_ASSIGN_DIVIDE; case TOKEN_ASSIGN_PERCENT: return BINARY_ASSIGN_MODULO; case TOKEN_ASSIGN_AND: return BINARY_ASSIGN_AND; case TOKEN_ASSIGN_OR: return BINARY_ASSIGN_OR; case TOKEN_ASSIGN_AMPERSAND: return BINARY_ASSIGN_BITWISE_AND; case TOKEN_ASSIGN_PIPE: return BINARY_ASSIGN_BITWISE_OR; case TOKEN_ASSIGN_CARET: return BINARY_ASSIGN_BITWISE_XOR; case TOKEN_ASSIGN_LEFT_SHIFT: return BINARY_ASSIGN_BITWISE_LEFT_SHIFT; case TOKEN_ASSIGN_RIGHT_SHIFT: return BINARY_ASSIGN_BITWISE_RIGHT_SHIFT; default: return BINARY_NONE; } } // return the precedence of the given binary operation. // lower values are weaker, with 1 being the weakest. // the values are taken mostly the same as other C-family languages. uint binary_operation_precedence(enum Binary_Operation operation) { switch (operation) { // weakest case BINARY_ASSIGN: case BINARY_ASSIGN_PLUS: case BINARY_ASSIGN_MINUS: case BINARY_ASSIGN_MULTIPLY: case BINARY_ASSIGN_DIVIDE: case BINARY_ASSIGN_MODULO: case BINARY_ASSIGN_AND: case BINARY_ASSIGN_OR: case BINARY_ASSIGN_BITWISE_AND: case BINARY_ASSIGN_BITWISE_OR: case BINARY_ASSIGN_BITWISE_XOR: case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: case BINARY_ASSIGN_BITWISE_RIGHT_SHIFT: return 1; case BINARY_OR: return 2; case BINARY_AND: return 3; case BINARY_BITWISE_OR: return 4; case BINARY_BITWISE_XOR: return 5; case BINARY_BITWISE_AND: return 6; case BINARY_EQUAL: case BINARY_NOT_EQUAL: return 7; case BINARY_GREATER_THAN: case BINARY_GREATER_THAN_EQUAL: case BINARY_LESS_THAN: case BINARY_LESS_THAN_EQUAL: return 8; case BINARY_BITWISE_LEFT_SHIFT: case BINARY_BITWISE_RIGHT_SHIFT: return 9; case BINARY_PLUS: case BINARY_MINUS: return 10; case BINARY_MULTIPLY: case BINARY_DIVIDE: case BINARY_MODULO: return 11; // strongest default: failure("unexpected binary operation passed to `binary_operation_precedence`"); return 0; } } enum Binary_Operation_Associativity { BINARY_ASSOCIATIVITY_NONE, BINARY_ASSOCIATIVITY_LEFT, BINARY_ASSOCIATIVITY_RIGHT, }; enum Binary_Operation_Associativity binary_operation_associativity(enum Binary_Operation operation) { switch (operation) { case BINARY_ASSIGN: case BINARY_ASSIGN_PLUS: case BINARY_ASSIGN_MINUS: case BINARY_ASSIGN_MULTIPLY: case BINARY_ASSIGN_DIVIDE: case BINARY_ASSIGN_MODULO: case BINARY_ASSIGN_AND: case BINARY_ASSIGN_OR: case BINARY_ASSIGN_BITWISE_AND: case BINARY_ASSIGN_BITWISE_OR: case BINARY_ASSIGN_BITWISE_XOR: case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: return BINARY_ASSOCIATIVITY_RIGHT; default: return BINARY_ASSOCIATIVITY_LEFT; } } const ascii* binary_operation_to_string(enum Binary_Operation operation) { switch (operation) { case BINARY_PLUS: return "+"; case BINARY_MINUS: return "-"; case BINARY_MULTIPLY: return "*"; case BINARY_DIVIDE: return "/"; case BINARY_MODULO: return "%"; case BINARY_EQUAL: return "=="; case BINARY_NOT_EQUAL: return "!="; case BINARY_GREATER_THAN: return ">"; case BINARY_GREATER_THAN_EQUAL: return ">="; case BINARY_LESS_THAN: return "<"; case BINARY_LESS_THAN_EQUAL: return "<="; case BINARY_AND: return "&&"; case BINARY_OR: return "||"; case BINARY_BITWISE_AND: return "&"; case BINARY_BITWISE_OR: return "|"; case BINARY_BITWISE_XOR: return "^"; case BINARY_BITWISE_LEFT_SHIFT: return "<<"; case BINARY_BITWISE_RIGHT_SHIFT: return ">>"; case BINARY_ASSIGN: return "="; case BINARY_ASSIGN_PLUS: return "+="; case BINARY_ASSIGN_MINUS: return "-="; case BINARY_ASSIGN_MULTIPLY: return "*="; case BINARY_ASSIGN_DIVIDE: return "/="; case BINARY_ASSIGN_MODULO: return "%="; case BINARY_ASSIGN_AND: return "&&="; case BINARY_ASSIGN_OR: return "||="; case BINARY_ASSIGN_BITWISE_AND: return "&="; case BINARY_ASSIGN_BITWISE_OR: return "|="; case BINARY_ASSIGN_BITWISE_XOR: return "^="; case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: return "<<="; case BINARY_ASSIGN_BITWISE_RIGHT_SHIFT: return ">>="; default: failure("unexpected binary operation passed to `binary_operation_to_string`"); return nil; } } // nodes are parts of the syntax tree that are reused often // and in different places. // a type node represents a type in the syntax tree. // currently, we only support types that are simple names. struct Type_Node { // note: we could also just include the token here i think? struct String name; struct Span span; struct Cursor location; }; enum Expression_Kind { EXPRESSION_NONE, EXPRESSION_INTEGER_LITERAL, EXPRESSION_FLOAT_LITERAL, EXPRESSION_STRING_LITERAL, EXPRESSION_BOOLEAN_LITERAL, EXPRESSION_NAME, EXPRESSION_UNARY_OPERATION, EXPRESSION_BINARY_OPERATION, EXPRESSION_GROUP, EXPRESSION_CALL, EXPRESSION_SUBSCRIPT, EXPRESSION_MEMBER, }; struct Expression_Integer_Literal { int64 value; // might not fit entire number given in source. }; struct Expression_Float_Literal { float64 value; }; struct Expression_String_Literal { struct String value; }; struct Expression_Bool_Literal { bool value; }; struct Expression_Name { struct String name; }; struct Expression_Unary_Operator { enum Unary_Operation operation; struct Expression* operand; }; struct Expression_Binary_Operator { enum Binary_Operation operation; struct Expression* left_operand; struct Expression* right_operand; }; struct Expression_Group { struct Expression* inner_expression; }; struct Expression_Call { struct Expression* subject; struct Expression* arguments; // linked list of expressions. }; struct Expression_Subscript { struct Expression* subject; struct Expression* index; }; struct Expression_Member { struct Expression* subject; struct String name; }; union Expression_Value { struct Expression_Integer_Literal integer_literal; struct Expression_Float_Literal float_literal; struct Expression_String_Literal string_literal; struct Expression_Bool_Literal bool_literal; struct Expression_Name name; struct Expression_Unary_Operator unary_operator; struct Expression_Binary_Operator binary_operator; struct Expression_Group group; struct Expression_Call call; struct Expression_Subscript subscript; struct Expression_Member member; }; struct Expression { enum Expression_Kind kind; union Expression_Value value; struct Span span; struct Cursor location; // if expression is within a group of multiple expressions, // points to the next expression within it. struct Expression* next; }; REGION(struct Expression, expression) struct Expression* expression_new( enum Expression_Kind kind, union Expression_Value value, struct Span span, struct Cursor location) { check(region_expression_cursor < REGION_SIZE, "out of expression memory"); struct Expression* expression = ®ion_expression[region_expression_cursor++]; *expression = (struct Expression){ .kind = kind, .value = value, .span = span, .location = location, .next = nil, }; return expression; } void expression_print(const struct Expression* expression) { printf("(expr "); switch (expression->kind) { case EXPRESSION_NONE: printf("none"); break; case EXPRESSION_INTEGER_LITERAL: printf("%ld", expression->value.integer_literal.value); break; case EXPRESSION_FLOAT_LITERAL: printf("%lf", expression->value.float_literal.value); break; case EXPRESSION_STRING_LITERAL: printf("\"%s\"", expression->value.string_literal.value.data); break; case EXPRESSION_BOOLEAN_LITERAL: printf("%s", expression->value.bool_literal.value ? "true" : "false"); break; case EXPRESSION_NAME: printf("(name %s)", expression->value.name.name.data); break; case EXPRESSION_UNARY_OPERATION: printf("(unary %s ", unary_operation_to_string(expression->value.unary_operator.operation)); expression_print(expression->value.unary_operator.operand); printf(")"); break; case EXPRESSION_BINARY_OPERATION: printf( "(binary %s ", binary_operation_to_string(expression->value.binary_operator.operation)); expression_print(expression->value.binary_operator.left_operand); printf(" "); expression_print(expression->value.binary_operator.right_operand); printf(")"); break; case EXPRESSION_GROUP: printf("(group "); expression_print(expression->value.group.inner_expression); printf(")"); break; case EXPRESSION_CALL: printf("(call "); expression_print(expression->value.call.subject); FOR_EACH(struct Expression*, argument, expression->value.call.arguments) { printf(" "); expression_print(argument); } printf(")"); break; case EXPRESSION_SUBSCRIPT: printf("(subscript "); expression_print(expression->value.subscript.subject); printf(" "); expression_print(expression->value.subscript.index); printf(")"); break; case EXPRESSION_MEMBER: printf("(member "); expression_print(expression->value.member.subject); printf(")"); break; default: failure("unexpected expression kind passed to `expression_print`"); break; } printf(")"); } enum Statement_Kind { STATEMENT_NONE, STATEMENT_EXPRESSION, STATEMENT_DECLARATION, }; struct Statement_Value_Expression { struct Expression* inner; }; struct Statement_Value_Declaration { struct String_Array names; // the names of the variables being declared. struct Expression* initializer; // the expression to initialize the variable with. bool has_type; // whether the declaration has a type. struct Type_Node type; // the type of the variable, if any. }; union Statement_Value { struct Statement_Value_Expression expression; struct Statement_Value_Declaration declaration; }; struct Statement { enum Statement_Kind kind; union Statement_Value value; struct Span span; struct Cursor location; // if statement is within a group of multiple statements, // points to the next statement within it. struct Statement* next; }; REGION(struct Statement, statement) struct Statement* statement_new( enum Statement_Kind kind, union Statement_Value value, struct Span span, struct Cursor location) { check(region_statement_cursor < REGION_SIZE, "out of statement memory"); struct Statement* statement = ®ion_statement[region_statement_cursor++]; *statement = (struct Statement){ .kind = kind, .value = value, .span = span, .location = location, .next = nil, }; return statement; } void statement_print(const struct Statement* statement) { printf("(stmt "); switch (statement->kind) { case STATEMENT_NONE: printf("none"); break; case STATEMENT_EXPRESSION: { const struct Expression* expression = statement->value.expression.inner; expression_print(expression); break; } case STATEMENT_DECLARATION: { printf("(declaration "); STRING_ARRAY_FOR_EACH(i, name, statement->value.declaration.names) { printf("%s ", name.data); } if (statement->value.declaration.has_type) { printf("(type %s) ", statement->value.declaration.type.name.data); } if (statement->value.declaration.initializer) { printf("(initializer "); expression_print(statement->value.declaration.initializer); printf(")"); } printf(")"); break; } default: failure("unexpected statement kind passed to `statement_print`"); break; } printf(")"); } // the top-level tree of a single catskill source file. struct Tree { struct Statement* top_level_statements; }; void tree_print(const struct Tree* tree) { FOR_EACH(struct Statement*, statement, tree->top_level_statements) { statement_print(statement); printf("\n"); } }