about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMel <mel@rnrd.eu>2025-05-18 19:43:19 +0200
committerMel <mel@rnrd.eu>2025-05-18 19:43:19 +0200
commit8c8d65f026121b4d75a31bff8a91c3fbf969fca5 (patch)
tree64ba811523b45c02c849c1655e83e54aba968ec5
parent7a7446a6dfdfcf5e02358fdb34c193eb67068b93 (diff)
downloadcatskill-8c8d65f026121b4d75a31bff8a91c3fbf969fca5.tar.zst
catskill-8c8d65f026121b4d75a31bff8a91c3fbf969fca5.zip
Define basic syntax tree (mostly expressions), with simple debug printing
Signed-off-by: Mel <mel@rnrd.eu>
-rw-r--r--Makefile2
-rw-r--r--boot/catboot.c1
-rw-r--r--boot/common.c10
-rw-r--r--boot/lex.c1
-rw-r--r--boot/tree.c340
5 files changed, 353 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index d24a003..91d1853 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 CFLAGS ?= -std=c99 -Wall -Werror -static -g -O0
 .DEFAULT_GOAL := all
 
-BOOTSTRAP_SOURCES = boot/catboot.c boot/common.c boot/lex.c
+BOOTSTRAP_SOURCES = boot/catboot.c boot/common.c boot/lex.c boot/tree.c
 SOURCES = src/catskill.csk
 
 build/catskill: build $(SOURCES)
diff --git a/boot/catboot.c b/boot/catboot.c
index e47d4b9..fec47c0 100644
--- a/boot/catboot.c
+++ b/boot/catboot.c
@@ -22,6 +22,7 @@
 
 #include "common.c"
 #include "lex.c"
+#include "tree.c"
 
 const ascii*
 read_file(const ascii* path)
diff --git a/boot/common.c b/boot/common.c
index 388aea5..d814538 100644
--- a/boot/common.c
+++ b/boot/common.c
@@ -62,6 +62,10 @@ check(bool condition, const ascii* message)
     if (!condition) failure(message);
 }
 
+// for each entry in a linked list.
+#define FOR_EACH(type, cursor, head) \
+    for (type cursor = head; cursor != nil; cursor = cursor->next)
+
 // the common size of region memory blocks.
 #define REGION_SIZE 65536
 
@@ -156,6 +160,12 @@ string_length(struct String s)
     return s.length;
 }
 
+void
+string_print(struct String s)
+{
+    printf("%.*s", (int32)s.length, s.data);
+}
+
 // single iteration of the CRC32 checksum algorithm
 // described in POSIX.
 // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html
diff --git a/boot/lex.c b/boot/lex.c
index db0173e..9a94c55 100644
--- a/boot/lex.c
+++ b/boot/lex.c
@@ -219,6 +219,7 @@ token_new(enum Token_Kind kind, struct Span span, struct Cursor location, union
         .kind = kind,
         .span = span,
         .value = value,
+        .location = location,
     };
 }
 
diff --git a/boot/tree.c b/boot/tree.c
new file mode 100644
index 0000000..339f12c
--- /dev/null
+++ b/boot/tree.c
@@ -0,0 +1,340 @@
+/*
+ * abstract syntax tree types for a catskill source.
+ */
+
+enum Unary_Operation
+{
+    UNARY_MINUS,
+    UNARY_NOT,
+
+    UNARY_BITWISE_NOT,
+};
+
+const ascii*
+unary_operation_to_string(enum Unary_Operation operation)
+{
+    switch (operation) {
+    case UNARY_MINUS:
+        return "-";
+    case UNARY_NOT:
+        return "!";
+    case UNARY_BITWISE_NOT:
+        return "~";
+
+    default:
+        return "unknown";
+    }
+}
+
+enum Binary_Operation
+{
+    BINARY_PLUS,
+    BINARY_MINUS,
+    BINARY_MULTIPLY,
+    BINARY_DIVIDE,
+    BINARY_MODULO,
+
+    BINARY_EQUAL,
+    BINARY_NOT_EQUAL,
+    BINARY_GREATER_THAN,
+    BINARY_GREATER_THAN_EQUAL,
+    BINARY_LESS_THAN,
+    BINARY_LESS_THAN_EQUAL,
+    BINARY_AND,
+    BINARY_OR,
+
+    BINARY_BITWISE_AND,
+    BINARY_BITWISE_OR,
+    BINARY_BITWISE_XOR,
+    BINARY_BITWISE_LEFT_SHIFT,
+    BINARY_BITWISE_RIGHT_SHIFT,
+};
+
+const ascii*
+binary_operation_to_string(enum Binary_Operation operation)
+{
+    switch (operation) {
+    case BINARY_PLUS:
+        return "+";
+    case BINARY_MINUS:
+        return "-";
+    case BINARY_MULTIPLY:
+        return "*";
+    case BINARY_DIVIDE:
+        return "/";
+    case BINARY_MODULO:
+        return "%";
+
+    case BINARY_EQUAL:
+        return "==";
+    case BINARY_NOT_EQUAL:
+        return "!=";
+    case BINARY_GREATER_THAN:
+        return ">";
+    case BINARY_GREATER_THAN_EQUAL:
+        return ">=";
+    case BINARY_LESS_THAN:
+        return "<";
+    case BINARY_LESS_THAN_EQUAL:
+        return "<=";
+    case BINARY_AND:
+        return "&&";
+    case BINARY_OR:
+        return "||";
+
+    case BINARY_BITWISE_AND:
+        return "&";
+    case BINARY_BITWISE_OR:
+        return "|";
+    case BINARY_BITWISE_XOR:
+        return "^";
+    case BINARY_BITWISE_LEFT_SHIFT:
+        return "<<";
+    case BINARY_BITWISE_RIGHT_SHIFT:
+        return ">>";
+
+    default:
+        return "unknown";
+    }
+}
+
+enum Expression_Kind
+{
+    EXPRESSION_NONE,
+
+    EXPRESSION_INTEGER_LITERAL,
+    EXPRESSION_FLOAT_LITERAL,
+    EXPRESSION_STRING_LITERAL,
+    EXPRESSION_BOOLEAN_LITERAL,
+    EXPRESSION_NAME,
+
+    EXPRESSION_UNARY_OPERATION,
+    EXPRESSION_BINARY_OPERATION,
+
+    EXPRESSION_GROUP,
+    EXPRESSION_CALL,
+    EXPRESSION_SUBSCRIPT,
+    EXPRESSION_MEMBER,
+};
+
+struct Expression_Integer_Literal
+{
+    int64 value; // might not fit entire number given in source.
+};
+
+struct Expression_Float_Literal
+{
+    float64 value;
+};
+
+struct Expression_String_Literal
+{
+    struct String value;
+};
+
+struct Expression_Bool_Literal
+{
+    bool value;
+};
+
+struct Expression_Name
+{
+    struct String name;
+};
+
+struct Expression_Unary_Operator
+{
+    enum Unary_Operation operation;
+    struct Expression* operand;
+};
+
+struct Expression_Binary_Operator
+{
+    enum Binary_Operation operation;
+    struct Expression* left_operand;
+    struct Expression* right_operand;
+};
+
+struct Expression_Group
+{
+    struct Expression* inner_expression;
+};
+
+struct Expression_Call
+{
+    struct Expression* subject;
+    struct Expression* arguments; // linked list of expressions.
+};
+
+struct Expression_Subscript
+{
+    struct Expression* subject;
+    struct Expression* index;
+};
+
+struct Expression_Member
+{
+    struct Expression* subject;
+    struct String name;
+};
+
+union Expression_Value
+{
+    struct Expression_Integer_Literal integer_literal;
+    struct Expression_Float_Literal float_literal;
+    struct Expression_String_Literal string_literal;
+    struct Expression_Bool_Literal bool_literal;
+    struct Expression_Name name;
+    struct Expression_Unary_Operator unary_operator;
+    struct Expression_Binary_Operator binary_operator;
+    struct Expression_Group group;
+    struct Expression_Call call;
+    struct Expression_Subscript subscript;
+    struct Expression_Member member;
+};
+
+struct Expression
+{
+    enum Expression_Kind kind;
+    union Expression_Value value;
+
+    struct Span span;
+    struct Cursor location;
+
+    // if expression is within a group of multiple expressions,
+    // points to the next expression within it.
+    struct Expression* next;
+};
+
+REGION(struct Expression, expression)
+
+void
+expression_print(const struct Expression* expression)
+{
+    printf("(expr ");
+    switch (expression->kind) {
+    case EXPRESSION_NONE:
+        printf("none");
+        break;
+    case EXPRESSION_INTEGER_LITERAL:
+        printf("%ld", expression->value.integer_literal.value);
+        break;
+    case EXPRESSION_FLOAT_LITERAL:
+        printf("%lf", expression->value.float_literal.value);
+        break;
+    case EXPRESSION_STRING_LITERAL:
+        printf("\"%s\"", expression->value.string_literal.value.data);
+        break;
+    case EXPRESSION_BOOLEAN_LITERAL:
+        printf("%s", expression->value.bool_literal.value ? "true" : "false");
+        break;
+    case EXPRESSION_NAME:
+        printf("(name %s)", expression->value.name.name.data);
+        break;
+    case EXPRESSION_UNARY_OPERATION:
+        printf("(unary %s ", unary_operation_to_string(expression->value.unary_operator.operation));
+        expression_print(expression->value.unary_operator.operand);
+        printf(")");
+        break;
+    case EXPRESSION_BINARY_OPERATION:
+        printf(
+            "(binary %s ", binary_operation_to_string(expression->value.binary_operator.operation));
+        expression_print(expression->value.binary_operator.left_operand);
+        printf(" ");
+        expression_print(expression->value.binary_operator.right_operand);
+        printf(")");
+        break;
+    case EXPRESSION_GROUP:
+        printf("(group ");
+        expression_print(expression->value.group.inner_expression);
+        printf(")");
+        break;
+    case EXPRESSION_CALL:
+        printf("(call ");
+        expression_print(expression->value.call.subject);
+        FOR_EACH(struct Expression*, argument, expression->value.call.arguments)
+        {
+            printf(" ");
+            expression_print(argument);
+        }
+        printf(")");
+        break;
+    case EXPRESSION_SUBSCRIPT:
+        printf("(subscript ");
+        expression_print(expression->value.subscript.subject);
+        printf(" ");
+        expression_print(expression->value.subscript.index);
+        printf(")");
+        break;
+    case EXPRESSION_MEMBER:
+        printf("(member ");
+        expression_print(expression->value.member.subject);
+        printf(")");
+        break;
+    default:
+        break;
+    }
+    printf(")");
+}
+
+enum Statement_Kind
+{
+    STATEMENT_NONE,
+    STATEMENT_EXPRESSION,
+};
+
+union Statement_Value
+{
+    struct Expression* expression;
+};
+
+struct Statement
+{
+    enum Statement_Kind kind;
+    union Statement_Value value;
+
+    struct Span span;
+    struct Cursor location;
+
+    // if statement is within a group of multiple statements,
+    // points to the next statement within it.
+    struct Statement* next;
+};
+
+REGION(struct Statement, statement)
+
+void
+statement_print(const struct Statement* statement)
+{
+    printf("(stmt ");
+    switch (statement->kind) {
+    case STATEMENT_NONE:
+        printf("none");
+        break;
+    case STATEMENT_EXPRESSION: {
+        const struct Expression* expression = statement->value.expression;
+        expression_print(expression);
+        break;
+    }
+    default:
+        printf("unknown");
+        break;
+    }
+    printf(")");
+}
+
+// the top-level tree of a single catskill source file.
+struct Tree
+{
+    struct Statement* top_level_statements;
+};
+
+void
+tree_print(const struct Tree* tree)
+{
+    FOR_EACH(struct Statement*, statement, tree->top_level_statements)
+    {
+        statement_print(statement);
+        printf("\n");
+    }
+}
\ No newline at end of file