From 577c6b4339b7ddc5af011ca06ccae97459f5ec76 Mon Sep 17 00:00:00 2001 From: Mel Date: Sat, 24 May 2025 14:08:39 +0200 Subject: Correctly interpret all assignment operator expression variations Signed-off-by: Mel --- boot/lex.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++-------------- boot/tree.c | 144 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 271 insertions(+), 64 deletions(-) diff --git a/boot/lex.c b/boot/lex.c index c850534..0fc6ed1 100644 --- a/boot/lex.c +++ b/boot/lex.c @@ -95,10 +95,13 @@ enum Token_Kind TOKEN_SQUARE_CLOSE, TOKEN_COMMA, - TOKEN_ASSIGN, TOKEN_AMPERSAND, TOKEN_DOT, + TOKEN_BANG, + TOKEN_PERCENT, TOKEN_PIPE, + TOKEN_TILDE, + TOKEN_CARET, TOKEN_PLUS, TOKEN_MINUS, @@ -107,13 +110,29 @@ enum Token_Kind TOKEN_AND, TOKEN_OR, - TOKEN_NOT, TOKEN_EQUAL, TOKEN_NOT_EQUAL, TOKEN_LESS, TOKEN_LESS_EQUAL, TOKEN_GREATER, TOKEN_GREATER_EQUAL, + + TOKEN_LEFT_SHIFT, + TOKEN_RIGHT_SHIFT, + + TOKEN_ASSIGN, + TOKEN_ASSIGN_PLUS, + TOKEN_ASSIGN_MINUS, + TOKEN_ASSIGN_STAR, + TOKEN_ASSIGN_SLASH, + TOKEN_ASSIGN_PERCENT, + TOKEN_ASSIGN_AND, + TOKEN_ASSIGN_OR, + TOKEN_ASSIGN_AMPERSAND, + TOKEN_ASSIGN_PIPE, + TOKEN_ASSIGN_CARET, + TOKEN_ASSIGN_LEFT_SHIFT, + TOKEN_ASSIGN_RIGHT_SHIFT, }; const ascii* @@ -182,14 +201,20 @@ token_kind_to_string(enum Token_Kind kind) case TOKEN_COMMA: return "COMMA"; - case TOKEN_ASSIGN: - return "ASSIGN"; case TOKEN_AMPERSAND: return "AMPERSAND"; case TOKEN_DOT: return "DOT"; + case TOKEN_BANG: + return "BANG"; + case TOKEN_PERCENT: + return "PERCENT"; case TOKEN_PIPE: return "PIPE"; + case TOKEN_TILDE: + return "TILDE"; + case TOKEN_CARET: + return "CARET"; case TOKEN_PLUS: return "PLUS"; @@ -204,8 +229,6 @@ token_kind_to_string(enum Token_Kind kind) return "AND"; case TOKEN_OR: return "OR"; - case TOKEN_NOT: - return "NOT"; case TOKEN_EQUAL: return "EQUAL"; case TOKEN_NOT_EQUAL: @@ -218,6 +241,39 @@ token_kind_to_string(enum Token_Kind kind) return "GREATER"; case TOKEN_GREATER_EQUAL: return "GREATER_EQUAL"; + + case TOKEN_LEFT_SHIFT: + return "BITWISE_LEFT_SHIFT"; + case TOKEN_RIGHT_SHIFT: + return "BITWISE_RIGHT_SHIFT"; + + case TOKEN_ASSIGN: + return "ASSIGN"; + case TOKEN_ASSIGN_PLUS: + return "ASSIGN_PLUS"; + case TOKEN_ASSIGN_MINUS: + return "ASSIGN_MINUS"; + case TOKEN_ASSIGN_STAR: + return "ASSIGN_STAR"; + case TOKEN_ASSIGN_SLASH: + return "ASSIGN_SLASH"; + case TOKEN_ASSIGN_PERCENT: + return "ASSIGN_PERCENT"; + case TOKEN_ASSIGN_AND: + return "ASSIGN_AND"; + case TOKEN_ASSIGN_OR: + return "ASSIGN_OR"; + case TOKEN_ASSIGN_AMPERSAND: + return "ASSIGN_AMPERSAND"; + case TOKEN_ASSIGN_PIPE: + return "ASSIGN_PIPE"; + case TOKEN_ASSIGN_CARET: + return "ASSIGN_CARET"; + case TOKEN_ASSIGN_LEFT_SHIFT: + return "ASSIGN_LEFT_SHIFT"; + case TOKEN_ASSIGN_RIGHT_SHIFT: + return "ASSIGN_RIGHT_SHIFT"; + default: return ""; } @@ -390,6 +446,16 @@ lexer_match_char(struct Lexer* l, ascii expected) return (struct Lexer_Char_Match){}; } +void +lexer_match_chars( + struct Lexer* l, ascii a, ascii b, struct Lexer_Char_Match* a_out, + struct Lexer_Char_Match* b_out) +{ + *a_out = lexer_match_char(l, a); + if (!a_out->got_match) return; + *b_out = lexer_match_char(l, b); +} + struct Lexer_Non_Code { bool had_newline; @@ -444,7 +510,7 @@ struct Lexer_Symbol_Token lexer_symbol_token(struct Lexer* l, struct Lexer_Char current) { #define RET return (struct Lexer_Symbol_Token) - struct Lexer_Char_Match next; + struct Lexer_Char_Match a, b; switch (current.character) { case '(': RET{ TOKEN_ROUND_OPEN, 1 }; @@ -459,59 +525,94 @@ lexer_symbol_token(struct Lexer* l, struct Lexer_Char current) case ']': RET{ TOKEN_SQUARE_CLOSE, 1 }; + case ',': RET{ TOKEN_COMMA, 1 }; + case '&': { + lexer_match_chars(l, '&', '=', &a, &b); - case '=': - next = lexer_match_char(l, '='); - if (next.got_match) RET{ TOKEN_EQUAL, 2 }; - - RET{ TOKEN_ASSIGN, 1 }; - - case '&': - next = lexer_match_char(l, '&'); - if (next.got_match) RET{ TOKEN_AND, 2 }; + if (a.got_match && b.got_match) RET{ TOKEN_ASSIGN_AND, 3 }; + if (a.got_match) RET{ TOKEN_AND, 2 }; + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_AMPERSAND, 2 }; RET{ TOKEN_AMPERSAND, 1 }; - + } case '.': RET{ TOKEN_DOT, 1 }; + case '!': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_NOT_EQUAL, 2 }; + RET{ TOKEN_BANG, 1 }; + } + case '%': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_PERCENT, 2 }; + RET{ TOKEN_PERCENT, 1 }; + } + case '|': { + lexer_match_chars(l, '|', '=', &a, &b); - case '|': - next = lexer_match_char(l, '|'); - if (next.got_match) RET{ TOKEN_OR, 2 }; + if (a.got_match && b.got_match) RET{ TOKEN_ASSIGN_OR, 3 }; + if (a.got_match) RET{ TOKEN_OR, 2 }; + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_PIPE, 2 }; RET{ TOKEN_PIPE, 1 }; - - // todo: increment, decrement, +=, -=, *=, etc. - // all the special assignment operations - case '+': + } + case '~': + RET{ TOKEN_TILDE, 1 }; + case '^': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_CARET, 2 }; + RET{ TOKEN_CARET, 1 }; + } + // todo: increment, decrement, power + case '+': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_PLUS, 2 }; RET{ TOKEN_PLUS, 1 }; - case '-': + } + case '-': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_MINUS, 2 }; RET{ TOKEN_MINUS, 1 }; - case '*': + } + case '*': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_STAR, 2 }; RET{ TOKEN_STAR, 1 }; - case '/': + } + case '/': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_ASSIGN_SLASH, 2 }; RET{ TOKEN_SLASH, 1 }; - - case '!': - next = lexer_match_char(l, '='); - if (next.got_match) RET{ TOKEN_NOT_EQUAL, 2 }; - - RET{ TOKEN_NOT, 1 }; - - case '<': - next = lexer_match_char(l, '='); - if (next.got_match) RET{ TOKEN_LESS_EQUAL, 2 }; - RET{ TOKEN_LESS }; - - case '>': - next = lexer_match_char(l, '='); - if (next.got_match) RET{ TOKEN_GREATER_EQUAL, 2 }; - RET{ TOKEN_GREATER }; - + } + case '<': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_LESS_EQUAL, 2 }; + + lexer_match_chars(l, '<', '=', &a, &b); + if (a.got_match && b.got_match) RET{ TOKEN_ASSIGN_LEFT_SHIFT, 3 }; + if (a.got_match) RET{ TOKEN_LEFT_SHIFT, 2 }; + RET{ TOKEN_LESS, 1 }; + } + case '>': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_GREATER_EQUAL, 2 }; + + lexer_match_chars(l, '>', '=', &a, &b); + if (a.got_match && b.got_match) RET{ TOKEN_ASSIGN_RIGHT_SHIFT, 3 }; + if (a.got_match) RET{ TOKEN_RIGHT_SHIFT, 2 }; + RET{ TOKEN_GREATER, 1 }; + } + case '=': { + a = lexer_match_char(l, '='); + if (a.got_match) RET{ TOKEN_EQUAL, 2 }; + RET{ TOKEN_ASSIGN, 1 }; + } default: - RET{ TOKEN_NONE, 0 }; + RET{ TOKEN_NONE, 1 }; } #undef RET } diff --git a/boot/tree.c b/boot/tree.c index 4fb4b73..e6c5b78 100644 --- a/boot/tree.c +++ b/boot/tree.c @@ -18,11 +18,10 @@ unary_operation_from_token(const struct Token* token) switch (token->kind) { case TOKEN_MINUS: return UNARY_MINUS; - case TOKEN_NOT: + case TOKEN_BANG: return UNARY_NOT; - // TODO: tilde token - // case TOKEN_TILDE: - // return UNARY_BITWISE_NOT; + case TOKEN_TILDE: + return UNARY_BITWISE_NOT; default: return UNARY_NONE; @@ -69,6 +68,20 @@ enum Binary_Operation BINARY_BITWISE_XOR, BINARY_BITWISE_LEFT_SHIFT, BINARY_BITWISE_RIGHT_SHIFT, + + BINARY_ASSIGN, + BINARY_ASSIGN_PLUS, + BINARY_ASSIGN_MINUS, + BINARY_ASSIGN_MULTIPLY, + BINARY_ASSIGN_DIVIDE, + BINARY_ASSIGN_MODULO, + BINARY_ASSIGN_AND, + BINARY_ASSIGN_OR, + BINARY_ASSIGN_BITWISE_AND, + BINARY_ASSIGN_BITWISE_OR, + BINARY_ASSIGN_BITWISE_XOR, + BINARY_ASSIGN_BITWISE_LEFT_SHIFT, + BINARY_ASSIGN_BITWISE_RIGHT_SHIFT, }; enum Binary_Operation @@ -83,9 +96,8 @@ binary_operation_from_token(const struct Token* token) return BINARY_MULTIPLY; case TOKEN_SLASH: return BINARY_DIVIDE; - // TODO: percent token - // case TOKEN_PERCENT: - // return BINARY_MODULO; + case TOKEN_PERCENT: + return BINARY_MODULO; case TOKEN_EQUAL: return BINARY_EQUAL; @@ -104,6 +116,44 @@ binary_operation_from_token(const struct Token* token) case TOKEN_OR: return BINARY_OR; + case TOKEN_AMPERSAND: + return BINARY_BITWISE_AND; + case TOKEN_PIPE: + return BINARY_BITWISE_OR; + case TOKEN_CARET: + return BINARY_BITWISE_XOR; + case TOKEN_LEFT_SHIFT: + return BINARY_BITWISE_LEFT_SHIFT; + case TOKEN_RIGHT_SHIFT: + return BINARY_BITWISE_RIGHT_SHIFT; + + case TOKEN_ASSIGN: + return BINARY_ASSIGN; + case TOKEN_ASSIGN_PLUS: + return BINARY_ASSIGN_PLUS; + case TOKEN_ASSIGN_MINUS: + return BINARY_ASSIGN_MINUS; + case TOKEN_ASSIGN_STAR: + return BINARY_ASSIGN_MULTIPLY; + case TOKEN_ASSIGN_SLASH: + return BINARY_ASSIGN_DIVIDE; + case TOKEN_ASSIGN_PERCENT: + return BINARY_ASSIGN_MODULO; + case TOKEN_ASSIGN_AND: + return BINARY_ASSIGN_AND; + case TOKEN_ASSIGN_OR: + return BINARY_ASSIGN_OR; + case TOKEN_ASSIGN_AMPERSAND: + return BINARY_ASSIGN_BITWISE_AND; + case TOKEN_ASSIGN_PIPE: + return BINARY_ASSIGN_BITWISE_OR; + case TOKEN_ASSIGN_CARET: + return BINARY_ASSIGN_BITWISE_XOR; + case TOKEN_ASSIGN_LEFT_SHIFT: + return BINARY_ASSIGN_BITWISE_LEFT_SHIFT; + case TOKEN_ASSIGN_RIGHT_SHIFT: + return BINARY_ASSIGN_BITWISE_RIGHT_SHIFT; + default: return BINARY_NONE; } @@ -117,34 +167,48 @@ binary_operation_precedence(enum Binary_Operation operation) { switch (operation) { // weakest - case BINARY_OR: + case BINARY_ASSIGN: + case BINARY_ASSIGN_PLUS: + case BINARY_ASSIGN_MINUS: + case BINARY_ASSIGN_MULTIPLY: + case BINARY_ASSIGN_DIVIDE: + case BINARY_ASSIGN_MODULO: + case BINARY_ASSIGN_AND: + case BINARY_ASSIGN_OR: + case BINARY_ASSIGN_BITWISE_AND: + case BINARY_ASSIGN_BITWISE_OR: + case BINARY_ASSIGN_BITWISE_XOR: + case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: + case BINARY_ASSIGN_BITWISE_RIGHT_SHIFT: return 1; - case BINARY_AND: + case BINARY_OR: return 2; - case BINARY_BITWISE_OR: + case BINARY_AND: return 3; - case BINARY_BITWISE_XOR: + case BINARY_BITWISE_OR: return 4; - case BINARY_BITWISE_AND: + case BINARY_BITWISE_XOR: return 5; + case BINARY_BITWISE_AND: + return 6; case BINARY_EQUAL: case BINARY_NOT_EQUAL: - return 6; + return 7; case BINARY_GREATER_THAN: case BINARY_GREATER_THAN_EQUAL: case BINARY_LESS_THAN: case BINARY_LESS_THAN_EQUAL: - return 7; + return 8; case BINARY_BITWISE_LEFT_SHIFT: case BINARY_BITWISE_RIGHT_SHIFT: - return 8; + return 9; case BINARY_PLUS: case BINARY_MINUS: - return 9; + return 10; case BINARY_MULTIPLY: case BINARY_DIVIDE: case BINARY_MODULO: - return 10; + return 11; // strongest default: @@ -162,8 +226,23 @@ enum Binary_Operation_Associativity enum Binary_Operation_Associativity binary_operation_associativity(enum Binary_Operation operation) { - // all operations are left associative by default. - return BINARY_ASSOCIATIVITY_LEFT; + switch (operation) { + case BINARY_ASSIGN: + case BINARY_ASSIGN_PLUS: + case BINARY_ASSIGN_MINUS: + case BINARY_ASSIGN_MULTIPLY: + case BINARY_ASSIGN_DIVIDE: + case BINARY_ASSIGN_MODULO: + case BINARY_ASSIGN_AND: + case BINARY_ASSIGN_OR: + case BINARY_ASSIGN_BITWISE_AND: + case BINARY_ASSIGN_BITWISE_OR: + case BINARY_ASSIGN_BITWISE_XOR: + case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: + return BINARY_ASSOCIATIVITY_RIGHT; + default: + return BINARY_ASSOCIATIVITY_LEFT; + } } const ascii* @@ -209,6 +288,33 @@ binary_operation_to_string(enum Binary_Operation operation) case BINARY_BITWISE_RIGHT_SHIFT: return ">>"; + case BINARY_ASSIGN: + return "="; + case BINARY_ASSIGN_PLUS: + return "+="; + case BINARY_ASSIGN_MINUS: + return "-="; + case BINARY_ASSIGN_MULTIPLY: + return "*="; + case BINARY_ASSIGN_DIVIDE: + return "/="; + case BINARY_ASSIGN_MODULO: + return "%="; + case BINARY_ASSIGN_AND: + return "&&="; + case BINARY_ASSIGN_OR: + return "||="; + case BINARY_ASSIGN_BITWISE_AND: + return "&="; + case BINARY_ASSIGN_BITWISE_OR: + return "|="; + case BINARY_ASSIGN_BITWISE_XOR: + return "^="; + case BINARY_ASSIGN_BITWISE_LEFT_SHIFT: + return "<<="; + case BINARY_ASSIGN_BITWISE_RIGHT_SHIFT: + return ">>="; + default: return "unknown"; } -- cgit 1.4.1