diff options
| author | Mel <einebeere@gmail.com> | 2021-10-19 21:07:46 +0200 |
|---|---|---|
| committer | Mel <einebeere@gmail.com> | 2021-10-19 21:46:57 +0200 |
| commit | 552573df2606f61b382166db57c27f209605e487 (patch) | |
| tree | ef9c19dc5452e699db929a49dc32111a0d4c00b5 /src | |
| parent | bf0f05e64cc809f74e591c8a88f708265360f07d (diff) | |
| download | rabbithole-552573df2606f61b382166db57c27f209605e487.tar.zst rabbithole-552573df2606f61b382166db57c27f209605e487.zip | |
Add more lexer tokens
Diffstat (limited to 'src')
| -rw-r--r-- | src/lex/lexer.rs | 138 | ||||
| -rw-r--r-- | src/lex/token.rs | 29 |
2 files changed, 142 insertions, 25 deletions
diff --git a/src/lex/lexer.rs b/src/lex/lexer.rs index e2ac0f4..b18a0f6 100644 --- a/src/lex/lexer.rs +++ b/src/lex/lexer.rs @@ -12,6 +12,8 @@ impl Iterator for Lexer<'_> { type Item = Token; fn next(&mut self) -> Option<Self::Item> { + use super::token::TokenVariant::*; + if self.done { return None; } @@ -20,7 +22,7 @@ impl Iterator for Lexer<'_> { self.done = true; return Some(Token { location: self.location, - variant: TokenVariant::Eof, + variant: Eof, }); } @@ -30,22 +32,71 @@ impl Iterator for Lexer<'_> { let token = if c.is_numeric() { self.number() - } else if c == '+' { - self.char_token(TokenVariant::OpPlus) - } else if c == '-' { - self.char_token(TokenVariant::OpMinus) - } else if c == '*' { - self.char_token(TokenVariant::OpStar) - } else if c == '/' { - self.char_token(TokenVariant::OpSlash) - } else if c == '!' { - self.char_token(TokenVariant::OpNot) - } else if c == '(' { - self.char_token(TokenVariant::GroupOpen) - } else if c == ')' { - self.char_token(TokenVariant::GroupClose) + } else if c.is_alphabetic() { + self.identifier() + } else if c == '"' { + self.str() } else { - self.char_token(TokenVariant::Unknown(c)) + let location = self.location; + + // Fixed length tokens + let variant = match self.advance().unwrap() { + '+' => OpPlus, + '-' => { + if self.advance_if('>') { + Arrow + } else { + OpMinus + } + } + '*' => OpStar, + '/' => OpSlash, + '=' => { + if self.advance_if('=') { + OpEq + } else { + Assign + } + } + '!' => { + if self.advance_if('=') { + OpNeq + } else { + OpNot + } + } + '<' => { + if self.advance_if('=') { + OpLte + } else { + OpLt + } + } + '>' => { + if self.advance_if('=') { + OpGte + } else { + OpGt + } + } + '(' => GroupOpen, + ')' => GroupClose, + '{' => BlockOpen, + '}' => BlockClose, + '.' => Dot, + ',' => Comma, + ':' => { + if self.advance_if('=') { + ConstAssign + } else { + Colon + } + } + ';' => SemiColon, + _ => Unknown(c), + }; + + Token { location, variant } }; Some(token) @@ -75,6 +126,10 @@ impl<'s> Lexer<'s> { next } + fn advance_if(&mut self, c: char) -> bool { + self.chars.next_if_eq(&c).is_some() + } + fn skip_whitespace(&mut self) { while self .chars @@ -85,15 +140,6 @@ impl<'s> Lexer<'s> { } } - fn char_token(&mut self, variant: TokenVariant) -> Token { - let token = Token { - location: self.location, - variant, - }; - self.advance(); - token - } - fn number(&mut self) -> Token { let location = self.location; @@ -122,4 +168,46 @@ impl<'s> Lexer<'s> { Token { location, variant } } + + fn identifier(&mut self) -> Token { + let location = self.location; + + let mut buffer = String::new(); + + while self.chars.peek().map_or(false, |&c| c.is_alphabetic()) { + let c = self.advance().unwrap(); + buffer.push(c); + } + + let variant = match buffer.as_str() { + "fn" => TokenVariant::KeywordFn, + "type" => TokenVariant::KeywordType, + "form" => TokenVariant::KeywordForm, + "self" => TokenVariant::KeywordSelf, + _ => TokenVariant::Identifer(buffer), + }; + + Token { location, variant } + } + + fn str(&mut self) -> Token { + let location = self.location; + + // Remove first " + self.advance().unwrap(); + + let mut buffer = String::new(); + loop { + let c = self.advance().expect("Expected Str literal to be closed"); + if c == '"' { + break; + } + buffer.push(c); + } + + Token { + location, + variant: TokenVariant::Str(buffer), + } + } } diff --git a/src/lex/token.rs b/src/lex/token.rs index a43cf0f..114884b 100644 --- a/src/lex/token.rs +++ b/src/lex/token.rs @@ -12,17 +12,46 @@ pub struct Token { #[derive(Clone, Debug)] pub enum TokenVariant { + // Basic math operators OpPlus, OpMinus, OpStar, OpSlash, + // Boolean operators + OpEq, OpNot, + OpNeq, + OpLt, + OpGt, + OpLte, + OpGte, + // Statement symbols + Dot, + Comma, + Colon, + SemiColon, + Assign, + ConstAssign, + Arrow, + + // Groupings GroupOpen, GroupClose, + BlockOpen, + BlockClose, + // Literals Int(u32), Float(f32), + Str(String), + Identifer(String), + + // Keywords + KeywordFn, + KeywordForm, + KeywordType, + KeywordSelf, Unknown(char), Eof, |
