about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorMel <einebeere@gmail.com>2021-10-19 21:07:46 +0200
committerMel <einebeere@gmail.com>2021-10-19 21:46:57 +0200
commit552573df2606f61b382166db57c27f209605e487 (patch)
treeef9c19dc5452e699db929a49dc32111a0d4c00b5 /src
parentbf0f05e64cc809f74e591c8a88f708265360f07d (diff)
downloadrabbithole-552573df2606f61b382166db57c27f209605e487.tar.zst
rabbithole-552573df2606f61b382166db57c27f209605e487.zip
Add more lexer tokens
Diffstat (limited to 'src')
-rw-r--r--src/lex/lexer.rs138
-rw-r--r--src/lex/token.rs29
2 files changed, 142 insertions, 25 deletions
diff --git a/src/lex/lexer.rs b/src/lex/lexer.rs
index e2ac0f4..b18a0f6 100644
--- a/src/lex/lexer.rs
+++ b/src/lex/lexer.rs
@@ -12,6 +12,8 @@ impl Iterator for Lexer<'_> {
     type Item = Token;
 
     fn next(&mut self) -> Option<Self::Item> {
+        use super::token::TokenVariant::*;
+
         if self.done {
             return None;
         }
@@ -20,7 +22,7 @@ impl Iterator for Lexer<'_> {
             self.done = true;
             return Some(Token {
                 location: self.location,
-                variant: TokenVariant::Eof,
+                variant: Eof,
             });
         }
 
@@ -30,22 +32,71 @@ impl Iterator for Lexer<'_> {
 
         let token = if c.is_numeric() {
             self.number()
-        } else if c == '+' {
-            self.char_token(TokenVariant::OpPlus)
-        } else if c == '-' {
-            self.char_token(TokenVariant::OpMinus)
-        } else if c == '*' {
-            self.char_token(TokenVariant::OpStar)
-        } else if c == '/' {
-            self.char_token(TokenVariant::OpSlash)
-        } else if c == '!' {
-            self.char_token(TokenVariant::OpNot)
-        } else if c == '(' {
-            self.char_token(TokenVariant::GroupOpen)
-        } else if c == ')' {
-            self.char_token(TokenVariant::GroupClose)
+        } else if c.is_alphabetic() {
+            self.identifier()
+        } else if c == '"' {
+            self.str()
         } else {
-            self.char_token(TokenVariant::Unknown(c))
+            let location = self.location;
+
+            // Fixed length tokens
+            let variant = match self.advance().unwrap() {
+                '+' => OpPlus,
+                '-' => {
+                    if self.advance_if('>') {
+                        Arrow
+                    } else {
+                        OpMinus
+                    }
+                }
+                '*' => OpStar,
+                '/' => OpSlash,
+                '=' => {
+                    if self.advance_if('=') {
+                        OpEq
+                    } else {
+                        Assign
+                    }
+                }
+                '!' => {
+                    if self.advance_if('=') {
+                        OpNeq
+                    } else {
+                        OpNot
+                    }
+                }
+                '<' => {
+                    if self.advance_if('=') {
+                        OpLte
+                    } else {
+                        OpLt
+                    }
+                }
+                '>' => {
+                    if self.advance_if('=') {
+                        OpGte
+                    } else {
+                        OpGt
+                    }
+                }
+                '(' => GroupOpen,
+                ')' => GroupClose,
+                '{' => BlockOpen,
+                '}' => BlockClose,
+                '.' => Dot,
+                ',' => Comma,
+                ':' => {
+                    if self.advance_if('=') {
+                        ConstAssign
+                    } else {
+                        Colon
+                    }
+                }
+                ';' => SemiColon,
+                _ => Unknown(c),
+            };
+
+            Token { location, variant }
         };
 
         Some(token)
@@ -75,6 +126,10 @@ impl<'s> Lexer<'s> {
         next
     }
 
+    fn advance_if(&mut self, c: char) -> bool {
+        self.chars.next_if_eq(&c).is_some()
+    }
+
     fn skip_whitespace(&mut self) {
         while self
             .chars
@@ -85,15 +140,6 @@ impl<'s> Lexer<'s> {
         }
     }
 
-    fn char_token(&mut self, variant: TokenVariant) -> Token {
-        let token = Token {
-            location: self.location,
-            variant,
-        };
-        self.advance();
-        token
-    }
-
     fn number(&mut self) -> Token {
         let location = self.location;
 
@@ -122,4 +168,46 @@ impl<'s> Lexer<'s> {
 
         Token { location, variant }
     }
+
+    fn identifier(&mut self) -> Token {
+        let location = self.location;
+
+        let mut buffer = String::new();
+
+        while self.chars.peek().map_or(false, |&c| c.is_alphabetic()) {
+            let c = self.advance().unwrap();
+            buffer.push(c);
+        }
+
+        let variant = match buffer.as_str() {
+            "fn" => TokenVariant::KeywordFn,
+            "type" => TokenVariant::KeywordType,
+            "form" => TokenVariant::KeywordForm,
+            "self" => TokenVariant::KeywordSelf,
+            _ => TokenVariant::Identifer(buffer),
+        };
+
+        Token { location, variant }
+    }
+
+    fn str(&mut self) -> Token {
+        let location = self.location;
+
+        // Remove first "
+        self.advance().unwrap();
+
+        let mut buffer = String::new();
+        loop {
+            let c = self.advance().expect("Expected Str literal to be closed");
+            if c == '"' {
+                break;
+            }
+            buffer.push(c);
+        }
+
+        Token {
+            location,
+            variant: TokenVariant::Str(buffer),
+        }
+    }
 }
diff --git a/src/lex/token.rs b/src/lex/token.rs
index a43cf0f..114884b 100644
--- a/src/lex/token.rs
+++ b/src/lex/token.rs
@@ -12,17 +12,46 @@ pub struct Token {
 
 #[derive(Clone, Debug)]
 pub enum TokenVariant {
+    // Basic math operators
     OpPlus,
     OpMinus,
     OpStar,
     OpSlash,
+    // Boolean operators
+    OpEq,
     OpNot,
+    OpNeq,
+    OpLt,
+    OpGt,
+    OpLte,
+    OpGte,
 
+    // Statement symbols
+    Dot,
+    Comma,
+    Colon,
+    SemiColon,
+    Assign,
+    ConstAssign,
+    Arrow,
+
+    // Groupings
     GroupOpen,
     GroupClose,
+    BlockOpen,
+    BlockClose,
 
+    // Literals
     Int(u32),
     Float(f32),
+    Str(String),
+    Identifer(String),
+
+    // Keywords
+    KeywordFn,
+    KeywordForm,
+    KeywordType,
+    KeywordSelf,
 
     Unknown(char),
     Eof,