about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml8
-rw-r--r--grammar.ebnf16
-rw-r--r--src/lex/lexer.rs125
-rw-r--r--src/lex/mod.rs2
-rw-r--r--src/lex/token.rs29
-rw-r--r--src/main.rs12
8 files changed, 200 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..41466aa
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "rabbithole"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..8679632
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "rabbithole"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/grammar.ebnf b/grammar.ebnf
new file mode 100644
index 0000000..1805abb
--- /dev/null
+++ b/grammar.ebnf
@@ -0,0 +1,16 @@
+(* Grammar definition in EBNF format. *)
+
+Expression = TermExpression;
+
+TermExpression = FactorExpression { ("+" | "-") FactorExpression };
+
+FactorExpression = UnaryExpression { ("*" | "/") UnaryExpression };
+
+UnaryExpression = ( "-" | "!" ) | GroupExpression;
+                
+UnitExpression = NaturalDigit {Digit} | "(" Expression ")";;
+
+(* Basics *)
+
+NaturalDigit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9";
+Digit = "0" | NaturalDigit ;
\ No newline at end of file
diff --git a/src/lex/lexer.rs b/src/lex/lexer.rs
new file mode 100644
index 0000000..e2ac0f4
--- /dev/null
+++ b/src/lex/lexer.rs
@@ -0,0 +1,125 @@
+use std::{iter::Peekable, str::Chars};
+
+use super::token::{Location, Token, TokenVariant};
+
+pub struct Lexer<'source> {
+    location: Location,
+    chars: Peekable<Chars<'source>>,
+    done: bool,
+}
+
+impl Iterator for Lexer<'_> {
+    type Item = Token;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.done {
+            return None;
+        }
+
+        if let None = self.chars.peek() {
+            self.done = true;
+            return Some(Token {
+                location: self.location,
+                variant: TokenVariant::Eof,
+            });
+        }
+
+        self.skip_whitespace();
+
+        let c = *self.chars.peek()?;
+
+        let token = if c.is_numeric() {
+            self.number()
+        } else if c == '+' {
+            self.char_token(TokenVariant::OpPlus)
+        } else if c == '-' {
+            self.char_token(TokenVariant::OpMinus)
+        } else if c == '*' {
+            self.char_token(TokenVariant::OpStar)
+        } else if c == '/' {
+            self.char_token(TokenVariant::OpSlash)
+        } else if c == '!' {
+            self.char_token(TokenVariant::OpNot)
+        } else if c == '(' {
+            self.char_token(TokenVariant::GroupOpen)
+        } else if c == ')' {
+            self.char_token(TokenVariant::GroupClose)
+        } else {
+            self.char_token(TokenVariant::Unknown(c))
+        };
+
+        Some(token)
+    }
+}
+
+impl<'s> Lexer<'s> {
+    pub fn new(source: &'s str) -> Self {
+        Lexer {
+            location: Location { col: 0, row: 0 },
+            chars: source.chars().peekable(),
+            done: false,
+        }
+    }
+
+    fn advance(&mut self) -> Option<char> {
+        let next = self.chars.next();
+        if let Some(c) = next {
+            if c == '\n' {
+                self.location.row += 1;
+                self.location.col = 0;
+            } else {
+                self.location.row += 1;
+            }
+        }
+
+        next
+    }
+
+    fn skip_whitespace(&mut self) {
+        while self
+            .chars
+            .peek()
+            .map_or(false, |x| x.is_whitespace() && *x != '\n')
+        {
+            self.advance();
+        }
+    }
+
+    fn char_token(&mut self, variant: TokenVariant) -> Token {
+        let token = Token {
+            location: self.location,
+            variant,
+        };
+        self.advance();
+        token
+    }
+
+    fn number(&mut self) -> Token {
+        let location = self.location;
+
+        let mut is_integer = true;
+        let mut buffer = String::new();
+
+        while self
+            .chars
+            .peek()
+            .map_or(false, |&c| c.is_numeric() || c == '.')
+        {
+            let c = self.advance().unwrap();
+            if c == '.' {
+                is_integer = false;
+            }
+            buffer.push(c);
+        }
+
+        let variant = if is_integer {
+            let int = buffer.parse().expect("Failed lexing integer token.");
+            TokenVariant::Int(int)
+        } else {
+            let float = buffer.parse().expect("Failed lexing float token.");
+            TokenVariant::Float(float)
+        };
+
+        Token { location, variant }
+    }
+}
diff --git a/src/lex/mod.rs b/src/lex/mod.rs
new file mode 100644
index 0000000..f785280
--- /dev/null
+++ b/src/lex/mod.rs
@@ -0,0 +1,2 @@
+pub mod lexer;
+pub mod token;
\ No newline at end of file
diff --git a/src/lex/token.rs b/src/lex/token.rs
new file mode 100644
index 0000000..a43cf0f
--- /dev/null
+++ b/src/lex/token.rs
@@ -0,0 +1,29 @@
+#[derive(Clone, Copy, Debug)]
+pub struct Location {
+    pub col: usize,
+    pub row: usize,
+}
+
+#[derive(Clone, Debug)]
+pub struct Token {
+    pub location: Location,
+    pub variant: TokenVariant,
+}
+
+#[derive(Clone, Debug)]
+pub enum TokenVariant {
+    OpPlus,
+    OpMinus,
+    OpStar,
+    OpSlash,
+    OpNot,
+
+    GroupOpen,
+    GroupClose,
+
+    Int(u32),
+    Float(f32),
+
+    Unknown(char),
+    Eof,
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..3b23d8e
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,12 @@
+mod lex;
+
+use lex::lexer::Lexer;
+
+fn main() {
+    let source = "1 + 2";
+    let lexer = Lexer::new(source);
+
+    for token in lexer {
+        println!("{:?}", token);
+    }
+}