diff options
| author | Mel <einebeere@gmail.com> | 2022-04-16 22:05:25 +0200 |
|---|---|---|
| committer | Mel <einebeere@gmail.com> | 2022-04-16 22:05:25 +0200 |
| commit | bbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2 (patch) | |
| tree | 01cc597df41e9ed9b246f9d537783f68e126caab /pkg/lang/scanner | |
| parent | 6163d259ed52991e2f95632b5a0516607aa56a5f (diff) | |
| download | jinx-bbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2.tar.zst jinx-bbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2.zip | |
Basic scanner for lang
Diffstat (limited to 'pkg/lang/scanner')
| -rw-r--r-- | pkg/lang/scanner/errors.go | 25 | ||||
| -rw-r--r-- | pkg/lang/scanner/scanner.go | 364 | ||||
| -rw-r--r-- | pkg/lang/scanner/scanner_test.go | 29 | ||||
| -rw-r--r-- | pkg/lang/scanner/token/kind.go | 71 | ||||
| -rw-r--r-- | pkg/lang/scanner/token/loc.go | 6 | ||||
| -rw-r--r-- | pkg/lang/scanner/token/token.go | 22 |
6 files changed, 517 insertions, 0 deletions
diff --git a/pkg/lang/scanner/errors.go b/pkg/lang/scanner/errors.go new file mode 100644 index 0000000..dd9aae3 --- /dev/null +++ b/pkg/lang/scanner/errors.go @@ -0,0 +1,25 @@ +package scanner + +import "errors" + +var ( + ErrScannerFinished = errors.New("scanner already finished") + ErrUnclosedString = errors.New("unclosed string") +) + +type ErrUnexpectedChar struct { + Expected rune + Actual rune +} + +func (e ErrUnexpectedChar) Error() string { + return "unexpected character: expected " + string(e.Expected) + ", actual " + string(e.Actual) +} + +type ErrUnknownChar struct { + Char rune +} + +func (e ErrUnknownChar) Error() string { + return "unknown character: " + string(e.Char) +} diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go new file mode 100644 index 0000000..fdb313b --- /dev/null +++ b/pkg/lang/scanner/scanner.go @@ -0,0 +1,364 @@ +package scanner + +import ( + "bufio" + "errors" + "io" + "jinx/pkg/lang/scanner/token" + "strconv" + "strings" + "unicode" +) + +type Scanner struct { + source *bufio.Reader + row int + col int + indent int + + finished bool +} + +func New(source io.Reader) *Scanner { + return &Scanner{ + source: bufio.NewReader(source), + row: 0, + col: 0, + indent: 0, + finished: false, + } +} + +func (s *Scanner) Scan() ([]token.Token, error) { + tokens := make([]token.Token, 0) + + for { + t, err := s.scanToken() + if err != nil { + return nil, err + } + + tokens = append(tokens, t) + + if t.Kind == token.EOF { + break + } + } + + return tokens, nil +} + +func (s *Scanner) scanToken() (token.Token, error) { + if s.finished { + return token.Token{}, ErrScannerFinished + } + + if err := s.skipWhitespace(); err != nil { + return token.Token{}, err + } + + c, eof, err := s.peek() + if err != nil { + return token.Token{}, err + } + + if eof { + s.finished = true + return token.Simple(token.EOF, s.loc()), nil + } + + if c == '"' { + return s.scanString() + } else if unicode.IsLetter(c) { + return s.scanIdentifierOrKeyword() + } else if unicode.IsDigit(c) { + return s.scanNumber() + } + + loc := s.loc() + c, _, err = s.next() + if err != nil { + return token.Token{}, err + } + + var kind token.TokenKind + switch c { + case '\n': + kind = token.EOL + case '=': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Eq + } else if cont && err != nil { + kind = token.Assign + } else { + return token.Token{}, err + } + case '+': + kind = token.Plus + case '-': + kind = token.Minus + case '*': + kind = token.Star + case '/': + kind = token.Slash + case '%': + kind = token.Percent + + case '<': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Lte + } else if cont && err != nil { + kind = token.Lt + } else { + return token.Token{}, err + } + case '>': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Gte + } else if cont && err != nil { + kind = token.Gt + } else { + return token.Token{}, err + } + case '!': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Neq + } else if cont && err != nil { + kind = token.Bang + } else { + return token.Token{}, err + } + + case '[': + kind = token.LBracket + case ']': + kind = token.RBracket + case '(': + kind = token.LParen + case ')': + kind = token.RParen + case '{': + kind = token.LBrace + case '}': + kind = token.RBrace + + case ',': + kind = token.Comma + case '.': + kind = token.Dot + case ';': + kind = token.SemiColon + + default: + return token.Token{}, ErrUnknownChar{Char: c} + } + + return token.Simple(kind, loc), nil +} + +func (s *Scanner) scanString() (token.Token, error) { + loc := s.loc() + if _, err := s.consume('"'); err != nil { + return token.Token{}, err + } + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof { + return token.Token{}, ErrUnclosedString + } + + if c == '"' { + break + } + + buf.WriteRune(c) + } + + return token.New(token.String, loc, buf.String()), nil +} + +func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { + loc := s.loc() + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof || (!unicode.IsLetter(c) && !unicode.IsDigit(c)) { + break + } + + buf.WriteRune(c) + } + + var kind token.TokenKind + switch buf.String() { + case "var": + kind = token.KwVar + case "fn": + kind = token.KwFn + case "object": + kind = token.KwObject + + case "if": + kind = token.KwIf + case "elif": + kind = token.KwElif + case "else": + kind = token.KwElse + case "for": + kind = token.KwFor + case "try": + kind = token.KwTry + case "catch": + kind = token.KwCatch + case "finally": + kind = token.KwFinally + + case "return": + kind = token.KwReturn + case "continue": + kind = token.KwContinue + case "break": + kind = token.KwBreak + case "throw": + kind = token.KwThrow + + case "in": + kind = token.KwIn + + case "null": + kind = token.KwNull + case "true": + kind = token.KwTrue + case "false": + kind = token.KwFalse + case "this": + kind = token.KwThis + + case "use": + kind = token.KwUse + case "from": + kind = token.KwFrom + case "by": + kind = token.KwBy + default: + return token.New(token.Ident, loc, buf.String()), nil + } + + return token.Simple(kind, loc), nil +} + +func (s *Scanner) scanNumber() (token.Token, error) { + loc := s.loc() + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof || !unicode.IsDigit(c) { + break + } + + buf.WriteRune(c) + } + + num, err := strconv.ParseUint(buf.String(), 10, 64) + if err != nil { + return token.Token{}, err + } + + return token.New(token.Int, loc, num), nil +} + +func (s *Scanner) skipWhitespace() error { + for { + c, eof, err := s.peek() + if err != nil { + return err + } + + if eof || !unicode.IsSpace(c) { + break + } + + if _, _, err = s.next(); err != nil { + return err + } + } + + return nil +} + +func (s *Scanner) loc() token.Loc { + return token.Loc{ + Row: s.row, + Col: s.col, + } +} + +func (s *Scanner) next() (rune, bool, error) { + r, _, err := s.source.ReadRune() + if err != nil { + if errors.Is(err, io.EOF) { + return 0, true, nil + } + + return 0, false, err + } + + if r == '\n' { + s.row++ + s.col = 0 + } else { + s.col++ + } + + return r, false, nil +} + +func (s *Scanner) consume(want rune) (bool, error) { + c, _, err := s.next() + if err != nil { + return false, err + } + + if c != want { + return true, ErrUnexpectedChar{ + Expected: want, + Actual: c, + } + } + + return true, nil +} + +func (s *Scanner) peek() (rune, bool, error) { + r, _, err := s.source.ReadRune() + defer s.source.UnreadRune() + + if err != nil { + if errors.Is(err, io.EOF) { + return 0, true, nil + } + + return 0, false, err + } + + return r, false, nil +} diff --git a/pkg/lang/scanner/scanner_test.go b/pkg/lang/scanner/scanner_test.go new file mode 100644 index 0000000..2948a58 --- /dev/null +++ b/pkg/lang/scanner/scanner_test.go @@ -0,0 +1,29 @@ +package scanner_test + +import ( + "jinx/pkg/lang/scanner" + "jinx/pkg/lang/scanner/token" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestBasic(t *testing.T) { + source := "var x = 1" + + s := scanner.New(strings.NewReader(source)) + + tokens, err := s.Scan() + require.NoError(t, err) + + expected := []token.Token{ + token.Simple(token.KwVar, token.Loc{Row: 0, Col: 0}), + token.New(token.Ident, token.Loc{Row: 0, Col: 4}, "x"), + token.Simple(token.Assign, token.Loc{Row: 0, Col: 6}), + token.New(token.Int, token.Loc{Row: 0, Col: 8}, uint64(1)), + token.Simple(token.EOF, token.Loc{Row: 0, Col: 9}), + } + + require.Equal(t, expected, tokens) +} diff --git a/pkg/lang/scanner/token/kind.go b/pkg/lang/scanner/token/kind.go new file mode 100644 index 0000000..e24ce2f --- /dev/null +++ b/pkg/lang/scanner/token/kind.go @@ -0,0 +1,71 @@ +package token + +type TokenKind int + +const ( + EOF TokenKind = iota + EOL + + // Keywords + KwVar + KwFn + KwObject + + KwIf + KwElif + KwElse + KwFor + KwTry + KwCatch + KwFinally + + KwReturn + KwContinue + KwBreak + KwThrow + + KwIn + + KwNull + KwTrue + KwFalse + + KwThis + + KwUse + KwFrom + KwBy + + // Data Tokens + Ident + Int + Float + String + + // Punctuation + Assign + Plus + Minus + Star + Slash + Percent + Bang + + Eq + Neq + Lt + Gt + Lte + Gte + + LParen + RParen + LBrace + RBrace + LBracket + RBracket + + Comma + Dot + SemiColon +) diff --git a/pkg/lang/scanner/token/loc.go b/pkg/lang/scanner/token/loc.go new file mode 100644 index 0000000..c4b073a --- /dev/null +++ b/pkg/lang/scanner/token/loc.go @@ -0,0 +1,6 @@ +package token + +type Loc struct { + Row int + Col int +} diff --git a/pkg/lang/scanner/token/token.go b/pkg/lang/scanner/token/token.go new file mode 100644 index 0000000..840a420 --- /dev/null +++ b/pkg/lang/scanner/token/token.go @@ -0,0 +1,22 @@ +package token + +type Token struct { + Kind TokenKind + At Loc + Data any +} + +func Simple(kind TokenKind, at Loc) Token { + return Token{ + Kind: kind, + At: at, + } +} + +func New(kind TokenKind, at Loc, data any) Token { + return Token{ + Kind: kind, + At: at, + Data: data, + } +} |
