diff options
Diffstat (limited to 'pkg/lang/scanner')
| -rw-r--r-- | pkg/lang/scanner/errors.go | 9 | ||||
| -rw-r--r-- | pkg/lang/scanner/scanner.go | 119 | ||||
| -rw-r--r-- | pkg/lang/scanner/scanner_test.go | 187 | ||||
| -rw-r--r-- | pkg/lang/scanner/token/loc.go | 10 | ||||
| -rw-r--r-- | pkg/lang/scanner/token/token.go | 8 |
5 files changed, 127 insertions, 206 deletions
diff --git a/pkg/lang/scanner/errors.go b/pkg/lang/scanner/errors.go index dd9aae3..6a6f9d9 100644 --- a/pkg/lang/scanner/errors.go +++ b/pkg/lang/scanner/errors.go @@ -7,15 +7,6 @@ var ( ErrUnclosedString = errors.New("unclosed string") ) -type ErrUnexpectedChar struct { - Expected rune - Actual rune -} - -func (e ErrUnexpectedChar) Error() string { - return "unexpected character: expected " + string(e.Expected) + ", actual " + string(e.Actual) -} - type ErrUnknownChar struct { Char rune } diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go index 19e3462..2991083 100644 --- a/pkg/lang/scanner/scanner.go +++ b/pkg/lang/scanner/scanner.go @@ -1,29 +1,24 @@ package scanner import ( - "bufio" - "errors" "io" "jinx/pkg/lang/scanner/token" + "jinx/pkg/libs/source" "strconv" "strings" "unicode" ) type Scanner struct { - source *bufio.Reader - row int - col int + src source.Walker indent int finished bool } -func New(source io.Reader) *Scanner { +func New(src io.Reader) *Scanner { return &Scanner{ - source: bufio.NewReader(source), - row: 0, - col: 0, + src: *source.NewWalker(src), indent: 0, finished: false, } @@ -62,14 +57,14 @@ func (s *Scanner) scanToken() (token.Token, error) { return token.New(token.EOL, firstNewline, nil), nil } - c, eof, err := s.peek() + c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } if eof { s.finished = true - return token.Simple(token.EOF, s.loc()), nil + return token.Simple(token.EOF, s.src.Loc()), nil } if c == '"' { @@ -80,8 +75,8 @@ func (s *Scanner) scanToken() (token.Token, error) { return s.scanNumber() } - loc := s.loc() - c, _, err = s.next() + loc := s.src.Loc() + c, _, err = s.src.Next() if err != nil { return token.Token{}, err } @@ -91,7 +86,7 @@ func (s *Scanner) scanToken() (token.Token, error) { case '\n': kind = token.EOL case '=': - if cont, err := s.consume('='); cont && err == nil { + if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Eq } else if cont && err != nil { kind = token.Assign @@ -110,7 +105,7 @@ func (s *Scanner) scanToken() (token.Token, error) { kind = token.Percent case '<': - if cont, err := s.consume('='); cont && err == nil { + if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Lte } else if cont && err != nil { kind = token.Lt @@ -118,7 +113,7 @@ func (s *Scanner) scanToken() (token.Token, error) { return token.Token{}, err } case '>': - if cont, err := s.consume('='); cont && err == nil { + if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Gte } else if cont && err != nil { kind = token.Gt @@ -126,7 +121,7 @@ func (s *Scanner) scanToken() (token.Token, error) { return token.Token{}, err } case '!': - if cont, err := s.consume('='); cont && err == nil { + if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Neq } else if cont && err != nil { kind = token.Bang @@ -162,15 +157,15 @@ func (s *Scanner) scanToken() (token.Token, error) { } func (s *Scanner) scanString() (token.Token, error) { - loc := s.loc() - if _, err := s.consume('"'); err != nil { + loc := s.src.Loc() + if _, err := s.src.Consume('"'); err != nil { return token.Token{}, err } var buf strings.Builder for { - c, eof, err := s.next() + c, eof, err := s.src.Next() if err != nil { return token.Token{}, err } @@ -190,12 +185,12 @@ func (s *Scanner) scanString() (token.Token, error) { } func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { - loc := s.loc() + loc := s.src.Loc() var buf strings.Builder for { - c, eof, err := s.peek() + c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } @@ -204,7 +199,7 @@ func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { break } - if _, _, err = s.next(); err != nil { + if _, _, err = s.src.Next(); err != nil { return token.Token{}, err } @@ -270,12 +265,12 @@ func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { } func (s *Scanner) scanNumber() (token.Token, error) { - loc := s.loc() + loc := s.src.Loc() var buf strings.Builder for { - c, eof, err := s.peek() + c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } @@ -284,7 +279,7 @@ func (s *Scanner) scanNumber() (token.Token, error) { break } - if _, _, err = s.next(); err != nil { + if _, _, err = s.src.Next(); err != nil { return token.Token{}, err } @@ -299,18 +294,18 @@ func (s *Scanner) scanNumber() (token.Token, error) { return token.New(token.Int, loc, num), nil } -func (s *Scanner) skipWhitespace() (bool, token.Loc, error) { +func (s *Scanner) skipWhitespace() (bool, source.Loc, error) { hadNewline := false - firstNewline := token.Loc{} + firstNewline := source.Loc{} for { - c, eof, err := s.peek() + c, eof, err := s.src.Peek() if err != nil { - return false, token.Loc{}, err + return false, source.Loc{}, err } if c == '\n' && !hadNewline { - firstNewline = s.loc() + firstNewline = s.src.Loc() hadNewline = true } @@ -318,68 +313,10 @@ func (s *Scanner) skipWhitespace() (bool, token.Loc, error) { break } - if _, _, err = s.next(); err != nil { - return false, token.Loc{}, err + if _, _, err = s.src.Next(); err != nil { + return false, source.Loc{}, err } } return hadNewline, firstNewline, nil } - -func (s *Scanner) loc() token.Loc { - return token.Loc{ - Row: s.row, - Col: s.col, - } -} - -func (s *Scanner) next() (rune, bool, error) { - r, _, err := s.source.ReadRune() - if err != nil { - if errors.Is(err, io.EOF) { - return 0, true, nil - } - - return 0, false, err - } - - if r == '\n' { - s.row++ - s.col = 0 - } else { - s.col++ - } - - return r, false, nil -} - -func (s *Scanner) consume(want rune) (bool, error) { - c, _, err := s.next() - if err != nil { - return false, err - } - - if c != want { - return true, ErrUnexpectedChar{ - Expected: want, - Actual: c, - } - } - - return true, nil -} - -func (s *Scanner) peek() (rune, bool, error) { - r, _, err := s.source.ReadRune() - defer s.source.UnreadRune() - - if err != nil { - if errors.Is(err, io.EOF) { - return 0, true, nil - } - - return 0, false, err - } - - return r, false, nil -} diff --git a/pkg/lang/scanner/scanner_test.go b/pkg/lang/scanner/scanner_test.go index 99e77fd..4df4b23 100644 --- a/pkg/lang/scanner/scanner_test.go +++ b/pkg/lang/scanner/scanner_test.go @@ -3,6 +3,7 @@ package scanner_test import ( "jinx/pkg/lang/scanner" "jinx/pkg/lang/scanner/token" + "jinx/pkg/libs/source" "strings" "testing" @@ -10,7 +11,7 @@ import ( ) func TestBasic(t *testing.T) { - source := ` + src := ` fn basic() { var x = 1 var y = x + 1 @@ -22,158 +23,158 @@ func TestBasic(t *testing.T) { return true }` - s := scanner.New(strings.NewReader(source)) + s := scanner.New(strings.NewReader(src)) tokens, err := s.Scan() require.NoError(t, err) expected := []token.Token{ - token.Simple(token.EOL, token.NewLoc(0, 0)), - - token.Simple(token.KwFn, token.NewLoc(1, 1)), - token.New(token.Ident, token.NewLoc(1, 4), "basic"), - token.Simple(token.LParen, token.NewLoc(1, 9)), - token.Simple(token.RParen, token.NewLoc(1, 10)), - token.Simple(token.LBrace, token.NewLoc(1, 12)), - token.Simple(token.EOL, token.NewLoc(1, 13)), - - token.Simple(token.KwVar, token.NewLoc(2, 2)), - token.New(token.Ident, token.NewLoc(2, 6), "x"), - token.Simple(token.Assign, token.NewLoc(2, 8)), - token.New(token.Int, token.NewLoc(2, 10), uint64(1)), - token.Simple(token.EOL, token.NewLoc(2, 11)), - - token.Simple(token.KwVar, token.NewLoc(3, 2)), - token.New(token.Ident, token.NewLoc(3, 6), "y"), - token.Simple(token.Assign, token.NewLoc(3, 8)), - token.New(token.Ident, token.NewLoc(3, 10), "x"), - token.Simple(token.Plus, token.NewLoc(3, 12)), - token.New(token.Int, token.NewLoc(3, 14), uint64(1)), - token.Simple(token.EOL, token.NewLoc(3, 15)), - - token.Simple(token.KwIf, token.NewLoc(4, 2)), - token.New(token.Ident, token.NewLoc(4, 5), "x"), - token.Simple(token.Lt, token.NewLoc(4, 7)), - token.New(token.Ident, token.NewLoc(4, 9), "y"), - token.Simple(token.LBrace, token.NewLoc(4, 11)), - token.Simple(token.EOL, token.NewLoc(4, 12)), - - token.New(token.Ident, token.NewLoc(5, 3), "say"), - token.Simple(token.LParen, token.NewLoc(5, 6)), - token.New(token.String, token.NewLoc(5, 7), "x is less than y"), - token.Simple(token.RParen, token.NewLoc(5, 25)), - token.Simple(token.EOL, token.NewLoc(5, 26)), - - token.Simple(token.RBrace, token.NewLoc(6, 2)), - token.Simple(token.KwElse, token.NewLoc(6, 4)), - token.Simple(token.LBrace, token.NewLoc(6, 9)), - token.Simple(token.EOL, token.NewLoc(6, 10)), - - token.New(token.Ident, token.NewLoc(7, 3), "say"), - token.Simple(token.LParen, token.NewLoc(7, 6)), - token.New(token.String, token.NewLoc(7, 7), "x is greater than or equal to y"), - token.Simple(token.RParen, token.NewLoc(7, 40)), - token.Simple(token.EOL, token.NewLoc(7, 41)), - - token.Simple(token.RBrace, token.NewLoc(8, 2)), - token.Simple(token.EOL, token.NewLoc(8, 3)), - - token.Simple(token.KwReturn, token.NewLoc(9, 2)), - token.Simple(token.KwTrue, token.NewLoc(9, 9)), - token.Simple(token.EOL, token.NewLoc(9, 13)), - - token.Simple(token.RBrace, token.NewLoc(10, 1)), - - token.Simple(token.EOF, token.NewLoc(10, 2)), + token.Simple(token.EOL, source.NewLoc(0, 0)), + + token.Simple(token.KwFn, source.NewLoc(1, 1)), + token.New(token.Ident, source.NewLoc(1, 4), "basic"), + token.Simple(token.LParen, source.NewLoc(1, 9)), + token.Simple(token.RParen, source.NewLoc(1, 10)), + token.Simple(token.LBrace, source.NewLoc(1, 12)), + token.Simple(token.EOL, source.NewLoc(1, 13)), + + token.Simple(token.KwVar, source.NewLoc(2, 2)), + token.New(token.Ident, source.NewLoc(2, 6), "x"), + token.Simple(token.Assign, source.NewLoc(2, 8)), + token.New(token.Int, source.NewLoc(2, 10), uint64(1)), + token.Simple(token.EOL, source.NewLoc(2, 11)), + + token.Simple(token.KwVar, source.NewLoc(3, 2)), + token.New(token.Ident, source.NewLoc(3, 6), "y"), + token.Simple(token.Assign, source.NewLoc(3, 8)), + token.New(token.Ident, source.NewLoc(3, 10), "x"), + token.Simple(token.Plus, source.NewLoc(3, 12)), + token.New(token.Int, source.NewLoc(3, 14), uint64(1)), + token.Simple(token.EOL, source.NewLoc(3, 15)), + + token.Simple(token.KwIf, source.NewLoc(4, 2)), + token.New(token.Ident, source.NewLoc(4, 5), "x"), + token.Simple(token.Lt, source.NewLoc(4, 7)), + token.New(token.Ident, source.NewLoc(4, 9), "y"), + token.Simple(token.LBrace, source.NewLoc(4, 11)), + token.Simple(token.EOL, source.NewLoc(4, 12)), + + token.New(token.Ident, source.NewLoc(5, 3), "say"), + token.Simple(token.LParen, source.NewLoc(5, 6)), + token.New(token.String, source.NewLoc(5, 7), "x is less than y"), + token.Simple(token.RParen, source.NewLoc(5, 25)), + token.Simple(token.EOL, source.NewLoc(5, 26)), + + token.Simple(token.RBrace, source.NewLoc(6, 2)), + token.Simple(token.KwElse, source.NewLoc(6, 4)), + token.Simple(token.LBrace, source.NewLoc(6, 9)), + token.Simple(token.EOL, source.NewLoc(6, 10)), + + token.New(token.Ident, source.NewLoc(7, 3), "say"), + token.Simple(token.LParen, source.NewLoc(7, 6)), + token.New(token.String, source.NewLoc(7, 7), "x is greater than or equal to y"), + token.Simple(token.RParen, source.NewLoc(7, 40)), + token.Simple(token.EOL, source.NewLoc(7, 41)), + + token.Simple(token.RBrace, source.NewLoc(8, 2)), + token.Simple(token.EOL, source.NewLoc(8, 3)), + + token.Simple(token.KwReturn, source.NewLoc(9, 2)), + token.Simple(token.KwTrue, source.NewLoc(9, 9)), + token.Simple(token.EOL, source.NewLoc(9, 13)), + + token.Simple(token.RBrace, source.NewLoc(10, 1)), + + token.Simple(token.EOF, source.NewLoc(10, 2)), } require.Equal(t, expected, tokens) } func TestTightIdent(t *testing.T) { - source := `say(message)` + src := `say(message)` - s := scanner.New(strings.NewReader(source)) + s := scanner.New(strings.NewReader(src)) tokens, err := s.Scan() require.NoError(t, err) expected := []token.Token{ - token.New(token.Ident, token.NewLoc(0, 0), "say"), - token.Simple(token.LParen, token.NewLoc(0, 3)), - token.New(token.Ident, token.NewLoc(0, 4), "message"), - token.Simple(token.RParen, token.NewLoc(0, 11)), - token.Simple(token.EOF, token.NewLoc(0, 12)), + token.New(token.Ident, source.NewLoc(0, 0), "say"), + token.Simple(token.LParen, source.NewLoc(0, 3)), + token.New(token.Ident, source.NewLoc(0, 4), "message"), + token.Simple(token.RParen, source.NewLoc(0, 11)), + token.Simple(token.EOF, source.NewLoc(0, 12)), } require.Equal(t, expected, tokens) } func TestTightNumber(t *testing.T) { - source := `1+2+3` + src := `1+2+3` - s := scanner.New(strings.NewReader(source)) + s := scanner.New(strings.NewReader(src)) tokens, err := s.Scan() require.NoError(t, err) expected := []token.Token{ - token.New(token.Int, token.NewLoc(0, 0), uint64(1)), - token.Simple(token.Plus, token.NewLoc(0, 1)), - token.New(token.Int, token.NewLoc(0, 2), uint64(2)), - token.Simple(token.Plus, token.NewLoc(0, 3)), - token.New(token.Int, token.NewLoc(0, 4), uint64(3)), - token.Simple(token.EOF, token.NewLoc(0, 5)), + token.New(token.Int, source.NewLoc(0, 0), uint64(1)), + token.Simple(token.Plus, source.NewLoc(0, 1)), + token.New(token.Int, source.NewLoc(0, 2), uint64(2)), + token.Simple(token.Plus, source.NewLoc(0, 3)), + token.New(token.Int, source.NewLoc(0, 4), uint64(3)), + token.Simple(token.EOF, source.NewLoc(0, 5)), } require.Equal(t, expected, tokens) } func TestNewlineStacking(t *testing.T) { - source := ` + src := ` x y ` - s := scanner.New(strings.NewReader(source)) + s := scanner.New(strings.NewReader(src)) tokens, err := s.Scan() require.NoError(t, err) expected := []token.Token{ - token.Simple(token.EOL, token.NewLoc(0, 0)), - token.New(token.Ident, token.NewLoc(1, 1), "x"), - token.Simple(token.EOL, token.NewLoc(1, 2)), - token.New(token.Ident, token.NewLoc(4, 1), "y"), - token.Simple(token.EOL, token.NewLoc(4, 2)), - token.Simple(token.EOF, token.NewLoc(5, 1)), + token.Simple(token.EOL, source.NewLoc(0, 0)), + token.New(token.Ident, source.NewLoc(1, 1), "x"), + token.Simple(token.EOL, source.NewLoc(1, 2)), + token.New(token.Ident, source.NewLoc(4, 1), "y"), + token.Simple(token.EOL, source.NewLoc(4, 2)), + token.Simple(token.EOF, source.NewLoc(5, 1)), } require.Equal(t, expected, tokens) } func TestEmojiInStrings(t *testing.T) { - source := ` + src := ` say("🇺🇦" + "❤️!") ` - s := scanner.New(strings.NewReader(source)) + s := scanner.New(strings.NewReader(src)) tokens, err := s.Scan() require.NoError(t, err) expected := []token.Token{ - token.Simple(token.EOL, token.NewLoc(0, 0)), - token.New(token.Ident, token.NewLoc(1, 1), "say"), - token.Simple(token.LParen, token.NewLoc(1, 4)), - token.New(token.String, token.NewLoc(1, 5), "🇺🇦"), - token.Simple(token.Plus, token.NewLoc(1, 10)), - token.New(token.String, token.NewLoc(1, 12), "❤️!"), - token.Simple(token.RParen, token.NewLoc(1, 17)), - token.Simple(token.EOL, token.NewLoc(1, 18)), - token.Simple(token.EOF, token.NewLoc(2, 1)), + token.Simple(token.EOL, source.NewLoc(0, 0)), + token.New(token.Ident, source.NewLoc(1, 1), "say"), + token.Simple(token.LParen, source.NewLoc(1, 4)), + token.New(token.String, source.NewLoc(1, 5), "🇺🇦"), + token.Simple(token.Plus, source.NewLoc(1, 10)), + token.New(token.String, source.NewLoc(1, 12), "❤️!"), + token.Simple(token.RParen, source.NewLoc(1, 17)), + token.Simple(token.EOL, source.NewLoc(1, 18)), + token.Simple(token.EOF, source.NewLoc(2, 1)), } require.Equal(t, expected, tokens) diff --git a/pkg/lang/scanner/token/loc.go b/pkg/lang/scanner/token/loc.go deleted file mode 100644 index 7936cba..0000000 --- a/pkg/lang/scanner/token/loc.go +++ /dev/null @@ -1,10 +0,0 @@ -package token - -type Loc struct { - Row int - Col int -} - -func NewLoc(row, col int) Loc { - return Loc{row, col} -} diff --git a/pkg/lang/scanner/token/token.go b/pkg/lang/scanner/token/token.go index 1ccd864..a39df8a 100644 --- a/pkg/lang/scanner/token/token.go +++ b/pkg/lang/scanner/token/token.go @@ -1,19 +1,21 @@ package token +import "jinx/pkg/libs/source" + type Token struct { Kind TokenKind - At Loc + At source.Loc Data any } -func Simple(kind TokenKind, at Loc) Token { +func Simple(kind TokenKind, at source.Loc) Token { return Token{ Kind: kind, At: at, } } -func New(kind TokenKind, at Loc, data any) Token { +func New(kind TokenKind, at source.Loc, data any) Token { return Token{ Kind: kind, At: at, |
