about summary refs log tree commit diff
path: root/pkg/lang/scanner
diff options
context:
space:
mode:
authorMel <einebeere@gmail.com>2022-05-09 00:01:02 +0200
committerMel <einebeere@gmail.com>2022-05-09 00:01:02 +0200
commitb09a14147d397904722ee7c25e4defc56135b96f (patch)
tree694dc725528310f3a65d04785b8eea33908ce69f /pkg/lang/scanner
parentb5a9660b6ac42bce27c746e76013c3ce5992743a (diff)
downloadjinx-b09a14147d397904722ee7c25e4defc56135b96f.tar.zst
jinx-b09a14147d397904722ee7c25e4defc56135b96f.zip
Extract source walk part of scanner
Diffstat (limited to 'pkg/lang/scanner')
-rw-r--r--pkg/lang/scanner/errors.go9
-rw-r--r--pkg/lang/scanner/scanner.go119
-rw-r--r--pkg/lang/scanner/scanner_test.go187
-rw-r--r--pkg/lang/scanner/token/loc.go10
-rw-r--r--pkg/lang/scanner/token/token.go8
5 files changed, 127 insertions, 206 deletions
diff --git a/pkg/lang/scanner/errors.go b/pkg/lang/scanner/errors.go
index dd9aae3..6a6f9d9 100644
--- a/pkg/lang/scanner/errors.go
+++ b/pkg/lang/scanner/errors.go
@@ -7,15 +7,6 @@ var (
 	ErrUnclosedString  = errors.New("unclosed string")
 )
 
-type ErrUnexpectedChar struct {
-	Expected rune
-	Actual   rune
-}
-
-func (e ErrUnexpectedChar) Error() string {
-	return "unexpected character: expected " + string(e.Expected) + ", actual " + string(e.Actual)
-}
-
 type ErrUnknownChar struct {
 	Char rune
 }
diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go
index 19e3462..2991083 100644
--- a/pkg/lang/scanner/scanner.go
+++ b/pkg/lang/scanner/scanner.go
@@ -1,29 +1,24 @@
 package scanner
 
 import (
-	"bufio"
-	"errors"
 	"io"
 	"jinx/pkg/lang/scanner/token"
+	"jinx/pkg/libs/source"
 	"strconv"
 	"strings"
 	"unicode"
 )
 
 type Scanner struct {
-	source *bufio.Reader
-	row    int
-	col    int
+	src    source.Walker
 	indent int
 
 	finished bool
 }
 
-func New(source io.Reader) *Scanner {
+func New(src io.Reader) *Scanner {
 	return &Scanner{
-		source:   bufio.NewReader(source),
-		row:      0,
-		col:      0,
+		src:      *source.NewWalker(src),
 		indent:   0,
 		finished: false,
 	}
@@ -62,14 +57,14 @@ func (s *Scanner) scanToken() (token.Token, error) {
 		return token.New(token.EOL, firstNewline, nil), nil
 	}
 
-	c, eof, err := s.peek()
+	c, eof, err := s.src.Peek()
 	if err != nil {
 		return token.Token{}, err
 	}
 
 	if eof {
 		s.finished = true
-		return token.Simple(token.EOF, s.loc()), nil
+		return token.Simple(token.EOF, s.src.Loc()), nil
 	}
 
 	if c == '"' {
@@ -80,8 +75,8 @@ func (s *Scanner) scanToken() (token.Token, error) {
 		return s.scanNumber()
 	}
 
-	loc := s.loc()
-	c, _, err = s.next()
+	loc := s.src.Loc()
+	c, _, err = s.src.Next()
 	if err != nil {
 		return token.Token{}, err
 	}
@@ -91,7 +86,7 @@ func (s *Scanner) scanToken() (token.Token, error) {
 	case '\n':
 		kind = token.EOL
 	case '=':
-		if cont, err := s.consume('='); cont && err == nil {
+		if cont, err := s.src.Consume('='); cont && err == nil {
 			kind = token.Eq
 		} else if cont && err != nil {
 			kind = token.Assign
@@ -110,7 +105,7 @@ func (s *Scanner) scanToken() (token.Token, error) {
 		kind = token.Percent
 
 	case '<':
-		if cont, err := s.consume('='); cont && err == nil {
+		if cont, err := s.src.Consume('='); cont && err == nil {
 			kind = token.Lte
 		} else if cont && err != nil {
 			kind = token.Lt
@@ -118,7 +113,7 @@ func (s *Scanner) scanToken() (token.Token, error) {
 			return token.Token{}, err
 		}
 	case '>':
-		if cont, err := s.consume('='); cont && err == nil {
+		if cont, err := s.src.Consume('='); cont && err == nil {
 			kind = token.Gte
 		} else if cont && err != nil {
 			kind = token.Gt
@@ -126,7 +121,7 @@ func (s *Scanner) scanToken() (token.Token, error) {
 			return token.Token{}, err
 		}
 	case '!':
-		if cont, err := s.consume('='); cont && err == nil {
+		if cont, err := s.src.Consume('='); cont && err == nil {
 			kind = token.Neq
 		} else if cont && err != nil {
 			kind = token.Bang
@@ -162,15 +157,15 @@ func (s *Scanner) scanToken() (token.Token, error) {
 }
 
 func (s *Scanner) scanString() (token.Token, error) {
-	loc := s.loc()
-	if _, err := s.consume('"'); err != nil {
+	loc := s.src.Loc()
+	if _, err := s.src.Consume('"'); err != nil {
 		return token.Token{}, err
 	}
 
 	var buf strings.Builder
 
 	for {
-		c, eof, err := s.next()
+		c, eof, err := s.src.Next()
 		if err != nil {
 			return token.Token{}, err
 		}
@@ -190,12 +185,12 @@ func (s *Scanner) scanString() (token.Token, error) {
 }
 
 func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) {
-	loc := s.loc()
+	loc := s.src.Loc()
 
 	var buf strings.Builder
 
 	for {
-		c, eof, err := s.peek()
+		c, eof, err := s.src.Peek()
 		if err != nil {
 			return token.Token{}, err
 		}
@@ -204,7 +199,7 @@ func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) {
 			break
 		}
 
-		if _, _, err = s.next(); err != nil {
+		if _, _, err = s.src.Next(); err != nil {
 			return token.Token{}, err
 		}
 
@@ -270,12 +265,12 @@ func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) {
 }
 
 func (s *Scanner) scanNumber() (token.Token, error) {
-	loc := s.loc()
+	loc := s.src.Loc()
 
 	var buf strings.Builder
 
 	for {
-		c, eof, err := s.peek()
+		c, eof, err := s.src.Peek()
 		if err != nil {
 			return token.Token{}, err
 		}
@@ -284,7 +279,7 @@ func (s *Scanner) scanNumber() (token.Token, error) {
 			break
 		}
 
-		if _, _, err = s.next(); err != nil {
+		if _, _, err = s.src.Next(); err != nil {
 			return token.Token{}, err
 		}
 
@@ -299,18 +294,18 @@ func (s *Scanner) scanNumber() (token.Token, error) {
 	return token.New(token.Int, loc, num), nil
 }
 
-func (s *Scanner) skipWhitespace() (bool, token.Loc, error) {
+func (s *Scanner) skipWhitespace() (bool, source.Loc, error) {
 	hadNewline := false
-	firstNewline := token.Loc{}
+	firstNewline := source.Loc{}
 
 	for {
-		c, eof, err := s.peek()
+		c, eof, err := s.src.Peek()
 		if err != nil {
-			return false, token.Loc{}, err
+			return false, source.Loc{}, err
 		}
 
 		if c == '\n' && !hadNewline {
-			firstNewline = s.loc()
+			firstNewline = s.src.Loc()
 			hadNewline = true
 		}
 
@@ -318,68 +313,10 @@ func (s *Scanner) skipWhitespace() (bool, token.Loc, error) {
 			break
 		}
 
-		if _, _, err = s.next(); err != nil {
-			return false, token.Loc{}, err
+		if _, _, err = s.src.Next(); err != nil {
+			return false, source.Loc{}, err
 		}
 	}
 
 	return hadNewline, firstNewline, nil
 }
-
-func (s *Scanner) loc() token.Loc {
-	return token.Loc{
-		Row: s.row,
-		Col: s.col,
-	}
-}
-
-func (s *Scanner) next() (rune, bool, error) {
-	r, _, err := s.source.ReadRune()
-	if err != nil {
-		if errors.Is(err, io.EOF) {
-			return 0, true, nil
-		}
-
-		return 0, false, err
-	}
-
-	if r == '\n' {
-		s.row++
-		s.col = 0
-	} else {
-		s.col++
-	}
-
-	return r, false, nil
-}
-
-func (s *Scanner) consume(want rune) (bool, error) {
-	c, _, err := s.next()
-	if err != nil {
-		return false, err
-	}
-
-	if c != want {
-		return true, ErrUnexpectedChar{
-			Expected: want,
-			Actual:   c,
-		}
-	}
-
-	return true, nil
-}
-
-func (s *Scanner) peek() (rune, bool, error) {
-	r, _, err := s.source.ReadRune()
-	defer s.source.UnreadRune()
-
-	if err != nil {
-		if errors.Is(err, io.EOF) {
-			return 0, true, nil
-		}
-
-		return 0, false, err
-	}
-
-	return r, false, nil
-}
diff --git a/pkg/lang/scanner/scanner_test.go b/pkg/lang/scanner/scanner_test.go
index 99e77fd..4df4b23 100644
--- a/pkg/lang/scanner/scanner_test.go
+++ b/pkg/lang/scanner/scanner_test.go
@@ -3,6 +3,7 @@ package scanner_test
 import (
 	"jinx/pkg/lang/scanner"
 	"jinx/pkg/lang/scanner/token"
+	"jinx/pkg/libs/source"
 	"strings"
 	"testing"
 
@@ -10,7 +11,7 @@ import (
 )
 
 func TestBasic(t *testing.T) {
-	source := `
+	src := `
 	fn basic() {
 		var x = 1
 		var y = x + 1
@@ -22,158 +23,158 @@ func TestBasic(t *testing.T) {
 		return true
 	}`
 
-	s := scanner.New(strings.NewReader(source))
+	s := scanner.New(strings.NewReader(src))
 
 	tokens, err := s.Scan()
 	require.NoError(t, err)
 
 	expected := []token.Token{
-		token.Simple(token.EOL, token.NewLoc(0, 0)),
-
-		token.Simple(token.KwFn, token.NewLoc(1, 1)),
-		token.New(token.Ident, token.NewLoc(1, 4), "basic"),
-		token.Simple(token.LParen, token.NewLoc(1, 9)),
-		token.Simple(token.RParen, token.NewLoc(1, 10)),
-		token.Simple(token.LBrace, token.NewLoc(1, 12)),
-		token.Simple(token.EOL, token.NewLoc(1, 13)),
-
-		token.Simple(token.KwVar, token.NewLoc(2, 2)),
-		token.New(token.Ident, token.NewLoc(2, 6), "x"),
-		token.Simple(token.Assign, token.NewLoc(2, 8)),
-		token.New(token.Int, token.NewLoc(2, 10), uint64(1)),
-		token.Simple(token.EOL, token.NewLoc(2, 11)),
-
-		token.Simple(token.KwVar, token.NewLoc(3, 2)),
-		token.New(token.Ident, token.NewLoc(3, 6), "y"),
-		token.Simple(token.Assign, token.NewLoc(3, 8)),
-		token.New(token.Ident, token.NewLoc(3, 10), "x"),
-		token.Simple(token.Plus, token.NewLoc(3, 12)),
-		token.New(token.Int, token.NewLoc(3, 14), uint64(1)),
-		token.Simple(token.EOL, token.NewLoc(3, 15)),
-
-		token.Simple(token.KwIf, token.NewLoc(4, 2)),
-		token.New(token.Ident, token.NewLoc(4, 5), "x"),
-		token.Simple(token.Lt, token.NewLoc(4, 7)),
-		token.New(token.Ident, token.NewLoc(4, 9), "y"),
-		token.Simple(token.LBrace, token.NewLoc(4, 11)),
-		token.Simple(token.EOL, token.NewLoc(4, 12)),
-
-		token.New(token.Ident, token.NewLoc(5, 3), "say"),
-		token.Simple(token.LParen, token.NewLoc(5, 6)),
-		token.New(token.String, token.NewLoc(5, 7), "x is less than y"),
-		token.Simple(token.RParen, token.NewLoc(5, 25)),
-		token.Simple(token.EOL, token.NewLoc(5, 26)),
-
-		token.Simple(token.RBrace, token.NewLoc(6, 2)),
-		token.Simple(token.KwElse, token.NewLoc(6, 4)),
-		token.Simple(token.LBrace, token.NewLoc(6, 9)),
-		token.Simple(token.EOL, token.NewLoc(6, 10)),
-
-		token.New(token.Ident, token.NewLoc(7, 3), "say"),
-		token.Simple(token.LParen, token.NewLoc(7, 6)),
-		token.New(token.String, token.NewLoc(7, 7), "x is greater than or equal to y"),
-		token.Simple(token.RParen, token.NewLoc(7, 40)),
-		token.Simple(token.EOL, token.NewLoc(7, 41)),
-
-		token.Simple(token.RBrace, token.NewLoc(8, 2)),
-		token.Simple(token.EOL, token.NewLoc(8, 3)),
-
-		token.Simple(token.KwReturn, token.NewLoc(9, 2)),
-		token.Simple(token.KwTrue, token.NewLoc(9, 9)),
-		token.Simple(token.EOL, token.NewLoc(9, 13)),
-
-		token.Simple(token.RBrace, token.NewLoc(10, 1)),
-
-		token.Simple(token.EOF, token.NewLoc(10, 2)),
+		token.Simple(token.EOL, source.NewLoc(0, 0)),
+
+		token.Simple(token.KwFn, source.NewLoc(1, 1)),
+		token.New(token.Ident, source.NewLoc(1, 4), "basic"),
+		token.Simple(token.LParen, source.NewLoc(1, 9)),
+		token.Simple(token.RParen, source.NewLoc(1, 10)),
+		token.Simple(token.LBrace, source.NewLoc(1, 12)),
+		token.Simple(token.EOL, source.NewLoc(1, 13)),
+
+		token.Simple(token.KwVar, source.NewLoc(2, 2)),
+		token.New(token.Ident, source.NewLoc(2, 6), "x"),
+		token.Simple(token.Assign, source.NewLoc(2, 8)),
+		token.New(token.Int, source.NewLoc(2, 10), uint64(1)),
+		token.Simple(token.EOL, source.NewLoc(2, 11)),
+
+		token.Simple(token.KwVar, source.NewLoc(3, 2)),
+		token.New(token.Ident, source.NewLoc(3, 6), "y"),
+		token.Simple(token.Assign, source.NewLoc(3, 8)),
+		token.New(token.Ident, source.NewLoc(3, 10), "x"),
+		token.Simple(token.Plus, source.NewLoc(3, 12)),
+		token.New(token.Int, source.NewLoc(3, 14), uint64(1)),
+		token.Simple(token.EOL, source.NewLoc(3, 15)),
+
+		token.Simple(token.KwIf, source.NewLoc(4, 2)),
+		token.New(token.Ident, source.NewLoc(4, 5), "x"),
+		token.Simple(token.Lt, source.NewLoc(4, 7)),
+		token.New(token.Ident, source.NewLoc(4, 9), "y"),
+		token.Simple(token.LBrace, source.NewLoc(4, 11)),
+		token.Simple(token.EOL, source.NewLoc(4, 12)),
+
+		token.New(token.Ident, source.NewLoc(5, 3), "say"),
+		token.Simple(token.LParen, source.NewLoc(5, 6)),
+		token.New(token.String, source.NewLoc(5, 7), "x is less than y"),
+		token.Simple(token.RParen, source.NewLoc(5, 25)),
+		token.Simple(token.EOL, source.NewLoc(5, 26)),
+
+		token.Simple(token.RBrace, source.NewLoc(6, 2)),
+		token.Simple(token.KwElse, source.NewLoc(6, 4)),
+		token.Simple(token.LBrace, source.NewLoc(6, 9)),
+		token.Simple(token.EOL, source.NewLoc(6, 10)),
+
+		token.New(token.Ident, source.NewLoc(7, 3), "say"),
+		token.Simple(token.LParen, source.NewLoc(7, 6)),
+		token.New(token.String, source.NewLoc(7, 7), "x is greater than or equal to y"),
+		token.Simple(token.RParen, source.NewLoc(7, 40)),
+		token.Simple(token.EOL, source.NewLoc(7, 41)),
+
+		token.Simple(token.RBrace, source.NewLoc(8, 2)),
+		token.Simple(token.EOL, source.NewLoc(8, 3)),
+
+		token.Simple(token.KwReturn, source.NewLoc(9, 2)),
+		token.Simple(token.KwTrue, source.NewLoc(9, 9)),
+		token.Simple(token.EOL, source.NewLoc(9, 13)),
+
+		token.Simple(token.RBrace, source.NewLoc(10, 1)),
+
+		token.Simple(token.EOF, source.NewLoc(10, 2)),
 	}
 
 	require.Equal(t, expected, tokens)
 }
 
 func TestTightIdent(t *testing.T) {
-	source := `say(message)`
+	src := `say(message)`
 
-	s := scanner.New(strings.NewReader(source))
+	s := scanner.New(strings.NewReader(src))
 
 	tokens, err := s.Scan()
 	require.NoError(t, err)
 
 	expected := []token.Token{
-		token.New(token.Ident, token.NewLoc(0, 0), "say"),
-		token.Simple(token.LParen, token.NewLoc(0, 3)),
-		token.New(token.Ident, token.NewLoc(0, 4), "message"),
-		token.Simple(token.RParen, token.NewLoc(0, 11)),
-		token.Simple(token.EOF, token.NewLoc(0, 12)),
+		token.New(token.Ident, source.NewLoc(0, 0), "say"),
+		token.Simple(token.LParen, source.NewLoc(0, 3)),
+		token.New(token.Ident, source.NewLoc(0, 4), "message"),
+		token.Simple(token.RParen, source.NewLoc(0, 11)),
+		token.Simple(token.EOF, source.NewLoc(0, 12)),
 	}
 
 	require.Equal(t, expected, tokens)
 }
 
 func TestTightNumber(t *testing.T) {
-	source := `1+2+3`
+	src := `1+2+3`
 
-	s := scanner.New(strings.NewReader(source))
+	s := scanner.New(strings.NewReader(src))
 
 	tokens, err := s.Scan()
 	require.NoError(t, err)
 
 	expected := []token.Token{
-		token.New(token.Int, token.NewLoc(0, 0), uint64(1)),
-		token.Simple(token.Plus, token.NewLoc(0, 1)),
-		token.New(token.Int, token.NewLoc(0, 2), uint64(2)),
-		token.Simple(token.Plus, token.NewLoc(0, 3)),
-		token.New(token.Int, token.NewLoc(0, 4), uint64(3)),
-		token.Simple(token.EOF, token.NewLoc(0, 5)),
+		token.New(token.Int, source.NewLoc(0, 0), uint64(1)),
+		token.Simple(token.Plus, source.NewLoc(0, 1)),
+		token.New(token.Int, source.NewLoc(0, 2), uint64(2)),
+		token.Simple(token.Plus, source.NewLoc(0, 3)),
+		token.New(token.Int, source.NewLoc(0, 4), uint64(3)),
+		token.Simple(token.EOF, source.NewLoc(0, 5)),
 	}
 
 	require.Equal(t, expected, tokens)
 }
 
 func TestNewlineStacking(t *testing.T) {
-	source := `
+	src := `
 	x
 
 
 	y
 	`
 
-	s := scanner.New(strings.NewReader(source))
+	s := scanner.New(strings.NewReader(src))
 
 	tokens, err := s.Scan()
 	require.NoError(t, err)
 
 	expected := []token.Token{
-		token.Simple(token.EOL, token.NewLoc(0, 0)),
-		token.New(token.Ident, token.NewLoc(1, 1), "x"),
-		token.Simple(token.EOL, token.NewLoc(1, 2)),
-		token.New(token.Ident, token.NewLoc(4, 1), "y"),
-		token.Simple(token.EOL, token.NewLoc(4, 2)),
-		token.Simple(token.EOF, token.NewLoc(5, 1)),
+		token.Simple(token.EOL, source.NewLoc(0, 0)),
+		token.New(token.Ident, source.NewLoc(1, 1), "x"),
+		token.Simple(token.EOL, source.NewLoc(1, 2)),
+		token.New(token.Ident, source.NewLoc(4, 1), "y"),
+		token.Simple(token.EOL, source.NewLoc(4, 2)),
+		token.Simple(token.EOF, source.NewLoc(5, 1)),
 	}
 
 	require.Equal(t, expected, tokens)
 }
 
 func TestEmojiInStrings(t *testing.T) {
-	source := `
+	src := `
 	say("🇺🇦" + "❤️!")
 	`
 
-	s := scanner.New(strings.NewReader(source))
+	s := scanner.New(strings.NewReader(src))
 
 	tokens, err := s.Scan()
 	require.NoError(t, err)
 
 	expected := []token.Token{
-		token.Simple(token.EOL, token.NewLoc(0, 0)),
-		token.New(token.Ident, token.NewLoc(1, 1), "say"),
-		token.Simple(token.LParen, token.NewLoc(1, 4)),
-		token.New(token.String, token.NewLoc(1, 5), "🇺🇦"),
-		token.Simple(token.Plus, token.NewLoc(1, 10)),
-		token.New(token.String, token.NewLoc(1, 12), "❤️!"),
-		token.Simple(token.RParen, token.NewLoc(1, 17)),
-		token.Simple(token.EOL, token.NewLoc(1, 18)),
-		token.Simple(token.EOF, token.NewLoc(2, 1)),
+		token.Simple(token.EOL, source.NewLoc(0, 0)),
+		token.New(token.Ident, source.NewLoc(1, 1), "say"),
+		token.Simple(token.LParen, source.NewLoc(1, 4)),
+		token.New(token.String, source.NewLoc(1, 5), "🇺🇦"),
+		token.Simple(token.Plus, source.NewLoc(1, 10)),
+		token.New(token.String, source.NewLoc(1, 12), "❤️!"),
+		token.Simple(token.RParen, source.NewLoc(1, 17)),
+		token.Simple(token.EOL, source.NewLoc(1, 18)),
+		token.Simple(token.EOF, source.NewLoc(2, 1)),
 	}
 
 	require.Equal(t, expected, tokens)
diff --git a/pkg/lang/scanner/token/loc.go b/pkg/lang/scanner/token/loc.go
deleted file mode 100644
index 7936cba..0000000
--- a/pkg/lang/scanner/token/loc.go
+++ /dev/null
@@ -1,10 +0,0 @@
-package token
-
-type Loc struct {
-	Row int
-	Col int
-}
-
-func NewLoc(row, col int) Loc {
-	return Loc{row, col}
-}
diff --git a/pkg/lang/scanner/token/token.go b/pkg/lang/scanner/token/token.go
index 1ccd864..a39df8a 100644
--- a/pkg/lang/scanner/token/token.go
+++ b/pkg/lang/scanner/token/token.go
@@ -1,19 +1,21 @@
 package token
 
+import "jinx/pkg/libs/source"
+
 type Token struct {
 	Kind TokenKind
-	At   Loc
+	At   source.Loc
 	Data any
 }
 
-func Simple(kind TokenKind, at Loc) Token {
+func Simple(kind TokenKind, at source.Loc) Token {
 	return Token{
 		Kind: kind,
 		At:   at,
 	}
 }
 
-func New(kind TokenKind, at Loc, data any) Token {
+func New(kind TokenKind, at source.Loc, data any) Token {
 	return Token{
 		Kind: kind,
 		At:   at,