about summary refs log tree commit diff
path: root/pkg/lang
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/lang')
-rw-r--r--pkg/lang/scanner/errors.go25
-rw-r--r--pkg/lang/scanner/scanner.go364
-rw-r--r--pkg/lang/scanner/scanner_test.go29
-rw-r--r--pkg/lang/scanner/token/kind.go71
-rw-r--r--pkg/lang/scanner/token/loc.go6
-rw-r--r--pkg/lang/scanner/token/token.go22
6 files changed, 517 insertions, 0 deletions
diff --git a/pkg/lang/scanner/errors.go b/pkg/lang/scanner/errors.go
new file mode 100644
index 0000000..dd9aae3
--- /dev/null
+++ b/pkg/lang/scanner/errors.go
@@ -0,0 +1,25 @@
+package scanner
+
+import "errors"
+
+var (
+	ErrScannerFinished = errors.New("scanner already finished")
+	ErrUnclosedString  = errors.New("unclosed string")
+)
+
+type ErrUnexpectedChar struct {
+	Expected rune
+	Actual   rune
+}
+
+func (e ErrUnexpectedChar) Error() string {
+	return "unexpected character: expected " + string(e.Expected) + ", actual " + string(e.Actual)
+}
+
+type ErrUnknownChar struct {
+	Char rune
+}
+
+func (e ErrUnknownChar) Error() string {
+	return "unknown character: " + string(e.Char)
+}
diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go
new file mode 100644
index 0000000..fdb313b
--- /dev/null
+++ b/pkg/lang/scanner/scanner.go
@@ -0,0 +1,364 @@
+package scanner
+
+import (
+	"bufio"
+	"errors"
+	"io"
+	"jinx/pkg/lang/scanner/token"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+type Scanner struct {
+	source *bufio.Reader
+	row    int
+	col    int
+	indent int
+
+	finished bool
+}
+
+func New(source io.Reader) *Scanner {
+	return &Scanner{
+		source:   bufio.NewReader(source),
+		row:      0,
+		col:      0,
+		indent:   0,
+		finished: false,
+	}
+}
+
+func (s *Scanner) Scan() ([]token.Token, error) {
+	tokens := make([]token.Token, 0)
+
+	for {
+		t, err := s.scanToken()
+		if err != nil {
+			return nil, err
+		}
+
+		tokens = append(tokens, t)
+
+		if t.Kind == token.EOF {
+			break
+		}
+	}
+
+	return tokens, nil
+}
+
+func (s *Scanner) scanToken() (token.Token, error) {
+	if s.finished {
+		return token.Token{}, ErrScannerFinished
+	}
+
+	if err := s.skipWhitespace(); err != nil {
+		return token.Token{}, err
+	}
+
+	c, eof, err := s.peek()
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	if eof {
+		s.finished = true
+		return token.Simple(token.EOF, s.loc()), nil
+	}
+
+	if c == '"' {
+		return s.scanString()
+	} else if unicode.IsLetter(c) {
+		return s.scanIdentifierOrKeyword()
+	} else if unicode.IsDigit(c) {
+		return s.scanNumber()
+	}
+
+	loc := s.loc()
+	c, _, err = s.next()
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	var kind token.TokenKind
+	switch c {
+	case '\n':
+		kind = token.EOL
+	case '=':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Eq
+		} else if cont && err != nil {
+			kind = token.Assign
+		} else {
+			return token.Token{}, err
+		}
+	case '+':
+		kind = token.Plus
+	case '-':
+		kind = token.Minus
+	case '*':
+		kind = token.Star
+	case '/':
+		kind = token.Slash
+	case '%':
+		kind = token.Percent
+
+	case '<':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Lte
+		} else if cont && err != nil {
+			kind = token.Lt
+		} else {
+			return token.Token{}, err
+		}
+	case '>':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Gte
+		} else if cont && err != nil {
+			kind = token.Gt
+		} else {
+			return token.Token{}, err
+		}
+	case '!':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Neq
+		} else if cont && err != nil {
+			kind = token.Bang
+		} else {
+			return token.Token{}, err
+		}
+
+	case '[':
+		kind = token.LBracket
+	case ']':
+		kind = token.RBracket
+	case '(':
+		kind = token.LParen
+	case ')':
+		kind = token.RParen
+	case '{':
+		kind = token.LBrace
+	case '}':
+		kind = token.RBrace
+
+	case ',':
+		kind = token.Comma
+	case '.':
+		kind = token.Dot
+	case ';':
+		kind = token.SemiColon
+
+	default:
+		return token.Token{}, ErrUnknownChar{Char: c}
+	}
+
+	return token.Simple(kind, loc), nil
+}
+
+func (s *Scanner) scanString() (token.Token, error) {
+	loc := s.loc()
+	if _, err := s.consume('"'); err != nil {
+		return token.Token{}, err
+	}
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof {
+			return token.Token{}, ErrUnclosedString
+		}
+
+		if c == '"' {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	return token.New(token.String, loc, buf.String()), nil
+}
+
+func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) {
+	loc := s.loc()
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof || (!unicode.IsLetter(c) && !unicode.IsDigit(c)) {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	var kind token.TokenKind
+	switch buf.String() {
+	case "var":
+		kind = token.KwVar
+	case "fn":
+		kind = token.KwFn
+	case "object":
+		kind = token.KwObject
+
+	case "if":
+		kind = token.KwIf
+	case "elif":
+		kind = token.KwElif
+	case "else":
+		kind = token.KwElse
+	case "for":
+		kind = token.KwFor
+	case "try":
+		kind = token.KwTry
+	case "catch":
+		kind = token.KwCatch
+	case "finally":
+		kind = token.KwFinally
+
+	case "return":
+		kind = token.KwReturn
+	case "continue":
+		kind = token.KwContinue
+	case "break":
+		kind = token.KwBreak
+	case "throw":
+		kind = token.KwThrow
+
+	case "in":
+		kind = token.KwIn
+
+	case "null":
+		kind = token.KwNull
+	case "true":
+		kind = token.KwTrue
+	case "false":
+		kind = token.KwFalse
+	case "this":
+		kind = token.KwThis
+
+	case "use":
+		kind = token.KwUse
+	case "from":
+		kind = token.KwFrom
+	case "by":
+		kind = token.KwBy
+	default:
+		return token.New(token.Ident, loc, buf.String()), nil
+	}
+
+	return token.Simple(kind, loc), nil
+}
+
+func (s *Scanner) scanNumber() (token.Token, error) {
+	loc := s.loc()
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof || !unicode.IsDigit(c) {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	num, err := strconv.ParseUint(buf.String(), 10, 64)
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	return token.New(token.Int, loc, num), nil
+}
+
+func (s *Scanner) skipWhitespace() error {
+	for {
+		c, eof, err := s.peek()
+		if err != nil {
+			return err
+		}
+
+		if eof || !unicode.IsSpace(c) {
+			break
+		}
+
+		if _, _, err = s.next(); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (s *Scanner) loc() token.Loc {
+	return token.Loc{
+		Row: s.row,
+		Col: s.col,
+	}
+}
+
+func (s *Scanner) next() (rune, bool, error) {
+	r, _, err := s.source.ReadRune()
+	if err != nil {
+		if errors.Is(err, io.EOF) {
+			return 0, true, nil
+		}
+
+		return 0, false, err
+	}
+
+	if r == '\n' {
+		s.row++
+		s.col = 0
+	} else {
+		s.col++
+	}
+
+	return r, false, nil
+}
+
+func (s *Scanner) consume(want rune) (bool, error) {
+	c, _, err := s.next()
+	if err != nil {
+		return false, err
+	}
+
+	if c != want {
+		return true, ErrUnexpectedChar{
+			Expected: want,
+			Actual:   c,
+		}
+	}
+
+	return true, nil
+}
+
+func (s *Scanner) peek() (rune, bool, error) {
+	r, _, err := s.source.ReadRune()
+	defer s.source.UnreadRune()
+
+	if err != nil {
+		if errors.Is(err, io.EOF) {
+			return 0, true, nil
+		}
+
+		return 0, false, err
+	}
+
+	return r, false, nil
+}
diff --git a/pkg/lang/scanner/scanner_test.go b/pkg/lang/scanner/scanner_test.go
new file mode 100644
index 0000000..2948a58
--- /dev/null
+++ b/pkg/lang/scanner/scanner_test.go
@@ -0,0 +1,29 @@
+package scanner_test
+
+import (
+	"jinx/pkg/lang/scanner"
+	"jinx/pkg/lang/scanner/token"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestBasic(t *testing.T) {
+	source := "var x = 1"
+
+	s := scanner.New(strings.NewReader(source))
+
+	tokens, err := s.Scan()
+	require.NoError(t, err)
+
+	expected := []token.Token{
+		token.Simple(token.KwVar, token.Loc{Row: 0, Col: 0}),
+		token.New(token.Ident, token.Loc{Row: 0, Col: 4}, "x"),
+		token.Simple(token.Assign, token.Loc{Row: 0, Col: 6}),
+		token.New(token.Int, token.Loc{Row: 0, Col: 8}, uint64(1)),
+		token.Simple(token.EOF, token.Loc{Row: 0, Col: 9}),
+	}
+
+	require.Equal(t, expected, tokens)
+}
diff --git a/pkg/lang/scanner/token/kind.go b/pkg/lang/scanner/token/kind.go
new file mode 100644
index 0000000..e24ce2f
--- /dev/null
+++ b/pkg/lang/scanner/token/kind.go
@@ -0,0 +1,71 @@
+package token
+
+type TokenKind int
+
+const (
+	EOF TokenKind = iota
+	EOL
+
+	// Keywords
+	KwVar
+	KwFn
+	KwObject
+
+	KwIf
+	KwElif
+	KwElse
+	KwFor
+	KwTry
+	KwCatch
+	KwFinally
+
+	KwReturn
+	KwContinue
+	KwBreak
+	KwThrow
+
+	KwIn
+
+	KwNull
+	KwTrue
+	KwFalse
+
+	KwThis
+
+	KwUse
+	KwFrom
+	KwBy
+
+	// Data Tokens
+	Ident
+	Int
+	Float
+	String
+
+	// Punctuation
+	Assign
+	Plus
+	Minus
+	Star
+	Slash
+	Percent
+	Bang
+
+	Eq
+	Neq
+	Lt
+	Gt
+	Lte
+	Gte
+
+	LParen
+	RParen
+	LBrace
+	RBrace
+	LBracket
+	RBracket
+
+	Comma
+	Dot
+	SemiColon
+)
diff --git a/pkg/lang/scanner/token/loc.go b/pkg/lang/scanner/token/loc.go
new file mode 100644
index 0000000..c4b073a
--- /dev/null
+++ b/pkg/lang/scanner/token/loc.go
@@ -0,0 +1,6 @@
+package token
+
+type Loc struct {
+	Row int
+	Col int
+}
diff --git a/pkg/lang/scanner/token/token.go b/pkg/lang/scanner/token/token.go
new file mode 100644
index 0000000..840a420
--- /dev/null
+++ b/pkg/lang/scanner/token/token.go
@@ -0,0 +1,22 @@
+package token
+
+type Token struct {
+	Kind TokenKind
+	At   Loc
+	Data any
+}
+
+func Simple(kind TokenKind, at Loc) Token {
+	return Token{
+		Kind: kind,
+		At:   at,
+	}
+}
+
+func New(kind TokenKind, at Loc, data any) Token {
+	return Token{
+		Kind: kind,
+		At:   at,
+		Data: data,
+	}
+}