about summary refs log tree commit diff
path: root/pkg/lang/scanner/scanner.go
diff options
context:
space:
mode:
authorMel <einebeere@gmail.com>2022-04-16 22:05:25 +0200
committerMel <einebeere@gmail.com>2022-04-16 22:05:25 +0200
commitbbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2 (patch)
tree01cc597df41e9ed9b246f9d537783f68e126caab /pkg/lang/scanner/scanner.go
parent6163d259ed52991e2f95632b5a0516607aa56a5f (diff)
downloadjinx-bbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2.tar.zst
jinx-bbb2962bd4bac0ce1271ec7d7cb65d038ead8ed2.zip
Basic scanner for lang
Diffstat (limited to 'pkg/lang/scanner/scanner.go')
-rw-r--r--pkg/lang/scanner/scanner.go364
1 files changed, 364 insertions, 0 deletions
diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go
new file mode 100644
index 0000000..fdb313b
--- /dev/null
+++ b/pkg/lang/scanner/scanner.go
@@ -0,0 +1,364 @@
+package scanner
+
+import (
+	"bufio"
+	"errors"
+	"io"
+	"jinx/pkg/lang/scanner/token"
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+type Scanner struct {
+	source *bufio.Reader
+	row    int
+	col    int
+	indent int
+
+	finished bool
+}
+
+func New(source io.Reader) *Scanner {
+	return &Scanner{
+		source:   bufio.NewReader(source),
+		row:      0,
+		col:      0,
+		indent:   0,
+		finished: false,
+	}
+}
+
+func (s *Scanner) Scan() ([]token.Token, error) {
+	tokens := make([]token.Token, 0)
+
+	for {
+		t, err := s.scanToken()
+		if err != nil {
+			return nil, err
+		}
+
+		tokens = append(tokens, t)
+
+		if t.Kind == token.EOF {
+			break
+		}
+	}
+
+	return tokens, nil
+}
+
+func (s *Scanner) scanToken() (token.Token, error) {
+	if s.finished {
+		return token.Token{}, ErrScannerFinished
+	}
+
+	if err := s.skipWhitespace(); err != nil {
+		return token.Token{}, err
+	}
+
+	c, eof, err := s.peek()
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	if eof {
+		s.finished = true
+		return token.Simple(token.EOF, s.loc()), nil
+	}
+
+	if c == '"' {
+		return s.scanString()
+	} else if unicode.IsLetter(c) {
+		return s.scanIdentifierOrKeyword()
+	} else if unicode.IsDigit(c) {
+		return s.scanNumber()
+	}
+
+	loc := s.loc()
+	c, _, err = s.next()
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	var kind token.TokenKind
+	switch c {
+	case '\n':
+		kind = token.EOL
+	case '=':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Eq
+		} else if cont && err != nil {
+			kind = token.Assign
+		} else {
+			return token.Token{}, err
+		}
+	case '+':
+		kind = token.Plus
+	case '-':
+		kind = token.Minus
+	case '*':
+		kind = token.Star
+	case '/':
+		kind = token.Slash
+	case '%':
+		kind = token.Percent
+
+	case '<':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Lte
+		} else if cont && err != nil {
+			kind = token.Lt
+		} else {
+			return token.Token{}, err
+		}
+	case '>':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Gte
+		} else if cont && err != nil {
+			kind = token.Gt
+		} else {
+			return token.Token{}, err
+		}
+	case '!':
+		if cont, err := s.consume('='); cont && err == nil {
+			kind = token.Neq
+		} else if cont && err != nil {
+			kind = token.Bang
+		} else {
+			return token.Token{}, err
+		}
+
+	case '[':
+		kind = token.LBracket
+	case ']':
+		kind = token.RBracket
+	case '(':
+		kind = token.LParen
+	case ')':
+		kind = token.RParen
+	case '{':
+		kind = token.LBrace
+	case '}':
+		kind = token.RBrace
+
+	case ',':
+		kind = token.Comma
+	case '.':
+		kind = token.Dot
+	case ';':
+		kind = token.SemiColon
+
+	default:
+		return token.Token{}, ErrUnknownChar{Char: c}
+	}
+
+	return token.Simple(kind, loc), nil
+}
+
+func (s *Scanner) scanString() (token.Token, error) {
+	loc := s.loc()
+	if _, err := s.consume('"'); err != nil {
+		return token.Token{}, err
+	}
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof {
+			return token.Token{}, ErrUnclosedString
+		}
+
+		if c == '"' {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	return token.New(token.String, loc, buf.String()), nil
+}
+
+func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) {
+	loc := s.loc()
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof || (!unicode.IsLetter(c) && !unicode.IsDigit(c)) {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	var kind token.TokenKind
+	switch buf.String() {
+	case "var":
+		kind = token.KwVar
+	case "fn":
+		kind = token.KwFn
+	case "object":
+		kind = token.KwObject
+
+	case "if":
+		kind = token.KwIf
+	case "elif":
+		kind = token.KwElif
+	case "else":
+		kind = token.KwElse
+	case "for":
+		kind = token.KwFor
+	case "try":
+		kind = token.KwTry
+	case "catch":
+		kind = token.KwCatch
+	case "finally":
+		kind = token.KwFinally
+
+	case "return":
+		kind = token.KwReturn
+	case "continue":
+		kind = token.KwContinue
+	case "break":
+		kind = token.KwBreak
+	case "throw":
+		kind = token.KwThrow
+
+	case "in":
+		kind = token.KwIn
+
+	case "null":
+		kind = token.KwNull
+	case "true":
+		kind = token.KwTrue
+	case "false":
+		kind = token.KwFalse
+	case "this":
+		kind = token.KwThis
+
+	case "use":
+		kind = token.KwUse
+	case "from":
+		kind = token.KwFrom
+	case "by":
+		kind = token.KwBy
+	default:
+		return token.New(token.Ident, loc, buf.String()), nil
+	}
+
+	return token.Simple(kind, loc), nil
+}
+
+func (s *Scanner) scanNumber() (token.Token, error) {
+	loc := s.loc()
+
+	var buf strings.Builder
+
+	for {
+		c, eof, err := s.next()
+		if err != nil {
+			return token.Token{}, err
+		}
+
+		if eof || !unicode.IsDigit(c) {
+			break
+		}
+
+		buf.WriteRune(c)
+	}
+
+	num, err := strconv.ParseUint(buf.String(), 10, 64)
+	if err != nil {
+		return token.Token{}, err
+	}
+
+	return token.New(token.Int, loc, num), nil
+}
+
+func (s *Scanner) skipWhitespace() error {
+	for {
+		c, eof, err := s.peek()
+		if err != nil {
+			return err
+		}
+
+		if eof || !unicode.IsSpace(c) {
+			break
+		}
+
+		if _, _, err = s.next(); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (s *Scanner) loc() token.Loc {
+	return token.Loc{
+		Row: s.row,
+		Col: s.col,
+	}
+}
+
+func (s *Scanner) next() (rune, bool, error) {
+	r, _, err := s.source.ReadRune()
+	if err != nil {
+		if errors.Is(err, io.EOF) {
+			return 0, true, nil
+		}
+
+		return 0, false, err
+	}
+
+	if r == '\n' {
+		s.row++
+		s.col = 0
+	} else {
+		s.col++
+	}
+
+	return r, false, nil
+}
+
+func (s *Scanner) consume(want rune) (bool, error) {
+	c, _, err := s.next()
+	if err != nil {
+		return false, err
+	}
+
+	if c != want {
+		return true, ErrUnexpectedChar{
+			Expected: want,
+			Actual:   c,
+		}
+	}
+
+	return true, nil
+}
+
+func (s *Scanner) peek() (rune, bool, error) {
+	r, _, err := s.source.ReadRune()
+	defer s.source.UnreadRune()
+
+	if err != nil {
+		if errors.Is(err, io.EOF) {
+			return 0, true, nil
+		}
+
+		return 0, false, err
+	}
+
+	return r, false, nil
+}