diff options
Diffstat (limited to 'pkg/lang/scanner/scanner.go')
| -rw-r--r-- | pkg/lang/scanner/scanner.go | 364 |
1 files changed, 364 insertions, 0 deletions
diff --git a/pkg/lang/scanner/scanner.go b/pkg/lang/scanner/scanner.go new file mode 100644 index 0000000..fdb313b --- /dev/null +++ b/pkg/lang/scanner/scanner.go @@ -0,0 +1,364 @@ +package scanner + +import ( + "bufio" + "errors" + "io" + "jinx/pkg/lang/scanner/token" + "strconv" + "strings" + "unicode" +) + +type Scanner struct { + source *bufio.Reader + row int + col int + indent int + + finished bool +} + +func New(source io.Reader) *Scanner { + return &Scanner{ + source: bufio.NewReader(source), + row: 0, + col: 0, + indent: 0, + finished: false, + } +} + +func (s *Scanner) Scan() ([]token.Token, error) { + tokens := make([]token.Token, 0) + + for { + t, err := s.scanToken() + if err != nil { + return nil, err + } + + tokens = append(tokens, t) + + if t.Kind == token.EOF { + break + } + } + + return tokens, nil +} + +func (s *Scanner) scanToken() (token.Token, error) { + if s.finished { + return token.Token{}, ErrScannerFinished + } + + if err := s.skipWhitespace(); err != nil { + return token.Token{}, err + } + + c, eof, err := s.peek() + if err != nil { + return token.Token{}, err + } + + if eof { + s.finished = true + return token.Simple(token.EOF, s.loc()), nil + } + + if c == '"' { + return s.scanString() + } else if unicode.IsLetter(c) { + return s.scanIdentifierOrKeyword() + } else if unicode.IsDigit(c) { + return s.scanNumber() + } + + loc := s.loc() + c, _, err = s.next() + if err != nil { + return token.Token{}, err + } + + var kind token.TokenKind + switch c { + case '\n': + kind = token.EOL + case '=': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Eq + } else if cont && err != nil { + kind = token.Assign + } else { + return token.Token{}, err + } + case '+': + kind = token.Plus + case '-': + kind = token.Minus + case '*': + kind = token.Star + case '/': + kind = token.Slash + case '%': + kind = token.Percent + + case '<': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Lte + } else if cont && err != nil { + kind = token.Lt + } else { + return token.Token{}, err + } + case '>': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Gte + } else if cont && err != nil { + kind = token.Gt + } else { + return token.Token{}, err + } + case '!': + if cont, err := s.consume('='); cont && err == nil { + kind = token.Neq + } else if cont && err != nil { + kind = token.Bang + } else { + return token.Token{}, err + } + + case '[': + kind = token.LBracket + case ']': + kind = token.RBracket + case '(': + kind = token.LParen + case ')': + kind = token.RParen + case '{': + kind = token.LBrace + case '}': + kind = token.RBrace + + case ',': + kind = token.Comma + case '.': + kind = token.Dot + case ';': + kind = token.SemiColon + + default: + return token.Token{}, ErrUnknownChar{Char: c} + } + + return token.Simple(kind, loc), nil +} + +func (s *Scanner) scanString() (token.Token, error) { + loc := s.loc() + if _, err := s.consume('"'); err != nil { + return token.Token{}, err + } + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof { + return token.Token{}, ErrUnclosedString + } + + if c == '"' { + break + } + + buf.WriteRune(c) + } + + return token.New(token.String, loc, buf.String()), nil +} + +func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { + loc := s.loc() + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof || (!unicode.IsLetter(c) && !unicode.IsDigit(c)) { + break + } + + buf.WriteRune(c) + } + + var kind token.TokenKind + switch buf.String() { + case "var": + kind = token.KwVar + case "fn": + kind = token.KwFn + case "object": + kind = token.KwObject + + case "if": + kind = token.KwIf + case "elif": + kind = token.KwElif + case "else": + kind = token.KwElse + case "for": + kind = token.KwFor + case "try": + kind = token.KwTry + case "catch": + kind = token.KwCatch + case "finally": + kind = token.KwFinally + + case "return": + kind = token.KwReturn + case "continue": + kind = token.KwContinue + case "break": + kind = token.KwBreak + case "throw": + kind = token.KwThrow + + case "in": + kind = token.KwIn + + case "null": + kind = token.KwNull + case "true": + kind = token.KwTrue + case "false": + kind = token.KwFalse + case "this": + kind = token.KwThis + + case "use": + kind = token.KwUse + case "from": + kind = token.KwFrom + case "by": + kind = token.KwBy + default: + return token.New(token.Ident, loc, buf.String()), nil + } + + return token.Simple(kind, loc), nil +} + +func (s *Scanner) scanNumber() (token.Token, error) { + loc := s.loc() + + var buf strings.Builder + + for { + c, eof, err := s.next() + if err != nil { + return token.Token{}, err + } + + if eof || !unicode.IsDigit(c) { + break + } + + buf.WriteRune(c) + } + + num, err := strconv.ParseUint(buf.String(), 10, 64) + if err != nil { + return token.Token{}, err + } + + return token.New(token.Int, loc, num), nil +} + +func (s *Scanner) skipWhitespace() error { + for { + c, eof, err := s.peek() + if err != nil { + return err + } + + if eof || !unicode.IsSpace(c) { + break + } + + if _, _, err = s.next(); err != nil { + return err + } + } + + return nil +} + +func (s *Scanner) loc() token.Loc { + return token.Loc{ + Row: s.row, + Col: s.col, + } +} + +func (s *Scanner) next() (rune, bool, error) { + r, _, err := s.source.ReadRune() + if err != nil { + if errors.Is(err, io.EOF) { + return 0, true, nil + } + + return 0, false, err + } + + if r == '\n' { + s.row++ + s.col = 0 + } else { + s.col++ + } + + return r, false, nil +} + +func (s *Scanner) consume(want rune) (bool, error) { + c, _, err := s.next() + if err != nil { + return false, err + } + + if c != want { + return true, ErrUnexpectedChar{ + Expected: want, + Actual: c, + } + } + + return true, nil +} + +func (s *Scanner) peek() (rune, bool, error) { + r, _, err := s.source.ReadRune() + defer s.source.UnreadRune() + + if err != nil { + if errors.Is(err, io.EOF) { + return 0, true, nil + } + + return 0, false, err + } + + return r, false, nil +} |
