package scanner import ( "bufio" "errors" "io" "jinx/pkg/lang/scanner/token" "strconv" "strings" "unicode" ) type Scanner struct { source *bufio.Reader row int col int indent int finished bool } func New(source io.Reader) *Scanner { return &Scanner{ source: bufio.NewReader(source), row: 0, col: 0, indent: 0, finished: false, } } func (s *Scanner) Scan() ([]token.Token, error) { tokens := make([]token.Token, 0) for { t, err := s.scanToken() if err != nil { return nil, err } tokens = append(tokens, t) if t.Kind == token.EOF { break } } return tokens, nil } func (s *Scanner) scanToken() (token.Token, error) { if s.finished { return token.Token{}, ErrScannerFinished } if err := s.skipWhitespace(); err != nil { return token.Token{}, err } c, eof, err := s.peek() if err != nil { return token.Token{}, err } if eof { s.finished = true return token.Simple(token.EOF, s.loc()), nil } if c == '"' { return s.scanString() } else if unicode.IsLetter(c) { return s.scanIdentifierOrKeyword() } else if unicode.IsDigit(c) { return s.scanNumber() } loc := s.loc() c, _, err = s.next() if err != nil { return token.Token{}, err } var kind token.TokenKind switch c { case '\n': kind = token.EOL case '=': if cont, err := s.consume('='); cont && err == nil { kind = token.Eq } else if cont && err != nil { kind = token.Assign } else { return token.Token{}, err } case '+': kind = token.Plus case '-': kind = token.Minus case '*': kind = token.Star case '/': kind = token.Slash case '%': kind = token.Percent case '<': if cont, err := s.consume('='); cont && err == nil { kind = token.Lte } else if cont && err != nil { kind = token.Lt } else { return token.Token{}, err } case '>': if cont, err := s.consume('='); cont && err == nil { kind = token.Gte } else if cont && err != nil { kind = token.Gt } else { return token.Token{}, err } case '!': if cont, err := s.consume('='); cont && err == nil { kind = token.Neq } else if cont && err != nil { kind = token.Bang } else { return token.Token{}, err } case '[': kind = token.LBracket case ']': kind = token.RBracket case '(': kind = token.LParen case ')': kind = token.RParen case '{': kind = token.LBrace case '}': kind = token.RBrace case ',': kind = token.Comma case '.': kind = token.Dot case ';': kind = token.SemiColon default: return token.Token{}, ErrUnknownChar{Char: c} } return token.Simple(kind, loc), nil } func (s *Scanner) scanString() (token.Token, error) { loc := s.loc() if _, err := s.consume('"'); err != nil { return token.Token{}, err } var buf strings.Builder for { c, eof, err := s.next() if err != nil { return token.Token{}, err } if eof { return token.Token{}, ErrUnclosedString } if c == '"' { break } buf.WriteRune(c) } return token.New(token.String, loc, buf.String()), nil } func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { loc := s.loc() var buf strings.Builder for { c, eof, err := s.next() if err != nil { return token.Token{}, err } if eof || (!unicode.IsLetter(c) && !unicode.IsDigit(c)) { break } buf.WriteRune(c) } var kind token.TokenKind switch buf.String() { case "var": kind = token.KwVar case "fn": kind = token.KwFn case "object": kind = token.KwObject case "if": kind = token.KwIf case "elif": kind = token.KwElif case "else": kind = token.KwElse case "for": kind = token.KwFor case "try": kind = token.KwTry case "catch": kind = token.KwCatch case "finally": kind = token.KwFinally case "return": kind = token.KwReturn case "continue": kind = token.KwContinue case "break": kind = token.KwBreak case "throw": kind = token.KwThrow case "in": kind = token.KwIn case "null": kind = token.KwNull case "true": kind = token.KwTrue case "false": kind = token.KwFalse case "this": kind = token.KwThis case "use": kind = token.KwUse case "from": kind = token.KwFrom case "by": kind = token.KwBy default: return token.New(token.Ident, loc, buf.String()), nil } return token.Simple(kind, loc), nil } func (s *Scanner) scanNumber() (token.Token, error) { loc := s.loc() var buf strings.Builder for { c, eof, err := s.next() if err != nil { return token.Token{}, err } if eof || !unicode.IsDigit(c) { break } buf.WriteRune(c) } num, err := strconv.ParseUint(buf.String(), 10, 64) if err != nil { return token.Token{}, err } return token.New(token.Int, loc, num), nil } func (s *Scanner) skipWhitespace() error { for { c, eof, err := s.peek() if err != nil { return err } if eof || !unicode.IsSpace(c) { break } if _, _, err = s.next(); err != nil { return err } } return nil } func (s *Scanner) loc() token.Loc { return token.Loc{ Row: s.row, Col: s.col, } } func (s *Scanner) next() (rune, bool, error) { r, _, err := s.source.ReadRune() if err != nil { if errors.Is(err, io.EOF) { return 0, true, nil } return 0, false, err } if r == '\n' { s.row++ s.col = 0 } else { s.col++ } return r, false, nil } func (s *Scanner) consume(want rune) (bool, error) { c, _, err := s.next() if err != nil { return false, err } if c != want { return true, ErrUnexpectedChar{ Expected: want, Actual: c, } } return true, nil } func (s *Scanner) peek() (rune, bool, error) { r, _, err := s.source.ReadRune() defer s.source.UnreadRune() if err != nil { if errors.Is(err, io.EOF) { return 0, true, nil } return 0, false, err } return r, false, nil }