package scanner import ( "io" "jinx/pkg/lang/scanner/token" "jinx/pkg/libs/source" "strconv" "strings" "unicode" ) type Scanner struct { src source.Walker indent int finished bool } func New(src io.Reader) *Scanner { return &Scanner{ src: *source.NewWalker(src), indent: 0, finished: false, } } func (s *Scanner) Scan() ([]token.Token, error) { tokens := make([]token.Token, 0) for { t, err := s.scanToken() if err != nil { return nil, err } tokens = append(tokens, t) if t.Kind == token.EOF { break } } return tokens, nil } func (s *Scanner) scanToken() (token.Token, error) { if s.finished { return token.Token{}, ErrScannerFinished } hadNewline, firstNewline, err := s.skipWhitespace() if err != nil { return token.Token{}, err } if hadNewline { return token.New(token.EOL, firstNewline, nil), nil } c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } if eof { s.finished = true return token.Simple(token.EOF, s.src.Loc()), nil } if c == '"' { return s.scanString() } else if unicode.IsLetter(c) || c == '_' { return s.scanIdentifierOrKeyword() } else if unicode.IsDigit(c) { return s.scanNumber() } loc := s.src.Loc() c, _, err = s.src.Next() if err != nil { return token.Token{}, err } var kind token.TokenKind switch c { case '\n': kind = token.EOL case '=': if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Eq } else if cont && err != nil { kind = token.Assign } else { return token.Token{}, err } case '+': kind = token.Plus case '-': kind = token.Minus case '*': kind = token.Star case '/': kind = token.Slash case '%': kind = token.Percent case '<': if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Lte } else if cont && err != nil { kind = token.Lt } else { return token.Token{}, err } case '>': if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Gte } else if cont && err != nil { kind = token.Gt } else { return token.Token{}, err } case '!': if cont, err := s.src.Consume('='); cont && err == nil { kind = token.Neq } else if cont && err != nil { kind = token.Bang } else { return token.Token{}, err } case '[': kind = token.LBracket case ']': kind = token.RBracket case '(': kind = token.LParen case ')': kind = token.RParen case '{': kind = token.LBrace case '}': kind = token.RBrace case ',': kind = token.Comma case '.': kind = token.Dot case ';': kind = token.SemiColon default: return token.Token{}, ErrUnknownChar{Char: c} } return token.Simple(kind, loc), nil } func (s *Scanner) scanString() (token.Token, error) { loc := s.src.Loc() if _, err := s.src.Consume('"'); err != nil { return token.Token{}, err } var buf strings.Builder for { c, eof, err := s.src.Next() if err != nil { return token.Token{}, err } if eof { return token.Token{}, ErrUnclosedString } if c == '"' { break } if c == '\\' { sequence, eof, err := s.src.Next() if err != nil { return token.Token{}, err } if eof { return token.Token{}, ErrUnclosedString } escaped, err := escapedChar(sequence) if err != nil { return token.Token{}, err } c = escaped } buf.WriteRune(c) } return token.New(token.String, loc, buf.String()), nil } func (s *Scanner) scanIdentifierOrKeyword() (token.Token, error) { loc := s.src.Loc() var buf strings.Builder for { c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } if eof || !(unicode.IsLetter(c) || unicode.IsDigit(c) || c == '_') { break } if _, _, err = s.src.Next(); err != nil { return token.Token{}, err } buf.WriteRune(c) } var kind token.TokenKind switch buf.String() { case "var": kind = token.KwVar case "fn": kind = token.KwFn case "type": kind = token.KwType case "if": kind = token.KwIf case "elif": kind = token.KwElif case "else": kind = token.KwElse case "for": kind = token.KwFor case "try": kind = token.KwTry case "catch": kind = token.KwCatch case "finally": kind = token.KwFinally case "return": kind = token.KwReturn case "continue": kind = token.KwContinue case "break": kind = token.KwBreak case "throw": kind = token.KwThrow case "in": kind = token.KwIn case "null": kind = token.KwNull case "true": kind = token.KwTrue case "false": kind = token.KwFalse case "this": kind = token.KwThis case "use": kind = token.KwUse case "from": kind = token.KwFrom case "by": kind = token.KwBy case "global": kind = token.KwGlobal default: return token.New(token.Ident, loc, buf.String()), nil } return token.Simple(kind, loc), nil } func (s *Scanner) scanNumber() (token.Token, error) { loc := s.src.Loc() var buf strings.Builder for { c, eof, err := s.src.Peek() if err != nil { return token.Token{}, err } if eof || !unicode.IsDigit(c) { break } if _, _, err = s.src.Next(); err != nil { return token.Token{}, err } buf.WriteRune(c) } num, err := strconv.ParseUint(buf.String(), 10, 64) if err != nil { return token.Token{}, err } return token.New(token.Int, loc, num), nil } func (s *Scanner) skipWhitespace() (bool, source.Loc, error) { hadNewline := false firstNewline := source.Loc{} for { c, eof, err := s.src.Peek() if err != nil { return false, source.Loc{}, err } if c == '\n' && !hadNewline { firstNewline = s.src.Loc() hadNewline = true } if eof || !unicode.IsSpace(c) { break } if _, _, err = s.src.Next(); err != nil { return false, source.Loc{}, err } } return hadNewline, firstNewline, nil } func escapedChar(c rune) (rune, error) { switch c { case 'n': return '\n', nil case 'r': return '\r', nil case 't': return '\t', nil case '\\': return '\\', nil case '"': return '"', nil } return c, ErrUnknownEscapeSequence{Char: c} }