package lexer import ( "fmt" "strconv" "git.red-panda.pet/pandaware/lox-go/reporter" ) var keywords = map[string]TokenType{ "and": TokenTypeAnd, "class": TokenTypeClass, "else": TokenTypeElse, "false": TokenTypeFalse, "fun": TokenTypeFun, "for": TokenTypeFor, "if": TokenTypeIf, "nil": TokenTypeNil, "or": TokenTypeOr, "print": TokenTypePrint, "return": TokenTypeReturn, "super": TokenTypeSuper, "this": TokenTypeThis, "true": TokenTypeTrue, "var": TokenTypeVar, "while": TokenTypeWhile, } func isDigit(r rune) bool { return r >= '0' && r <= '9' } func isAlpha(r rune) bool { return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' } func isAlphaNumeric(r rune) bool { return isDigit(r) || isAlpha(r) } type Scanner struct { source []rune tokens []*Token start int current int line int } func New(source string) *Scanner { s := new(Scanner) s.source = []rune(source) s.tokens = []*Token{} s.start = 0 s.current = 0 s.line = 1 return s } func (s *Scanner) isAtEnd() bool { return s.current >= len(s.source) } func (s *Scanner) advance() rune { r := s.source[s.current] s.current += 1 return r } func (s *Scanner) addToken(t TokenType, literal any) { s.tokens = append(s.tokens, &Token{ Type: t, Lexeme: string(s.source[s.start:s.current]), Literal: literal, Line: s.line, }) } func (s *Scanner) match(expected rune) bool { if s.isAtEnd() { return false } c := s.source[s.current] if c != expected { return false } s.current += 1 return true } func (s *Scanner) peek() rune { if s.isAtEnd() { return rune(0) } return s.source[s.current] } func (s *Scanner) peekNext() rune { if s.current+1 > len(s.source) { return rune(0) } return s.source[s.current+1] } func (s *Scanner) scanToken() bool { r := s.advance() switch r { // simple 1 character tokens case '(': s.addToken(TokenTypeLeftParen, nil) case ')': s.addToken(TokenTypeRightParen, nil) case '{': s.addToken(TokenTypeLeftBrace, nil) case '}': s.addToken(TokenTypeRightBrace, nil) case ',': s.addToken(TokenTypeComma, nil) case '.': s.addToken(TokenTypeDot, nil) case '-': s.addToken(TokenTypeMinus, nil) case '+': s.addToken(TokenTypePlus, nil) case ';': s.addToken(TokenTypeSemicolon, nil) case '*': s.addToken(TokenTypeStar, nil) // simple 2 character tokens case '!': if s.match('=') { s.addToken(TokenTypeBangEq, nil) } else { s.addToken(TokenTypeBang, nil) } case '=': if s.match('=') { s.addToken(TokenTypeEqualEqual, nil) } else { s.addToken(TokenTypeEqual, nil) } case '<': if s.match('=') { s.addToken(TokenTypeLessEq, nil) } else { s.addToken(TokenTypeLess, nil) } case '>': if s.match('=') { s.addToken(TokenTypeGreaterEq, nil) } else { s.addToken(TokenTypeGreater, nil) } case '/': // match comments if s.match('/') { // we scan until the end of line/file (whichever comes first :p) for s.peek() != '\n' && !s.isAtEnd() { s.advance() } } else { s.addToken(TokenTypeSlash, nil) } // ignore whitespace case ' ': break case '\r': break case '\t': break // advance the line counter :D case '\n': s.line += 1 // string literals case '"': return s.string() default: if isDigit(r) { return s.number() } else if isAlpha(r) { s.identifier() return false } reporter.Err(s.line, fmt.Sprintf("Unexpected character %c", r)) return true } return false } func (s *Scanner) string() bool { // peek until we hit the end of the string or file, whichever is first for s.peek() != '"' && !s.isAtEnd() { // support strings with new lines :D if s.peek() == '\n' { s.line += 1 } s.advance() } // if the token didn't end before the file we report and err // and return that we got one if s.isAtEnd() { reporter.Err(s.line, "Unterminated string") return true } s.advance() // todo: escape sequences value := s.source[s.start+1 : s.current-1] s.addToken(TokenTypeString, string(value)) return false } func (s *Scanner) number() bool { for isDigit(s.peek()) { s.advance() } if s.peek() == '.' && isDigit(s.peekNext()) { s.advance() for isDigit(s.peek()) { s.advance() } } literal, _ := strconv.ParseFloat(string(s.source[s.start:s.current]), 64) s.addToken(TokenTypeNumber, literal) return false } func (s *Scanner) identifier() { for isAlphaNumeric(s.peek()) { s.advance() } text := s.source[s.start:s.current] tt, ok := keywords[string(text)] if !ok { tt = TokenTypeIdentifier } s.addToken(tt, nil) } func (s *Scanner) ScanTokens() ([]*Token, bool) { isErr := false for !s.isAtEnd() { s.start = s.current isErr = isErr || s.scanToken() } s.tokens = append(s.tokens, &Token{ Type: TokenTypeEOF, Lexeme: "", Literal: nil, Line: s.line, }) return s.tokens, !isErr }