lox-go/lexer/scanner.go
2025-06-08 15:02:35 -04:00

282 lines
4.8 KiB
Go

package lexer
import (
"fmt"
"strconv"
"git.red-panda.pet/pandaware/lox-go/reporter"
)
var keywords = map[string]TokenType{
"and": TokenTypeAnd,
"class": TokenTypeClass,
"else": TokenTypeElse,
"false": TokenTypeFalse,
"fun": TokenTypeFun,
"for": TokenTypeFor,
"if": TokenTypeIf,
"nil": TokenTypeNil,
"or": TokenTypeOr,
"print": TokenTypePrint,
"return": TokenTypeReturn,
"super": TokenTypeSuper,
"this": TokenTypeThis,
"true": TokenTypeTrue,
"var": TokenTypeVar,
"while": TokenTypeWhile,
}
func isDigit(r rune) bool {
return r >= '0' && r <= '9'
}
func isAlpha(r rune) bool {
return (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
r == '_'
}
func isAlphaNumeric(r rune) bool {
return isDigit(r) || isAlpha(r)
}
type Scanner struct {
source []rune
tokens []*Token
start int
current int
line int
}
func New(source string) *Scanner {
s := new(Scanner)
s.source = []rune(source)
s.tokens = []*Token{}
s.start = 0
s.current = 0
s.line = 1
return s
}
func (s *Scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
func (s *Scanner) advance() rune {
r := s.source[s.current]
s.current += 1
return r
}
func (s *Scanner) addToken(t TokenType, literal any) {
s.tokens = append(s.tokens, &Token{
Type: t,
Lexeme: string(s.source[s.start:s.current]),
Literal: literal,
Line: s.line,
})
}
func (s *Scanner) match(expected rune) bool {
if s.isAtEnd() {
return false
}
c := s.source[s.current]
if c != expected {
return false
}
s.current += 1
return true
}
func (s *Scanner) peek() rune {
if s.isAtEnd() {
return rune(0)
}
return s.source[s.current]
}
func (s *Scanner) peekNext() rune {
if s.current+1 > len(s.source) {
return rune(0)
}
return s.source[s.current+1]
}
func (s *Scanner) scanToken() bool {
r := s.advance()
switch r {
// simple 1 character tokens
case '(':
s.addToken(TokenTypeLeftParen, nil)
case ')':
s.addToken(TokenTypeRightParen, nil)
case '{':
s.addToken(TokenTypeLeftBrace, nil)
case '}':
s.addToken(TokenTypeRightBrace, nil)
case ',':
s.addToken(TokenTypeComma, nil)
case '.':
s.addToken(TokenTypeDot, nil)
case '-':
s.addToken(TokenTypeMinus, nil)
case '+':
s.addToken(TokenTypePlus, nil)
case ';':
s.addToken(TokenTypeSemicolon, nil)
case '*':
s.addToken(TokenTypeStar, nil)
// simple 2 character tokens
case '!':
if s.match('=') {
s.addToken(TokenTypeBangEq, nil)
} else {
s.addToken(TokenTypeBang, nil)
}
case '=':
if s.match('=') {
s.addToken(TokenTypeEqualEqual, nil)
} else {
s.addToken(TokenTypeEqual, nil)
}
case '<':
if s.match('=') {
s.addToken(TokenTypeLessEq, nil)
} else {
s.addToken(TokenTypeLess, nil)
}
case '>':
if s.match('=') {
s.addToken(TokenTypeGreaterEq, nil)
} else {
s.addToken(TokenTypeGreater, nil)
}
case '/':
// match comments
if s.match('/') {
// we scan until the end of line/file (whichever comes first :p)
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
} else {
s.addToken(TokenTypeSlash, nil)
}
// ignore whitespace
case ' ':
break
case '\r':
break
case '\t':
break
// advance the line counter :D
case '\n':
s.line += 1
// string literals
case '"':
return s.string()
default:
if isDigit(r) {
return s.number()
} else if isAlpha(r) {
s.identifier()
return false
}
reporter.Err(s.line, fmt.Sprintf("Unexpected character %c", r))
return true
}
return false
}
func (s *Scanner) string() bool {
// peek until we hit the end of the string or file, whichever is first
for s.peek() != '"' && !s.isAtEnd() {
// support strings with new lines :D
if s.peek() == '\n' {
s.line += 1
}
s.advance()
}
// if the token didn't end before the file we report and err
// and return that we got one
if s.isAtEnd() {
reporter.Err(s.line, "Unterminated string")
return true
}
s.advance()
// todo: escape sequences
value := s.source[s.start+1 : s.current-1]
s.addToken(TokenTypeString, string(value))
return false
}
func (s *Scanner) number() bool {
for isDigit(s.peek()) {
s.advance()
}
if s.peek() == '.' && isDigit(s.peekNext()) {
s.advance()
for isDigit(s.peek()) {
s.advance()
}
}
literal, _ := strconv.ParseFloat(string(s.source[s.start:s.current]), 64)
s.addToken(TokenTypeNumber, literal)
return false
}
func (s *Scanner) identifier() {
for isAlphaNumeric(s.peek()) {
s.advance()
}
text := s.source[s.start:s.current]
tt, ok := keywords[string(text)]
if !ok {
tt = TokenTypeIdentifier
}
s.addToken(tt, nil)
}
func (s *Scanner) ScanTokens() ([]*Token, bool) {
isErr := false
for !s.isAtEnd() {
s.start = s.current
isErr = isErr || s.scanToken()
}
s.tokens = append(s.tokens, &Token{
Type: TokenTypeEOF,
Lexeme: "",
Literal: nil,
Line: s.line,
})
return s.tokens, !isErr
}