lox-go/ast/gen/lex.go

180 lines
2.9 KiB
Go

package main
import (
"errors"
"fmt"
)
type tokenType int
const (
tokenTypeIdentifier tokenType = iota
tokenTypeRightBracket
tokenTypeLeftBracket
tokenTypeEqual
tokenTypeName
tokenTypeSemicolon
tokenTypeEOF
)
type token struct {
Type tokenType
Lexeme string
Line int
}
func isUpperAlpha(r rune) bool {
return r >= 'A' && r <= 'Z'
}
func isAlpha(r rune) bool {
return isUpperAlpha(r) || (r >= 'a' && r <= 'z')
}
func isNumeric(r rune) bool {
return r >= '0' && r <= '9'
}
func isAlphaNumeric(r rune) bool {
return isAlpha(r) || isNumeric(r)
}
func isGoIdentifier(r rune) bool {
return isAlphaNumeric(r) || r == '_'
}
func isIdentifier(r rune) bool {
return isGoIdentifier(r) || r == '.' || r == '*'
}
type lexer struct {
source []rune
current int
start int
line int
tokens []*token
}
func (l *lexer) addToken(t tokenType) {
l.tokens = append(l.tokens, &token{
Type: t,
Lexeme: string(l.source[l.start:l.current]),
Line: l.line,
})
}
func (l *lexer) peek() rune {
if l.isAtEnd() {
return rune(0)
}
return l.source[l.current]
}
func (l *lexer) peekNext() rune {
if l.current+1 >= len(l.source) {
return rune(0)
}
return l.source[l.current+1]
}
func (l *lexer) advance() rune {
r := l.source[l.current]
l.current += 1
return r
}
func (l *lexer) isAtEnd() bool {
return l.current >= len(l.source)
}
func (l *lexer) scanToken() error {
r := l.advance()
switch r {
case '[':
l.addToken(tokenTypeLeftBracket)
case ']':
l.addToken(tokenTypeRightBracket)
case '=':
l.addToken(tokenTypeEqual)
case ';':
l.addToken(tokenTypeSemicolon)
case ' ', '\r', '\t':
break
case '#':
next := l.peek()
if isUpperAlpha(next) {
l.name()
return nil
}
return errors.New(fmt.Sprintf("names must have an uppercase alphabetical first character, found '%s'", string(next)))
case '\n':
l.line += 1
default:
if isIdentifier(r) {
l.identifier()
return nil
}
return errors.New(fmt.Sprintf("unexpected character '%s' at line %d", string(r), l.line))
}
return nil
}
func (l *lexer) name() {
for isGoIdentifier(l.peek()) && l.peek() != '\n' {
l.advance()
}
text := l.source[l.start+1 : l.current]
l.advance()
l.tokens = append(l.tokens, &token{
Type: tokenTypeName,
Lexeme: string(text),
Line: l.line,
})
}
func (l *lexer) identifier() {
for isIdentifier(l.peek()) {
l.advance()
}
text := l.source[l.start:l.current]
l.tokens = append(l.tokens, &token{
Type: tokenTypeIdentifier,
Lexeme: string(text),
Line: l.line,
})
}
func (l *lexer) scanTokens() ([]*token, []error) {
errs := []error{}
for !l.isAtEnd() {
l.start = l.current
err := l.scanToken()
if err != nil {
errs = append(errs, err)
}
}
l.addToken(tokenTypeEOF)
return l.tokens, errs
}
func lex(source string) ([]*token, []error) {
l := new(lexer)
l.source = []rune(source)
l.current = 0
l.start = 0
l.line = 1
l.tokens = []*token{}
tokens, errs := l.scanTokens()
return tokens, errs
}