180 lines
2.9 KiB
Go
180 lines
2.9 KiB
Go
package main
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
)
|
|
|
|
type tokenType int
|
|
|
|
const (
|
|
tokenTypeIdentifier tokenType = iota
|
|
tokenTypeRightBracket
|
|
tokenTypeLeftBracket
|
|
tokenTypeEqual
|
|
tokenTypeName
|
|
tokenTypeSemicolon
|
|
tokenTypeEOF
|
|
)
|
|
|
|
type token struct {
|
|
Type tokenType
|
|
Lexeme string
|
|
Line int
|
|
}
|
|
|
|
func isUpperAlpha(r rune) bool {
|
|
return r >= 'A' && r <= 'Z'
|
|
}
|
|
|
|
func isAlpha(r rune) bool {
|
|
return isUpperAlpha(r) || (r >= 'a' && r <= 'z')
|
|
}
|
|
|
|
func isNumeric(r rune) bool {
|
|
return r >= '0' && r <= '9'
|
|
}
|
|
|
|
func isAlphaNumeric(r rune) bool {
|
|
return isAlpha(r) || isNumeric(r)
|
|
}
|
|
|
|
func isGoIdentifier(r rune) bool {
|
|
return isAlphaNumeric(r) || r == '_'
|
|
}
|
|
|
|
func isIdentifier(r rune) bool {
|
|
return isGoIdentifier(r) || r == '.' || r == '*'
|
|
}
|
|
|
|
type lexer struct {
|
|
source []rune
|
|
current int
|
|
start int
|
|
line int
|
|
tokens []*token
|
|
}
|
|
|
|
func (l *lexer) addToken(t tokenType) {
|
|
l.tokens = append(l.tokens, &token{
|
|
Type: t,
|
|
Lexeme: string(l.source[l.start:l.current]),
|
|
Line: l.line,
|
|
})
|
|
}
|
|
|
|
func (l *lexer) peek() rune {
|
|
if l.isAtEnd() {
|
|
return rune(0)
|
|
}
|
|
return l.source[l.current]
|
|
}
|
|
|
|
func (l *lexer) peekNext() rune {
|
|
if l.current+1 >= len(l.source) {
|
|
return rune(0)
|
|
}
|
|
return l.source[l.current+1]
|
|
}
|
|
|
|
func (l *lexer) advance() rune {
|
|
r := l.source[l.current]
|
|
l.current += 1
|
|
return r
|
|
}
|
|
|
|
func (l *lexer) isAtEnd() bool {
|
|
return l.current >= len(l.source)
|
|
}
|
|
|
|
func (l *lexer) scanToken() error {
|
|
r := l.advance()
|
|
|
|
switch r {
|
|
case '[':
|
|
l.addToken(tokenTypeLeftBracket)
|
|
case ']':
|
|
l.addToken(tokenTypeRightBracket)
|
|
case '=':
|
|
l.addToken(tokenTypeEqual)
|
|
case ';':
|
|
l.addToken(tokenTypeSemicolon)
|
|
case ' ', '\r', '\t':
|
|
break
|
|
case '#':
|
|
next := l.peek()
|
|
if isUpperAlpha(next) {
|
|
l.name()
|
|
return nil
|
|
}
|
|
return errors.New(fmt.Sprintf("names must have an uppercase alphabetical first character, found '%s'", string(next)))
|
|
case '\n':
|
|
l.line += 1
|
|
default:
|
|
if isIdentifier(r) {
|
|
l.identifier()
|
|
return nil
|
|
}
|
|
|
|
return errors.New(fmt.Sprintf("unexpected character '%s' at line %d", string(r), l.line))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (l *lexer) name() {
|
|
for isGoIdentifier(l.peek()) && l.peek() != '\n' {
|
|
l.advance()
|
|
}
|
|
|
|
text := l.source[l.start+1 : l.current]
|
|
l.advance()
|
|
|
|
l.tokens = append(l.tokens, &token{
|
|
Type: tokenTypeName,
|
|
Lexeme: string(text),
|
|
Line: l.line,
|
|
})
|
|
}
|
|
|
|
func (l *lexer) identifier() {
|
|
for isIdentifier(l.peek()) {
|
|
l.advance()
|
|
}
|
|
|
|
text := l.source[l.start:l.current]
|
|
|
|
l.tokens = append(l.tokens, &token{
|
|
Type: tokenTypeIdentifier,
|
|
Lexeme: string(text),
|
|
Line: l.line,
|
|
})
|
|
}
|
|
|
|
func (l *lexer) scanTokens() ([]*token, []error) {
|
|
errs := []error{}
|
|
for !l.isAtEnd() {
|
|
l.start = l.current
|
|
err := l.scanToken()
|
|
if err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
}
|
|
|
|
l.addToken(tokenTypeEOF)
|
|
return l.tokens, errs
|
|
}
|
|
|
|
func lex(source string) ([]*token, []error) {
|
|
l := new(lexer)
|
|
|
|
l.source = []rune(source)
|
|
l.current = 0
|
|
l.start = 0
|
|
l.line = 1
|
|
l.tokens = []*token{}
|
|
|
|
tokens, errs := l.scanTokens()
|
|
|
|
return tokens, errs
|
|
}
|