init + lexer

This commit is contained in:
basil 2025-06-07 17:24:28 -04:00
commit 9bebc6e307
Signed by: basil
SSH key fingerprint: SHA256:y04xIFL/yqNaG9ae9Vl95vELtHfApGAIoOGLeVLP/fE
5 changed files with 493 additions and 0 deletions

3
go.mod Normal file
View file

@ -0,0 +1,3 @@
module git.red-panda.pet/pandaware/lox-go
go 1.23.2

78
main.go Normal file
View file

@ -0,0 +1,78 @@
package main
import (
"bufio"
"flag"
"fmt"
"os"
)
var (
file string
)
func main() {
flag.Parse()
args := flag.Args()
if len(args) > 1 {
fmt.Printf("Usage: %s [script]", args[0])
os.Exit(64)
} else if len(args) == 1 {
runFile(args[0])
} else {
repl()
}
}
func runFile(filename string) {
bs, err := os.ReadFile(filename)
if err != nil {
fmt.Printf("unable to read file '%s':\n\t%s", filename, err.Error())
os.Exit(1)
}
if run(string(bs)) {
os.Exit(65)
}
}
func repl() {
s := bufio.NewScanner(os.Stdin)
for {
fmt.Printf("repl> ")
s.Scan()
text := s.Text()
if text == ":q" {
return
}
run(text)
if err := s.Err(); err != nil {
panic(err)
}
}
}
func run(source string) bool {
s := newScanner(source)
tokens, ok := s.ScanTokens()
if !ok {
return true
}
for _, token := range tokens {
fmt.Println(token)
}
return false
}
func reportErr(line int, message string) {
report(line, "", message)
}
func report(line int, where, message string) {
fmt.Printf("[line %d] Error%s: %s\n", line, where, message)
}

280
scanner.go Normal file
View file

@ -0,0 +1,280 @@
package main
import (
"fmt"
"strconv"
)
var keywords = map[string]tokenType{
"and": tokenTypeAnd,
"class": tokenTypeClass,
"else": tokenTypeElse,
"false": tokenTypeFalse,
"fun": tokenTypeFun,
"for": tokenTypeFor,
"if": tokenTypeIf,
"nil": tokenTypeNil,
"or": tokenTypeOr,
"print": tokenTypePrint,
"return": tokenTypeReturn,
"super": tokenTypeSuper,
"this": tokenTypeThis,
"true": tokenTypeTrue,
"var": tokenTypeVar,
"while": tokenTypeWhile,
}
func isDigit(r rune) bool {
return r >= '0' && r <= '9'
}
func isAlpha(r rune) bool {
return (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
r == '_'
}
func isAlphaNumeric(r rune) bool {
return isDigit(r) || isAlpha(r)
}
type scanner struct {
source []rune
tokens []*token
start int
current int
line int
}
func newScanner(source string) *scanner {
s := new(scanner)
s.source = []rune(source)
s.tokens = []*token{}
s.start = 0
s.current = 0
s.line = 1
return s
}
func (s *scanner) isAtEnd() bool {
return s.current >= len(s.source)
}
func (s *scanner) advance() rune {
r := s.source[s.current]
s.current += 1
return r
}
func (s *scanner) addToken(t tokenType, literal any) {
s.tokens = append(s.tokens, &token{
Type: t,
Lexeme: string(s.source[s.start:s.current]),
Literal: literal,
Line: s.line,
})
}
func (s *scanner) match(expected rune) bool {
if s.isAtEnd() {
return false
}
c := s.source[s.current]
if c != expected {
return false
}
s.current += 1
return true
}
func (s *scanner) peek() rune {
if s.isAtEnd() {
return rune(0)
}
return s.source[s.current]
}
func (s *scanner) peekNext() rune {
if s.current+1 > len(s.source) {
return rune(0)
}
return s.source[s.current+1]
}
func (s *scanner) scanToken() bool {
r := s.advance()
switch r {
// simple 1 character tokens
case '(':
s.addToken(tokenTypeLeftParen, nil)
case ')':
s.addToken(tokenTypeRightParen, nil)
case '{':
s.addToken(tokenTypeLeftBrace, nil)
case '}':
s.addToken(tokenTypeRightBrace, nil)
case ',':
s.addToken(tokenTypeComma, nil)
case '.':
s.addToken(tokenTypeDot, nil)
case '-':
s.addToken(tokenTypeMinus, nil)
case '+':
s.addToken(tokenTypePlus, nil)
case ';':
s.addToken(tokenTypeSemicolon, nil)
case '*':
s.addToken(tokenTypeStar, nil)
// simple 2 character tokens
case '!':
if s.match('=') {
s.addToken(tokenTypeBangEq, nil)
} else {
s.addToken(tokenTypeBang, nil)
}
case '=':
if s.match('=') {
s.addToken(tokenTypeEqualEqual, nil)
} else {
s.addToken(tokenTypeEqual, nil)
}
case '<':
if s.match('=') {
s.addToken(tokenTypeLessEq, nil)
} else {
s.addToken(tokenTypeLess, nil)
}
case '>':
if s.match('=') {
s.addToken(tokenTypeGreaterEq, nil)
} else {
s.addToken(tokenTypeGreater, nil)
}
case '/':
// match comments
if s.match('/') {
// we scan until the end of line/file (whichever comes first :p)
for s.peek() != '\n' && !s.isAtEnd() {
s.advance()
}
} else {
s.addToken(tokenTypeSlash, nil)
}
// ignore whitespace
case ' ':
break
case '\r':
break
case '\t':
break
// advance the line counter :D
case '\n':
s.line += 1
// string literals
case '"':
return s.string()
default:
if isDigit(r) {
return s.number()
} else if isAlpha(r) {
s.identifier()
return false
}
reportErr(s.line, fmt.Sprintf("Unexpected character %c", r))
return true
}
return false
}
func (s *scanner) string() bool {
// peek until we hit the end of the string or file, whichever is first
for s.peek() != '"' && !s.isAtEnd() {
// support strings with new lines :D
if s.peek() == '\n' {
s.line += 1
}
s.advance()
}
// if the token didn't end before the file we report and err
// and return that we got one
if s.isAtEnd() {
reportErr(s.line, "Unterminated string")
return true
}
s.advance()
// todo: escape sequences
value := s.source[s.start+1 : s.current-1]
s.addToken(tokenTypeString, value)
return false
}
func (s *scanner) number() bool {
for isDigit(s.peek()) {
s.advance()
}
if s.peek() == '.' && isDigit(s.peekNext()) {
s.advance()
for isDigit(s.peek()) {
s.advance()
}
}
literal, _ := strconv.ParseFloat(string(s.source[s.start:s.current]), 64)
s.addToken(tokenTypeNumber, literal)
return false
}
func (s *scanner) identifier() {
for isAlphaNumeric(s.peek()) {
s.advance()
}
text := s.source[s.start:s.current]
tt, ok := keywords[string(text)]
if !ok {
tt = tokenTypeIdentifier
}
s.addToken(tt, nil)
}
func (s *scanner) ScanTokens() ([]*token, bool) {
isErr := false
for !s.isAtEnd() {
s.start = s.current
isErr = isErr || s.scanToken()
}
s.tokens = append(s.tokens, &token{
Type: tokenTypeEOF,
Lexeme: "",
Literal: nil,
Line: s.line,
})
return s.tokens, !isErr
}

71
tokentype.go Normal file
View file

@ -0,0 +1,71 @@
package main
import "fmt"
//go:generate stringer -type tokenType -linecomment -trimprefix tokenType
type tokenType int
const (
// single char tokens
tokenTypeLeftParen tokenType = iota
tokenTypeRightParen
tokenTypeLeftBrace
tokenTypeRightBrace
tokenTypeComma
tokenTypeDot
tokenTypeMinus
tokenTypePlus
tokenTypeSemicolon
tokenTypeSlash
tokenTypeStar
// 1-2 char token
tokenTypeBang
tokenTypeBangEq
tokenTypeEqual
tokenTypeEqualEqual
tokenTypeGreater
tokenTypeGreaterEq
tokenTypeLess
tokenTypeLessEq
// literals
tokenTypeIdentifier
tokenTypeString
tokenTypeNumber
// keywords
tokenTypeAnd
tokenTypeClass
tokenTypeElse
tokenTypeFalse
tokenTypeFun
tokenTypeFor
tokenTypeIf
tokenTypeNil
tokenTypeOr
tokenTypePrint
tokenTypeReturn
tokenTypeSuper
tokenTypeThis
tokenTypeTrue
tokenTypeVar
tokenTypeWhile
tokenTypeEOF
)
type token struct {
Type tokenType
Lexeme string
Literal any
Line int
}
func (t token) String() string {
return fmt.Sprintf("%s %s %+v", t.Type, t.Lexeme, t.Literal)
}

61
tokentype_string.go Normal file
View file

@ -0,0 +1,61 @@
// Code generated by "stringer -type tokenType -linecomment -trimprefix tokenType"; DO NOT EDIT.
package main
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[tokenTypeLeftParen-0]
_ = x[tokenTypeRightParen-1]
_ = x[tokenTypeLeftBrace-2]
_ = x[tokenTypeRightBrace-3]
_ = x[tokenTypeComma-4]
_ = x[tokenTypeDot-5]
_ = x[tokenTypeMinus-6]
_ = x[tokenTypePlus-7]
_ = x[tokenTypeSemicolon-8]
_ = x[tokenTypeSlash-9]
_ = x[tokenTypeStar-10]
_ = x[tokenTypeBang-11]
_ = x[tokenTypeBangEq-12]
_ = x[tokenTypeEqual-13]
_ = x[tokenTypeEqualEqual-14]
_ = x[tokenTypeGreater-15]
_ = x[tokenTypeGreaterEq-16]
_ = x[tokenTypeLess-17]
_ = x[tokenTypeLessEq-18]
_ = x[tokenTypeIdentifier-19]
_ = x[tokenTypeString-20]
_ = x[tokenTypeNumber-21]
_ = x[tokenTypeAnd-22]
_ = x[tokenTypeClass-23]
_ = x[tokenTypeElse-24]
_ = x[tokenTypeFalse-25]
_ = x[tokenTypeFun-26]
_ = x[tokenTypeFor-27]
_ = x[tokenTypeIf-28]
_ = x[tokenTypeNil-29]
_ = x[tokenTypeOr-30]
_ = x[tokenTypePrint-31]
_ = x[tokenTypeReturn-32]
_ = x[tokenTypeSuper-33]
_ = x[tokenTypeThis-34]
_ = x[tokenTypeTrue-35]
_ = x[tokenTypeVar-36]
_ = x[tokenTypeWhile-37]
_ = x[tokenTypeEOF-38]
}
const _tokenType_name = "LeftParenRightParenLeftBraceRightBraceCommaDotMinusPlusSemicolonSlashStarBangBangEqEqualEqualEqualGreaterGreaterEqLessLessEqIdentifierStringNumberAndClassElseFalseFunForIfNilOrPrintReturnSuperThisTrueVarWhileEOF"
var _tokenType_index = [...]uint8{0, 9, 19, 28, 38, 43, 46, 51, 55, 64, 69, 73, 77, 83, 88, 98, 105, 114, 118, 124, 134, 140, 146, 149, 154, 158, 163, 166, 169, 171, 174, 176, 181, 187, 192, 196, 200, 203, 208, 211}
func (i tokenType) String() string {
if i < 0 || i >= tokenType(len(_tokenType_index)-1) {
return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]]
}