init + lexer

2025-06-07 17:24:28 -04:00 · 2025-06-07 17:24:28 -04:00 · 9bebc6e307
commit 9bebc6e307
5 changed files with 493 additions and 0 deletions
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
+module git.red-panda.pet/pandaware/lox-go
+
+go 1.23.2
--- a/main.go
+++ b/main.go
@ -0,0 +1,78 @@
+package main
+
+import (
+	"bufio"
+	"flag"
+	"fmt"
+	"os"
+)
+
+var (
+	file string
+)
+
+func main() {
+	flag.Parse()
+	args := flag.Args()
+
+	if len(args) > 1 {
+		fmt.Printf("Usage: %s [script]", args[0])
+		os.Exit(64)
+	} else if len(args) == 1 {
+		runFile(args[0])
+	} else {
+		repl()
+	}
+}
+
+func runFile(filename string) {
+	bs, err := os.ReadFile(filename)
+	if err != nil {
+		fmt.Printf("unable to read file '%s':\n\t%s", filename, err.Error())
+		os.Exit(1)
+	}
+
+	if run(string(bs)) {
+		os.Exit(65)
+	}
+}
+
+func repl() {
+	s := bufio.NewScanner(os.Stdin)
+	for {
+		fmt.Printf("repl> ")
+		s.Scan()
+		text := s.Text()
+
+		if text == ":q" {
+			return
+		}
+
+		run(text)
+		if err := s.Err(); err != nil {
+			panic(err)
+		}
+	}
+}
+
+func run(source string) bool {
+	s := newScanner(source)
+	tokens, ok := s.ScanTokens()
+	if !ok {
+		return true
+	}
+
+	for _, token := range tokens {
+		fmt.Println(token)
+	}
+
+	return false
+}
+
+func reportErr(line int, message string) {
+	report(line, "", message)
+}
+
+func report(line int, where, message string) {
+	fmt.Printf("[line %d] Error%s: %s\n", line, where, message)
+}
--- a/scanner.go
+++ b/scanner.go
@ -0,0 +1,280 @@
+package main
+
+import (
+	"fmt"
+	"strconv"
+)
+
+var keywords = map[string]tokenType{
+	"and":    tokenTypeAnd,
+	"class":  tokenTypeClass,
+	"else":   tokenTypeElse,
+	"false":  tokenTypeFalse,
+	"fun":    tokenTypeFun,
+	"for":    tokenTypeFor,
+	"if":     tokenTypeIf,
+	"nil":    tokenTypeNil,
+	"or":     tokenTypeOr,
+	"print":  tokenTypePrint,
+	"return": tokenTypeReturn,
+	"super":  tokenTypeSuper,
+	"this":   tokenTypeThis,
+	"true":   tokenTypeTrue,
+	"var":    tokenTypeVar,
+	"while":  tokenTypeWhile,
+}
+
+func isDigit(r rune) bool {
+	return r >= '0' && r <= '9'
+}
+
+func isAlpha(r rune) bool {
+	return (r >= 'a' && r <= 'z') ||
+		(r >= 'A' && r <= 'Z') ||
+		r == '_'
+}
+
+func isAlphaNumeric(r rune) bool {
+	return isDigit(r) || isAlpha(r)
+}
+
+type scanner struct {
+	source []rune
+	tokens []*token
+
+	start   int
+	current int
+	line    int
+}
+
+func newScanner(source string) *scanner {
+	s := new(scanner)
+
+	s.source = []rune(source)
+	s.tokens = []*token{}
+
+	s.start = 0
+	s.current = 0
+	s.line = 1
+
+	return s
+}
+
+func (s *scanner) isAtEnd() bool {
+	return s.current >= len(s.source)
+}
+
+func (s *scanner) advance() rune {
+	r := s.source[s.current]
+	s.current += 1
+	return r
+}
+
+func (s *scanner) addToken(t tokenType, literal any) {
+	s.tokens = append(s.tokens, &token{
+		Type:    t,
+		Lexeme:  string(s.source[s.start:s.current]),
+		Literal: literal,
+		Line:    s.line,
+	})
+}
+
+func (s *scanner) match(expected rune) bool {
+	if s.isAtEnd() {
+		return false
+	}
+	c := s.source[s.current]
+	if c != expected {
+		return false
+	}
+
+	s.current += 1
+	return true
+}
+
+func (s *scanner) peek() rune {
+	if s.isAtEnd() {
+		return rune(0)
+	}
+	return s.source[s.current]
+}
+
+func (s *scanner) peekNext() rune {
+	if s.current+1 > len(s.source) {
+		return rune(0)
+	}
+	return s.source[s.current+1]
+}
+
+func (s *scanner) scanToken() bool {
+	r := s.advance()
+
+	switch r {
+	// simple 1 character tokens
+	case '(':
+		s.addToken(tokenTypeLeftParen, nil)
+	case ')':
+		s.addToken(tokenTypeRightParen, nil)
+	case '{':
+		s.addToken(tokenTypeLeftBrace, nil)
+	case '}':
+		s.addToken(tokenTypeRightBrace, nil)
+	case ',':
+		s.addToken(tokenTypeComma, nil)
+	case '.':
+		s.addToken(tokenTypeDot, nil)
+	case '-':
+		s.addToken(tokenTypeMinus, nil)
+	case '+':
+		s.addToken(tokenTypePlus, nil)
+	case ';':
+		s.addToken(tokenTypeSemicolon, nil)
+	case '*':
+		s.addToken(tokenTypeStar, nil)
+
+	// simple 2 character tokens
+	case '!':
+		if s.match('=') {
+			s.addToken(tokenTypeBangEq, nil)
+		} else {
+			s.addToken(tokenTypeBang, nil)
+		}
+	case '=':
+		if s.match('=') {
+			s.addToken(tokenTypeEqualEqual, nil)
+		} else {
+			s.addToken(tokenTypeEqual, nil)
+		}
+	case '<':
+		if s.match('=') {
+			s.addToken(tokenTypeLessEq, nil)
+		} else {
+			s.addToken(tokenTypeLess, nil)
+		}
+	case '>':
+		if s.match('=') {
+			s.addToken(tokenTypeGreaterEq, nil)
+		} else {
+			s.addToken(tokenTypeGreater, nil)
+		}
+
+	case '/':
+		// match comments
+		if s.match('/') {
+			// we scan until the end of line/file (whichever comes first :p)
+			for s.peek() != '\n' && !s.isAtEnd() {
+				s.advance()
+			}
+		} else {
+			s.addToken(tokenTypeSlash, nil)
+		}
+
+	// ignore whitespace
+	case ' ':
+		break
+	case '\r':
+		break
+	case '\t':
+		break
+
+	// advance the line counter :D
+	case '\n':
+		s.line += 1
+
+	// string literals
+	case '"':
+		return s.string()
+
+	default:
+		if isDigit(r) {
+			return s.number()
+		} else if isAlpha(r) {
+			s.identifier()
+			return false
+		}
+
+		reportErr(s.line, fmt.Sprintf("Unexpected character %c", r))
+
+		return true
+	}
+
+	return false
+}
+
+func (s *scanner) string() bool {
+	// peek until we hit the end of the string or file, whichever is first
+	for s.peek() != '"' && !s.isAtEnd() {
+		// support strings with new lines :D
+		if s.peek() == '\n' {
+			s.line += 1
+		}
+		s.advance()
+	}
+
+	// if the token didn't end before the file we report and err
+	// and return that we got one
+	if s.isAtEnd() {
+		reportErr(s.line, "Unterminated string")
+		return true
+	}
+
+	s.advance()
+
+	// todo: escape sequences
+	value := s.source[s.start+1 : s.current-1]
+	s.addToken(tokenTypeString, value)
+
+	return false
+}
+
+func (s *scanner) number() bool {
+	for isDigit(s.peek()) {
+		s.advance()
+	}
+
+	if s.peek() == '.' && isDigit(s.peekNext()) {
+		s.advance()
+
+		for isDigit(s.peek()) {
+			s.advance()
+		}
+	}
+
+	literal, _ := strconv.ParseFloat(string(s.source[s.start:s.current]), 64)
+	s.addToken(tokenTypeNumber, literal)
+
+	return false
+}
+
+func (s *scanner) identifier() {
+	for isAlphaNumeric(s.peek()) {
+		s.advance()
+	}
+
+	text := s.source[s.start:s.current]
+	tt, ok := keywords[string(text)]
+
+	if !ok {
+		tt = tokenTypeIdentifier
+	}
+
+	s.addToken(tt, nil)
+}
+
+func (s *scanner) ScanTokens() ([]*token, bool) {
+	isErr := false
+
+	for !s.isAtEnd() {
+		s.start = s.current
+		isErr = isErr || s.scanToken()
+	}
+
+	s.tokens = append(s.tokens, &token{
+		Type:    tokenTypeEOF,
+		Lexeme:  "",
+		Literal: nil,
+		Line:    s.line,
+	})
+
+	return s.tokens, !isErr
+}
--- a/tokentype.go
+++ b/tokentype.go
@ -0,0 +1,71 @@
+package main
+
+import "fmt"
+
+//go:generate stringer -type tokenType -linecomment -trimprefix tokenType
+type tokenType int
+
+const (
+	// single char tokens
+
+	tokenTypeLeftParen tokenType = iota
+	tokenTypeRightParen
+	tokenTypeLeftBrace
+	tokenTypeRightBrace
+	tokenTypeComma
+	tokenTypeDot
+	tokenTypeMinus
+	tokenTypePlus
+	tokenTypeSemicolon
+	tokenTypeSlash
+	tokenTypeStar
+
+	// 1-2 char token
+
+	tokenTypeBang
+	tokenTypeBangEq
+	tokenTypeEqual
+	tokenTypeEqualEqual
+	tokenTypeGreater
+	tokenTypeGreaterEq
+	tokenTypeLess
+	tokenTypeLessEq
+
+	// literals
+
+	tokenTypeIdentifier
+	tokenTypeString
+	tokenTypeNumber
+
+	// keywords
+
+	tokenTypeAnd
+	tokenTypeClass
+	tokenTypeElse
+	tokenTypeFalse
+	tokenTypeFun
+	tokenTypeFor
+	tokenTypeIf
+	tokenTypeNil
+	tokenTypeOr
+	tokenTypePrint
+	tokenTypeReturn
+	tokenTypeSuper
+	tokenTypeThis
+	tokenTypeTrue
+	tokenTypeVar
+	tokenTypeWhile
+
+	tokenTypeEOF
+)
+
+type token struct {
+	Type    tokenType
+	Lexeme  string
+	Literal any
+	Line    int
+}
+
+func (t token) String() string {
+	return fmt.Sprintf("%s %s %+v", t.Type, t.Lexeme, t.Literal)
+}
--- a/tokentype_string.go
+++ b/tokentype_string.go
@ -0,0 +1,61 @@
+// Code generated by "stringer -type tokenType -linecomment -trimprefix tokenType"; DO NOT EDIT.
+
+package main
+
+import "strconv"
+
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[tokenTypeLeftParen-0]
+	_ = x[tokenTypeRightParen-1]
+	_ = x[tokenTypeLeftBrace-2]
+	_ = x[tokenTypeRightBrace-3]
+	_ = x[tokenTypeComma-4]
+	_ = x[tokenTypeDot-5]
+	_ = x[tokenTypeMinus-6]
+	_ = x[tokenTypePlus-7]
+	_ = x[tokenTypeSemicolon-8]
+	_ = x[tokenTypeSlash-9]
+	_ = x[tokenTypeStar-10]
+	_ = x[tokenTypeBang-11]
+	_ = x[tokenTypeBangEq-12]
+	_ = x[tokenTypeEqual-13]
+	_ = x[tokenTypeEqualEqual-14]
+	_ = x[tokenTypeGreater-15]
+	_ = x[tokenTypeGreaterEq-16]
+	_ = x[tokenTypeLess-17]
+	_ = x[tokenTypeLessEq-18]
+	_ = x[tokenTypeIdentifier-19]
+	_ = x[tokenTypeString-20]
+	_ = x[tokenTypeNumber-21]
+	_ = x[tokenTypeAnd-22]
+	_ = x[tokenTypeClass-23]
+	_ = x[tokenTypeElse-24]
+	_ = x[tokenTypeFalse-25]
+	_ = x[tokenTypeFun-26]
+	_ = x[tokenTypeFor-27]
+	_ = x[tokenTypeIf-28]
+	_ = x[tokenTypeNil-29]
+	_ = x[tokenTypeOr-30]
+	_ = x[tokenTypePrint-31]
+	_ = x[tokenTypeReturn-32]
+	_ = x[tokenTypeSuper-33]
+	_ = x[tokenTypeThis-34]
+	_ = x[tokenTypeTrue-35]
+	_ = x[tokenTypeVar-36]
+	_ = x[tokenTypeWhile-37]
+	_ = x[tokenTypeEOF-38]
+}
+
+const _tokenType_name = "LeftParenRightParenLeftBraceRightBraceCommaDotMinusPlusSemicolonSlashStarBangBangEqEqualEqualEqualGreaterGreaterEqLessLessEqIdentifierStringNumberAndClassElseFalseFunForIfNilOrPrintReturnSuperThisTrueVarWhileEOF"
+
+var _tokenType_index = [...]uint8{0, 9, 19, 28, 38, 43, 46, 51, 55, 64, 69, 73, 77, 83, 88, 98, 105, 114, 118, 124, 134, 140, 146, 149, 154, 158, 163, 166, 169, 171, 174, 176, 181, 187, 192, 196, 200, 203, 208, 211}
+
+func (i tokenType) String() string {
+	if i < 0 || i >= tokenType(len(_tokenType_index)-1) {
+		return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]]
+}