commit 9bebc6e30744b3e6fb89b9c87eebc85e58280e69 Author: red Date: Sat Jun 7 17:24:28 2025 -0400 init + lexer diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..af23978 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.red-panda.pet/pandaware/lox-go + +go 1.23.2 diff --git a/main.go b/main.go new file mode 100644 index 0000000..99531fd --- /dev/null +++ b/main.go @@ -0,0 +1,78 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "os" +) + +var ( + file string +) + +func main() { + flag.Parse() + args := flag.Args() + + if len(args) > 1 { + fmt.Printf("Usage: %s [script]", args[0]) + os.Exit(64) + } else if len(args) == 1 { + runFile(args[0]) + } else { + repl() + } +} + +func runFile(filename string) { + bs, err := os.ReadFile(filename) + if err != nil { + fmt.Printf("unable to read file '%s':\n\t%s", filename, err.Error()) + os.Exit(1) + } + + if run(string(bs)) { + os.Exit(65) + } +} + +func repl() { + s := bufio.NewScanner(os.Stdin) + for { + fmt.Printf("repl> ") + s.Scan() + text := s.Text() + + if text == ":q" { + return + } + + run(text) + if err := s.Err(); err != nil { + panic(err) + } + } +} + +func run(source string) bool { + s := newScanner(source) + tokens, ok := s.ScanTokens() + if !ok { + return true + } + + for _, token := range tokens { + fmt.Println(token) + } + + return false +} + +func reportErr(line int, message string) { + report(line, "", message) +} + +func report(line int, where, message string) { + fmt.Printf("[line %d] Error%s: %s\n", line, where, message) +} diff --git a/scanner.go b/scanner.go new file mode 100644 index 0000000..54d35bc --- /dev/null +++ b/scanner.go @@ -0,0 +1,280 @@ +package main + +import ( + "fmt" + "strconv" +) + +var keywords = map[string]tokenType{ + "and": tokenTypeAnd, + "class": tokenTypeClass, + "else": tokenTypeElse, + "false": tokenTypeFalse, + "fun": tokenTypeFun, + "for": tokenTypeFor, + "if": tokenTypeIf, + "nil": tokenTypeNil, + "or": tokenTypeOr, + "print": tokenTypePrint, + "return": tokenTypeReturn, + "super": tokenTypeSuper, + "this": tokenTypeThis, + "true": tokenTypeTrue, + "var": tokenTypeVar, + "while": tokenTypeWhile, +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +func isAlpha(r rune) bool { + return (r >= 'a' && r <= 'z') || + (r >= 'A' && r <= 'Z') || + r == '_' +} + +func isAlphaNumeric(r rune) bool { + return isDigit(r) || isAlpha(r) +} + +type scanner struct { + source []rune + tokens []*token + + start int + current int + line int +} + +func newScanner(source string) *scanner { + s := new(scanner) + + s.source = []rune(source) + s.tokens = []*token{} + + s.start = 0 + s.current = 0 + s.line = 1 + + return s +} + +func (s *scanner) isAtEnd() bool { + return s.current >= len(s.source) +} + +func (s *scanner) advance() rune { + r := s.source[s.current] + s.current += 1 + return r +} + +func (s *scanner) addToken(t tokenType, literal any) { + s.tokens = append(s.tokens, &token{ + Type: t, + Lexeme: string(s.source[s.start:s.current]), + Literal: literal, + Line: s.line, + }) +} + +func (s *scanner) match(expected rune) bool { + if s.isAtEnd() { + return false + } + c := s.source[s.current] + if c != expected { + return false + } + + s.current += 1 + return true +} + +func (s *scanner) peek() rune { + if s.isAtEnd() { + return rune(0) + } + return s.source[s.current] +} + +func (s *scanner) peekNext() rune { + if s.current+1 > len(s.source) { + return rune(0) + } + return s.source[s.current+1] +} + +func (s *scanner) scanToken() bool { + r := s.advance() + + switch r { + // simple 1 character tokens + case '(': + s.addToken(tokenTypeLeftParen, nil) + case ')': + s.addToken(tokenTypeRightParen, nil) + case '{': + s.addToken(tokenTypeLeftBrace, nil) + case '}': + s.addToken(tokenTypeRightBrace, nil) + case ',': + s.addToken(tokenTypeComma, nil) + case '.': + s.addToken(tokenTypeDot, nil) + case '-': + s.addToken(tokenTypeMinus, nil) + case '+': + s.addToken(tokenTypePlus, nil) + case ';': + s.addToken(tokenTypeSemicolon, nil) + case '*': + s.addToken(tokenTypeStar, nil) + + // simple 2 character tokens + case '!': + if s.match('=') { + s.addToken(tokenTypeBangEq, nil) + } else { + s.addToken(tokenTypeBang, nil) + } + case '=': + if s.match('=') { + s.addToken(tokenTypeEqualEqual, nil) + } else { + s.addToken(tokenTypeEqual, nil) + } + case '<': + if s.match('=') { + s.addToken(tokenTypeLessEq, nil) + } else { + s.addToken(tokenTypeLess, nil) + } + case '>': + if s.match('=') { + s.addToken(tokenTypeGreaterEq, nil) + } else { + s.addToken(tokenTypeGreater, nil) + } + + case '/': + // match comments + if s.match('/') { + // we scan until the end of line/file (whichever comes first :p) + for s.peek() != '\n' && !s.isAtEnd() { + s.advance() + } + } else { + s.addToken(tokenTypeSlash, nil) + } + + // ignore whitespace + case ' ': + break + case '\r': + break + case '\t': + break + + // advance the line counter :D + case '\n': + s.line += 1 + + // string literals + case '"': + return s.string() + + default: + if isDigit(r) { + return s.number() + } else if isAlpha(r) { + s.identifier() + return false + } + + reportErr(s.line, fmt.Sprintf("Unexpected character %c", r)) + + return true + } + + return false +} + +func (s *scanner) string() bool { + // peek until we hit the end of the string or file, whichever is first + for s.peek() != '"' && !s.isAtEnd() { + // support strings with new lines :D + if s.peek() == '\n' { + s.line += 1 + } + s.advance() + } + + // if the token didn't end before the file we report and err + // and return that we got one + if s.isAtEnd() { + reportErr(s.line, "Unterminated string") + return true + } + + s.advance() + + // todo: escape sequences + value := s.source[s.start+1 : s.current-1] + s.addToken(tokenTypeString, value) + + return false +} + +func (s *scanner) number() bool { + for isDigit(s.peek()) { + s.advance() + } + + if s.peek() == '.' && isDigit(s.peekNext()) { + s.advance() + + for isDigit(s.peek()) { + s.advance() + } + } + + literal, _ := strconv.ParseFloat(string(s.source[s.start:s.current]), 64) + s.addToken(tokenTypeNumber, literal) + + return false +} + +func (s *scanner) identifier() { + for isAlphaNumeric(s.peek()) { + s.advance() + } + + text := s.source[s.start:s.current] + tt, ok := keywords[string(text)] + + if !ok { + tt = tokenTypeIdentifier + } + + s.addToken(tt, nil) +} + +func (s *scanner) ScanTokens() ([]*token, bool) { + isErr := false + + for !s.isAtEnd() { + s.start = s.current + isErr = isErr || s.scanToken() + } + + s.tokens = append(s.tokens, &token{ + Type: tokenTypeEOF, + Lexeme: "", + Literal: nil, + Line: s.line, + }) + + return s.tokens, !isErr +} diff --git a/tokentype.go b/tokentype.go new file mode 100644 index 0000000..fc11673 --- /dev/null +++ b/tokentype.go @@ -0,0 +1,71 @@ +package main + +import "fmt" + +//go:generate stringer -type tokenType -linecomment -trimprefix tokenType +type tokenType int + +const ( + // single char tokens + + tokenTypeLeftParen tokenType = iota + tokenTypeRightParen + tokenTypeLeftBrace + tokenTypeRightBrace + tokenTypeComma + tokenTypeDot + tokenTypeMinus + tokenTypePlus + tokenTypeSemicolon + tokenTypeSlash + tokenTypeStar + + // 1-2 char token + + tokenTypeBang + tokenTypeBangEq + tokenTypeEqual + tokenTypeEqualEqual + tokenTypeGreater + tokenTypeGreaterEq + tokenTypeLess + tokenTypeLessEq + + // literals + + tokenTypeIdentifier + tokenTypeString + tokenTypeNumber + + // keywords + + tokenTypeAnd + tokenTypeClass + tokenTypeElse + tokenTypeFalse + tokenTypeFun + tokenTypeFor + tokenTypeIf + tokenTypeNil + tokenTypeOr + tokenTypePrint + tokenTypeReturn + tokenTypeSuper + tokenTypeThis + tokenTypeTrue + tokenTypeVar + tokenTypeWhile + + tokenTypeEOF +) + +type token struct { + Type tokenType + Lexeme string + Literal any + Line int +} + +func (t token) String() string { + return fmt.Sprintf("%s %s %+v", t.Type, t.Lexeme, t.Literal) +} diff --git a/tokentype_string.go b/tokentype_string.go new file mode 100644 index 0000000..c12cd65 --- /dev/null +++ b/tokentype_string.go @@ -0,0 +1,61 @@ +// Code generated by "stringer -type tokenType -linecomment -trimprefix tokenType"; DO NOT EDIT. + +package main + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[tokenTypeLeftParen-0] + _ = x[tokenTypeRightParen-1] + _ = x[tokenTypeLeftBrace-2] + _ = x[tokenTypeRightBrace-3] + _ = x[tokenTypeComma-4] + _ = x[tokenTypeDot-5] + _ = x[tokenTypeMinus-6] + _ = x[tokenTypePlus-7] + _ = x[tokenTypeSemicolon-8] + _ = x[tokenTypeSlash-9] + _ = x[tokenTypeStar-10] + _ = x[tokenTypeBang-11] + _ = x[tokenTypeBangEq-12] + _ = x[tokenTypeEqual-13] + _ = x[tokenTypeEqualEqual-14] + _ = x[tokenTypeGreater-15] + _ = x[tokenTypeGreaterEq-16] + _ = x[tokenTypeLess-17] + _ = x[tokenTypeLessEq-18] + _ = x[tokenTypeIdentifier-19] + _ = x[tokenTypeString-20] + _ = x[tokenTypeNumber-21] + _ = x[tokenTypeAnd-22] + _ = x[tokenTypeClass-23] + _ = x[tokenTypeElse-24] + _ = x[tokenTypeFalse-25] + _ = x[tokenTypeFun-26] + _ = x[tokenTypeFor-27] + _ = x[tokenTypeIf-28] + _ = x[tokenTypeNil-29] + _ = x[tokenTypeOr-30] + _ = x[tokenTypePrint-31] + _ = x[tokenTypeReturn-32] + _ = x[tokenTypeSuper-33] + _ = x[tokenTypeThis-34] + _ = x[tokenTypeTrue-35] + _ = x[tokenTypeVar-36] + _ = x[tokenTypeWhile-37] + _ = x[tokenTypeEOF-38] +} + +const _tokenType_name = "LeftParenRightParenLeftBraceRightBraceCommaDotMinusPlusSemicolonSlashStarBangBangEqEqualEqualEqualGreaterGreaterEqLessLessEqIdentifierStringNumberAndClassElseFalseFunForIfNilOrPrintReturnSuperThisTrueVarWhileEOF" + +var _tokenType_index = [...]uint8{0, 9, 19, 28, 38, 43, 46, 51, 55, 64, 69, 73, 77, 83, 88, 98, 105, 114, 118, 124, 134, 140, 146, 149, 154, 158, 163, 166, 169, 171, 174, 176, 181, 187, 192, 196, 200, 203, 208, 211} + +func (i tokenType) String() string { + if i < 0 || i >= tokenType(len(_tokenType_index)-1) { + return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]] +}