aboutsummaryrefslogtreecommitdiff
path: root/spec/lexer.go
diff options
context:
space:
mode:
Diffstat (limited to 'spec/lexer.go')
-rw-r--r--spec/lexer.go137
1 files changed, 137 insertions, 0 deletions
diff --git a/spec/lexer.go b/spec/lexer.go
new file mode 100644
index 0000000..54eb13a
--- /dev/null
+++ b/spec/lexer.go
@@ -0,0 +1,137 @@
+//go:generate maleeni compile -l lexspec.json -o clexspec.json
+
+package spec
+
+import (
+ _ "embed"
+ "encoding/json"
+ "fmt"
+ "io"
+ "strings"
+
+ mldriver "github.com/nihei9/maleeni/driver"
+ mlspec "github.com/nihei9/maleeni/spec"
+)
+
+type tokenKind string
+
+const (
+ tokenKindID = tokenKind("id")
+ tokenKindTerminalPattern = tokenKind("terminal pattern")
+ tokenKindColon = tokenKind(":")
+ tokenKindSemicolon = tokenKind(";")
+ tokenKindEOF = tokenKind("eof")
+ tokenKindInvalid = tokenKind("invalid")
+)
+
+type token struct {
+ kind tokenKind
+ text string
+}
+
+func newSymbolToken(kind tokenKind) *token {
+ return &token{
+ kind: kind,
+ }
+}
+
+func newIDToken(text string) *token {
+ return &token{
+ kind: tokenKindID,
+ text: text,
+ }
+}
+
+func newTerminalPatternToken(text string) *token {
+ return &token{
+ kind: tokenKindTerminalPattern,
+ text: text,
+ }
+}
+
+func newEOFToken() *token {
+ return &token{
+ kind: tokenKindEOF,
+ }
+}
+
+func newInvalidToken(text string) *token {
+ return &token{
+ kind: tokenKindInvalid,
+ text: text,
+ }
+}
+
+type lexer struct {
+ s *mlspec.CompiledLexSpec
+ d *mldriver.Lexer
+ dufTok *token
+}
+
+//go:embed clexspec.json
+var lexspec []byte
+
+func newLexer(src io.Reader) (*lexer, error) {
+ s := &mlspec.CompiledLexSpec{}
+ err := json.Unmarshal(lexspec, s)
+ if err != nil {
+ return nil, err
+ }
+ d, err := mldriver.NewLexer(s, src)
+ if err != nil {
+ return nil, err
+ }
+ return &lexer{
+ s: s,
+ d: d,
+ }, nil
+}
+
+func (l *lexer) next() (*token, error) {
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.Invalid {
+ newInvalidToken(tok.Text())
+ }
+ if tok.EOF {
+ return newEOFToken(), nil
+ }
+ switch tok.KindName {
+ case "white_space":
+ continue
+ case "newline":
+ continue
+ case "identifier":
+ return newIDToken(tok.Text()), nil
+ case "terminal_open":
+ var b strings.Builder
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF {
+ return nil, synErrUnclosedTerminal
+ }
+ switch tok.KindName {
+ case "pattern":
+ // Remove '\' character.
+ fmt.Fprintf(&b, strings.ReplaceAll(tok.Text(), `\"`, `"`))
+ case "escape_symbol":
+ return nil, synErrIncompletedEscSeq
+ case "terminal_close":
+ return newTerminalPatternToken(b.String()), nil
+ }
+ }
+ case "colon":
+ return newSymbolToken(tokenKindColon), nil
+ case "semicolon":
+ return newSymbolToken(tokenKindSemicolon), nil
+ default:
+ return newInvalidToken(tok.Text()), nil
+ }
+ }
+}