Define a lexical specification interface

author: Ryo Nihei <nihei.dev@gmail.com> 2021-09-11 00:40:05 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-09-11 22:57:17 +0900
commit: 96a555a00f000704c618c226485fa6d87ce66d9d (patch)
tree: 9d7398033a2c015390f0de7ab69b6fd37bb1ba30 /driver
parent: Remove --debug option from the lex command (diff)
download: tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.gz
tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.xz
3 files changed, 356 insertions, 338 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index bce8d8c..d8230d2 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -5,71 +5,60 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
-	"strings"
-
-	"github.com/nihei9/maleeni/spec"
 )
 
-type byteSequence []byte
+type ModeID int
 
-func newByteSequence(b []byte) byteSequence {
-	return byteSequence(b)
+func (id ModeID) Int() int {
+	return int(id)
 }
 
-func (s byteSequence) ByteSlice() []byte {
-	return []byte(s)
-}
+type StateID int
 
-func (s byteSequence) String() string {
-	if len(s) <= 0 {
-		return ""
-	}
-	var b strings.Builder
-	fmt.Fprintf(&b, "%X", s[0])
-	for _, d := range s[1:] {
-		fmt.Fprintf(&b, " %X", d)
-	}
-	return b.String()
+func (id StateID) Int() int {
+	return int(id)
 }
 
-func (s byteSequence) GoString() string {
-	return fmt.Sprintf("\"%v\"", s.String())
+type KindID int
+
+func (id KindID) Int() int {
+	return int(id)
 }
 
-func (s byteSequence) MarshalJSON() ([]byte, error) {
-	if len(s) <= 0 {
-		return []byte("[]"), nil
-	}
-	var b strings.Builder
-	fmt.Fprintf(&b, "[%v", uint8(s[0]))
-	for _, e := range s[1:] {
-		fmt.Fprintf(&b, ", %v", uint8(e))
-	}
-	fmt.Fprintf(&b, "]")
-	return []byte(b.String()), nil
+type ModeKindID int
+
+func (id ModeKindID) Int() int {
+	return int(id)
 }
 
-func (s byteSequence) merge(a byteSequence) byteSequence {
-	return append([]byte(s), []byte(a)...)
+type LexSpec interface {
+	InitialMode() ModeID
+	Pop(mode ModeID, modeKind ModeKindID) bool
+	Push(mode ModeID, modeKind ModeKindID) (ModeID, bool)
+	ModeName(mode ModeID) string
+	InitialState(mode ModeID) StateID
+	NextState(mode ModeID, state StateID, v int) (StateID, bool)
+	Accept(mode ModeID, state StateID) (ModeKindID, bool)
+	KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string)
 }
 
 // Token representes a token.
 type Token struct {
 	// ModeID is an ID of a lex mode.
-	ModeID spec.LexModeID
+	ModeID ModeID
 
 	// ModeName is a name of a lex mode.
-	ModeName spec.LexModeName
+	ModeName string
 
 	// KindID is an ID of a kind. This is unique among all modes.
-	KindID spec.LexKindID
+	KindID KindID
 
 	// ModeKindID is an ID of a lexical kind. This is unique only within a mode.
 	// Note that you need to use KindID field if you want to identify a kind across all modes.
-	ModeKindID spec.LexModeKindID
+	ModeKindID ModeKindID
 
 	// KindName is a name of a lexical kind.
-	KindName spec.LexKindName
+	KindName string
 
 	// Row is a row number where a lexeme appears.
 	Row int
@@ -78,59 +67,37 @@ type Token struct {
 	// Note that Col is counted in code points, not bytes.
 	Col int
 
+	// Lexeme is a byte sequence matched a pattern of a lexical specification.
+	Lexeme []byte
+
 	// When this field is true, it means the token is the EOF token.
 	EOF bool
 
 	// When this field is true, it means the token is an error token.
 	Invalid bool
-
-	// match is a byte sequence matched a pattern of a lexical specification.
-	match byteSequence
-}
-
-func (t *Token) String() string {
-	if t.Invalid {
-		return fmt.Sprintf("!{mode id: %v, mode name: %v, row: %v, col: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.Row, t.Col, t.Text(), t.Match())
-	}
-	if t.EOF {
-		return fmt.Sprintf("{kind name: eof, row: %v, col: %v}", t.Row, t.Col)
-	}
-	return fmt.Sprintf("{mode id: %v, mode name: %v, kind id: %v, mode kind id: %v, kind name: %v, row: %v, col: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.KindID, t.ModeKindID, t.KindName, t.Row, t.Col, t.Text(), t.Match())
-}
-
-// Match returns a byte slice matched a pattern of a lexical specification.
-func (t *Token) Match() []byte {
-	return t.match.ByteSlice()
-}
-
-// Text returns a string representation of a matched byte sequence.
-func (t *Token) Text() string {
-	return string(t.Match())
 }
 
 func (t *Token) MarshalJSON() ([]byte, error) {
 	return json.Marshal(struct {
-		ModeID     int          `json:"mode_id"`
-		ModeName   string       `json:"mode_name"`
-		KindID     int          `json:"kind_id"`
-		ModeKindID int          `json:"mode_kind_id"`
-		KindName   string       `json:"kind_name"`
-		Row        int          `json:"row"`
-		Col        int          `json:"col"`
-		Match      byteSequence `json:"match"`
-		Text       string       `json:"text"`
-		EOF        bool         `json:"eof"`
-		Invalid    bool         `json:"invalid"`
+		ModeID     int    `json:"mode_id"`
+		ModeName   string `json:"mode_name"`
+		KindID     int    `json:"kind_id"`
+		ModeKindID int    `json:"mode_kind_id"`
+		KindName   string `json:"kind_name"`
+		Row        int    `json:"row"`
+		Col        int    `json:"col"`
+		Lexeme     string `json:"lexeme"`
+		EOF        bool   `json:"eof"`
+		Invalid    bool   `json:"invalid"`
 	}{
 		ModeID:     t.ModeID.Int(),
-		ModeName:   t.ModeName.String(),
+		ModeName:   t.ModeName,
 		KindID:     t.KindID.Int(),
 		ModeKindID: t.ModeKindID.Int(),
-		KindName:   t.KindName.String(),
+		KindName:   t.KindName,
 		Row:        t.Row,
 		Col:        t.Col,
-		Match:      t.match,
-		Text:       t.Text(),
+		Lexeme:     string(t.Lexeme),
 		EOF:        t.EOF,
 		Invalid:    t.Invalid,
 	})
@@ -146,7 +113,7 @@ func DisableModeTransition() LexerOption {
 }
 
 type Lexer struct {
-	clspec          *spec.CompiledLexSpec
+	spec            LexSpec
 	src             []byte
 	srcPtr          int
 	row             int
@@ -154,23 +121,23 @@ type Lexer struct {
 	prevRow         int
 	prevCol         int
 	tokBuf          []*Token
-	modeStack       []spec.LexModeID
+	modeStack       []ModeID
 	passiveModeTran bool
 }
 
-func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
+func NewLexer(spec LexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
 	b, err := ioutil.ReadAll(src)
 	if err != nil {
 		return nil, err
 	}
 	l := &Lexer{
-		clspec: clspec,
+		spec:   spec,
 		src:    b,
 		srcPtr: 0,
 		row:    0,
 		col:    0,
-		modeStack: []spec.LexModeID{
-			clspec.InitialModeID,
+		modeStack: []ModeID{
+			spec.InitialMode(),
 		},
 		passiveModeTran: false,
 	}
@@ -207,7 +174,7 @@ func (l *Lexer) Next() (*Token, error) {
 		if !tok.Invalid {
 			break
 		}
-		errTok.match = errTok.match.merge(tok.match)
+		errTok.Lexeme = append(errTok.Lexeme, tok.Lexeme...)
 	}
 	l.tokBuf = append(l.tokBuf, tok)
 
@@ -225,15 +192,14 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
 	if l.passiveModeTran {
 		return tok, nil
 	}
-	spec := l.clspec.Specs[l.Mode()]
-	if spec.Pop[tok.ModeKindID] == 1 {
+	mode := l.Mode()
+	if l.spec.Pop(mode, tok.ModeKindID) {
 		err := l.PopMode()
 		if err != nil {
 			return nil, err
 		}
 	}
-	mode := spec.Push[tok.ModeKindID]
-	if !mode.IsNil() {
+	if mode, ok := l.spec.Push(mode, tok.ModeKindID); ok {
 		l.PushMode(mode)
 	}
 	// The checking length of the mode stack must be at after pop and push operations
@@ -249,9 +215,8 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
 
 func (l *Lexer) next() (*Token, error) {
 	mode := l.Mode()
-	modeName := l.clspec.ModeNames[mode]
-	spec := l.clspec.Specs[mode]
-	state := spec.DFA.InitialStateID
+	modeName := l.spec.ModeName(mode)
+	state := l.spec.InitialState(mode)
 	buf := []byte{}
 	unfixedBufLen := 0
 	row := l.row
@@ -271,9 +236,9 @@ func (l *Lexer) next() (*Token, error) {
 					ModeID:     mode,
 					ModeName:   modeName,
 					ModeKindID: 0,
+					Lexeme:     buf,
 					Row:        row,
 					Col:        col,
-					match:      newByteSequence(buf),
 					Invalid:    true,
 				}, nil
 			}
@@ -288,7 +253,7 @@ func (l *Lexer) next() (*Token, error) {
 		}
 		buf = append(buf, v)
 		unfixedBufLen++
-		nextState, ok := l.lookupNextState(mode, state, int(v))
+		nextState, ok := l.spec.NextState(mode, state, int(v))
 		if !ok {
 			if tok != nil {
 				l.unread(unfixedBufLen)
@@ -298,62 +263,35 @@ func (l *Lexer) next() (*Token, error) {
 				ModeID:     mode,
 				ModeName:   modeName,
 				ModeKindID: 0,
+				Lexeme:     buf,
 				Row:        row,
 				Col:        col,
-				match:      newByteSequence(buf),
 				Invalid:    true,
 			}, nil
 		}
 		state = nextState
-		modeKindID := spec.DFA.AcceptingStates[state]
-		if modeKindID != 0 {
-			kindID := l.clspec.KindIDs[mode][modeKindID]
+		if modeKindID, ok := l.spec.Accept(mode, state); ok {
+			kindID, kindName := l.spec.KindIDAndName(mode, modeKindID)
 			tok = &Token{
 				ModeID:     mode,
 				ModeName:   modeName,
 				KindID:     kindID,
 				ModeKindID: modeKindID,
-				KindName:   spec.KindNames[modeKindID],
+				KindName:   kindName,
+				Lexeme:     buf,
 				Row:        row,
 				Col:        col,
-				match:      newByteSequence(buf),
 			}
 			unfixedBufLen = 0
 		}
 	}
 }
 
-func (l *Lexer) lookupNextState(mode spec.LexModeID, state spec.StateID, v int) (spec.StateID, bool) {
-	switch l.clspec.CompressionLevel {
-	case 2:
-		tab := l.clspec.Specs[mode].DFA.Transition
-		rowNum := tab.RowNums[state]
-		d := tab.UniqueEntries.RowDisplacement[rowNum]
-		if tab.UniqueEntries.Bounds[d+v] != rowNum {
-			return tab.UniqueEntries.EmptyValue, false
-		}
-		return tab.UniqueEntries.Entries[d+v], true
-	case 1:
-		tab := l.clspec.Specs[mode].DFA.Transition
-		next := tab.UncompressedUniqueEntries[tab.RowNums[state]*tab.OriginalColCount+v]
-		if next == spec.StateIDNil {
-			return spec.StateIDNil, false
-		}
-		return next, true
-	}
-	modeSpec := l.clspec.Specs[mode]
-	next := modeSpec.DFA.UncompressedTransition[state.Int()*modeSpec.DFA.ColCount+v]
-	if next == spec.StateIDNil {
-		return spec.StateIDNil, false
-	}
-	return next, true
-}
-
-func (l *Lexer) Mode() spec.LexModeID {
+func (l *Lexer) Mode() ModeID {
 	return l.modeStack[len(l.modeStack)-1]
 }
 
-func (l *Lexer) PushMode(mode spec.LexModeID) {
+func (l *Lexer) PushMode(mode ModeID) {
 	l.modeStack = append(l.modeStack, mode)
 }
 
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 33b206f..ebb4aad 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -42,22 +42,29 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
 	}
 }
 
-func newToken(modeID spec.LexModeID, modeName spec.LexModeName, kindID spec.LexKindID, modeKindID spec.LexModeKindID, kindName spec.LexKindName, match byteSequence) *Token {
+func newToken(modeID ModeID, modeName string, kindID KindID, modeKindID ModeKindID, kindName string, lexeme []byte) *Token {
 	return &Token{
 		ModeID:     modeID,
 		ModeName:   modeName,
 		KindID:     kindID,
 		ModeKindID: modeKindID,
 		KindName:   kindName,
-		match:      match,
+		Lexeme:     lexeme,
 	}
 }
 
-func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
-	return newToken(spec.LexModeIDDefault, spec.LexModeNameDefault, spec.LexKindID(kindID), spec.LexModeKindID(modeKindID), spec.LexKindName(kindName), match)
+func newTokenDefault(kindID int, modeKindID int, kindName string, lexeme []byte) *Token {
+	return newToken(
+		ModeID(spec.LexModeIDDefault.Int()),
+		spec.LexModeNameDefault.String(),
+		KindID(spec.LexKindID(kindID).Int()),
+		ModeKindID(spec.LexModeKindID(modeKindID).Int()),
+		spec.LexKindName(kindName).String(),
+		lexeme,
+	)
 }
 
-func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
+func newEOFToken(modeID ModeID, modeName string) *Token {
 	return &Token{
 		ModeID:     modeID,
 		ModeName:   modeName,
@@ -67,15 +74,15 @@ func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
 }
 
 func newEOFTokenDefault() *Token {
-	return newEOFToken(spec.LexModeIDDefault, spec.LexModeNameDefault)
+	return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String())
 }
 
-func newInvalidToken(modeID spec.LexModeID, modeName spec.LexModeName, match byteSequence) *Token {
+func newInvalidToken(modeID ModeID, modeName string, lexeme []byte) *Token {
 	return &Token{
 		ModeID:     modeID,
 		ModeName:   modeName,
 		ModeKindID: 0,
-		match:      match,
+		Lexeme:     lexeme,
 		Invalid:    true,
 	}
 }
@@ -103,17 +110,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abb aabb aaabb babb bbabb abbbabb",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))),
+				newTokenDefault(1, 1, "t1", []byte("abb")),
+				newTokenDefault(2, 2, "t2", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("aabb")),
+				newTokenDefault(2, 2, "t2", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("aaabb")),
+				newTokenDefault(2, 2, "t2", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("babb")),
+				newTokenDefault(2, 2, "t2", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("bbabb")),
+				newTokenDefault(2, 2, "t2", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("abbbabb")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -127,21 +134,21 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))),
-				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))),
+				newTokenDefault(1, 1, "t1", []byte("ba")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("baaa")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("a")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(1, 1, "t1", []byte("aaa")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(2, 2, "t2", []byte("abcd")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(2, 2, "t2", []byte("abcdcdcd")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(2, 2, "t2", []byte("cd")),
+				newTokenDefault(3, 3, "t3", []byte(" ")),
+				newTokenDefault(2, 2, "t2", []byte("cdcdcd")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -170,22 +177,22 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", []byte{0x00}),
+				newTokenDefault(1, 1, "t1", []byte{0x7f}),
+				newTokenDefault(1, 1, "t1", []byte{0xc2, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xdf, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xe1, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xec, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xed, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xed, 0x9f, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xee, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xef, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -197,17 +204,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ab.*+?|()[]",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))),
+				newTokenDefault(1, 1, "t1", []byte("a")),
+				newTokenDefault(1, 1, "t1", []byte("b")),
+				newTokenDefault(1, 1, "t1", []byte(".")),
+				newTokenDefault(1, 1, "t1", []byte("*")),
+				newTokenDefault(1, 1, "t1", []byte("+")),
+				newTokenDefault(1, 1, "t1", []byte("?")),
+				newTokenDefault(1, 1, "t1", []byte("|")),
+				newTokenDefault(1, 1, "t1", []byte("(")),
+				newTokenDefault(1, 1, "t1", []byte(")")),
+				newTokenDefault(1, 1, "t1", []byte("[")),
+				newTokenDefault(1, 1, "t1", []byte("]")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -230,10 +237,10 @@ func TestLexer_Next(t *testing.T) {
 				0x7f,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})),
-				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})),
-				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})),
-				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})),
+				newTokenDefault(1, 1, "1ByteChar", []byte{0x01}),
+				newTokenDefault(1, 1, "1ByteChar", []byte{0x02}),
+				newTokenDefault(1, 1, "1ByteChar", []byte{0x7e}),
+				newTokenDefault(1, 1, "1ByteChar", []byte{0x7f}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -251,10 +258,10 @@ func TestLexer_Next(t *testing.T) {
 				0xdf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
-				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
-				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
-				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
+				newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x80}),
+				newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x81}),
+				newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbe}),
+				newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -269,7 +276,7 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -287,10 +294,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -308,10 +315,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -341,22 +348,22 @@ func TestLexer_Next(t *testing.T) {
 				0xef, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -371,7 +378,7 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -389,10 +396,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -410,10 +417,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -431,10 +438,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0xbf, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -460,18 +467,18 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
-				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -483,7 +490,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "foo9",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))),
+				newTokenDefault(1, 1, "NonNumber", []byte("foo9")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -498,10 +505,10 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "nνに😸",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})),
-				newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
-				newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
-				newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+				newTokenDefault(1, 1, "1ByteChar", []byte{0x6E}),
+				newTokenDefault(2, 2, "2ByteChar", []byte{0xCE, 0xBD}),
+				newTokenDefault(3, 3, "3ByteChar", []byte{0xE3, 0x81, 0xAB}),
+				newTokenDefault(4, 4, "4ByteChar", []byte{0xF0, 0x9F, 0x98, 0xB8}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -513,10 +520,10 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "nνに😸",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})),
-				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
-				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
-				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+				newTokenDefault(1, 1, "codePointsAlt", []byte{0x6E}),
+				newTokenDefault(1, 1, "codePointsAlt", []byte{0xCE, 0xBD}),
+				newTokenDefault(1, 1, "codePointsAlt", []byte{0xE3, 0x81, 0xAB}),
+				newTokenDefault(1, 1, "codePointsAlt", []byte{0xF0, 0x9F, 0x98, 0xB8}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -530,8 +537,8 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabcdef",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))),
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))),
+				newTokenDefault(1, 1, "t1", []byte("abcdefdef")),
+				newTokenDefault(1, 1, "t1", []byte("abcdef")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -545,7 +552,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabc",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+				newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -560,7 +567,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabc",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+				newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")),
 				newEOFTokenDefault(),
 			},
 		},
@@ -576,16 +583,16 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `"" "Hello world.\n\"Hello world.\""`,
 			tokens: []*Token{
-				newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
-				newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
-				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
-				newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
-				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))),
-				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
-				newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
-				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
-				newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
+				newToken(1, "default", 2, 2, "string_open", []byte(`"`)),
+				newToken(2, "string", 5, 3, "string_close", []byte(`"`)),
+				newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+				newToken(1, "default", 2, 2, "string_open", []byte(`"`)),
+				newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)),
+				newToken(2, "string", 3, 1, "escape_sequence", []byte(`\n`)),
+				newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)),
+				newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)),
+				newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)),
+				newToken(2, "string", 5, 3, "string_close", []byte(`"`)),
 				newEOFTokenDefault(),
 			},
 		},
@@ -602,15 +609,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: ` a b < < `,
 			tokens: []*Token{
-				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))),
-				newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))),
-				newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))),
-				newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))),
-				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+				newToken(1, "default", 2, 2, "char_a", []byte(`a`)),
+				newToken(2, "state_a", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "state_a", 3, 2, "char_b", []byte(`b`)),
+				newToken(3, "state_b", 1, 1, "white_space", []byte(` `)),
+				newToken(3, "state_b", 5, 2, "back_from_b", []byte(`<`)),
+				newToken(2, "state_a", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "state_a", 4, 3, "back_from_a", []byte(`<`)),
+				newToken(1, "default", 1, 1, "white_space", []byte(` `)),
 				newEOFTokenDefault(),
 			},
 		},
@@ -627,20 +634,20 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `-> 1 -> 2 <- <- a`,
 			tokens: []*Token{
-				newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
-				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
-				newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
-				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
-				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
+				newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)),
+				newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)),
+				newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)),
+				newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)),
+				newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)),
+				newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+				newToken(1, "default", 2, 2, "char", []byte(`a`)),
 				newEOFTokenDefault(),
 			},
 			passiveModeTran: true,
 			tran: func(l *Lexer, tok *Token) error {
-				switch l.clspec.ModeNames[l.Mode()] {
+				switch l.spec.ModeName(l.Mode()) {
 				case "default":
 					switch tok.KindName {
 					case "push_1":
@@ -675,21 +682,21 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `-> 1 -> 2 <- <- a`,
 			tokens: []*Token{
-				newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
-				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
-				newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
-				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
-				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
+				newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)),
+				newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)),
+				newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)),
+				newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)),
+				newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+				newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)),
+				newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+				newToken(1, "default", 2, 2, "char", []byte(`a`)),
 				newEOFTokenDefault(),
 			},
 			// Active mode transition and an external transition function can be used together.
 			passiveModeTran: false,
 			tran: func(l *Lexer, tok *Token) error {
-				switch l.clspec.ModeNames[l.Mode()] {
+				switch l.spec.ModeName(l.Mode()) {
 				case "mode_1":
 					switch tok.KindName {
 					case "push_2":
@@ -717,15 +724,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `.*+?|()[\`,
 			tokens: []*Token{
-				newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))),
-				newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))),
-				newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))),
-				newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))),
-				newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))),
-				newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))),
-				newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))),
-				newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))),
-				newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))),
+				newTokenDefault(1, 1, "dot", []byte(`.`)),
+				newTokenDefault(2, 2, "star", []byte(`*`)),
+				newTokenDefault(3, 3, "plus", []byte(`+`)),
+				newTokenDefault(4, 4, "question", []byte(`?`)),
+				newTokenDefault(5, 5, "vbar", []byte(`|`)),
+				newTokenDefault(6, 6, "lparen", []byte(`(`)),
+				newTokenDefault(7, 7, "rparen", []byte(`)`)),
+				newTokenDefault(8, 8, "lbrace", []byte(`[`)),
+				newTokenDefault(9, 9, "backslash", []byte(`\`)),
 				newEOFTokenDefault(),
 			},
 		},
@@ -741,7 +748,7 @@ func TestLexer_Next(t *testing.T) {
 				if tt.passiveModeTran {
 					opts = append(opts, DisableModeTransition())
 				}
-				lexer, err := NewLexer(clspec, strings.NewReader(tt.src), opts...)
+				lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...)
 				if err != nil {
 					t.Fatalf("unexpected error: %v", err)
 				}
@@ -752,7 +759,7 @@ func TestLexer_Next(t *testing.T) {
 						break
 					}
 					testToken(t, eTok, tok, false)
-					// t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match(), tok.Text(), tok.EOF, tok.Invalid)
+
 					if tok.EOF {
 						break
 					}
@@ -813,39 +820,39 @@ func TestLexer_Next_WithPosition(t *testing.T) {
 	})
 
 	expected := []*Token{
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x00})), 0, 0),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x7F})), 0, 1),
-		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 0, 2),
-
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xC2, 0x80})), 1, 0),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xDF, 0xBF})), 1, 1),
-		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 1, 2),
-
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xA0, 0x80})), 2, 0),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xBF, 0xBF})), 2, 1),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE1, 0x80, 0x80})), 2, 2),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEC, 0xBF, 0xBF})), 2, 3),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x80, 0x80})), 2, 4),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x9F, 0xBF})), 2, 5),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEE, 0x80, 0x80})), 2, 6),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEF, 0xBF, 0xBF})), 2, 7),
-		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 2, 8),
-
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0x90, 0x80, 0x80})), 3, 0),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0xBF, 0xBF, 0xBF})), 3, 1),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF1, 0x80, 0x80, 0x80})), 3, 2),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF3, 0xBF, 0xBF, 0xBF})), 3, 3),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x80, 0x80, 0x80})), 3, 4),
-		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x8F, 0xBF, 0xBF})), 3, 5),
+		withPos(newTokenDefault(2, 2, "any", []byte{0x00}), 0, 0),
+		withPos(newTokenDefault(2, 2, "any", []byte{0x7F}), 0, 1),
+		withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 0, 2),
+
+		withPos(newTokenDefault(2, 2, "any", []byte{0xC2, 0x80}), 1, 0),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xDF, 0xBF}), 1, 1),
+		withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 1, 2),
+
+		withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xA0, 0x80}), 2, 0),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xBF, 0xBF}), 2, 1),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xE1, 0x80, 0x80}), 2, 2),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xEC, 0xBF, 0xBF}), 2, 3),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x80, 0x80}), 2, 4),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x9F, 0xBF}), 2, 5),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xEE, 0x80, 0x80}), 2, 6),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xEF, 0xBF, 0xBF}), 2, 7),
+		withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 2, 8),
+
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0x90, 0x80, 0x80}), 3, 0),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0xBF, 0xBF, 0xBF}), 3, 1),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF1, 0x80, 0x80, 0x80}), 3, 2),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF3, 0xBF, 0xBF, 0xBF}), 3, 3),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x80, 0x80, 0x80}), 3, 4),
+		withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x8F, 0xBF, 0xBF}), 3, 5),
 
 		// When a token contains multiple line breaks, the driver sets the token position to
 		// the line number where a lexeme first appears.
-		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A, 0x0A, 0x0A})), 3, 6),
+		withPos(newTokenDefault(1, 1, "newline", []byte{0x0A, 0x0A, 0x0A}), 3, 6),
 
 		withPos(newEOFTokenDefault(), 0, 0),
 	}
 
-	lexer, err := NewLexer(clspec, strings.NewReader(src))
+	lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src))
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -872,15 +879,15 @@ func testToken(t *testing.T, expected, actual *Token, checkPosition bool) {
 		actual.KindID != expected.KindID ||
 		actual.ModeKindID != expected.ModeKindID ||
 		actual.KindName != expected.KindName ||
-		!bytes.Equal(actual.Match(), expected.Match()) ||
+		!bytes.Equal(actual.Lexeme, expected.Lexeme) ||
 		actual.EOF != expected.EOF ||
 		actual.Invalid != expected.Invalid {
-		t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
+		t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme))
 	}
 
 	if checkPosition {
 		if actual.Row != expected.Row || actual.Col != expected.Col {
-			t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
+			t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme))
 		}
 	}
 }
diff --git a/driver/spec.go b/driver/spec.go
new file mode 100644
index 0000000..149d922
--- /dev/null
+++ b/driver/spec.go
@@ -0,0 +1,73 @@
+package driver
+
+import (
+	"github.com/nihei9/maleeni/spec"
+)
+
+type lexSpec struct {
+	spec *spec.CompiledLexSpec
+}
+
+func NewLexSpec(spec *spec.CompiledLexSpec) *lexSpec {
+	return &lexSpec{
+		spec: spec,
+	}
+}
+
+func (s *lexSpec) InitialMode() ModeID {
+	return ModeID(s.spec.InitialModeID.Int())
+}
+
+func (s *lexSpec) Pop(mode ModeID, modeKind ModeKindID) bool {
+	return s.spec.Specs[mode].Pop[modeKind] == 1
+}
+
+func (s *lexSpec) Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) {
+	modeID := s.spec.Specs[mode].Push[modeKind]
+	return ModeID(modeID.Int()), !modeID.IsNil()
+}
+
+func (s *lexSpec) ModeName(mode ModeID) string {
+	return s.spec.ModeNames[mode].String()
+}
+
+func (s *lexSpec) InitialState(mode ModeID) StateID {
+	return StateID(s.spec.Specs[mode].DFA.InitialStateID.Int())
+}
+
+func (s *lexSpec) NextState(mode ModeID, state StateID, v int) (StateID, bool) {
+	switch s.spec.CompressionLevel {
+	case 2:
+		tran := s.spec.Specs[mode].DFA.Transition
+		rowNum := tran.RowNums[state]
+		d := tran.UniqueEntries.RowDisplacement[rowNum]
+		if tran.UniqueEntries.Bounds[d+v] != rowNum {
+			return StateID(tran.UniqueEntries.EmptyValue.Int()), false
+		}
+		return StateID(tran.UniqueEntries.Entries[d+v].Int()), true
+	case 1:
+		tran := s.spec.Specs[mode].DFA.Transition
+		next := tran.UncompressedUniqueEntries[tran.RowNums[state]*tran.OriginalColCount+v]
+		if next == spec.StateIDNil {
+			return StateID(spec.StateIDNil.Int()), false
+		}
+		return StateID(next.Int()), true
+	}
+
+	modeSpec := s.spec.Specs[mode]
+	next := modeSpec.DFA.UncompressedTransition[state.Int()*modeSpec.DFA.ColCount+v]
+	if next == spec.StateIDNil {
+		return StateID(spec.StateIDNil), false
+	}
+	return StateID(next.Int()), true
+}
+
+func (s *lexSpec) Accept(mode ModeID, state StateID) (ModeKindID, bool) {
+	modeKindID := s.spec.Specs[mode].DFA.AcceptingStates[state]
+	return ModeKindID(modeKindID.Int()), modeKindID != spec.LexModeKindIDNil
+}
+
+func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) {
+	kindID := s.spec.KindIDs[mode][modeKind]
+	return KindID(kindID.Int()), s.spec.KindNames[kindID].String()
+}
author	Ryo Nihei <nihei.dev@gmail.com>	2021-09-11 00:40:05 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-09-11 22:57:17 +0900
commit	96a555a00f000704c618c226485fa6d87ce66d9d (patch)
tree	9d7398033a2c015390f0de7ab69b6fd37bb1ba30 /driver
parent	Remove --debug option from the lex command (diff)
download	tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.gz tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.xz