Add unique kind IDs to tokens

author: Ryo Nihei <nihei.dev@gmail.com> 2021-08-01 15:29:18 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-08-01 15:38:54 +0900
commit: 03e3688e3928c88c12107ea734c35281c814e0c0 (patch)
tree: 7f57554aec423098c8325238aef72cffdae7651e /driver
parent: Fix CHANGELOG (diff)
download: tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.gz
tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.xz
2 files changed, 186 insertions, 178 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index 1e54fa6..7ad2dd0 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -62,6 +62,9 @@ type Token struct {
 	// `ModeName` is a mode name that represents in which mode the lexer detected the token.
 	ModeName spec.LexModeName
 
+	// `KindID` is a unique ID among modes.
+	KindID int
+
 	// `Kind` represents a number that corresponds to a `KindName`.
 	Kind int
 
@@ -78,11 +81,12 @@ type Token struct {
 	match byteSequence
 }
 
-func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kind int, kindName string, match byteSequence) *Token {
+func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kindID int, modeKindID int, kindName string, match byteSequence) *Token {
 	return &Token{
 		Mode:     mode,
 		ModeName: modeName,
-		Kind:     kind,
+		KindID:   kindID,
+		Kind:     modeKindID,
 		KindName: kindName,
 		match:    match,
 	}
@@ -131,6 +135,7 @@ func (t *Token) MarshalJSON() ([]byte, error) {
 	return json.Marshal(struct {
 		Mode     int          `json:"mode"`
 		ModeName string       `json:"mode_name"`
+		KindID   int          `json:"kind_id"`
 		Kind     int          `json:"kind"`
 		KindName string       `json:"kind_name"`
 		Match    byteSequence `json:"match"`
@@ -140,6 +145,7 @@ func (t *Token) MarshalJSON() ([]byte, error) {
 	}{
 		Mode:     t.Mode.Int(),
 		ModeName: t.ModeName.String(),
+		KindID:   t.KindID,
 		Kind:     t.Kind,
 		KindName: t.KindName,
 		Match:    t.match,
@@ -321,9 +327,10 @@ func (l *Lexer) next() (*Token, error) {
 			return newInvalidToken(mode, modeName, newByteSequence(buf)), nil
 		}
 		state = nextState
-		id := spec.DFA.AcceptingStates[state]
-		if id != 0 {
-			tok = newToken(mode, modeName, id, spec.Kinds[id].String(), newByteSequence(buf))
+		modeKindID := spec.DFA.AcceptingStates[state]
+		if modeKindID != 0 {
+			kindID := l.clspec.KindIDs[mode][modeKindID]
+			tok = newToken(mode, modeName, kindID.Int(), modeKindID, spec.Kinds[modeKindID].String(), newByteSequence(buf))
 			unfixedBufLen = 0
 		}
 	}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 4dfed99..79ee12e 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -42,8 +42,8 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
 	}
 }
 
-func newTokenDefault(id int, kind string, match byteSequence) *Token {
-	return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, id, kind, match)
+func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
+	return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, kindID, modeKindID, kindName, match)
 }
 
 func newEOFTokenDefault() *Token {
@@ -67,17 +67,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abb aabb aaabb babb bbabb abbbabb",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("abb"))),
-				newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("aabb"))),
-				newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("aaabb"))),
-				newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("babb"))),
-				newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("bbabb"))),
-				newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("abbbabb"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -91,21 +91,21 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("ba"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("baaa"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("aaa"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, "t2", newByteSequence([]byte("abcd"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, "t2", newByteSequence([]byte("abcdcdcd"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, "t2", newByteSequence([]byte("cd"))),
-				newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
-				newTokenDefault(2, "t2", newByteSequence([]byte("cdcdcd"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))),
+				newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+				newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -134,22 +134,22 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte{0x00})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0x7f})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -161,17 +161,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ab.*+?|()[]",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("b"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("."))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("*"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("+"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("?"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("|"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("("))),
-				newTokenDefault(1, "t1", newByteSequence([]byte(")"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("["))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("]"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -194,10 +194,10 @@ func TestLexer_Next(t *testing.T) {
 				0x7f,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x01})),
-				newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x02})),
-				newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7e})),
-				newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7f})),
+				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})),
+				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})),
+				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})),
+				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -215,10 +215,10 @@ func TestLexer_Next(t *testing.T) {
 				0xdf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
-				newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
-				newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
-				newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
+				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
+				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
+				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
+				newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -233,7 +233,7 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -251,10 +251,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -272,10 +272,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -305,22 +305,22 @@ func TestLexer_Next(t *testing.T) {
 				0xef, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
-				newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -335,7 +335,7 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -353,10 +353,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -374,10 +374,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -395,10 +395,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0xbf, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -424,18 +424,18 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
-				newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
+				newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -447,7 +447,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "foo9",
 			tokens: []*Token{
-				newTokenDefault(1, "NonNumber", newByteSequence([]byte("foo9"))),
+				newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -462,10 +462,10 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "nνに😸",
 			tokens: []*Token{
-				newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x6E})),
-				newTokenDefault(2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
-				newTokenDefault(3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
-				newTokenDefault(4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+				newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})),
+				newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
+				newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+				newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -477,10 +477,10 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "nνに😸",
 			tokens: []*Token{
-				newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0x6E})),
-				newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
-				newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
-				newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})),
+				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
+				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+				newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
 				newEOFTokenDefault(),
 			},
 		},
@@ -494,8 +494,8 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabcdef",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdef"))),
-				newTokenDefault(1, "t1", newByteSequence([]byte("abcdef"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -509,7 +509,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabc",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -524,7 +524,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abcdefdefabc",
 			tokens: []*Token{
-				newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+				newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -540,16 +540,16 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `"" "Hello world.\n\"Hello world.\""`,
 			tokens: []*Token{
-				newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
-				newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
-				newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
-				newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
-				newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\n`))),
-				newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
-				newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
-				newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
-				newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
+				newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
+				newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
+				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
+				newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))),
+				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+				newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+				newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+				newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -566,15 +566,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: ` a b < < `,
 			tokens: []*Token{
-				newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, "char_a", newByteSequence([]byte(`a`))),
-				newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "state_a", 2, "char_b", newByteSequence([]byte(`b`))),
-				newToken(3, "state_b", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "state_b", 2, "back_from_b", newByteSequence([]byte(`<`))),
-				newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "state_a", 3, "back_from_a", newByteSequence([]byte(`<`))),
-				newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))),
+				newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))),
+				newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))),
+				newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))),
+				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -591,15 +591,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `-> 1 -> 2 <- <- a`,
 			tokens: []*Token{
-				newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
-				newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
-				newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
-				newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
-				newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+				newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
+				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
+				newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
+				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
+				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
 				newEOFTokenDefault(),
 			},
 			passiveModeTran: true,
@@ -639,15 +639,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `-> 1 -> 2 <- <- a`,
 			tokens: []*Token{
-				newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
-				newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
-				newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
-				newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
-				newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
-				newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+				newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
+				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
+				newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
+				newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
+				newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+				newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
 				newEOFTokenDefault(),
 			},
 			// Active mode transition and an external transition function can be used together.
@@ -681,15 +681,15 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: `.*+?|()[\`,
 			tokens: []*Token{
-				newTokenDefault(1, "dot", newByteSequence([]byte(`.`))),
-				newTokenDefault(2, "star", newByteSequence([]byte(`*`))),
-				newTokenDefault(3, "plus", newByteSequence([]byte(`+`))),
-				newTokenDefault(4, "question", newByteSequence([]byte(`?`))),
-				newTokenDefault(5, "vbar", newByteSequence([]byte(`|`))),
-				newTokenDefault(6, "lparen", newByteSequence([]byte(`(`))),
-				newTokenDefault(7, "rparen", newByteSequence([]byte(`)`))),
-				newTokenDefault(8, "lbrace", newByteSequence([]byte(`[`))),
-				newTokenDefault(9, "backslash", newByteSequence([]byte(`\`))),
+				newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))),
+				newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))),
+				newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))),
+				newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))),
+				newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))),
+				newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))),
+				newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))),
+				newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))),
+				newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))),
 				newEOFTokenDefault(),
 			},
 		},
@@ -737,7 +737,8 @@ func testToken(t *testing.T, expected, actual *Token) {
 	t.Helper()
 
 	if actual.Mode != expected.Mode ||
-		actual.ModeName != actual.ModeName ||
+		actual.ModeName != expected.ModeName ||
+		actual.KindID != expected.KindID ||
 		actual.Kind != expected.Kind ||
 		actual.KindName != expected.KindName ||
 		!bytes.Equal(actual.Match(), expected.Match()) ||
author	Ryo Nihei <nihei.dev@gmail.com>	2021-08-01 15:29:18 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-08-01 15:38:54 +0900
commit	03e3688e3928c88c12107ea734c35281c814e0c0 (patch)
tree	7f57554aec423098c8325238aef72cffdae7651e /driver
parent	Fix CHANGELOG (diff)
download	tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.gz tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.xz