aboutsummaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-08-01 15:29:18 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-08-01 15:38:54 +0900
commit03e3688e3928c88c12107ea734c35281c814e0c0 (patch)
tree7f57554aec423098c8325238aef72cffdae7651e /driver
parentFix CHANGELOG (diff)
downloadtre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.gz
tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.xz
Add unique kind IDs to tokens
Diffstat (limited to 'driver')
-rw-r--r--driver/lexer.go17
-rw-r--r--driver/lexer_test.go347
2 files changed, 186 insertions, 178 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index 1e54fa6..7ad2dd0 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -62,6 +62,9 @@ type Token struct {
// `ModeName` is a mode name that represents in which mode the lexer detected the token.
ModeName spec.LexModeName
+ // `KindID` is a unique ID among modes.
+ KindID int
+
// `Kind` represents a number that corresponds to a `KindName`.
Kind int
@@ -78,11 +81,12 @@ type Token struct {
match byteSequence
}
-func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kind int, kindName string, match byteSequence) *Token {
+func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kindID int, modeKindID int, kindName string, match byteSequence) *Token {
return &Token{
Mode: mode,
ModeName: modeName,
- Kind: kind,
+ KindID: kindID,
+ Kind: modeKindID,
KindName: kindName,
match: match,
}
@@ -131,6 +135,7 @@ func (t *Token) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
Mode int `json:"mode"`
ModeName string `json:"mode_name"`
+ KindID int `json:"kind_id"`
Kind int `json:"kind"`
KindName string `json:"kind_name"`
Match byteSequence `json:"match"`
@@ -140,6 +145,7 @@ func (t *Token) MarshalJSON() ([]byte, error) {
}{
Mode: t.Mode.Int(),
ModeName: t.ModeName.String(),
+ KindID: t.KindID,
Kind: t.Kind,
KindName: t.KindName,
Match: t.match,
@@ -321,9 +327,10 @@ func (l *Lexer) next() (*Token, error) {
return newInvalidToken(mode, modeName, newByteSequence(buf)), nil
}
state = nextState
- id := spec.DFA.AcceptingStates[state]
- if id != 0 {
- tok = newToken(mode, modeName, id, spec.Kinds[id].String(), newByteSequence(buf))
+ modeKindID := spec.DFA.AcceptingStates[state]
+ if modeKindID != 0 {
+ kindID := l.clspec.KindIDs[mode][modeKindID]
+ tok = newToken(mode, modeName, kindID.Int(), modeKindID, spec.Kinds[modeKindID].String(), newByteSequence(buf))
unfixedBufLen = 0
}
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 4dfed99..79ee12e 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -42,8 +42,8 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
}
}
-func newTokenDefault(id int, kind string, match byteSequence) *Token {
- return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, id, kind, match)
+func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
+ return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, kindID, modeKindID, kindName, match)
}
func newEOFTokenDefault() *Token {
@@ -67,17 +67,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("abb"))),
- newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("aabb"))),
- newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("aaabb"))),
- newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("babb"))),
- newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("bbabb"))),
- newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("abbbabb"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))),
newEOFTokenDefault(),
},
},
@@ -91,21 +91,21 @@ func TestLexer_Next(t *testing.T) {
},
src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("ba"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("baaa"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, "t1", newByteSequence([]byte("aaa"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, "t2", newByteSequence([]byte("abcd"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, "t2", newByteSequence([]byte("abcdcdcd"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, "t2", newByteSequence([]byte("cd"))),
- newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, "t2", newByteSequence([]byte("cdcdcd"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))),
+ newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))),
newEOFTokenDefault(),
},
},
@@ -134,22 +134,22 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte{0x00})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0x7f})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -161,17 +161,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "ab.*+?|()[]",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("b"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("."))),
- newTokenDefault(1, "t1", newByteSequence([]byte("*"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("+"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("?"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("|"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("("))),
- newTokenDefault(1, "t1", newByteSequence([]byte(")"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("["))),
- newTokenDefault(1, "t1", newByteSequence([]byte("]"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))),
newEOFTokenDefault(),
},
},
@@ -194,10 +194,10 @@ func TestLexer_Next(t *testing.T) {
0x7f,
}),
tokens: []*Token{
- newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x01})),
- newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x02})),
- newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7e})),
- newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7f})),
+ newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})),
+ newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})),
+ newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})),
+ newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})),
newEOFTokenDefault(),
},
},
@@ -215,10 +215,10 @@ func TestLexer_Next(t *testing.T) {
0xdf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
- newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
- newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
- newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
+ newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
+ newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
+ newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
+ newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -233,7 +233,7 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
newEOFTokenDefault(),
},
},
@@ -251,10 +251,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -272,10 +272,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -305,22 +305,22 @@ func TestLexer_Next(t *testing.T) {
0xef, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
- newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -335,7 +335,7 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
newEOFTokenDefault(),
},
},
@@ -353,10 +353,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -374,10 +374,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -395,10 +395,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0xbf, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -424,18 +424,18 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
- newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
+ newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
newEOFTokenDefault(),
},
},
@@ -447,7 +447,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "foo9",
tokens: []*Token{
- newTokenDefault(1, "NonNumber", newByteSequence([]byte("foo9"))),
+ newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))),
newEOFTokenDefault(),
},
},
@@ -462,10 +462,10 @@ func TestLexer_Next(t *testing.T) {
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x6E})),
- newTokenDefault(2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
- newTokenDefault(3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newTokenDefault(4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})),
+ newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
+ newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+ newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
newEOFTokenDefault(),
},
},
@@ -477,10 +477,10 @@ func TestLexer_Next(t *testing.T) {
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0x6E})),
- newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
- newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})),
+ newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
+ newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+ newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
newEOFTokenDefault(),
},
},
@@ -494,8 +494,8 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabcdef",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdef"))),
- newTokenDefault(1, "t1", newByteSequence([]byte("abcdef"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))),
newEOFTokenDefault(),
},
},
@@ -509,7 +509,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabc",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
newEOFTokenDefault(),
},
},
@@ -524,7 +524,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabc",
tokens: []*Token{
- newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+ newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
newEOFTokenDefault(),
},
},
@@ -540,16 +540,16 @@ func TestLexer_Next(t *testing.T) {
},
src: `"" "Hello world.\n\"Hello world.\""`,
tokens: []*Token{
- newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
- newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
- newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
- newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
- newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\n`))),
- newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
- newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
- newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
- newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
+ newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
+ newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
+ newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
+ newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+ newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))),
+ newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+ newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+ newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+ newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
newEOFTokenDefault(),
},
},
@@ -566,15 +566,15 @@ func TestLexer_Next(t *testing.T) {
},
src: ` a b < < `,
tokens: []*Token{
- newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, "char_a", newByteSequence([]byte(`a`))),
- newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "state_a", 2, "char_b", newByteSequence([]byte(`b`))),
- newToken(3, "state_b", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "state_b", 2, "back_from_b", newByteSequence([]byte(`<`))),
- newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "state_a", 3, "back_from_a", newByteSequence([]byte(`<`))),
- newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))),
+ newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))),
+ newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))),
+ newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))),
+ newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
newEOFTokenDefault(),
},
},
@@ -591,15 +591,15 @@ func TestLexer_Next(t *testing.T) {
},
src: `-> 1 -> 2 <- <- a`,
tokens: []*Token{
- newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
- newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
- newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
- newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
- newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+ newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
+ newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
+ newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
+ newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
+ newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
newEOFTokenDefault(),
},
passiveModeTran: true,
@@ -639,15 +639,15 @@ func TestLexer_Next(t *testing.T) {
},
src: `-> 1 -> 2 <- <- a`,
tokens: []*Token{
- newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))),
- newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))),
- newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))),
- newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))),
- newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))),
+ newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
+ newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
+ newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
+ newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
+ newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
newEOFTokenDefault(),
},
// Active mode transition and an external transition function can be used together.
@@ -681,15 +681,15 @@ func TestLexer_Next(t *testing.T) {
},
src: `.*+?|()[\`,
tokens: []*Token{
- newTokenDefault(1, "dot", newByteSequence([]byte(`.`))),
- newTokenDefault(2, "star", newByteSequence([]byte(`*`))),
- newTokenDefault(3, "plus", newByteSequence([]byte(`+`))),
- newTokenDefault(4, "question", newByteSequence([]byte(`?`))),
- newTokenDefault(5, "vbar", newByteSequence([]byte(`|`))),
- newTokenDefault(6, "lparen", newByteSequence([]byte(`(`))),
- newTokenDefault(7, "rparen", newByteSequence([]byte(`)`))),
- newTokenDefault(8, "lbrace", newByteSequence([]byte(`[`))),
- newTokenDefault(9, "backslash", newByteSequence([]byte(`\`))),
+ newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))),
+ newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))),
+ newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))),
+ newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))),
+ newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))),
+ newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))),
+ newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))),
+ newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))),
+ newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))),
newEOFTokenDefault(),
},
},
@@ -737,7 +737,8 @@ func testToken(t *testing.T, expected, actual *Token) {
t.Helper()
if actual.Mode != expected.Mode ||
- actual.ModeName != actual.ModeName ||
+ actual.ModeName != expected.ModeName ||
+ actual.KindID != expected.KindID ||
actual.Kind != expected.Kind ||
actual.KindName != expected.KindName ||
!bytes.Equal(actual.Match(), expected.Match()) ||