diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-01 15:29:18 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-01 15:38:54 +0900 |
commit | 03e3688e3928c88c12107ea734c35281c814e0c0 (patch) | |
tree | 7f57554aec423098c8325238aef72cffdae7651e /driver | |
parent | Fix CHANGELOG (diff) | |
download | tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.gz tre-03e3688e3928c88c12107ea734c35281c814e0c0.tar.xz |
Add unique kind IDs to tokens
Diffstat (limited to 'driver')
-rw-r--r-- | driver/lexer.go | 17 | ||||
-rw-r--r-- | driver/lexer_test.go | 347 |
2 files changed, 186 insertions, 178 deletions
diff --git a/driver/lexer.go b/driver/lexer.go index 1e54fa6..7ad2dd0 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -62,6 +62,9 @@ type Token struct { // `ModeName` is a mode name that represents in which mode the lexer detected the token. ModeName spec.LexModeName + // `KindID` is a unique ID among modes. + KindID int + // `Kind` represents a number that corresponds to a `KindName`. Kind int @@ -78,11 +81,12 @@ type Token struct { match byteSequence } -func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kind int, kindName string, match byteSequence) *Token { +func newToken(mode spec.LexModeNum, modeName spec.LexModeName, kindID int, modeKindID int, kindName string, match byteSequence) *Token { return &Token{ Mode: mode, ModeName: modeName, - Kind: kind, + KindID: kindID, + Kind: modeKindID, KindName: kindName, match: match, } @@ -131,6 +135,7 @@ func (t *Token) MarshalJSON() ([]byte, error) { return json.Marshal(struct { Mode int `json:"mode"` ModeName string `json:"mode_name"` + KindID int `json:"kind_id"` Kind int `json:"kind"` KindName string `json:"kind_name"` Match byteSequence `json:"match"` @@ -140,6 +145,7 @@ func (t *Token) MarshalJSON() ([]byte, error) { }{ Mode: t.Mode.Int(), ModeName: t.ModeName.String(), + KindID: t.KindID, Kind: t.Kind, KindName: t.KindName, Match: t.match, @@ -321,9 +327,10 @@ func (l *Lexer) next() (*Token, error) { return newInvalidToken(mode, modeName, newByteSequence(buf)), nil } state = nextState - id := spec.DFA.AcceptingStates[state] - if id != 0 { - tok = newToken(mode, modeName, id, spec.Kinds[id].String(), newByteSequence(buf)) + modeKindID := spec.DFA.AcceptingStates[state] + if modeKindID != 0 { + kindID := l.clspec.KindIDs[mode][modeKindID] + tok = newToken(mode, modeName, kindID.Int(), modeKindID, spec.Kinds[modeKindID].String(), newByteSequence(buf)) unfixedBufLen = 0 } } diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 4dfed99..79ee12e 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -42,8 +42,8 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry { } } -func newTokenDefault(id int, kind string, match byteSequence) *Token { - return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, id, kind, match) +func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token { + return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, kindID, modeKindID, kindName, match) } func newEOFTokenDefault() *Token { @@ -67,17 +67,17 @@ func TestLexer_Next(t *testing.T) { }, src: "abb aabb aaabb babb bbabb abbbabb", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("abb"))), - newTokenDefault(2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("aabb"))), - newTokenDefault(2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("aaabb"))), - newTokenDefault(2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("babb"))), - newTokenDefault(2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("bbabb"))), - newTokenDefault(2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("abbbabb"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))), newEOFTokenDefault(), }, }, @@ -91,21 +91,21 @@ func TestLexer_Next(t *testing.T) { }, src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("ba"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("baaa"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("a"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, "t1", newByteSequence([]byte("aaa"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, "t2", newByteSequence([]byte("abcd"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, "t2", newByteSequence([]byte("abcdcdcd"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, "t2", newByteSequence([]byte("cd"))), - newTokenDefault(3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, "t2", newByteSequence([]byte("cdcdcd"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))), + newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), + newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))), newEOFTokenDefault(), }, }, @@ -134,22 +134,22 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte{0x00})), - newTokenDefault(1, "t1", newByteSequence([]byte{0x7f})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xc2, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xdf, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), - newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), + newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -161,17 +161,17 @@ func TestLexer_Next(t *testing.T) { }, src: "ab.*+?|()[]", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("a"))), - newTokenDefault(1, "t1", newByteSequence([]byte("b"))), - newTokenDefault(1, "t1", newByteSequence([]byte("."))), - newTokenDefault(1, "t1", newByteSequence([]byte("*"))), - newTokenDefault(1, "t1", newByteSequence([]byte("+"))), - newTokenDefault(1, "t1", newByteSequence([]byte("?"))), - newTokenDefault(1, "t1", newByteSequence([]byte("|"))), - newTokenDefault(1, "t1", newByteSequence([]byte("("))), - newTokenDefault(1, "t1", newByteSequence([]byte(")"))), - newTokenDefault(1, "t1", newByteSequence([]byte("["))), - newTokenDefault(1, "t1", newByteSequence([]byte("]"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))), newEOFTokenDefault(), }, }, @@ -194,10 +194,10 @@ func TestLexer_Next(t *testing.T) { 0x7f, }), tokens: []*Token{ - newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x01})), - newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x02})), - newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7e})), - newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7f})), + newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})), + newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})), + newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})), + newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})), newEOFTokenDefault(), }, }, @@ -215,10 +215,10 @@ func TestLexer_Next(t *testing.T) { 0xdf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})), - newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})), - newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})), - newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})), + newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})), + newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})), + newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})), + newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})), newEOFTokenDefault(), }, }, @@ -233,7 +233,7 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0x80, }), tokens: []*Token{ - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), newEOFTokenDefault(), }, }, @@ -251,10 +251,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})), newEOFTokenDefault(), }, }, @@ -272,10 +272,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -305,22 +305,22 @@ func TestLexer_Next(t *testing.T) { 0xef, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})), - newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})), + newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -335,7 +335,7 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0x80, }), tokens: []*Token{ - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), newEOFTokenDefault(), }, }, @@ -353,10 +353,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})), newEOFTokenDefault(), }, }, @@ -374,10 +374,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -395,10 +395,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0xbf, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -424,18 +424,18 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})), - newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})), + newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), newEOFTokenDefault(), }, }, @@ -447,7 +447,7 @@ func TestLexer_Next(t *testing.T) { }, src: "foo9", tokens: []*Token{ - newTokenDefault(1, "NonNumber", newByteSequence([]byte("foo9"))), + newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))), newEOFTokenDefault(), }, }, @@ -462,10 +462,10 @@ func TestLexer_Next(t *testing.T) { }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x6E})), - newTokenDefault(2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})), - newTokenDefault(3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})), - newTokenDefault(4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), + newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})), + newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})), + newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})), + newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), newEOFTokenDefault(), }, }, @@ -477,10 +477,10 @@ func TestLexer_Next(t *testing.T) { }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0x6E})), - newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})), - newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})), - newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), + newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})), + newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})), + newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})), + newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), newEOFTokenDefault(), }, }, @@ -494,8 +494,8 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabcdef", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdef"))), - newTokenDefault(1, "t1", newByteSequence([]byte("abcdef"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))), newEOFTokenDefault(), }, }, @@ -509,7 +509,7 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabc", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))), newEOFTokenDefault(), }, }, @@ -524,7 +524,7 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabc", tokens: []*Token{ - newTokenDefault(1, "t1", newByteSequence([]byte("abcdefdefabc"))), + newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))), newEOFTokenDefault(), }, }, @@ -540,16 +540,16 @@ func TestLexer_Next(t *testing.T) { }, src: `"" "Hello world.\n\"Hello world.\""`, tokens: []*Token{ - newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))), - newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))), - newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))), - newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), - newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\n`))), - newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))), - newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), - newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))), - newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))), + newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))), + newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))), + newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))), + newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), + newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))), + newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))), + newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), + newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))), + newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))), newEOFTokenDefault(), }, }, @@ -566,15 +566,15 @@ func TestLexer_Next(t *testing.T) { }, src: ` a b < < `, tokens: []*Token{ - newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, "char_a", newByteSequence([]byte(`a`))), - newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "state_a", 2, "char_b", newByteSequence([]byte(`b`))), - newToken(3, "state_b", 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "state_b", 2, "back_from_b", newByteSequence([]byte(`<`))), - newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "state_a", 3, "back_from_a", newByteSequence([]byte(`<`))), - newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))), + newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))), + newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))), + newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))), + newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), newEOFTokenDefault(), }, }, @@ -591,15 +591,15 @@ func TestLexer_Next(t *testing.T) { }, src: `-> 1 -> 2 <- <- a`, tokens: []*Token{ - newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))), - newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))), - newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))), - newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))), - newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))), + newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))), + newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))), + newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))), + newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))), + newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))), newEOFTokenDefault(), }, passiveModeTran: true, @@ -639,15 +639,15 @@ func TestLexer_Next(t *testing.T) { }, src: `-> 1 -> 2 <- <- a`, tokens: []*Token{ - newToken(1, "default", 3, "push_1", newByteSequence([]byte(`-> 1`))), - newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 2, "push_2", newByteSequence([]byte(`-> 2`))), - newToken(3, "mode_2", 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "mode_2", 2, "pop_2", newByteSequence([]byte(`<-`))), - newToken(2, "mode_1", 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 3, "pop_1", newByteSequence([]byte(`<-`))), - newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, "char", newByteSequence([]byte(`a`))), + newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))), + newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))), + newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))), + newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))), + newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))), newEOFTokenDefault(), }, // Active mode transition and an external transition function can be used together. @@ -681,15 +681,15 @@ func TestLexer_Next(t *testing.T) { }, src: `.*+?|()[\`, tokens: []*Token{ - newTokenDefault(1, "dot", newByteSequence([]byte(`.`))), - newTokenDefault(2, "star", newByteSequence([]byte(`*`))), - newTokenDefault(3, "plus", newByteSequence([]byte(`+`))), - newTokenDefault(4, "question", newByteSequence([]byte(`?`))), - newTokenDefault(5, "vbar", newByteSequence([]byte(`|`))), - newTokenDefault(6, "lparen", newByteSequence([]byte(`(`))), - newTokenDefault(7, "rparen", newByteSequence([]byte(`)`))), - newTokenDefault(8, "lbrace", newByteSequence([]byte(`[`))), - newTokenDefault(9, "backslash", newByteSequence([]byte(`\`))), + newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))), + newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))), + newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))), + newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))), + newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))), + newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))), + newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))), + newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))), + newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))), newEOFTokenDefault(), }, }, @@ -737,7 +737,8 @@ func testToken(t *testing.T, expected, actual *Token) { t.Helper() if actual.Mode != expected.Mode || - actual.ModeName != actual.ModeName || + actual.ModeName != expected.ModeName || + actual.KindID != expected.KindID || actual.Kind != expected.Kind || actual.KindName != expected.KindName || !bytes.Equal(actual.Match(), expected.Match()) || |