diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-11 00:40:05 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-11 22:57:17 +0900 |
commit | 96a555a00f000704c618c226485fa6d87ce66d9d (patch) | |
tree | 9d7398033a2c015390f0de7ab69b6fd37bb1ba30 /driver/lexer_test.go | |
parent | Remove --debug option from the lex command (diff) | |
download | tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.gz tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.xz |
Define a lexical specification interface
Diffstat (limited to 'driver/lexer_test.go')
-rw-r--r-- | driver/lexer_test.go | 429 |
1 files changed, 218 insertions, 211 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 33b206f..ebb4aad 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -42,22 +42,29 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry { } } -func newToken(modeID spec.LexModeID, modeName spec.LexModeName, kindID spec.LexKindID, modeKindID spec.LexModeKindID, kindName spec.LexKindName, match byteSequence) *Token { +func newToken(modeID ModeID, modeName string, kindID KindID, modeKindID ModeKindID, kindName string, lexeme []byte) *Token { return &Token{ ModeID: modeID, ModeName: modeName, KindID: kindID, ModeKindID: modeKindID, KindName: kindName, - match: match, + Lexeme: lexeme, } } -func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token { - return newToken(spec.LexModeIDDefault, spec.LexModeNameDefault, spec.LexKindID(kindID), spec.LexModeKindID(modeKindID), spec.LexKindName(kindName), match) +func newTokenDefault(kindID int, modeKindID int, kindName string, lexeme []byte) *Token { + return newToken( + ModeID(spec.LexModeIDDefault.Int()), + spec.LexModeNameDefault.String(), + KindID(spec.LexKindID(kindID).Int()), + ModeKindID(spec.LexModeKindID(modeKindID).Int()), + spec.LexKindName(kindName).String(), + lexeme, + ) } -func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token { +func newEOFToken(modeID ModeID, modeName string) *Token { return &Token{ ModeID: modeID, ModeName: modeName, @@ -67,15 +74,15 @@ func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token { } func newEOFTokenDefault() *Token { - return newEOFToken(spec.LexModeIDDefault, spec.LexModeNameDefault) + return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String()) } -func newInvalidToken(modeID spec.LexModeID, modeName spec.LexModeName, match byteSequence) *Token { +func newInvalidToken(modeID ModeID, modeName string, lexeme []byte) *Token { return &Token{ ModeID: modeID, ModeName: modeName, ModeKindID: 0, - match: match, + Lexeme: lexeme, Invalid: true, } } @@ -103,17 +110,17 @@ func TestLexer_Next(t *testing.T) { }, src: "abb aabb aaabb babb bbabb abbbabb", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))), + newTokenDefault(1, 1, "t1", []byte("abb")), + newTokenDefault(2, 2, "t2", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("aabb")), + newTokenDefault(2, 2, "t2", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("aaabb")), + newTokenDefault(2, 2, "t2", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("babb")), + newTokenDefault(2, 2, "t2", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("bbabb")), + newTokenDefault(2, 2, "t2", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("abbbabb")), newEOFTokenDefault(), }, }, @@ -127,21 +134,21 @@ func TestLexer_Next(t *testing.T) { }, src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))), - newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))), - newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))), + newTokenDefault(1, 1, "t1", []byte("ba")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("baaa")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("a")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(1, 1, "t1", []byte("aaa")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(2, 2, "t2", []byte("abcd")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(2, 2, "t2", []byte("abcdcdcd")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(2, 2, "t2", []byte("cd")), + newTokenDefault(3, 3, "t3", []byte(" ")), + newTokenDefault(2, 2, "t2", []byte("cdcdcd")), newEOFTokenDefault(), }, }, @@ -170,22 +177,22 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), - newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), + newTokenDefault(1, 1, "t1", []byte{0x00}), + newTokenDefault(1, 1, "t1", []byte{0x7f}), + newTokenDefault(1, 1, "t1", []byte{0xc2, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xdf, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xe1, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xec, 0xbf, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xed, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xed, 0x9f, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xee, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xef, 0xbf, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -197,17 +204,17 @@ func TestLexer_Next(t *testing.T) { }, src: "ab.*+?|()[]", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))), + newTokenDefault(1, 1, "t1", []byte("a")), + newTokenDefault(1, 1, "t1", []byte("b")), + newTokenDefault(1, 1, "t1", []byte(".")), + newTokenDefault(1, 1, "t1", []byte("*")), + newTokenDefault(1, 1, "t1", []byte("+")), + newTokenDefault(1, 1, "t1", []byte("?")), + newTokenDefault(1, 1, "t1", []byte("|")), + newTokenDefault(1, 1, "t1", []byte("(")), + newTokenDefault(1, 1, "t1", []byte(")")), + newTokenDefault(1, 1, "t1", []byte("[")), + newTokenDefault(1, 1, "t1", []byte("]")), newEOFTokenDefault(), }, }, @@ -230,10 +237,10 @@ func TestLexer_Next(t *testing.T) { 0x7f, }), tokens: []*Token{ - newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})), - newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})), - newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})), - newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})), + newTokenDefault(1, 1, "1ByteChar", []byte{0x01}), + newTokenDefault(1, 1, "1ByteChar", []byte{0x02}), + newTokenDefault(1, 1, "1ByteChar", []byte{0x7e}), + newTokenDefault(1, 1, "1ByteChar", []byte{0x7f}), newEOFTokenDefault(), }, }, @@ -251,10 +258,10 @@ func TestLexer_Next(t *testing.T) { 0xdf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})), - newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})), - newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})), - newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})), + newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x80}), + newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x81}), + newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbe}), + newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbf}), newEOFTokenDefault(), }, }, @@ -269,7 +276,7 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), newEOFTokenDefault(), }, }, @@ -287,10 +294,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}), newEOFTokenDefault(), }, }, @@ -308,10 +315,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -341,22 +348,22 @@ func TestLexer_Next(t *testing.T) { 0xef, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})), - newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x80}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x81}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}), + newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -371,7 +378,7 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), newEOFTokenDefault(), }, }, @@ -389,10 +396,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}), newEOFTokenDefault(), }, }, @@ -410,10 +417,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -431,10 +438,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0xbf, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -460,18 +467,18 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})), - newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}), + newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -483,7 +490,7 @@ func TestLexer_Next(t *testing.T) { }, src: "foo9", tokens: []*Token{ - newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))), + newTokenDefault(1, 1, "NonNumber", []byte("foo9")), newEOFTokenDefault(), }, }, @@ -498,10 +505,10 @@ func TestLexer_Next(t *testing.T) { }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})), - newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})), - newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})), - newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), + newTokenDefault(1, 1, "1ByteChar", []byte{0x6E}), + newTokenDefault(2, 2, "2ByteChar", []byte{0xCE, 0xBD}), + newTokenDefault(3, 3, "3ByteChar", []byte{0xE3, 0x81, 0xAB}), + newTokenDefault(4, 4, "4ByteChar", []byte{0xF0, 0x9F, 0x98, 0xB8}), newEOFTokenDefault(), }, }, @@ -513,10 +520,10 @@ func TestLexer_Next(t *testing.T) { }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})), - newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})), - newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})), - newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})), + newTokenDefault(1, 1, "codePointsAlt", []byte{0x6E}), + newTokenDefault(1, 1, "codePointsAlt", []byte{0xCE, 0xBD}), + newTokenDefault(1, 1, "codePointsAlt", []byte{0xE3, 0x81, 0xAB}), + newTokenDefault(1, 1, "codePointsAlt", []byte{0xF0, 0x9F, 0x98, 0xB8}), newEOFTokenDefault(), }, }, @@ -530,8 +537,8 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabcdef", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))), - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))), + newTokenDefault(1, 1, "t1", []byte("abcdefdef")), + newTokenDefault(1, 1, "t1", []byte("abcdef")), newEOFTokenDefault(), }, }, @@ -545,7 +552,7 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabc", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))), + newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")), newEOFTokenDefault(), }, }, @@ -560,7 +567,7 @@ func TestLexer_Next(t *testing.T) { }, src: "abcdefdefabc", tokens: []*Token{ - newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))), + newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")), newEOFTokenDefault(), }, }, @@ -576,16 +583,16 @@ func TestLexer_Next(t *testing.T) { }, src: `"" "Hello world.\n\"Hello world.\""`, tokens: []*Token{ - newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))), - newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))), - newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))), - newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), - newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))), - newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))), - newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))), - newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))), - newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))), + newToken(1, "default", 2, 2, "string_open", []byte(`"`)), + newToken(2, "string", 5, 3, "string_close", []byte(`"`)), + newToken(1, "default", 1, 1, "white_space", []byte(` `)), + newToken(1, "default", 2, 2, "string_open", []byte(`"`)), + newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)), + newToken(2, "string", 3, 1, "escape_sequence", []byte(`\n`)), + newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)), + newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)), + newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)), + newToken(2, "string", 5, 3, "string_close", []byte(`"`)), newEOFTokenDefault(), }, }, @@ -602,15 +609,15 @@ func TestLexer_Next(t *testing.T) { }, src: ` a b < < `, tokens: []*Token{ - newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))), - newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))), - newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))), - newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))), - newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), + newToken(1, "default", 1, 1, "white_space", []byte(` `)), + newToken(1, "default", 2, 2, "char_a", []byte(`a`)), + newToken(2, "state_a", 1, 1, "white_space", []byte(` `)), + newToken(2, "state_a", 3, 2, "char_b", []byte(`b`)), + newToken(3, "state_b", 1, 1, "white_space", []byte(` `)), + newToken(3, "state_b", 5, 2, "back_from_b", []byte(`<`)), + newToken(2, "state_a", 1, 1, "white_space", []byte(` `)), + newToken(2, "state_a", 4, 3, "back_from_a", []byte(`<`)), + newToken(1, "default", 1, 1, "white_space", []byte(` `)), newEOFTokenDefault(), }, }, @@ -627,20 +634,20 @@ func TestLexer_Next(t *testing.T) { }, src: `-> 1 -> 2 <- <- a`, tokens: []*Token{ - newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))), - newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))), - newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))), - newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))), - newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))), + newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)), + newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)), + newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)), + newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)), + newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)), + newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)), + newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)), + newToken(1, "default", 1, 1, "white_space", []byte(` `)), + newToken(1, "default", 2, 2, "char", []byte(`a`)), newEOFTokenDefault(), }, passiveModeTran: true, tran: func(l *Lexer, tok *Token) error { - switch l.clspec.ModeNames[l.Mode()] { + switch l.spec.ModeName(l.Mode()) { case "default": switch tok.KindName { case "push_1": @@ -675,21 +682,21 @@ func TestLexer_Next(t *testing.T) { }, src: `-> 1 -> 2 <- <- a`, tokens: []*Token{ - newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))), - newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))), - newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))), - newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))), - newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))), - newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))), + newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)), + newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)), + newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)), + newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)), + newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)), + newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)), + newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)), + newToken(1, "default", 1, 1, "white_space", []byte(` `)), + newToken(1, "default", 2, 2, "char", []byte(`a`)), newEOFTokenDefault(), }, // Active mode transition and an external transition function can be used together. passiveModeTran: false, tran: func(l *Lexer, tok *Token) error { - switch l.clspec.ModeNames[l.Mode()] { + switch l.spec.ModeName(l.Mode()) { case "mode_1": switch tok.KindName { case "push_2": @@ -717,15 +724,15 @@ func TestLexer_Next(t *testing.T) { }, src: `.*+?|()[\`, tokens: []*Token{ - newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))), - newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))), - newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))), - newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))), - newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))), - newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))), - newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))), - newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))), - newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))), + newTokenDefault(1, 1, "dot", []byte(`.`)), + newTokenDefault(2, 2, "star", []byte(`*`)), + newTokenDefault(3, 3, "plus", []byte(`+`)), + newTokenDefault(4, 4, "question", []byte(`?`)), + newTokenDefault(5, 5, "vbar", []byte(`|`)), + newTokenDefault(6, 6, "lparen", []byte(`(`)), + newTokenDefault(7, 7, "rparen", []byte(`)`)), + newTokenDefault(8, 8, "lbrace", []byte(`[`)), + newTokenDefault(9, 9, "backslash", []byte(`\`)), newEOFTokenDefault(), }, }, @@ -741,7 +748,7 @@ func TestLexer_Next(t *testing.T) { if tt.passiveModeTran { opts = append(opts, DisableModeTransition()) } - lexer, err := NewLexer(clspec, strings.NewReader(tt.src), opts...) + lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -752,7 +759,7 @@ func TestLexer_Next(t *testing.T) { break } testToken(t, eTok, tok, false) - // t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match(), tok.Text(), tok.EOF, tok.Invalid) + if tok.EOF { break } @@ -813,39 +820,39 @@ func TestLexer_Next_WithPosition(t *testing.T) { }) expected := []*Token{ - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x00})), 0, 0), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x7F})), 0, 1), - withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 0, 2), - - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xC2, 0x80})), 1, 0), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xDF, 0xBF})), 1, 1), - withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 1, 2), - - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xA0, 0x80})), 2, 0), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xBF, 0xBF})), 2, 1), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE1, 0x80, 0x80})), 2, 2), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEC, 0xBF, 0xBF})), 2, 3), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x80, 0x80})), 2, 4), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x9F, 0xBF})), 2, 5), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEE, 0x80, 0x80})), 2, 6), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEF, 0xBF, 0xBF})), 2, 7), - withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 2, 8), - - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0x90, 0x80, 0x80})), 3, 0), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0xBF, 0xBF, 0xBF})), 3, 1), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF1, 0x80, 0x80, 0x80})), 3, 2), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF3, 0xBF, 0xBF, 0xBF})), 3, 3), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x80, 0x80, 0x80})), 3, 4), - withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x8F, 0xBF, 0xBF})), 3, 5), + withPos(newTokenDefault(2, 2, "any", []byte{0x00}), 0, 0), + withPos(newTokenDefault(2, 2, "any", []byte{0x7F}), 0, 1), + withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 0, 2), + + withPos(newTokenDefault(2, 2, "any", []byte{0xC2, 0x80}), 1, 0), + withPos(newTokenDefault(2, 2, "any", []byte{0xDF, 0xBF}), 1, 1), + withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 1, 2), + + withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xA0, 0x80}), 2, 0), + withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xBF, 0xBF}), 2, 1), + withPos(newTokenDefault(2, 2, "any", []byte{0xE1, 0x80, 0x80}), 2, 2), + withPos(newTokenDefault(2, 2, "any", []byte{0xEC, 0xBF, 0xBF}), 2, 3), + withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x80, 0x80}), 2, 4), + withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x9F, 0xBF}), 2, 5), + withPos(newTokenDefault(2, 2, "any", []byte{0xEE, 0x80, 0x80}), 2, 6), + withPos(newTokenDefault(2, 2, "any", []byte{0xEF, 0xBF, 0xBF}), 2, 7), + withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 2, 8), + + withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0x90, 0x80, 0x80}), 3, 0), + withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0xBF, 0xBF, 0xBF}), 3, 1), + withPos(newTokenDefault(2, 2, "any", []byte{0xF1, 0x80, 0x80, 0x80}), 3, 2), + withPos(newTokenDefault(2, 2, "any", []byte{0xF3, 0xBF, 0xBF, 0xBF}), 3, 3), + withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x80, 0x80, 0x80}), 3, 4), + withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x8F, 0xBF, 0xBF}), 3, 5), // When a token contains multiple line breaks, the driver sets the token position to // the line number where a lexeme first appears. - withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A, 0x0A, 0x0A})), 3, 6), + withPos(newTokenDefault(1, 1, "newline", []byte{0x0A, 0x0A, 0x0A}), 3, 6), withPos(newEOFTokenDefault(), 0, 0), } - lexer, err := NewLexer(clspec, strings.NewReader(src)) + lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src)) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -872,15 +879,15 @@ func testToken(t *testing.T, expected, actual *Token, checkPosition bool) { actual.KindID != expected.KindID || actual.ModeKindID != expected.ModeKindID || actual.KindName != expected.KindName || - !bytes.Equal(actual.Match(), expected.Match()) || + !bytes.Equal(actual.Lexeme, expected.Lexeme) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { - t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text()) + t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme)) } if checkPosition { if actual.Row != expected.Row || actual.Col != expected.Col { - t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text()) + t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme)) } } } |