aboutsummaryrefslogtreecommitdiff
path: root/driver/lexer_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'driver/lexer_test.go')
-rw-r--r--driver/lexer_test.go421
1 files changed, 251 insertions, 170 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 26b5d49..d3edb3c 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -10,13 +10,38 @@ import (
"github.com/nihei9/maleeni/spec"
)
-func newLexEntry(kind string, pattern string) *spec.LexEntry {
+func newLexEntry(modes []string, kind string, pattern string, push string, pop bool) *spec.LexEntry {
+ ms := []spec.LexModeName{}
+ for _, m := range modes {
+ ms = append(ms, spec.LexModeName(m))
+ }
+ return &spec.LexEntry{
+ Kind: spec.LexKind(kind),
+ Pattern: spec.LexPattern(pattern),
+ Modes: ms,
+ Push: spec.LexModeName(push),
+ Pop: pop,
+ }
+}
+
+func newLexEntryDefaultNOP(kind string, pattern string) *spec.LexEntry {
return &spec.LexEntry{
Kind: spec.LexKind(kind),
Pattern: spec.LexPattern(pattern),
+ Modes: []spec.LexModeName{
+ spec.LexModeNameDefault,
+ },
}
}
+func newTokenDefault(id int, kind string, match byteSequence) *Token {
+ return newToken(spec.LexModeNumDefault, spec.LexModeNameDefault, id, kind, match)
+}
+
+func newEOFTokenDefault() *Token {
+ return newEOFToken(spec.LexModeNumDefault, spec.LexModeNameDefault)
+}
+
func TestLexer_Next(t *testing.T) {
test := []struct {
lspec *spec.LexSpec
@@ -26,58 +51,58 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("t1", "(a|b)*abb"),
- newLexEntry("t2", " +"),
+ newLexEntryDefaultNOP("t1", "(a|b)*abb"),
+ newLexEntryDefaultNOP("t2", " +"),
},
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newToken(1, "t1", newByteSequence([]byte("abb"))),
- newToken(2, "t2", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("aabb"))),
- newToken(2, "t2", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("aaabb"))),
- newToken(2, "t2", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("babb"))),
- newToken(2, "t2", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("bbabb"))),
- newToken(2, "t2", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("abbbabb"))),
- newEOFToken(),
+ newTokenDefault(1, "t1", newByteSequence([]byte("abb"))),
+ newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("aabb"))),
+ newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("aaabb"))),
+ newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("babb"))),
+ newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("bbabb"))),
+ newTokenDefault(2, "t2", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("abbbabb"))),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("t1", "b?a+"),
- newLexEntry("t2", "(ab)?(cd)+"),
- newLexEntry("t3", " +"),
+ newLexEntryDefaultNOP("t1", "b?a+"),
+ newLexEntryDefaultNOP("t2", "(ab)?(cd)+"),
+ newLexEntryDefaultNOP("t3", " +"),
},
},
src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
tokens: []*Token{
- newToken(1, "t1", newByteSequence([]byte("ba"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("baaa"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("a"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(1, "t1", newByteSequence([]byte("aaa"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(2, "t2", newByteSequence([]byte("abcd"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(2, "t2", newByteSequence([]byte("abcdcdcd"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(2, "t2", newByteSequence([]byte("cd"))),
- newToken(3, "t3", newByteSequence([]byte(" "))),
- newToken(2, "t2", newByteSequence([]byte("cdcdcd"))),
- newEOFToken(),
+ newTokenDefault(1, "t1", newByteSequence([]byte("ba"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("baaa"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("aaa"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, "t2", newByteSequence([]byte("abcd"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, "t2", newByteSequence([]byte("abcdcdcd"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, "t2", newByteSequence([]byte("cd"))),
+ newTokenDefault(3, "t3", newByteSequence([]byte(" "))),
+ newTokenDefault(2, "t2", newByteSequence([]byte("cdcdcd"))),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("t1", "."),
+ newLexEntryDefaultNOP("t1", "."),
},
},
src: string([]byte{
@@ -99,45 +124,45 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "t1", newByteSequence([]byte{0x00})),
- newToken(1, "t1", newByteSequence([]byte{0x7f})),
- newToken(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newToken(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newToken(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0x00})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0x7f})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("t1", "[ab.*+?|()[\\]]"),
+ newLexEntryDefaultNOP("t1", "[ab.*+?|()[\\]]"),
},
},
src: "ab.*+?|()[]",
tokens: []*Token{
- newToken(1, "t1", newByteSequence([]byte("a"))),
- newToken(1, "t1", newByteSequence([]byte("b"))),
- newToken(1, "t1", newByteSequence([]byte("."))),
- newToken(1, "t1", newByteSequence([]byte("*"))),
- newToken(1, "t1", newByteSequence([]byte("+"))),
- newToken(1, "t1", newByteSequence([]byte("?"))),
- newToken(1, "t1", newByteSequence([]byte("|"))),
- newToken(1, "t1", newByteSequence([]byte("("))),
- newToken(1, "t1", newByteSequence([]byte(")"))),
- newToken(1, "t1", newByteSequence([]byte("["))),
- newToken(1, "t1", newByteSequence([]byte("]"))),
- newEOFToken(),
+ newTokenDefault(1, "t1", newByteSequence([]byte("a"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("b"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("."))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("*"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("+"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("?"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("|"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("("))),
+ newTokenDefault(1, "t1", newByteSequence([]byte(")"))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("["))),
+ newTokenDefault(1, "t1", newByteSequence([]byte("]"))),
+ newEOFTokenDefault(),
},
},
{
@@ -149,7 +174,7 @@ func TestLexer_Next(t *testing.T) {
// maleeni cannot handle the null character in patterns because compiler.lexer,
// specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
// If a pattern needs a null character, use code point expression \u{0000}.
- newLexEntry("1ByteChar", "[\x01-\x7f]"),
+ newLexEntryDefaultNOP("1ByteChar", "[\x01-\x7f]"),
},
},
src: string([]byte{
@@ -159,18 +184,18 @@ func TestLexer_Next(t *testing.T) {
0x7f,
}),
tokens: []*Token{
- newToken(1, "1ByteChar", newByteSequence([]byte{0x01})),
- newToken(1, "1ByteChar", newByteSequence([]byte{0x02})),
- newToken(1, "1ByteChar", newByteSequence([]byte{0x7e})),
- newToken(1, "1ByteChar", newByteSequence([]byte{0x7f})),
- newEOFToken(),
+ newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x01})),
+ newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x02})),
+ newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7e})),
+ newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x7f})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 2 byte characters
- newLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+ newLexEntryDefaultNOP("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
},
},
src: string([]byte{
@@ -180,33 +205,33 @@ func TestLexer_Next(t *testing.T) {
0xdf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
- newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
- newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
- newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
+ newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
+ newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
+ newTokenDefault(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// All bytes are the same.
- newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+ newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
},
},
src: string([]byte{
0xe0, 0xa0, 0x80,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newEOFToken(),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first two bytes are the same.
- newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+ newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
},
},
src: string([]byte{
@@ -216,18 +241,18 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first byte are the same.
- newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+ newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
},
},
src: string([]byte{
@@ -237,18 +262,18 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 3 byte characters
- newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+ newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
},
},
src: string([]byte{
@@ -270,45 +295,45 @@ func TestLexer_Next(t *testing.T) {
0xef, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
- newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
+ newTokenDefault(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// All bytes are the same.
- newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+ newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
},
},
src: string([]byte{
0xf0, 0x90, 0x80, 0x80,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newEOFToken(),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first 3 bytes are the same.
- newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+ newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
},
},
src: string([]byte{
@@ -318,18 +343,18 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first 2 bytes are the same.
- newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+ newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
},
},
src: string([]byte{
@@ -339,18 +364,18 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// The first byte are the same.
- newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+ newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
},
},
src: string([]byte{
@@ -360,18 +385,18 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0xbf, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
// all 4 byte characters
- newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+ newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
},
},
src: string([]byte{
@@ -389,64 +414,114 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
- newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
- newEOFToken(),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
+ newTokenDefault(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("NonNumber", "[^0-9]+[0-9]"),
+ newLexEntryDefaultNOP("NonNumber", "[^0-9]+[0-9]"),
},
},
src: "foo9",
tokens: []*Token{
- newToken(1, "NonNumber", newByteSequence([]byte("foo9"))),
- newEOFToken(),
+ newTokenDefault(1, "NonNumber", newByteSequence([]byte("foo9"))),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("1ByteChar", "\\u{006E}"),
- newLexEntry("2ByteChar", "\\u{03BD}"),
- newLexEntry("3ByteChar", "\\u{306B}"),
- newLexEntry("4ByteChar", "\\u{01F638}"),
+ newLexEntryDefaultNOP("1ByteChar", "\\u{006E}"),
+ newLexEntryDefaultNOP("2ByteChar", "\\u{03BD}"),
+ newLexEntryDefaultNOP("3ByteChar", "\\u{306B}"),
+ newLexEntryDefaultNOP("4ByteChar", "\\u{01F638}"),
},
},
src: "nνに😸",
tokens: []*Token{
- newToken(1, "1ByteChar", newByteSequence([]byte{0x6E})),
- newToken(2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
- newToken(3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newToken(4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
- newEOFToken(),
+ newTokenDefault(1, "1ByteChar", newByteSequence([]byte{0x6E})),
+ newTokenDefault(2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
+ newTokenDefault(3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+ newTokenDefault(4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newEOFTokenDefault(),
},
},
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("codePointsAlt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
+ newLexEntryDefaultNOP("codePointsAlt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
},
},
src: "nνに😸",
tokens: []*Token{
- newToken(1, "codePointsAlt", newByteSequence([]byte{0x6E})),
- newToken(1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
- newToken(1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newToken(1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
- newEOFToken(),
+ newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0x6E})),
+ newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
+ newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
+ newTokenDefault(1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newEOFTokenDefault(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ newLexEntryDefaultNOP("white_space", ` *`),
+ newLexEntry([]string{"default"}, "string_open", `"`, "string", false),
+ newLexEntry([]string{"string"}, "escape_sequence", `\\[n"\\]`, "", false),
+ newLexEntry([]string{"string"}, "char_sequence", `[^"\\]*`, "", false),
+ newLexEntry([]string{"string"}, "string_close", `"`, "", true),
+ },
+ },
+ src: `"" "Hello world.\n\"Hello world.\""`,
+ tokens: []*Token{
+ newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
+ newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
+ newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, "string_open", newByteSequence([]byte(`"`))),
+ newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+ newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\n`))),
+ newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+ newToken(2, "string", 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
+ newToken(2, "string", 1, "escape_sequence", newByteSequence([]byte(`\"`))),
+ newToken(2, "string", 3, "string_close", newByteSequence([]byte(`"`))),
+ newEOFTokenDefault(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // `white_space` is enabled in multiple modes.
+ newLexEntry([]string{"default", "state_a", "state_b"}, "white_space", ` *`, "", false),
+ newLexEntry([]string{"default"}, "char_a", `a`, "state_a", false),
+ newLexEntry([]string{"state_a"}, "char_b", `b`, "state_b", false),
+ newLexEntry([]string{"state_a"}, "back_from_a", `<`, "", true),
+ newLexEntry([]string{"state_b"}, "back_from_b", `<`, "", true),
+ },
+ },
+ src: ` a b < < `,
+ tokens: []*Token{
+ newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 2, "char_a", newByteSequence([]byte(`a`))),
+ newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "state_a", 2, "char_b", newByteSequence([]byte(`b`))),
+ newToken(3, "state_b", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(3, "state_b", 2, "back_from_b", newByteSequence([]byte(`<`))),
+ newToken(2, "state_a", 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(2, "state_a", 3, "back_from_a", newByteSequence([]byte(`<`))),
+ newToken(1, "default", 1, "white_space", newByteSequence([]byte(` `))),
+ newEOFTokenDefault(),
},
},
}
@@ -479,8 +554,8 @@ func TestLexer_Next(t *testing.T) {
func TestLexer_PeekN(t *testing.T) {
clspec, err := compiler.Compile(&spec.LexSpec{
Entries: []*spec.LexEntry{
- newLexEntry("t1", "foo"),
- newLexEntry("t2", "bar"),
+ newLexEntryDefaultNOP("t1", "foo"),
+ newLexEntryDefaultNOP("t2", "bar"),
},
})
if err != nil {
@@ -492,9 +567,9 @@ func TestLexer_PeekN(t *testing.T) {
}
expectedTokens := []*Token{
- newToken(1, "t1", []byte("foo")),
- newToken(2, "t2", []byte("bar")),
- newEOFToken(),
+ newTokenDefault(1, "t1", []byte("foo")),
+ newTokenDefault(2, "t2", []byte("bar")),
+ newEOFTokenDefault(),
}
tok, err := lex.Peek1()
@@ -539,7 +614,13 @@ func TestLexer_PeekN(t *testing.T) {
func testToken(t *testing.T, expected, actual *Token) {
t.Helper()
- if actual.ID != expected.ID || actual.Kind != expected.Kind || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
- t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match))
+ if actual.Mode != expected.Mode ||
+ actual.ModeName != actual.ModeName ||
+ actual.ID != expected.ID ||
+ actual.Kind != expected.Kind ||
+ !bytes.Equal(actual.Match, expected.Match) ||
+ actual.EOF != expected.EOF ||
+ actual.Invalid != expected.Invalid {
+ t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, string(expected.Match), actual, string(actual.Match))
}
}