aboutsummaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-24 01:30:08 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-24 01:30:08 +0900
commite5fb2fe4f4dfc7dff550b934933b88e9392a6e11 (patch)
tree7ffa58f2106d8b3bbbe931b84f73a9fb5c2b51a1 /driver
parentAdd + and ? operators (diff)
downloadtre-e5fb2fe4f4dfc7dff550b934933b88e9392a6e11.tar.gz
tre-e5fb2fe4f4dfc7dff550b934933b88e9392a6e11.tar.xz
Add range expression
[a-z] matches any one character from a to z. The order of the characters depends on Unicode code points.
Diffstat (limited to 'driver')
-rw-r--r--driver/lexer_test.go261
1 files changed, 260 insertions, 1 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 283d5fe..bdf5f03 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -132,6 +132,265 @@ func TestLexer_Next(t *testing.T) {
newEOFToken(),
},
},
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // all 1 byte characters
+ spec.NewLexEntry("1ByteChar", "[\x00-\x7f]"),
+ },
+ },
+ src: string([]byte{
+ 0x00,
+ 0x01,
+ 0x7e,
+ 0x7f,
+ }),
+ tokens: []*Token{
+ newToken(1, "1ByteChar", []byte{0x00}),
+ newToken(1, "1ByteChar", []byte{0x01}),
+ newToken(1, "1ByteChar", []byte{0x7e}),
+ newToken(1, "1ByteChar", []byte{0x7f}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // all 2 byte characters
+ spec.NewLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xc2, 0x80,
+ 0xc2, 0x81,
+ 0xdf, 0xbe,
+ 0xdf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "2ByteChar", []byte{0xc2, 0x80}),
+ newToken(1, "2ByteChar", []byte{0xc2, 0x81}),
+ newToken(1, "2ByteChar", []byte{0xdf, 0xbe}),
+ newToken(1, "2ByteChar", []byte{0xdf, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // All bytes are the same.
+ spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ }),
+ tokens: []*Token{
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // The first two bytes are the same.
+ spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xa0, 0xbe,
+ 0xe0, 0xa0, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // The first byte are the same.
+ spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xbf, 0xbe,
+ 0xe0, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // all 3 byte characters
+ spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xe0, 0xa0, 0x80,
+ 0xe0, 0xa0, 0x81,
+ 0xe0, 0xbf, 0xbe,
+ 0xe0, 0xbf, 0xbf,
+ 0xe1, 0x80, 0x80,
+ 0xe1, 0x80, 0x81,
+ 0xec, 0xbf, 0xbe,
+ 0xec, 0xbf, 0xbf,
+ 0xed, 0x80, 0x80,
+ 0xed, 0x80, 0x81,
+ 0xed, 0x9f, 0xbe,
+ 0xed, 0x9f, 0xbf,
+ 0xee, 0x80, 0x80,
+ 0xee, 0x80, 0x81,
+ 0xef, 0xbf, 0xbe,
+ 0xef, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+ newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
+ newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
+ newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
+ newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
+ newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
+ newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // All bytes are the same.
+ spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ }),
+ tokens: []*Token{
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // The first 3 bytes are the same.
+ spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0x90, 0x80, 0xbe,
+ 0xf0, 0x90, 0x80, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // The first 2 bytes are the same.
+ spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0x90, 0xbf, 0xbe,
+ 0xf0, 0x90, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // The first byte are the same.
+ spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0xbf, 0xbf, 0xbe,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ // all 4 byte characters
+ spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+ },
+ },
+ src: string([]byte{
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0x90, 0x80, 0x81,
+ 0xf0, 0xbf, 0xbf, 0xbe,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ 0xf1, 0x80, 0x80, 0x80,
+ 0xf1, 0x80, 0x80, 0x81,
+ 0xf3, 0xbf, 0xbf, 0xbe,
+ 0xf3, 0xbf, 0xbf, 0xbf,
+ 0xf4, 0x80, 0x80, 0x80,
+ 0xf4, 0x80, 0x80, 0x81,
+ 0xf4, 0x8f, 0xbf, 0xbe,
+ 0xf4, 0x8f, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
+ newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+ newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
}
for _, tt := range test {
clspec, err := compiler.Compile(tt.lspec)
@@ -149,7 +408,7 @@ func TestLexer_Next(t *testing.T) {
break
}
testToken(t, eTok, tok)
- t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid)
+ // t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid)
if tok.EOF {
break
}