aboutsummaryrefslogtreecommitdiff
path: root/driver/lexer_test.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:38:46 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:54:18 +0900
commita1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree9fb55c6b8bbf25e493588442936e65c1cb7755db /driver/lexer_test.go
parentAdd driver (diff)
downloadtre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz
tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz
Add dot symbol matching any single character
The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences
Diffstat (limited to 'driver/lexer_test.go')
-rw-r--r--driver/lexer_test.go44
1 files changed, 43 insertions, 1 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 4fd4bf8..1a46470 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -35,6 +35,48 @@ func TestLexer_Next(t *testing.T) {
newEOFToken(),
},
},
+ {
+ regexps: [][]byte{
+ []byte("."),
+ },
+ src: string([]byte{
+ 0x00,
+ 0x7f,
+ 0xc2, 0x80,
+ 0xdf, 0xbf,
+ 0xe1, 0x80, 0x80,
+ 0xec, 0xbf, 0xbf,
+ 0xed, 0x80, 0x80,
+ 0xed, 0x9f, 0xbf,
+ 0xee, 0x80, 0x80,
+ 0xef, 0xbf, 0xbf,
+ 0xf0, 0x90, 0x80, 0x80,
+ 0xf0, 0xbf, 0xbf, 0xbf,
+ 0xf1, 0x80, 0x80, 0x80,
+ 0xf3, 0xbf, 0xbf, 0xbf,
+ 0xf4, 0x80, 0x80, 0x80,
+ 0xf4, 0x8f, 0xbf, 0xbf,
+ }),
+ tokens: []*Token{
+ newToken(1, []byte{0x00}),
+ newToken(1, []byte{0x7f}),
+ newToken(1, []byte{0xc2, 0x80}),
+ newToken(1, []byte{0xdf, 0xbf}),
+ newToken(1, []byte{0xe1, 0x80, 0x80}),
+ newToken(1, []byte{0xec, 0xbf, 0xbf}),
+ newToken(1, []byte{0xed, 0x80, 0x80}),
+ newToken(1, []byte{0xed, 0x9f, 0xbf}),
+ newToken(1, []byte{0xee, 0x80, 0x80}),
+ newToken(1, []byte{0xef, 0xbf, 0xbf}),
+ newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}),
+ newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}),
+ newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newEOFToken(),
+ },
+ },
}
for _, tt := range test {
res := map[int][]byte{}
@@ -142,6 +184,6 @@ func testToken(t *testing.T, expected, actual *Token) {
t.Helper()
if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
- t.Errorf("unexpected token; want: %v, got: %v", expected, actual)
+ t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match))
}
}