Add dot symbol matching any single character

The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences
author: Ryo Nihei <nihei.dev@gmail.com> 2021-02-14 17:38:46 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-02-14 17:54:18 +0900
commit: a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree: 9fb55c6b8bbf25e493588442936e65c1cb7755db /driver/lexer_test.go
parent: Add driver (diff)
download: tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz
tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz
1 files changed, 43 insertions, 1 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 4fd4bf8..1a46470 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -35,6 +35,48 @@ func TestLexer_Next(t *testing.T) {
 				newEOFToken(),
 			},
 		},
+		{
+			regexps: [][]byte{
+				[]byte("."),
+			},
+			src: string([]byte{
+				0x00,
+				0x7f,
+				0xc2, 0x80,
+				0xdf, 0xbf,
+				0xe1, 0x80, 0x80,
+				0xec, 0xbf, 0xbf,
+				0xed, 0x80, 0x80,
+				0xed, 0x9f, 0xbf,
+				0xee, 0x80, 0x80,
+				0xef, 0xbf, 0xbf,
+				0xf0, 0x90, 0x80, 0x80,
+				0xf0, 0xbf, 0xbf, 0xbf,
+				0xf1, 0x80, 0x80, 0x80,
+				0xf3, 0xbf, 0xbf, 0xbf,
+				0xf4, 0x80, 0x80, 0x80,
+				0xf4, 0x8f, 0xbf, 0xbf,
+			}),
+			tokens: []*Token{
+				newToken(1, []byte{0x00}),
+				newToken(1, []byte{0x7f}),
+				newToken(1, []byte{0xc2, 0x80}),
+				newToken(1, []byte{0xdf, 0xbf}),
+				newToken(1, []byte{0xe1, 0x80, 0x80}),
+				newToken(1, []byte{0xec, 0xbf, 0xbf}),
+				newToken(1, []byte{0xed, 0x80, 0x80}),
+				newToken(1, []byte{0xed, 0x9f, 0xbf}),
+				newToken(1, []byte{0xee, 0x80, 0x80}),
+				newToken(1, []byte{0xef, 0xbf, 0xbf}),
+				newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}),
+				newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+				newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}),
+				newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+				newEOFToken(),
+			},
+		},
 	}
 	for _, tt := range test {
 		res := map[int][]byte{}
@@ -142,6 +184,6 @@ func testToken(t *testing.T, expected, actual *Token) {
 	t.Helper()
 
 	if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
-		t.Errorf("unexpected token; want: %v, got: %v", expected, actual)
+		t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match))
 	}
 }
author	Ryo Nihei <nihei.dev@gmail.com>	2021-02-14 17:38:46 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-02-14 17:54:18 +0900
commit	a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree	9fb55c6b8bbf25e493588442936e65c1cb7755db /driver/lexer_test.go
parent	Add driver (diff)
download	tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz