aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer_test.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:38:46 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:54:18 +0900
commita1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree9fb55c6b8bbf25e493588442936e65c1cb7755db /compiler/lexer_test.go
parentAdd driver (diff)
downloadtre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz
tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz
Add dot symbol matching any single character
The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences
Diffstat (limited to 'compiler/lexer_test.go')
-rw-r--r--compiler/lexer_test.go6
1 files changed, 4 insertions, 2 deletions
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index b172ae9..75770b0 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -31,8 +31,9 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the special characters",
- src: "*|()",
+ src: ".*|()",
tokens: []*token{
+ newToken(tokenKindAnyChar, nullChar),
newToken(tokenKindRepeat, nullChar),
newToken(tokenKindAlt, nullChar),
newToken(tokenKindGroupOpen, nullChar),
@@ -42,9 +43,10 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the escape sequences",
- src: "\\\\\\*\\|\\(\\)",
+ src: "\\\\\\.\\*\\|\\(\\)",
tokens: []*token{
newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
newToken(tokenKindChar, '*'),
newToken(tokenKindChar, '|'),
newToken(tokenKindChar, '('),