From a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Sun, 14 Feb 2021 17:38:46 +0900 Subject: Add dot symbol matching any single character The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences --- compiler/lexer_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'compiler/lexer_test.go') diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go index b172ae9..75770b0 100644 --- a/compiler/lexer_test.go +++ b/compiler/lexer_test.go @@ -31,8 +31,9 @@ func TestLexer(t *testing.T) { }, { caption: "lexer can recognize the special characters", - src: "*|()", + src: ".*|()", tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), newToken(tokenKindRepeat, nullChar), newToken(tokenKindAlt, nullChar), newToken(tokenKindGroupOpen, nullChar), @@ -42,9 +43,10 @@ func TestLexer(t *testing.T) { }, { caption: "lexer can recognize the escape sequences", - src: "\\\\\\*\\|\\(\\)", + src: "\\\\\\.\\*\\|\\(\\)", tokens: []*token{ newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), newToken(tokenKindChar, '*'), newToken(tokenKindChar, '|'), newToken(tokenKindChar, '('), -- cgit v1.2.3