From a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 Mon Sep 17 00:00:00 2001
From: Ryo Nihei <nihei.dev@gmail.com>
Date: Sun, 14 Feb 2021 17:38:46 +0900
Subject: Add dot symbol matching any single character

The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences.

Refelences:
* https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404
  * Table 3-6.  UTF-8 Bit Distribution
  * Table 3-7.  Well-Formed UTF-8 Byte Sequences
---
 compiler/lexer.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'compiler/lexer.go')

diff --git a/compiler/lexer.go b/compiler/lexer.go
index f78b920..1c09260 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -10,6 +10,7 @@ type tokenKind string
 
 const (
 	tokenKindChar       = tokenKind("char")
+	tokenKindAnyChar    = tokenKind(".")
 	tokenKindRepeat     = tokenKind("*")
 	tokenKindAlt        = tokenKind("|")
 	tokenKindGroupOpen  = tokenKind("(")
@@ -59,6 +60,8 @@ func (l *lexer) next() (*token, error) {
 	switch c {
 	case '*':
 		return newToken(tokenKindRepeat, nullChar), nil
+	case '.':
+		return newToken(tokenKindAnyChar, nullChar), nil
 	case '|':
 		return newToken(tokenKindAlt, nullChar), nil
 	case '(':
@@ -76,7 +79,7 @@ func (l *lexer) next() (*token, error) {
 			}
 		}
 		switch {
-		case c == '\\' || c == '*' || c == '|' || c == '(' || c == ')':
+		case c == '\\' || c == '.' || c == '*' || c == '|' || c == '(' || c == ')':
 			return newToken(tokenKindChar, c), nil
 		default:
 			return nil, &SyntaxError{
-- 
cgit v1.2.3