aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:38:46 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:54:18 +0900
commita1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree9fb55c6b8bbf25e493588442936e65c1cb7755db /compiler/lexer.go
parentAdd driver (diff)
downloadtre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz
tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz
Add dot symbol matching any single character
The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences
Diffstat (limited to 'compiler/lexer.go')
-rw-r--r--compiler/lexer.go5
1 files changed, 4 insertions, 1 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go
index f78b920..1c09260 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -10,6 +10,7 @@ type tokenKind string
const (
tokenKindChar = tokenKind("char")
+ tokenKindAnyChar = tokenKind(".")
tokenKindRepeat = tokenKind("*")
tokenKindAlt = tokenKind("|")
tokenKindGroupOpen = tokenKind("(")
@@ -59,6 +60,8 @@ func (l *lexer) next() (*token, error) {
switch c {
case '*':
return newToken(tokenKindRepeat, nullChar), nil
+ case '.':
+ return newToken(tokenKindAnyChar, nullChar), nil
case '|':
return newToken(tokenKindAlt, nullChar), nil
case '(':
@@ -76,7 +79,7 @@ func (l *lexer) next() (*token, error) {
}
}
switch {
- case c == '\\' || c == '*' || c == '|' || c == '(' || c == ')':
+ case c == '\\' || c == '.' || c == '*' || c == '|' || c == '(' || c == ')':
return newToken(tokenKindChar, c), nil
default:
return nil, &SyntaxError{