aboutsummaryrefslogtreecommitdiff
path: root/compiler/dfa.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:38:46 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-14 17:54:18 +0900
commita1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch)
tree9fb55c6b8bbf25e493588442936e65c1cb7755db /compiler/dfa.go
parentAdd driver (diff)
downloadtre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz
tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz
Add dot symbol matching any single character
The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences
Diffstat (limited to 'compiler/dfa.go')
-rw-r--r--compiler/dfa.go10
1 files changed, 6 insertions, 4 deletions
diff --git a/compiler/dfa.go b/compiler/dfa.go
index 84692a4..fec93ce 100644
--- a/compiler/dfa.go
+++ b/compiler/dfa.go
@@ -29,11 +29,13 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
if pos.isEndMark() {
continue
}
- symVal := int(symTab.symPos2Byte[pos])
- if tranTabOfState[symVal] == nil {
- tranTabOfState[symVal] = newSymbolPositionSet()
+ valRange := symTab.symPos2Byte[pos]
+ for symVal := valRange.from; symVal <= valRange.to; symVal++ {
+ if tranTabOfState[symVal] == nil {
+ tranTabOfState[symVal] = newSymbolPositionSet()
+ }
+ tranTabOfState[symVal].merge(follow[pos])
}
- tranTabOfState[symVal].merge(follow[pos])
}
for _, t := range tranTabOfState {
if t == nil {