diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-14 17:38:46 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-14 17:54:18 +0900 |
commit | a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 (patch) | |
tree | 9fb55c6b8bbf25e493588442936e65c1cb7755db /compiler/dfa.go | |
parent | Add driver (diff) | |
download | tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.gz tre-a1d1cfe08ae809d454ac6f1ce80a19395e7940e5.tar.xz |
Add dot symbol matching any single character
The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences.
Refelences:
* https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404
* Table 3-6. UTF-8 Bit Distribution
* Table 3-7. Well-Formed UTF-8 Byte Sequences
Diffstat (limited to 'compiler/dfa.go')
-rw-r--r-- | compiler/dfa.go | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/compiler/dfa.go b/compiler/dfa.go index 84692a4..fec93ce 100644 --- a/compiler/dfa.go +++ b/compiler/dfa.go @@ -29,11 +29,13 @@ func genDFA(root astNode, symTab *symbolTable) *DFA { if pos.isEndMark() { continue } - symVal := int(symTab.symPos2Byte[pos]) - if tranTabOfState[symVal] == nil { - tranTabOfState[symVal] = newSymbolPositionSet() + valRange := symTab.symPos2Byte[pos] + for symVal := valRange.from; symVal <= valRange.to; symVal++ { + if tranTabOfState[symVal] == nil { + tranTabOfState[symVal] = newSymbolPositionSet() + } + tranTabOfState[symVal].merge(follow[pos]) } - tranTabOfState[symVal].merge(follow[pos]) } for _, t := range tranTabOfState { if t == nil { |