From a1d1cfe08ae809d454ac6f1ce80a19395e7940e5 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Sun, 14 Feb 2021 17:38:46 +0900 Subject: Add dot symbol matching any single character The dot symbol matches any single character. When the dot symbol appears, the parser generates an AST matching all of the well-formed UTF-8 byte sequences. Refelences: * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 * Table 3-6. UTF-8 Bit Distribution * Table 3-7. Well-Formed UTF-8 Byte Sequences --- compiler/dfa.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'compiler/dfa.go') diff --git a/compiler/dfa.go b/compiler/dfa.go index 84692a4..fec93ce 100644 --- a/compiler/dfa.go +++ b/compiler/dfa.go @@ -29,11 +29,13 @@ func genDFA(root astNode, symTab *symbolTable) *DFA { if pos.isEndMark() { continue } - symVal := int(symTab.symPos2Byte[pos]) - if tranTabOfState[symVal] == nil { - tranTabOfState[symVal] = newSymbolPositionSet() + valRange := symTab.symPos2Byte[pos] + for symVal := valRange.from; symVal <= valRange.to; symVal++ { + if tranTabOfState[symVal] == nil { + tranTabOfState[symVal] = newSymbolPositionSet() + } + tranTabOfState[symVal].merge(follow[pos]) } - tranTabOfState[symVal].merge(follow[pos]) } for _, t := range tranTabOfState { if t == nil { -- cgit v1.2.3