aboutsummaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/parser.go608
-rw-r--r--compiler/parser_test.go222
2 files changed, 128 insertions, 702 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index 89c8301..ce481e3 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -10,6 +10,7 @@ import (
"github.com/nihei9/maleeni/spec"
"github.com/nihei9/maleeni/ucd"
+ "github.com/nihei9/maleeni/utf8"
)
type ParseErrors struct {
@@ -725,124 +726,30 @@ func convertByteRangeSeqsToAST(seqs [][]byteRange) astNode {
return genAltNode(concats...)
}
-// Refelences:
-// * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404
-// * Table 3-6. UTF-8 Bit Distribution
-// * Table 3-7. Well-Formed UTF-8 Byte Sequences
func genAnyCharAST() astNode {
- return genAltNode(
- // 1 byte character <00..7F>
- newRangeSymbolNode(0x00, 0x7f),
- // 2 bytes character <C2..DF 80..BF>
- genConcatNode(
- newRangeSymbolNode(0xc2, 0xdf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 3 bytes character <E0 A0..BF 80..BF>
- genConcatNode(
- newSymbolNode(0xe0),
- newRangeSymbolNode(0xa0, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 3 bytes character <E1..EC 80..BF 80..BF>
- genConcatNode(
- newRangeSymbolNode(0xe1, 0xec),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 3 bytes character <ED 80..9F 80..BF>
- genConcatNode(
- newSymbolNode(0xed),
- newRangeSymbolNode(0x80, 0x9f),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 3 bytes character <EE..EF 80..BF 80..BF>
- genConcatNode(
- newRangeSymbolNode(0xee, 0xef),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 4 bytes character <F0 90..BF 80..BF 80..BF>
- genConcatNode(
- newSymbolNode(0xf0),
- newRangeSymbolNode(0x90, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 4 bytes character <F1..F3 80..BF 80..BF 80..BF>
- genConcatNode(
- newRangeSymbolNode(0xf1, 0xf3),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- // 4 bytes character <F4 80..8F 80..BF 80..BF>
- genConcatNode(
- newSymbolNode(0xf4),
- newRangeSymbolNode(0x80, 0x8f),
- newRangeSymbolNode(0x80, 0xbf),
- newRangeSymbolNode(0x80, 0xbf),
- ),
- )
+ return convertCharBlocksToAST(utf8.AllCharBlocks())
}
func genRangeAST(fromNode, toNode astNode) astNode {
from := genByteSeq(fromNode)
to := genByteSeq(toNode)
- switch len(from) {
- case 1:
- switch len(to) {
- case 1:
- return gen1ByteCharRangeAST(from, to)
- case 2:
- return genAltNode(
- gen1ByteCharRangeAST(from, []byte{0x7f}),
- gen2ByteCharRangeAST([]byte{0xc2, 0x80}, to),
- )
- case 3:
- return genAltNode(
- gen1ByteCharRangeAST(from, []byte{0x7f}),
- gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}),
- gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, to),
- )
- case 4:
- return genAltNode(
- gen1ByteCharRangeAST(from, []byte{0x7f}),
- gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}),
- gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, []byte{0xef, 0xbf, 0xbf}),
- gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
- )
- }
- case 2:
- switch len(to) {
- case 2:
- return gen2ByteCharRangeAST(from, to)
- case 3:
- return genAltNode(
- gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}),
- gen3ByteCharRangeAST([]byte{0xc2, 0x80}, to),
- )
- case 4:
- return genAltNode(
- gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}),
- gen3ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xef, 0xbf, 0xbf}),
- gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
- )
- }
- case 3:
- switch len(to) {
- case 3:
- return gen3ByteCharRangeAST(from, to)
- case 4:
- return genAltNode(
- gen3ByteCharRangeAST(from, []byte{0xef, 0xbf, 0xbf}),
- gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to),
- )
+ blks, err := utf8.GenCharBlocks(from, to)
+ if err != nil {
+ panic(err)
+ }
+ return convertCharBlocksToAST(blks)
+}
+
+func convertCharBlocksToAST(blks []*utf8.CharBlock) astNode {
+ var alt astNode
+ for _, blk := range blks {
+ r := make([]astNode, len(blk.From))
+ for i := 0; i < len(blk.From); i++ {
+ r[i] = newRangeSymbolNode(blk.From[i], blk.To[i])
}
- case 4:
- return gen4ByteCharRangeAST(from, to)
+ alt = genAltNode(alt, genConcatNode(r...))
}
- panic(fmt.Errorf("invalid range; from: %v, to: %v", from, to))
+ return alt
}
func genByteSeq(node astNode) []byte {
@@ -888,487 +795,6 @@ func isValidOrder(from, to []byte) bool {
return true
}
-type byteBoundsEntry struct {
- min byte
- max byte
-}
-
-var (
- bounds1 = [][]byteBoundsEntry{
- nil,
- {{min: 0x00, max: 0x7f}},
- }
-
- bounds2 = [][]byteBoundsEntry{
- nil,
- {{min: 0xc2, max: 0xdf}, {min: 0x80, max: 0xbf}},
- }
-
- bounds3 = [][]byteBoundsEntry{
- nil,
- {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}},
- {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- }
-
- bounds4 = [][]byteBoundsEntry{
- nil,
- {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- }
-)
-
-func gen1ByteCharRangeAST(from, to []byte) astNode {
- return newRangeSymbolNode(from[0], to[0])
-}
-
-func gen2ByteCharRangeAST(from, to []byte) astNode {
- from0 := from[0]
- from1 := from[1]
- to0 := to[0]
- to1 := to[1]
- switch {
- case from0 == to0 && from1 == to1:
- return genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- )
- case from0 == to0:
- return genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1, to1),
- )
- default:
- alt1 := genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1, 0xbf),
- )
- alt2 := genConcatNode(
- newRangeSymbolNode(from0+1, to0),
- newRangeSymbolNode(0x80, to1),
- )
- return newAltNode(alt1, alt2)
- }
-}
-
-func get3ByteCharRangeNum(seq []byte) int {
- head := seq[0]
- switch {
- case head == 0xe0:
- return 1
- case head >= 0xe1 && head <= 0xec:
- return 2
- case head == 0xed:
- return 3
- case head >= 0xee && head <= 0xef:
- return 4
- }
- return 0
-}
-
-func get4ByteCharRangeNum(seq []byte) int {
- head := seq[0]
- switch {
- case head == 0xf0:
- return 1
- case head >= 0xf1 && head <= 0xf3:
- return 2
- case head == 0xf4:
- return 3
- }
- return 0
-}
-
-func gen3ByteCharRangeAST(from, to []byte) astNode {
- from0 := from[0]
- from1 := from[1]
- from2 := from[2]
- to0 := to[0]
- to1 := to[1]
- to2 := to[2]
- switch {
- case from0 == to0 && from1 == to1 && from2 == to2:
- return genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- )
- case from0 == to0 && from1 == to1:
- return genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2, to2),
- )
- case from0 == to0:
- rangeNum := get3ByteCharRangeNum(from)
- bounds := bounds3[rangeNum]
- var alt astNode
- alt = genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2, bounds[2].max),
- )
- if from1+1 < to1 {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1+1, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2),
- ),
- )
- return alt
- default:
- fromRangeNum := get3ByteCharRangeNum(from)
- toRangeNum := get3ByteCharRangeNum(to)
- bounds := bounds3[fromRangeNum]
- var alt astNode
- alt = genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2, bounds[2].max),
- )
- if from1 < bounds[1].max {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1+1, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- if fromRangeNum == toRangeNum {
- if from0+1 < to0 {
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(from0+1, to0-1),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- if to1 > bounds[1].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newRangeSymbolNode(bounds[1].min, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2),
- ),
- )
- return alt
- }
- for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ {
- bounds := bounds3[rangeNum]
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(bounds[0].min, bounds[0].max),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- bounds = bounds3[toRangeNum]
- if to0 > bounds[0].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(bounds[0].min, to0-1),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- if to1 > bounds[1].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newRangeSymbolNode(bounds[1].min, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2),
- ),
- )
- return alt
- }
-}
-
-func gen4ByteCharRangeAST(from, to []byte) astNode {
- from0 := from[0]
- from1 := from[1]
- from2 := from[2]
- from3 := from[3]
- to0 := to[0]
- to1 := to[1]
- to2 := to[2]
- to3 := to[3]
- switch {
- case from0 == to0 && from1 == to1 && from2 == to2 && from3 == to3:
- return genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- newSymbolNode(from3),
- )
- case from0 == to0 && from1 == to1 && from2 == to2:
- return genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- newRangeSymbolNode(from3, to3),
- )
- case from0 == to0 && from1 == to1:
- rangeNum := get4ByteCharRangeNum(from)
- bounds := bounds4[rangeNum]
- var alt astNode
- alt = genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- newRangeSymbolNode(from3, bounds[3].max),
- )
- if from2+1 < to2 {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2+1, to2-1),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(to2),
- newRangeSymbolNode(bounds[3].min, to3),
- ),
- )
- return alt
- case from0 == to0:
- rangeNum := get4ByteCharRangeNum(from)
- bounds := bounds4[rangeNum]
- var alt astNode
- alt = genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- newRangeSymbolNode(from3, bounds[3].max),
- )
- if from2 < bounds[2].max {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2+1, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if from1+1 < to1 {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1+1, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if to2 > bounds[2].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2-1),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(to1),
- newSymbolNode(to2),
- newRangeSymbolNode(bounds[3].min, to3),
- ),
- )
- return alt
- default:
- fromRangeNum := get4ByteCharRangeNum(from)
- toRangeNum := get4ByteCharRangeNum(to)
- bounds := bounds4[fromRangeNum]
- var alt astNode
- alt = genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newSymbolNode(from2),
- newRangeSymbolNode(from3, bounds[3].max),
- )
- if from2 < bounds[2].max {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newSymbolNode(from1),
- newRangeSymbolNode(from2+1, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if from1 < bounds[1].max {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(from0),
- newRangeSymbolNode(from1+1, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if fromRangeNum == toRangeNum {
- if from0+1 < to0 {
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(from0+1, to0-1),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if to1 > bounds[1].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newRangeSymbolNode(bounds[1].min, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if to2 > bounds[2].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2-1),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newSymbolNode(to2),
- newRangeSymbolNode(bounds[3].min, to3),
- ),
- )
- return alt
- }
- for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ {
- bounds := bounds4[rangeNum]
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(bounds[0].min, bounds[0].max),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- bounds = bounds4[toRangeNum]
- if to0 > bounds[0].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newRangeSymbolNode(bounds[0].min, to0-1),
- newRangeSymbolNode(bounds[1].min, bounds[1].max),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if to1 > bounds[1].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newRangeSymbolNode(bounds[1].min, to1-1),
- newRangeSymbolNode(bounds[2].min, bounds[2].max),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- if to2 > bounds[2].min {
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newRangeSymbolNode(bounds[2].min, to2-1),
- newRangeSymbolNode(bounds[3].min, bounds[3].max),
- ),
- )
- }
- alt = genAltNode(
- alt,
- genConcatNode(
- newSymbolNode(to0),
- newSymbolNode(to1),
- newSymbolNode(to2),
- newRangeSymbolNode(bounds[3].min, to3),
- ),
- )
- return alt
- }
-}
-
func genConcatNode(cs ...astNode) astNode {
if len(cs) <= 0 {
return nil
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index e4a6fe2..b0bc67a 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -469,48 +469,48 @@ func TestParse(t *testing.T) {
pattern: ".",
ast: newConcatNode(
genAltNode(
- newRangeSymbolNodeWithPos(bounds1[1][0].min, bounds1[1][0].max, symPos(1)),
+ newRangeSymbolNodeWithPos(0x00, 0x7f, symPos(1)),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(2)),
- newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(3)),
+ newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(2)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(3)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(4)),
- newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(5)),
- newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(6)),
+ newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(4)),
+ newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(5)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(6)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(7)),
- newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(8)),
- newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(9)),
+ newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(7)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(9)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(10)),
- newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(11)),
- newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(12)),
+ newRangeSymbolNodeWithPos(0xed, 0xed, symPos(10)),
+ newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(11)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(12)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(13)),
- newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(14)),
- newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(15)),
+ newRangeSymbolNodeWithPos(0xee, 0xef, symPos(13)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(15)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(16)),
- newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(17)),
- newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(18)),
- newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(19)),
+ newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(16)),
+ newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(17)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(18)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(19)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(20)),
- newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(21)),
- newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(22)),
- newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(23)),
+ newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(20)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(22)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(24)),
- newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(25)),
- newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(26)),
- newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(27)),
+ newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(24)),
+ newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(25)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(26)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(27)),
),
),
newEndMarkerNodeWithPos(1, endPos(28)),
@@ -676,50 +676,50 @@ func TestParse(t *testing.T) {
pattern: "[^a-]",
ast: newConcatNode(
genAltNode(
- newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
+ newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)),
newRangeSymbolNodeWithPos(byte(46), byte(96), symPos(2)),
- newRangeSymbolNodeWithPos(byte(98), bounds1[1][0].max, symPos(3)),
+ newRangeSymbolNodeWithPos(byte(98), 0x7f, symPos(3)),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)),
- newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)),
+ newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(4)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(5)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)),
- newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)),
- newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)),
+ newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(6)),
+ newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(7)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)),
- newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)),
- newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)),
+ newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(9)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(11)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)),
- newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)),
- newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)),
+ newRangeSymbolNodeWithPos(0xed, 0xed, symPos(12)),
+ newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(13)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)),
- newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)),
- newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)),
+ newRangeSymbolNodeWithPos(0xee, 0xef, symPos(15)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(17)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)),
- newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)),
- newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)),
- newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)),
+ newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(18)),
+ newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(19)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)),
- newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)),
- newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)),
- newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)),
+ newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(22)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(25)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)),
- newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)),
- newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)),
- newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)),
+ newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(26)),
+ newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(27)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(29)),
),
),
newEndMarkerNodeWithPos(1, endPos(30)),
@@ -739,52 +739,52 @@ func TestParse(t *testing.T) {
pattern: "[^-z]",
ast: newConcatNode(
genAltNode(
- newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
+ newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)),
genAltNode(
newRangeSymbolNodeWithPos(byte(46), byte(121), symPos(2)),
- newRangeSymbolNodeWithPos(byte(123), bounds1[1][0].max, symPos(3)),
+ newRangeSymbolNodeWithPos(byte(123), 0x7f, symPos(3)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)),
- newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)),
+ newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(4)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(5)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)),
- newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)),
- newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)),
+ newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(6)),
+ newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(7)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)),
- newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)),
- newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)),
+ newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(9)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(11)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)),
- newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)),
- newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)),
+ newRangeSymbolNodeWithPos(0xed, 0xed, symPos(12)),
+ newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(13)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)),
- newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)),
- newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)),
+ newRangeSymbolNodeWithPos(0xee, 0xef, symPos(15)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(17)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)),
- newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)),
- newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)),
- newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)),
+ newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(18)),
+ newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(19)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)),
- newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)),
- newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)),
- newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)),
+ newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(22)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(25)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)),
- newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)),
- newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)),
- newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)),
+ newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(26)),
+ newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(27)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(29)),
),
),
newEndMarkerNodeWithPos(1, endPos(30)),
@@ -801,49 +801,49 @@ func TestParse(t *testing.T) {
pattern: "[^-]",
ast: newConcatNode(
genAltNode(
- newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
- newRangeSymbolNodeWithPos(byte(46), bounds1[1][0].max, symPos(2)),
+ newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)),
+ newRangeSymbolNodeWithPos(byte(46), 0x7f, symPos(2)),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(3)),
- newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(4)),
+ newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(3)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(4)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(5)),
- newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(6)),
- newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(7)),
+ newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(5)),
+ newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(6)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(7)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(8)),
- newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(9)),
- newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(10)),
+ newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(8)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(9)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(11)),
- newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(12)),
- newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(13)),
+ newRangeSymbolNodeWithPos(0xed, 0xed, symPos(11)),
+ newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(12)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(13)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(14)),
- newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(15)),
- newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(16)),
+ newRangeSymbolNodeWithPos(0xee, 0xef, symPos(14)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(15)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(17)),
- newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(18)),
- newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(19)),
- newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(20)),
+ newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(17)),
+ newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(18)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(19)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(21)),
- newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(22)),
- newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(23)),
- newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(24)),
+ newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(21)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(22)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)),
),
genConcatNode(
- newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(25)),
- newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(26)),
- newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(27)),
- newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(28)),
+ newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(25)),
+ newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(26)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(27)),
+ newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)),
),
),
newEndMarkerNodeWithPos(1, endPos(29)),