diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/parser.go | 608 | ||||
-rw-r--r-- | compiler/parser_test.go | 222 |
2 files changed, 128 insertions, 702 deletions
diff --git a/compiler/parser.go b/compiler/parser.go index 89c8301..ce481e3 100644 --- a/compiler/parser.go +++ b/compiler/parser.go @@ -10,6 +10,7 @@ import ( "github.com/nihei9/maleeni/spec" "github.com/nihei9/maleeni/ucd" + "github.com/nihei9/maleeni/utf8" ) type ParseErrors struct { @@ -725,124 +726,30 @@ func convertByteRangeSeqsToAST(seqs [][]byteRange) astNode { return genAltNode(concats...) } -// Refelences: -// * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404 -// * Table 3-6. UTF-8 Bit Distribution -// * Table 3-7. Well-Formed UTF-8 Byte Sequences func genAnyCharAST() astNode { - return genAltNode( - // 1 byte character <00..7F> - newRangeSymbolNode(0x00, 0x7f), - // 2 bytes character <C2..DF 80..BF> - genConcatNode( - newRangeSymbolNode(0xc2, 0xdf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 3 bytes character <E0 A0..BF 80..BF> - genConcatNode( - newSymbolNode(0xe0), - newRangeSymbolNode(0xa0, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 3 bytes character <E1..EC 80..BF 80..BF> - genConcatNode( - newRangeSymbolNode(0xe1, 0xec), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 3 bytes character <ED 80..9F 80..BF> - genConcatNode( - newSymbolNode(0xed), - newRangeSymbolNode(0x80, 0x9f), - newRangeSymbolNode(0x80, 0xbf), - ), - // 3 bytes character <EE..EF 80..BF 80..BF> - genConcatNode( - newRangeSymbolNode(0xee, 0xef), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 4 bytes character <F0 90..BF 80..BF 80..BF> - genConcatNode( - newSymbolNode(0xf0), - newRangeSymbolNode(0x90, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 4 bytes character <F1..F3 80..BF 80..BF 80..BF> - genConcatNode( - newRangeSymbolNode(0xf1, 0xf3), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - // 4 bytes character <F4 80..8F 80..BF 80..BF> - genConcatNode( - newSymbolNode(0xf4), - newRangeSymbolNode(0x80, 0x8f), - newRangeSymbolNode(0x80, 0xbf), - newRangeSymbolNode(0x80, 0xbf), - ), - ) + return convertCharBlocksToAST(utf8.AllCharBlocks()) } func genRangeAST(fromNode, toNode astNode) astNode { from := genByteSeq(fromNode) to := genByteSeq(toNode) - switch len(from) { - case 1: - switch len(to) { - case 1: - return gen1ByteCharRangeAST(from, to) - case 2: - return genAltNode( - gen1ByteCharRangeAST(from, []byte{0x7f}), - gen2ByteCharRangeAST([]byte{0xc2, 0x80}, to), - ) - case 3: - return genAltNode( - gen1ByteCharRangeAST(from, []byte{0x7f}), - gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}), - gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, to), - ) - case 4: - return genAltNode( - gen1ByteCharRangeAST(from, []byte{0x7f}), - gen2ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xdf, 0xbf}), - gen3ByteCharRangeAST([]byte{0xe0, 0xa0, 0x80}, []byte{0xef, 0xbf, 0xbf}), - gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to), - ) - } - case 2: - switch len(to) { - case 2: - return gen2ByteCharRangeAST(from, to) - case 3: - return genAltNode( - gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}), - gen3ByteCharRangeAST([]byte{0xc2, 0x80}, to), - ) - case 4: - return genAltNode( - gen2ByteCharRangeAST(from, []byte{0xdf, 0xbf}), - gen3ByteCharRangeAST([]byte{0xc2, 0x80}, []byte{0xef, 0xbf, 0xbf}), - gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to), - ) - } - case 3: - switch len(to) { - case 3: - return gen3ByteCharRangeAST(from, to) - case 4: - return genAltNode( - gen3ByteCharRangeAST(from, []byte{0xef, 0xbf, 0xbf}), - gen4ByteCharRangeAST([]byte{0xf0, 0x90, 0x80}, to), - ) + blks, err := utf8.GenCharBlocks(from, to) + if err != nil { + panic(err) + } + return convertCharBlocksToAST(blks) +} + +func convertCharBlocksToAST(blks []*utf8.CharBlock) astNode { + var alt astNode + for _, blk := range blks { + r := make([]astNode, len(blk.From)) + for i := 0; i < len(blk.From); i++ { + r[i] = newRangeSymbolNode(blk.From[i], blk.To[i]) } - case 4: - return gen4ByteCharRangeAST(from, to) + alt = genAltNode(alt, genConcatNode(r...)) } - panic(fmt.Errorf("invalid range; from: %v, to: %v", from, to)) + return alt } func genByteSeq(node astNode) []byte { @@ -888,487 +795,6 @@ func isValidOrder(from, to []byte) bool { return true } -type byteBoundsEntry struct { - min byte - max byte -} - -var ( - bounds1 = [][]byteBoundsEntry{ - nil, - {{min: 0x00, max: 0x7f}}, - } - - bounds2 = [][]byteBoundsEntry{ - nil, - {{min: 0xc2, max: 0xdf}, {min: 0x80, max: 0xbf}}, - } - - bounds3 = [][]byteBoundsEntry{ - nil, - {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}}, - {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - } - - bounds4 = [][]byteBoundsEntry{ - nil, - {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - } -) - -func gen1ByteCharRangeAST(from, to []byte) astNode { - return newRangeSymbolNode(from[0], to[0]) -} - -func gen2ByteCharRangeAST(from, to []byte) astNode { - from0 := from[0] - from1 := from[1] - to0 := to[0] - to1 := to[1] - switch { - case from0 == to0 && from1 == to1: - return genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - ) - case from0 == to0: - return genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1, to1), - ) - default: - alt1 := genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1, 0xbf), - ) - alt2 := genConcatNode( - newRangeSymbolNode(from0+1, to0), - newRangeSymbolNode(0x80, to1), - ) - return newAltNode(alt1, alt2) - } -} - -func get3ByteCharRangeNum(seq []byte) int { - head := seq[0] - switch { - case head == 0xe0: - return 1 - case head >= 0xe1 && head <= 0xec: - return 2 - case head == 0xed: - return 3 - case head >= 0xee && head <= 0xef: - return 4 - } - return 0 -} - -func get4ByteCharRangeNum(seq []byte) int { - head := seq[0] - switch { - case head == 0xf0: - return 1 - case head >= 0xf1 && head <= 0xf3: - return 2 - case head == 0xf4: - return 3 - } - return 0 -} - -func gen3ByteCharRangeAST(from, to []byte) astNode { - from0 := from[0] - from1 := from[1] - from2 := from[2] - to0 := to[0] - to1 := to[1] - to2 := to[2] - switch { - case from0 == to0 && from1 == to1 && from2 == to2: - return genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - ) - case from0 == to0 && from1 == to1: - return genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2, to2), - ) - case from0 == to0: - rangeNum := get3ByteCharRangeNum(from) - bounds := bounds3[rangeNum] - var alt astNode - alt = genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2, bounds[2].max), - ) - if from1+1 < to1 { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1+1, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2), - ), - ) - return alt - default: - fromRangeNum := get3ByteCharRangeNum(from) - toRangeNum := get3ByteCharRangeNum(to) - bounds := bounds3[fromRangeNum] - var alt astNode - alt = genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2, bounds[2].max), - ) - if from1 < bounds[1].max { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1+1, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - if fromRangeNum == toRangeNum { - if from0+1 < to0 { - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(from0+1, to0-1), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - if to1 > bounds[1].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newRangeSymbolNode(bounds[1].min, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2), - ), - ) - return alt - } - for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ { - bounds := bounds3[rangeNum] - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(bounds[0].min, bounds[0].max), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - bounds = bounds3[toRangeNum] - if to0 > bounds[0].min { - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(bounds[0].min, to0-1), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - if to1 > bounds[1].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newRangeSymbolNode(bounds[1].min, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2), - ), - ) - return alt - } -} - -func gen4ByteCharRangeAST(from, to []byte) astNode { - from0 := from[0] - from1 := from[1] - from2 := from[2] - from3 := from[3] - to0 := to[0] - to1 := to[1] - to2 := to[2] - to3 := to[3] - switch { - case from0 == to0 && from1 == to1 && from2 == to2 && from3 == to3: - return genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - newSymbolNode(from3), - ) - case from0 == to0 && from1 == to1 && from2 == to2: - return genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - newRangeSymbolNode(from3, to3), - ) - case from0 == to0 && from1 == to1: - rangeNum := get4ByteCharRangeNum(from) - bounds := bounds4[rangeNum] - var alt astNode - alt = genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - newRangeSymbolNode(from3, bounds[3].max), - ) - if from2+1 < to2 { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2+1, to2-1), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(to2), - newRangeSymbolNode(bounds[3].min, to3), - ), - ) - return alt - case from0 == to0: - rangeNum := get4ByteCharRangeNum(from) - bounds := bounds4[rangeNum] - var alt astNode - alt = genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - newRangeSymbolNode(from3, bounds[3].max), - ) - if from2 < bounds[2].max { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2+1, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if from1+1 < to1 { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1+1, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if to2 > bounds[2].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2-1), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(to1), - newSymbolNode(to2), - newRangeSymbolNode(bounds[3].min, to3), - ), - ) - return alt - default: - fromRangeNum := get4ByteCharRangeNum(from) - toRangeNum := get4ByteCharRangeNum(to) - bounds := bounds4[fromRangeNum] - var alt astNode - alt = genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newSymbolNode(from2), - newRangeSymbolNode(from3, bounds[3].max), - ) - if from2 < bounds[2].max { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newSymbolNode(from1), - newRangeSymbolNode(from2+1, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if from1 < bounds[1].max { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(from0), - newRangeSymbolNode(from1+1, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if fromRangeNum == toRangeNum { - if from0+1 < to0 { - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(from0+1, to0-1), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if to1 > bounds[1].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newRangeSymbolNode(bounds[1].min, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if to2 > bounds[2].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2-1), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newSymbolNode(to2), - newRangeSymbolNode(bounds[3].min, to3), - ), - ) - return alt - } - for rangeNum := fromRangeNum + 1; rangeNum < toRangeNum; rangeNum++ { - bounds := bounds4[rangeNum] - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(bounds[0].min, bounds[0].max), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - bounds = bounds4[toRangeNum] - if to0 > bounds[0].min { - alt = genAltNode( - alt, - genConcatNode( - newRangeSymbolNode(bounds[0].min, to0-1), - newRangeSymbolNode(bounds[1].min, bounds[1].max), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if to1 > bounds[1].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newRangeSymbolNode(bounds[1].min, to1-1), - newRangeSymbolNode(bounds[2].min, bounds[2].max), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - if to2 > bounds[2].min { - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newRangeSymbolNode(bounds[2].min, to2-1), - newRangeSymbolNode(bounds[3].min, bounds[3].max), - ), - ) - } - alt = genAltNode( - alt, - genConcatNode( - newSymbolNode(to0), - newSymbolNode(to1), - newSymbolNode(to2), - newRangeSymbolNode(bounds[3].min, to3), - ), - ) - return alt - } -} - func genConcatNode(cs ...astNode) astNode { if len(cs) <= 0 { return nil diff --git a/compiler/parser_test.go b/compiler/parser_test.go index e4a6fe2..b0bc67a 100644 --- a/compiler/parser_test.go +++ b/compiler/parser_test.go @@ -469,48 +469,48 @@ func TestParse(t *testing.T) { pattern: ".", ast: newConcatNode( genAltNode( - newRangeSymbolNodeWithPos(bounds1[1][0].min, bounds1[1][0].max, symPos(1)), + newRangeSymbolNodeWithPos(0x00, 0x7f, symPos(1)), genConcatNode( - newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(2)), - newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(3)), + newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(2)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(3)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(4)), - newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(5)), - newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(6)), + newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(4)), + newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(5)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(6)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(7)), - newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(8)), - newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(9)), + newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(7)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(9)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(10)), - newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(11)), - newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(12)), + newRangeSymbolNodeWithPos(0xed, 0xed, symPos(10)), + newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(11)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(12)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(13)), - newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(14)), - newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(15)), + newRangeSymbolNodeWithPos(0xee, 0xef, symPos(13)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(15)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(16)), - newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(17)), - newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(18)), - newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(19)), + newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(16)), + newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(17)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(18)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(19)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(20)), - newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(21)), - newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(22)), - newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(23)), + newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(20)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(22)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(24)), - newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(25)), - newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(26)), - newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(27)), + newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(24)), + newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(25)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(26)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(27)), ), ), newEndMarkerNodeWithPos(1, endPos(28)), @@ -676,50 +676,50 @@ func TestParse(t *testing.T) { pattern: "[^a-]", ast: newConcatNode( genAltNode( - newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)), newRangeSymbolNodeWithPos(byte(46), byte(96), symPos(2)), - newRangeSymbolNodeWithPos(byte(98), bounds1[1][0].max, symPos(3)), + newRangeSymbolNodeWithPos(byte(98), 0x7f, symPos(3)), genConcatNode( - newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), - newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(4)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(5)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), - newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), - newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(6)), + newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(7)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), - newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), - newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(9)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(11)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), - newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), - newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + newRangeSymbolNodeWithPos(0xed, 0xed, symPos(12)), + newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(13)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), - newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), - newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + newRangeSymbolNodeWithPos(0xee, 0xef, symPos(15)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(17)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), - newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), - newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), - newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(18)), + newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(19)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), - newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), - newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), - newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(22)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(25)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), - newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), - newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), - newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(26)), + newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(27)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(29)), ), ), newEndMarkerNodeWithPos(1, endPos(30)), @@ -739,52 +739,52 @@ func TestParse(t *testing.T) { pattern: "[^-z]", ast: newConcatNode( genAltNode( - newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)), genAltNode( newRangeSymbolNodeWithPos(byte(46), byte(121), symPos(2)), - newRangeSymbolNodeWithPos(byte(123), bounds1[1][0].max, symPos(3)), + newRangeSymbolNodeWithPos(byte(123), 0x7f, symPos(3)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), - newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(4)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(5)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), - newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), - newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(6)), + newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(7)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(8)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), - newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), - newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(9)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(11)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), - newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), - newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + newRangeSymbolNodeWithPos(0xed, 0xed, symPos(12)), + newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(13)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(14)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), - newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), - newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + newRangeSymbolNodeWithPos(0xee, 0xef, symPos(15)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(17)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), - newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), - newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), - newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(18)), + newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(19)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(21)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), - newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), - newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), - newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(22)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(25)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), - newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), - newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), - newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(26)), + newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(27)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(29)), ), ), newEndMarkerNodeWithPos(1, endPos(30)), @@ -801,49 +801,49 @@ func TestParse(t *testing.T) { pattern: "[^-]", ast: newConcatNode( genAltNode( - newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), - newRangeSymbolNodeWithPos(byte(46), bounds1[1][0].max, symPos(2)), + newRangeSymbolNodeWithPos(0x00, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(byte(46), 0x7f, symPos(2)), genConcatNode( - newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(3)), - newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(4)), + newRangeSymbolNodeWithPos(0xc2, 0xdf, symPos(3)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(4)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(5)), - newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(6)), - newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(7)), + newRangeSymbolNodeWithPos(0xe0, 0xe0, symPos(5)), + newRangeSymbolNodeWithPos(0xa0, 0xbf, symPos(6)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(7)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(8)), - newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(9)), - newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(10)), + newRangeSymbolNodeWithPos(0xe1, 0xec, symPos(8)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(9)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(10)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(11)), - newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(12)), - newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(13)), + newRangeSymbolNodeWithPos(0xed, 0xed, symPos(11)), + newRangeSymbolNodeWithPos(0x80, 0x9f, symPos(12)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(13)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(14)), - newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(15)), - newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(16)), + newRangeSymbolNodeWithPos(0xee, 0xef, symPos(14)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(15)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(16)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(17)), - newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(18)), - newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(19)), - newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(20)), + newRangeSymbolNodeWithPos(0xf0, 0xf0, symPos(17)), + newRangeSymbolNodeWithPos(0x90, 0xbf, symPos(18)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(19)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(20)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(21)), - newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(22)), - newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(23)), - newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(24)), + newRangeSymbolNodeWithPos(0xf1, 0xf3, symPos(21)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(22)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(23)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(24)), ), genConcatNode( - newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(25)), - newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(26)), - newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(27)), - newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(28)), + newRangeSymbolNodeWithPos(0xf4, 0xf4, symPos(25)), + newRangeSymbolNodeWithPos(0x80, 0x8f, symPos(26)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(27)), + newRangeSymbolNodeWithPos(0x80, 0xbf, symPos(28)), ), ), newEndMarkerNodeWithPos(1, endPos(29)), |