aboutsummaryrefslogtreecommitdiff
path: root/compiler/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/parser.go')
-rw-r--r--compiler/parser.go97
1 files changed, 91 insertions, 6 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index cdc4817..d5c7b3d 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -183,6 +183,28 @@ func (p *parser) parseSingleChar() astNode {
}
return left
}
+ if p.consume(tokenKindInverseBExpOpen) {
+ defer p.expect(tokenKindBExpClose)
+ elem := p.parseBExpElem()
+ if elem == nil {
+ raiseSyntaxError("bracket expression must include at least one character")
+ }
+ inverse := exclude(elem, genAnyCharAST())
+ if inverse == nil {
+ panic(fmt.Errorf("a pattern that isn't matching any symbols"))
+ }
+ for {
+ elem := p.parseBExpElem()
+ if elem == nil {
+ break
+ }
+ inverse = exclude(elem, inverse)
+ if inverse == nil {
+ panic(fmt.Errorf("a pattern that isn't matching any symbols"))
+ }
+ }
+ return inverse
+ }
return p.parseNormalChar()
}
@@ -228,6 +250,38 @@ func (p *parser) parseNormalChar() astNode {
}
}
+func exclude(symbol, base astNode) astNode {
+ switch base.(type) {
+ case *altNode:
+ left, right := base.children()
+ return genAltNode(
+ exclude(symbol, left),
+ exclude(symbol, right),
+ )
+ case *symbolNode:
+ baseSeq := genByteRangeSeq(base)
+ symSeq := genByteRangeSeq(symbol)
+ excluded := excludeByteRangeSequence(symSeq, baseSeq)
+ if len(excluded) <= 0 {
+ return nil
+ }
+ return convertByteRangeSeqsToAST(excluded)
+ }
+ return nil
+}
+
+func convertByteRangeSeqsToAST(seqs [][]byteRange) astNode {
+ concats := []astNode{}
+ for _, seq := range seqs {
+ syms := []astNode{}
+ for _, elem := range seq {
+ syms = append(syms, newRangeSymbolNode(elem.from, elem.to))
+ }
+ concats = append(concats, genConcatNode(syms...))
+ }
+ return genAltNode(concats...)
+}
+
// Refelences:
// * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G7404
// * Table 3-6. UTF-8 Bit Distribution
@@ -363,6 +417,18 @@ func genByteSeq(node astNode) []byte {
panic(fmt.Errorf("genByteSeq() cannot handle %T: %v", node, node))
}
+func genByteRangeSeq(node astNode) []byteRange {
+ switch n := node.(type) {
+ case *symbolNode:
+ return []byteRange{{from: n.from, to: n.to}}
+ case *concatNode:
+ seq := genByteRangeSeq(n.left)
+ seq = append(seq, genByteRangeSeq(n.right)...)
+ return seq
+ }
+ panic(fmt.Errorf("genByteRangeSeq() cannot handle %T: %v", node, node))
+}
+
func isValidOrder(from, to []byte) bool {
if len(from) > len(to) {
return false
@@ -853,17 +919,36 @@ func gen4ByteCharRangeAST(from, to []byte) astNode {
}
}
-func genConcatNode(c1, c2 astNode, cn ...astNode) *concatNode {
- concat := newConcatNode(c1, c2)
- for _, c := range cn {
+func genConcatNode(cs ...astNode) astNode {
+ if len(cs) <= 0 {
+ return nil
+ }
+ if len(cs) == 1 {
+ return cs[0]
+ }
+ concat := newConcatNode(cs[0], cs[1])
+ for _, c := range cs[2:] {
concat = newConcatNode(concat, c)
}
return concat
}
-func genAltNode(c1, c2 astNode, cn ...astNode) *altNode {
- alt := newAltNode(c1, c2)
- for _, c := range cn {
+func genAltNode(cs ...astNode) astNode {
+ nonNilNodes := []astNode{}
+ for _, c := range cs {
+ if c == nil {
+ continue
+ }
+ nonNilNodes = append(nonNilNodes, c)
+ }
+ if len(nonNilNodes) <= 0 {
+ return nil
+ }
+ if len(nonNilNodes) == 1 {
+ return nonNilNodes[0]
+ }
+ alt := newAltNode(nonNilNodes[0], nonNilNodes[1])
+ for _, c := range nonNilNodes[2:] {
alt = newAltNode(alt, c)
}
return alt