aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/ast.go20
-rw-r--r--compiler/lexer.go78
-rw-r--r--compiler/lexer_test.go73
-rw-r--r--compiler/parser.go103
-rw-r--r--compiler/parser_test.go1012
-rw-r--r--compiler/test_util_test.go6
6 files changed, 1193 insertions, 99 deletions
diff --git a/compiler/ast.go b/compiler/ast.go
index e9c1b50..803d5a9 100644
--- a/compiler/ast.go
+++ b/compiler/ast.go
@@ -331,7 +331,7 @@ func newRepeatOneOrMoreNode(left astNode) *concatNode {
return newConcatNode(
left,
&repeatNode{
- left: left,
+ left: copyAST(left),
})
}
@@ -369,6 +369,24 @@ func (n *optionNode) last() symbolPositionSet {
return s
}
+func copyAST(src astNode) astNode {
+ switch n := src.(type) {
+ case *symbolNode:
+ return newRangeSymbolNode(n.from, n.to)
+ case *endMarkerNode:
+ return newEndMarkerNode(n.id)
+ case *concatNode:
+ return newConcatNode(copyAST(n.left), copyAST(n.right))
+ case *altNode:
+ return newAltNode(copyAST(n.left), copyAST(n.right))
+ case *repeatNode:
+ return newRepeatNode(copyAST(n.left))
+ case *optionNode:
+ return newOptionNode(copyAST(n.left))
+ }
+ panic(fmt.Errorf("copyAST cannot handle %T type; AST: %v", src, src))
+}
+
type followTable map[symbolPosition]symbolPositionSet
func genFollowTable(root astNode) followTable {
diff --git a/compiler/lexer.go b/compiler/lexer.go
index dba667f..19c7105 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -45,6 +45,20 @@ const (
lexerModeBExp = lexerMode("bracket expression")
)
+type rangeState string
+
+// [a-z]
+// ^^^^
+// |||`-- ready
+// ||`-- expect range terminator
+// |`-- read range initiator
+// `-- ready
+const (
+ rangeStateReady = rangeState("ready")
+ rangeStateReadRangeInitiator = rangeState("read range initiator")
+ rangeStateExpectRangeTerminator = rangeState("expect range terminator")
+)
+
type lexer struct {
src *bufio.Reader
peekChar2 rune
@@ -58,6 +72,7 @@ type lexer struct {
prevChar2 rune
pervEOF2 bool
mode lexerMode
+ rangeState rangeState
}
func newLexer(src io.Reader) *lexer {
@@ -74,6 +89,7 @@ func newLexer(src io.Reader) *lexer {
prevChar2: nullChar,
pervEOF2: false,
mode: lexerModeDefault,
+ rangeState: rangeStateReady,
}
}
@@ -88,9 +104,38 @@ func (l *lexer) next() (*token, error) {
switch l.mode {
case lexerModeBExp:
- return l.nextInBExp(c)
+ tok, err := l.nextInBExp(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindBExpClose:
+ l.mode = lexerModeDefault
+ case tokenKindCharRange:
+ l.rangeState = rangeStateExpectRangeTerminator
+ case tokenKindChar:
+ switch l.rangeState {
+ case rangeStateReady:
+ l.rangeState = rangeStateReadRangeInitiator
+ case rangeStateExpectRangeTerminator:
+ l.rangeState = rangeStateReady
+ }
+ }
+ return tok, nil
default:
- return l.nextInDefault(c)
+ tok, err := l.nextInDefault(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindBExpOpen:
+ l.mode = lexerModeBExp
+ l.rangeState = rangeStateReady
+ case tokenKindInverseBExpOpen:
+ l.mode = lexerModeBExp
+ l.rangeState = rangeStateReady
+ }
+ return tok, nil
}
}
@@ -111,7 +156,6 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
case ')':
return newToken(tokenKindGroupClose, nullChar), nil
case '[':
- l.mode = lexerModeBExp
c1, eof, err := l.read()
if err != nil {
return nil, err
@@ -185,9 +229,33 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
func (l *lexer) nextInBExp(c rune) (*token, error) {
switch c {
case '-':
- return newToken(tokenKindCharRange, nullChar), nil
+ if l.rangeState != rangeStateReadRangeInitiator {
+ return newToken(tokenKindChar, c), nil
+ }
+ c1, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindCharRange, nullChar), nil
+ }
+ if c1 != ']' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindCharRange, nullChar), nil
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindChar, c), nil
case ']':
- l.mode = lexerModeDefault
return newToken(tokenKindBExpClose, nullChar), nil
case '\\':
c, eof, err := l.read()
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index 0dfe2f7..451c3c7 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -31,7 +31,7 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the special characters",
- src: ".*+?|()[-][^^]",
+ src: ".*+?|()[a-z][^^]",
tokens: []*token{
newToken(tokenKindAnyChar, nullChar),
newToken(tokenKindRepeat, nullChar),
@@ -41,7 +41,9 @@ func TestLexer(t *testing.T) {
newToken(tokenKindGroupOpen, nullChar),
newToken(tokenKindGroupClose, nullChar),
newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
newToken(tokenKindBExpClose, nullChar),
newToken(tokenKindInverseBExpOpen, nullChar),
newToken(tokenKindChar, '^'),
@@ -98,6 +100,75 @@ func TestLexer(t *testing.T) {
},
},
{
+ caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
+ // [...-...][...-][-...][-]
+ // ~~~~~~~ ~ ~ ~
+ // ^ ^ ^ ^
+ // | | | `-- Ordinary Character (b)
+ // | | `-- Ordinary Character (b)
+ // | `-- Ordinary Character (b)
+ // `-- Character Range (a)
+ //
+ // a. *-* is handled as a character range expression.
+ // b. *-, -*, or - are handled as ordinary characters.
+ src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
// [^...^...][^]
// ~~ ~ ~~
diff --git a/compiler/parser.go b/compiler/parser.go
index d5c7b3d..623a174 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -2,7 +2,6 @@ package compiler
import (
"bytes"
- "errors"
"fmt"
"io"
)
@@ -96,23 +95,23 @@ func newParser(id int, src io.Reader) *parser {
}
}
-func (p *parser) parse() (astNode, error) {
- return p.parseRegexp()
-}
-
-func (p *parser) parseRegexp() (ast astNode, retErr error) {
+func (p *parser) parse() (ast astNode, retErr error) {
defer func() {
err := recover()
if err != nil {
- retErr = err.(error)
- var synErr SyntaxError
- if !errors.Is(retErr, &synErr) {
- panic(err)
+ var ok bool
+ retErr, ok = err.(error)
+ if !ok {
+ retErr = fmt.Errorf("Error: %v", err)
}
return
}
}()
+ return p.parseRegexp()
+}
+
+func (p *parser) parseRegexp() (astNode, error) {
alt := p.parseAlt()
p.expect(tokenKindEOF)
return newConcatNode(alt, newEndMarkerNode(p.id)), nil
@@ -120,11 +119,17 @@ func (p *parser) parseRegexp() (ast astNode, retErr error) {
func (p *parser) parseAlt() astNode {
left := p.parseConcat()
+ if left == nil {
+ raiseSyntaxError("alternation expression must have operands")
+ }
for {
if !p.consume(tokenKindAlt) {
break
}
right := p.parseConcat()
+ if right == nil {
+ raiseSyntaxError("alternation expression must have operands")
+ }
left = newAltNode(left, right)
}
return left
@@ -144,6 +149,9 @@ func (p *parser) parseConcat() astNode {
func (p *parser) parseRepeat() astNode {
group := p.parseGroup()
+ if group == nil {
+ return nil
+ }
if p.consume(tokenKindRepeat) {
return newRepeatNode(group)
}
@@ -159,7 +167,11 @@ func (p *parser) parseRepeat() astNode {
func (p *parser) parseGroup() astNode {
if p.consume(tokenKindGroupOpen) {
defer p.expect(tokenKindGroupClose)
- return p.parseAlt()
+ alt := p.parseAlt()
+ if alt == nil {
+ raiseSyntaxError("grouping expression must include at least one character")
+ }
+ return alt
}
return p.parseSingleChar()
}
@@ -169,7 +181,6 @@ func (p *parser) parseSingleChar() astNode {
return genAnyCharAST()
}
if p.consume(tokenKindBExpOpen) {
- defer p.expect(tokenKindBExpClose)
left := p.parseBExpElem()
if left == nil {
raiseSyntaxError("bracket expression must include at least one character")
@@ -181,10 +192,10 @@ func (p *parser) parseSingleChar() astNode {
}
left = newAltNode(left, right)
}
+ p.expect(tokenKindBExpClose)
return left
}
if p.consume(tokenKindInverseBExpOpen) {
- defer p.expect(tokenKindBExpClose)
elem := p.parseBExpElem()
if elem == nil {
raiseSyntaxError("bracket expression must include at least one character")
@@ -203,6 +214,7 @@ func (p *parser) parseSingleChar() astNode {
panic(fmt.Errorf("a pattern that isn't matching any symbols"))
}
}
+ p.expect(tokenKindBExpClose)
return inverse
}
return p.parseNormalChar()
@@ -210,6 +222,9 @@ func (p *parser) parseSingleChar() astNode {
func (p *parser) parseBExpElem() astNode {
left := p.parseNormalChar()
+ if left == nil {
+ return nil
+ }
if !p.consume(tokenKindCharRange) {
return left
}
@@ -258,6 +273,14 @@ func exclude(symbol, base astNode) astNode {
exclude(symbol, left),
exclude(symbol, right),
)
+ case *concatNode:
+ baseSeq := genByteRangeSeq(base)
+ symSeq := genByteRangeSeq(symbol)
+ excluded := excludeByteRangeSequence(symSeq, baseSeq)
+ if len(excluded) <= 0 {
+ return nil
+ }
+ return convertByteRangeSeqsToAST(excluded)
case *symbolNode:
baseSeq := genByteRangeSeq(base)
symSeq := genByteRangeSeq(symbol)
@@ -448,6 +471,38 @@ func isValidOrder(from, to []byte) bool {
return true
}
+type byteBoundsEntry struct {
+ min byte
+ max byte
+}
+
+var (
+ bounds1 = [][]byteBoundsEntry{
+ nil,
+ {{min: 0x00, max: 0x7f}},
+ }
+
+ bounds2 = [][]byteBoundsEntry{
+ nil,
+ {{min: 0xc2, max: 0xdf}, {min: 0x80, max: 0xbf}},
+ }
+
+ bounds3 = [][]byteBoundsEntry{
+ nil,
+ {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}},
+ {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ }
+
+ bounds4 = [][]byteBoundsEntry{
+ nil,
+ {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
+ }
+)
+
func gen1ByteCharRangeAST(from, to []byte) astNode {
return newRangeSymbolNode(from[0], to[0])
}
@@ -481,28 +536,6 @@ func gen2ByteCharRangeAST(from, to []byte) astNode {
}
}
-type byteBoundsEntry struct {
- min byte
- max byte
-}
-
-var (
- bounds3 = [][]byteBoundsEntry{
- nil,
- {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}},
- {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- }
-
- bounds4 = [][]byteBoundsEntry{
- nil,
- {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}},
- }
-)
-
func get3ByteCharRangeNum(seq []byte) int {
head := seq[0]
switch {
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index 3bad222..fd44b5d 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -1,16 +1,13 @@
package compiler
import (
+ "fmt"
"os"
"reflect"
"testing"
)
-func TestParser(t *testing.T) {
- rune2Byte := func(char rune, index int) byte {
- return []byte(string(char))[index]
- }
-
+func TestParse(t *testing.T) {
symPos := func(n uint8) symbolPosition {
return newSymbolPosition(n, false)
}
@@ -19,69 +16,970 @@ func TestParser(t *testing.T) {
return newSymbolPosition(n, true)
}
- root, symTab, err := parse(map[int][]byte{
- 1: []byte("(a|b)*abb"),
- })
- if err != nil {
- t.Fatal(err)
- }
- if root == nil {
- t.Fatal("root of AST is nil")
+ tests := []struct {
+ patterns []string
+ ast astNode
+ syntaxError bool
+ }{
+ {
+ patterns: []string{
+ "a",
+ },
+ ast: genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "abc",
+ },
+ ast: genConcatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "a?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[abc]?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(a)?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "((a?)?)?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ newOptionNode(
+ newOptionNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(abc)?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(a|b)?",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "?",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "a??",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "a*",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), 1),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[abc]*",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), 1),
+ newSymbolNodeWithPos(byte('b'), 2),
+ newSymbolNodeWithPos(byte('c'), 3),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "((a*)*)*",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ newRepeatNode(
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), 1),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(abc)*",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), 1),
+ newSymbolNodeWithPos(byte('b'), 2),
+ newSymbolNodeWithPos(byte('c'), 3),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(a|b)*",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), 1),
+ newSymbolNodeWithPos(byte('b'), 2),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "*",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "a**",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "a+",
+ },
+ ast: genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(2)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[abc]+",
+ },
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(4)),
+ newSymbolNodeWithPos(byte('b'), symPos(5)),
+ newSymbolNodeWithPos(byte('c'), symPos(6)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(7)),
+ ),
+ },
+ {
+ patterns: []string{
+ "((a+)+)+",
+ },
+ ast: genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(2)),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(3)),
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(4)),
+ ),
+ ),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(5)),
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(6)),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(7)),
+ newRepeatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(8)),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(9)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(abc)+",
+ },
+ ast: genConcatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(4)),
+ newSymbolNodeWithPos(byte('b'), symPos(5)),
+ newSymbolNodeWithPos(byte('c'), symPos(6)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(7)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(a|b)+",
+ },
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(3)),
+ newSymbolNodeWithPos(byte('b'), symPos(4)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(5)),
+ ),
+ },
+ {
+ patterns: []string{
+ "+",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "a++",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ ".",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newRangeSymbolNodeWithPos(bounds1[1][0].min, bounds1[1][0].max, symPos(1)),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(2)),
+ newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(3)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(4)),
+ newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(5)),
+ newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(6)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(7)),
+ newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(8)),
+ newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(9)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(10)),
+ newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(11)),
+ newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(12)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(13)),
+ newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(14)),
+ newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(15)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(16)),
+ newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(17)),
+ newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(18)),
+ newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(19)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(20)),
+ newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(21)),
+ newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(22)),
+ newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(23)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(24)),
+ newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(25)),
+ newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(26)),
+ newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(27)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(28)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[a]",
+ },
+ ast: newConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[abc]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[a-z]",
+ },
+ ast: newConcatNode(
+ newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[A-Za-z]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newRangeSymbolNodeWithPos(byte('A'), byte('Z'), symPos(1)),
+ newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(2)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "a[]",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[]a",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[]",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[^]",
+ },
+ ast: newConcatNode(
+ newSymbolNodeWithPos(byte('^'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[a",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[a-",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[^",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[^a",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[^a-",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "]",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "[a-]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('-'), symPos(2)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[^a-]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
+ newRangeSymbolNodeWithPos(byte(46), byte(96), symPos(2)),
+ newRangeSymbolNodeWithPos(byte(98), bounds1[1][0].max, symPos(3)),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)),
+ newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)),
+ newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)),
+ newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)),
+ newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)),
+ newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)),
+ newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)),
+ newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)),
+ newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)),
+ newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)),
+ newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)),
+ newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)),
+ newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)),
+ newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)),
+ newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)),
+ newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)),
+ newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)),
+ newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)),
+ newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(30)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[-z]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newSymbolNodeWithPos(byte('-'), symPos(1)),
+ newSymbolNodeWithPos(byte('z'), symPos(2)),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(3)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[^-z]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
+ genAltNode(
+ newRangeSymbolNodeWithPos(byte(46), byte(121), symPos(2)),
+ newRangeSymbolNodeWithPos(byte(123), bounds1[1][0].max, symPos(3)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)),
+ newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)),
+ newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)),
+ newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)),
+ newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)),
+ newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)),
+ newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)),
+ newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)),
+ newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)),
+ newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)),
+ newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)),
+ newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)),
+ newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)),
+ newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)),
+ newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)),
+ newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)),
+ newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)),
+ newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)),
+ newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(30)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[-]",
+ },
+ ast: newConcatNode(
+ newSymbolNodeWithPos(byte('-'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "[^-]",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)),
+ newRangeSymbolNodeWithPos(byte(46), bounds1[1][0].max, symPos(2)),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(3)),
+ newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(4)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(5)),
+ newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(6)),
+ newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(7)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(8)),
+ newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(9)),
+ newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(10)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(11)),
+ newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(12)),
+ newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(13)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(14)),
+ newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(15)),
+ newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(16)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(17)),
+ newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(18)),
+ newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(19)),
+ newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(20)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(21)),
+ newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(22)),
+ newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(23)),
+ newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(24)),
+ ),
+ genConcatNode(
+ newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(25)),
+ newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(26)),
+ newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(27)),
+ newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(28)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(29)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(a)",
+ },
+ ast: newConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "(((a)))",
+ },
+ ast: newConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newEndMarkerNodeWithPos(1, endPos(2)),
+ ),
+ },
+ {
+ patterns: []string{
+ "a()",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "()a",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "()",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "(",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ ")",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Mulder|Scully",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('M'), symPos(1)),
+ newSymbolNodeWithPos(byte('u'), symPos(2)),
+ newSymbolNodeWithPos(byte('l'), symPos(3)),
+ newSymbolNodeWithPos(byte('d'), symPos(4)),
+ newSymbolNodeWithPos(byte('e'), symPos(5)),
+ newSymbolNodeWithPos(byte('r'), symPos(6)),
+ ),
+ genConcatNode(
+ newSymbolNodeWithPos(byte('S'), symPos(7)),
+ newSymbolNodeWithPos(byte('c'), symPos(8)),
+ newSymbolNodeWithPos(byte('u'), symPos(9)),
+ newSymbolNodeWithPos(byte('l'), symPos(10)),
+ newSymbolNodeWithPos(byte('l'), symPos(11)),
+ newSymbolNodeWithPos(byte('y'), symPos(12)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(13)),
+ ),
+ },
+ {
+ patterns: []string{
+ "Langly|Frohike|Byers",
+ },
+ ast: newConcatNode(
+ genAltNode(
+ genConcatNode(
+ newSymbolNodeWithPos(byte('L'), symPos(1)),
+ newSymbolNodeWithPos(byte('a'), symPos(2)),
+ newSymbolNodeWithPos(byte('n'), symPos(3)),
+ newSymbolNodeWithPos(byte('g'), symPos(4)),
+ newSymbolNodeWithPos(byte('l'), symPos(5)),
+ newSymbolNodeWithPos(byte('y'), symPos(6)),
+ ),
+ genConcatNode(
+ newSymbolNodeWithPos(byte('F'), symPos(7)),
+ newSymbolNodeWithPos(byte('r'), symPos(8)),
+ newSymbolNodeWithPos(byte('o'), symPos(9)),
+ newSymbolNodeWithPos(byte('h'), symPos(10)),
+ newSymbolNodeWithPos(byte('i'), symPos(11)),
+ newSymbolNodeWithPos(byte('k'), symPos(12)),
+ newSymbolNodeWithPos(byte('e'), symPos(13)),
+ ),
+ genConcatNode(
+ newSymbolNodeWithPos(byte('B'), symPos(14)),
+ newSymbolNodeWithPos(byte('y'), symPos(15)),
+ newSymbolNodeWithPos(byte('e'), symPos(16)),
+ newSymbolNodeWithPos(byte('r'), symPos(17)),
+ newSymbolNodeWithPos(byte('s'), symPos(18)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(19)),
+ ),
+ },
+ {
+ patterns: []string{
+ "|",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "||",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Mulder|",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "|Scully",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Langly|Frohike|",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Langly||Byers",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "|Frohike|Byers",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "|Frohike|",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Fox(|)Mulder",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "(Fox|)Mulder",
+ },
+ syntaxError: true,
+ },
+ {
+ patterns: []string{
+ "Fox(|Mulder)",
+ },
+ syntaxError: true,
+ },
}
- printAST(os.Stdout, root, "", "", false)
-
- {
- expectedAST := genConcatNode(
- newRepeatNode(
- newAltNode(
- newSymbolNodeWithPos(rune2Byte('a', 0), symPos(1)),
- newSymbolNodeWithPos(rune2Byte('b', 0), symPos(2)),
- ),
- ),
- newSymbolNodeWithPos(rune2Byte('a', 0), symPos(3)),
- newSymbolNodeWithPos(rune2Byte('b', 0), symPos(4)),
- newSymbolNodeWithPos(rune2Byte('b', 0), symPos(5)),
- newEndMarkerNodeWithPos(1, endPos(6)),
- )
- testAST(t, expectedAST, root)
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ps := map[int][]byte{}
+ for i, p := range tt.patterns {
+ ps[i+1] = []byte(p)
+ }
+ root, _, err := parse(ps)
+ if tt.syntaxError {
+ // printAST(os.Stdout, root, "", "", false)
+ if err == nil {
+ t.Fatalf("expected syntax error; got: nil")
+ }
+ if _, ok := err.(*SyntaxError); !ok {
+ t.Fatalf("expected syntax error; got: %v (type: %T)", err, err)
+ }
+ if root != nil {
+ t.Fatalf("root is not nil")
+ }
+ } else {
+ if err != nil {
+ t.Fatal(err)
+ }
+ if root == nil {
+ t.Fatal("AST is nil")
+ }
+ // printAST(os.Stdout, root, "", "", false)
+ testAST(t, tt.ast, root)
+ }
+ })
}
+ // Test a FOLLOE table and a symbol table
{
- followTab := genFollowTable(root)
- if followTab == nil {
- t.Fatal("follow table is nil")
+ root, symTab, err := parse(map[int][]byte{
+ 1: []byte("(a|b)*abb"),
+ })
+ if err != nil {
+ t.Fatal(err)
}
- expectedFollowTab := followTable{
- 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
- 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
- 3: newSymbolPositionSet().add(symPos(4)),
- 4: newSymbolPositionSet().add(symPos(5)),
- 5: newSymbolPositionSet().add(endPos(6)),
+ if root == nil {
+ t.Fatal("root of AST is nil")
}
- testFollowTable(t, expectedFollowTab, followTab)
- }
+ printAST(os.Stdout, root, "", "", false)
- {
- entry := func(v byte) byteRange {
- return byteRange{
- from: v,
- to: v,
+ {
+ expectedAST := genConcatNode(
+ newRepeatNode(
+ newAltNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ ),
+ ),
+ newSymbolNodeWithPos(byte('a'), symPos(3)),
+ newSymbolNodeWithPos(byte('b'), symPos(4)),
+ newSymbolNodeWithPos(byte('b'), symPos(5)),
+ newEndMarkerNodeWithPos(1, endPos(6)),
+ )
+ testAST(t, expectedAST, root)
+ }
+
+ {
+ followTab := genFollowTable(root)
+ if followTab == nil {
+ t.Fatal("follow table is nil")
}
+ expectedFollowTab := followTable{
+ 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 3: newSymbolPositionSet().add(symPos(4)),
+ 4: newSymbolPositionSet().add(symPos(5)),
+ 5: newSymbolPositionSet().add(endPos(6)),
+ }
+ testFollowTable(t, expectedFollowTab, followTab)
}
- expectedSymTab := &symbolTable{
- symPos2Byte: map[symbolPosition]byteRange{
- symPos(1): entry(byte('a')),
- symPos(2): entry(byte('b')),
- symPos(3): entry(byte('a')),
- symPos(4): entry(byte('b')),
- symPos(5): entry(byte('b')),
- },
- endPos2ID: map[symbolPosition]int{
- endPos(6): 1,
- },
+ {
+ entry := func(v byte) byteRange {
+ return byteRange{
+ from: v,
+ to: v,
+ }
+ }
+
+ expectedSymTab := &symbolTable{
+ symPos2Byte: map[symbolPosition]byteRange{
+ symPos(1): entry(byte('a')),
+ symPos(2): entry(byte('b')),
+ symPos(3): entry(byte('a')),
+ symPos(4): entry(byte('b')),
+ symPos(5): entry(byte('b')),
+ },
+ endPos2ID: map[symbolPosition]int{
+ endPos(6): 1,
+ },
+ }
+ testSymbolTable(t, expectedSymTab, symTab)
}
- testSymbolTable(t, expectedSymTab, symTab)
}
}
diff --git a/compiler/test_util_test.go b/compiler/test_util_test.go
index 7e6896d..2ead2c9 100644
--- a/compiler/test_util_test.go
+++ b/compiler/test_util_test.go
@@ -1,5 +1,11 @@
package compiler
+func newRangeSymbolNodeWithPos(from, to byte, pos symbolPosition) *symbolNode {
+ n := newRangeSymbolNode(from, to)
+ n.pos = pos
+ return n
+}
+
func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
n := newSymbolNode(v)
n.pos = pos