diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/ast.go | 20 | ||||
-rw-r--r-- | compiler/lexer.go | 78 | ||||
-rw-r--r-- | compiler/lexer_test.go | 73 | ||||
-rw-r--r-- | compiler/parser.go | 103 | ||||
-rw-r--r-- | compiler/parser_test.go | 1012 | ||||
-rw-r--r-- | compiler/test_util_test.go | 6 |
6 files changed, 1193 insertions, 99 deletions
diff --git a/compiler/ast.go b/compiler/ast.go index e9c1b50..803d5a9 100644 --- a/compiler/ast.go +++ b/compiler/ast.go @@ -331,7 +331,7 @@ func newRepeatOneOrMoreNode(left astNode) *concatNode { return newConcatNode( left, &repeatNode{ - left: left, + left: copyAST(left), }) } @@ -369,6 +369,24 @@ func (n *optionNode) last() symbolPositionSet { return s } +func copyAST(src astNode) astNode { + switch n := src.(type) { + case *symbolNode: + return newRangeSymbolNode(n.from, n.to) + case *endMarkerNode: + return newEndMarkerNode(n.id) + case *concatNode: + return newConcatNode(copyAST(n.left), copyAST(n.right)) + case *altNode: + return newAltNode(copyAST(n.left), copyAST(n.right)) + case *repeatNode: + return newRepeatNode(copyAST(n.left)) + case *optionNode: + return newOptionNode(copyAST(n.left)) + } + panic(fmt.Errorf("copyAST cannot handle %T type; AST: %v", src, src)) +} + type followTable map[symbolPosition]symbolPositionSet func genFollowTable(root astNode) followTable { diff --git a/compiler/lexer.go b/compiler/lexer.go index dba667f..19c7105 100644 --- a/compiler/lexer.go +++ b/compiler/lexer.go @@ -45,6 +45,20 @@ const ( lexerModeBExp = lexerMode("bracket expression") ) +type rangeState string + +// [a-z] +// ^^^^ +// |||`-- ready +// ||`-- expect range terminator +// |`-- read range initiator +// `-- ready +const ( + rangeStateReady = rangeState("ready") + rangeStateReadRangeInitiator = rangeState("read range initiator") + rangeStateExpectRangeTerminator = rangeState("expect range terminator") +) + type lexer struct { src *bufio.Reader peekChar2 rune @@ -58,6 +72,7 @@ type lexer struct { prevChar2 rune pervEOF2 bool mode lexerMode + rangeState rangeState } func newLexer(src io.Reader) *lexer { @@ -74,6 +89,7 @@ func newLexer(src io.Reader) *lexer { prevChar2: nullChar, pervEOF2: false, mode: lexerModeDefault, + rangeState: rangeStateReady, } } @@ -88,9 +104,38 @@ func (l *lexer) next() (*token, error) { switch l.mode { case lexerModeBExp: - return l.nextInBExp(c) + tok, err := l.nextInBExp(c) + if err != nil { + return nil, err + } + switch tok.kind { + case tokenKindBExpClose: + l.mode = lexerModeDefault + case tokenKindCharRange: + l.rangeState = rangeStateExpectRangeTerminator + case tokenKindChar: + switch l.rangeState { + case rangeStateReady: + l.rangeState = rangeStateReadRangeInitiator + case rangeStateExpectRangeTerminator: + l.rangeState = rangeStateReady + } + } + return tok, nil default: - return l.nextInDefault(c) + tok, err := l.nextInDefault(c) + if err != nil { + return nil, err + } + switch tok.kind { + case tokenKindBExpOpen: + l.mode = lexerModeBExp + l.rangeState = rangeStateReady + case tokenKindInverseBExpOpen: + l.mode = lexerModeBExp + l.rangeState = rangeStateReady + } + return tok, nil } } @@ -111,7 +156,6 @@ func (l *lexer) nextInDefault(c rune) (*token, error) { case ')': return newToken(tokenKindGroupClose, nullChar), nil case '[': - l.mode = lexerModeBExp c1, eof, err := l.read() if err != nil { return nil, err @@ -185,9 +229,33 @@ func (l *lexer) nextInDefault(c rune) (*token, error) { func (l *lexer) nextInBExp(c rune) (*token, error) { switch c { case '-': - return newToken(tokenKindCharRange, nullChar), nil + if l.rangeState != rangeStateReadRangeInitiator { + return newToken(tokenKindChar, c), nil + } + c1, eof, err := l.read() + if err != nil { + return nil, err + } + if eof { + err := l.restore() + if err != nil { + return nil, err + } + return newToken(tokenKindCharRange, nullChar), nil + } + if c1 != ']' { + err := l.restore() + if err != nil { + return nil, err + } + return newToken(tokenKindCharRange, nullChar), nil + } + err = l.restore() + if err != nil { + return nil, err + } + return newToken(tokenKindChar, c), nil case ']': - l.mode = lexerModeDefault return newToken(tokenKindBExpClose, nullChar), nil case '\\': c, eof, err := l.read() diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go index 0dfe2f7..451c3c7 100644 --- a/compiler/lexer_test.go +++ b/compiler/lexer_test.go @@ -31,7 +31,7 @@ func TestLexer(t *testing.T) { }, { caption: "lexer can recognize the special characters", - src: ".*+?|()[-][^^]", + src: ".*+?|()[a-z][^^]", tokens: []*token{ newToken(tokenKindAnyChar, nullChar), newToken(tokenKindRepeat, nullChar), @@ -41,7 +41,9 @@ func TestLexer(t *testing.T) { newToken(tokenKindGroupOpen, nullChar), newToken(tokenKindGroupClose, nullChar), newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '^'), @@ -98,6 +100,75 @@ func TestLexer(t *testing.T) { }, }, { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", // [^...^...][^] // ~~ ~ ~~ diff --git a/compiler/parser.go b/compiler/parser.go index d5c7b3d..623a174 100644 --- a/compiler/parser.go +++ b/compiler/parser.go @@ -2,7 +2,6 @@ package compiler import ( "bytes" - "errors" "fmt" "io" ) @@ -96,23 +95,23 @@ func newParser(id int, src io.Reader) *parser { } } -func (p *parser) parse() (astNode, error) { - return p.parseRegexp() -} - -func (p *parser) parseRegexp() (ast astNode, retErr error) { +func (p *parser) parse() (ast astNode, retErr error) { defer func() { err := recover() if err != nil { - retErr = err.(error) - var synErr SyntaxError - if !errors.Is(retErr, &synErr) { - panic(err) + var ok bool + retErr, ok = err.(error) + if !ok { + retErr = fmt.Errorf("Error: %v", err) } return } }() + return p.parseRegexp() +} + +func (p *parser) parseRegexp() (astNode, error) { alt := p.parseAlt() p.expect(tokenKindEOF) return newConcatNode(alt, newEndMarkerNode(p.id)), nil @@ -120,11 +119,17 @@ func (p *parser) parseRegexp() (ast astNode, retErr error) { func (p *parser) parseAlt() astNode { left := p.parseConcat() + if left == nil { + raiseSyntaxError("alternation expression must have operands") + } for { if !p.consume(tokenKindAlt) { break } right := p.parseConcat() + if right == nil { + raiseSyntaxError("alternation expression must have operands") + } left = newAltNode(left, right) } return left @@ -144,6 +149,9 @@ func (p *parser) parseConcat() astNode { func (p *parser) parseRepeat() astNode { group := p.parseGroup() + if group == nil { + return nil + } if p.consume(tokenKindRepeat) { return newRepeatNode(group) } @@ -159,7 +167,11 @@ func (p *parser) parseRepeat() astNode { func (p *parser) parseGroup() astNode { if p.consume(tokenKindGroupOpen) { defer p.expect(tokenKindGroupClose) - return p.parseAlt() + alt := p.parseAlt() + if alt == nil { + raiseSyntaxError("grouping expression must include at least one character") + } + return alt } return p.parseSingleChar() } @@ -169,7 +181,6 @@ func (p *parser) parseSingleChar() astNode { return genAnyCharAST() } if p.consume(tokenKindBExpOpen) { - defer p.expect(tokenKindBExpClose) left := p.parseBExpElem() if left == nil { raiseSyntaxError("bracket expression must include at least one character") @@ -181,10 +192,10 @@ func (p *parser) parseSingleChar() astNode { } left = newAltNode(left, right) } + p.expect(tokenKindBExpClose) return left } if p.consume(tokenKindInverseBExpOpen) { - defer p.expect(tokenKindBExpClose) elem := p.parseBExpElem() if elem == nil { raiseSyntaxError("bracket expression must include at least one character") @@ -203,6 +214,7 @@ func (p *parser) parseSingleChar() astNode { panic(fmt.Errorf("a pattern that isn't matching any symbols")) } } + p.expect(tokenKindBExpClose) return inverse } return p.parseNormalChar() @@ -210,6 +222,9 @@ func (p *parser) parseSingleChar() astNode { func (p *parser) parseBExpElem() astNode { left := p.parseNormalChar() + if left == nil { + return nil + } if !p.consume(tokenKindCharRange) { return left } @@ -258,6 +273,14 @@ func exclude(symbol, base astNode) astNode { exclude(symbol, left), exclude(symbol, right), ) + case *concatNode: + baseSeq := genByteRangeSeq(base) + symSeq := genByteRangeSeq(symbol) + excluded := excludeByteRangeSequence(symSeq, baseSeq) + if len(excluded) <= 0 { + return nil + } + return convertByteRangeSeqsToAST(excluded) case *symbolNode: baseSeq := genByteRangeSeq(base) symSeq := genByteRangeSeq(symbol) @@ -448,6 +471,38 @@ func isValidOrder(from, to []byte) bool { return true } +type byteBoundsEntry struct { + min byte + max byte +} + +var ( + bounds1 = [][]byteBoundsEntry{ + nil, + {{min: 0x00, max: 0x7f}}, + } + + bounds2 = [][]byteBoundsEntry{ + nil, + {{min: 0xc2, max: 0xdf}, {min: 0x80, max: 0xbf}}, + } + + bounds3 = [][]byteBoundsEntry{ + nil, + {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}}, + {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, + {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}}, + {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, + } + + bounds4 = [][]byteBoundsEntry{ + nil, + {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, + {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, + {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, + } +) + func gen1ByteCharRangeAST(from, to []byte) astNode { return newRangeSymbolNode(from[0], to[0]) } @@ -481,28 +536,6 @@ func gen2ByteCharRangeAST(from, to []byte) astNode { } } -type byteBoundsEntry struct { - min byte - max byte -} - -var ( - bounds3 = [][]byteBoundsEntry{ - nil, - {{min: 0xe0, max: 0xe0}, {min: 0xa0, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xe1, max: 0xec}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xed, max: 0xed}, {min: 0x80, max: 0x9f}, {min: 0x80, max: 0xbf}}, - {{min: 0xee, max: 0xef}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - } - - bounds4 = [][]byteBoundsEntry{ - nil, - {{min: 0xf0, max: 0xf0}, {min: 0x90, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xf1, max: 0xf3}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - {{min: 0xf4, max: 0xf4}, {min: 0x80, max: 0x8f}, {min: 0x80, max: 0xbf}, {min: 0x80, max: 0xbf}}, - } -) - func get3ByteCharRangeNum(seq []byte) int { head := seq[0] switch { diff --git a/compiler/parser_test.go b/compiler/parser_test.go index 3bad222..fd44b5d 100644 --- a/compiler/parser_test.go +++ b/compiler/parser_test.go @@ -1,16 +1,13 @@ package compiler import ( + "fmt" "os" "reflect" "testing" ) -func TestParser(t *testing.T) { - rune2Byte := func(char rune, index int) byte { - return []byte(string(char))[index] - } - +func TestParse(t *testing.T) { symPos := func(n uint8) symbolPosition { return newSymbolPosition(n, false) } @@ -19,69 +16,970 @@ func TestParser(t *testing.T) { return newSymbolPosition(n, true) } - root, symTab, err := parse(map[int][]byte{ - 1: []byte("(a|b)*abb"), - }) - if err != nil { - t.Fatal(err) - } - if root == nil { - t.Fatal("root of AST is nil") + tests := []struct { + patterns []string + ast astNode + syntaxError bool + }{ + { + patterns: []string{ + "a", + }, + ast: genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "abc", + }, + ast: genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "a?", + }, + ast: genConcatNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]?", + }, + ast: genConcatNode( + newOptionNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a)?", + }, + ast: genConcatNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "((a?)?)?", + }, + ast: genConcatNode( + newOptionNode( + newOptionNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(abc)?", + }, + ast: genConcatNode( + newOptionNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a|b)?", + }, + ast: genConcatNode( + newOptionNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "?", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a??", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a*", + }, + ast: genConcatNode( + newRepeatNode( + newSymbolNodeWithPos(byte('a'), 1), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]*", + }, + ast: genConcatNode( + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + newSymbolNodeWithPos(byte('c'), 3), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "((a*)*)*", + }, + ast: genConcatNode( + newRepeatNode( + newRepeatNode( + newRepeatNode( + newSymbolNodeWithPos(byte('a'), 1), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(abc)*", + }, + ast: genConcatNode( + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + newSymbolNodeWithPos(byte('c'), 3), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a|b)*", + }, + ast: genConcatNode( + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + ), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "*", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a**", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a+", + }, + ast: genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[abc]+", + }, + ast: genConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newSymbolNodeWithPos(byte('c'), symPos(6)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(7)), + ), + }, + { + patterns: []string{ + "((a+)+)+", + }, + ast: genConcatNode( + genConcatNode( + genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(2)), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(3)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + ), + ), + ), + ), + newRepeatNode( + genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(5)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(6)), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(7)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(8)), + ), + ), + ), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(9)), + ), + }, + { + patterns: []string{ + "(abc)+", + }, + ast: genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newSymbolNodeWithPos(byte('c'), symPos(6)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(7)), + ), + }, + { + patterns: []string{ + "(a|b)+", + }, + ast: genConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(3)), + newSymbolNodeWithPos(byte('b'), symPos(4)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(5)), + ), + }, + { + patterns: []string{ + "+", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a++", + }, + syntaxError: true, + }, + { + patterns: []string{ + ".", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, bounds1[1][0].max, symPos(1)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(2)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(3)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(5)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(6)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(8)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(9)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(11)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(12)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(14)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(15)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(17)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(19)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(21)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(23)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(25)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(27)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(28)), + ), + }, + { + patterns: []string{ + "[a]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "[a-z]", + }, + ast: newConcatNode( + newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[A-Za-z]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(byte('A'), byte('Z'), symPos(1)), + newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "a[]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[]a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('^'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a-", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^a-", + }, + syntaxError: true, + }, + { + patterns: []string{ + "]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a-]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('-'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[^a-]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(byte(46), byte(96), symPos(2)), + newRangeSymbolNodeWithPos(byte(98), bounds1[1][0].max, symPos(3)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(30)), + ), + }, + { + patterns: []string{ + "[-z]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('-'), symPos(1)), + newSymbolNodeWithPos(byte('z'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[^-z]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + genAltNode( + newRangeSymbolNodeWithPos(byte(46), byte(121), symPos(2)), + newRangeSymbolNodeWithPos(byte(123), bounds1[1][0].max, symPos(3)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(30)), + ), + }, + { + patterns: []string{ + "[-]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('-'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[^-]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(byte(46), bounds1[1][0].max, symPos(2)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(3)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(4)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(5)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(7)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(8)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(10)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(11)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(13)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(14)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(16)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(17)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(20)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(21)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(24)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(25)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(28)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(29)), + ), + }, + { + patterns: []string{ + "(a)", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(((a)))", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "a()", + }, + syntaxError: true, + }, + { + patterns: []string{ + "()a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "()", + }, + syntaxError: true, + }, + { + patterns: []string{ + "(", + }, + syntaxError: true, + }, + { + patterns: []string{ + ")", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Mulder|Scully", + }, + ast: newConcatNode( + genAltNode( + genConcatNode( + newSymbolNodeWithPos(byte('M'), symPos(1)), + newSymbolNodeWithPos(byte('u'), symPos(2)), + newSymbolNodeWithPos(byte('l'), symPos(3)), + newSymbolNodeWithPos(byte('d'), symPos(4)), + newSymbolNodeWithPos(byte('e'), symPos(5)), + newSymbolNodeWithPos(byte('r'), symPos(6)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('S'), symPos(7)), + newSymbolNodeWithPos(byte('c'), symPos(8)), + newSymbolNodeWithPos(byte('u'), symPos(9)), + newSymbolNodeWithPos(byte('l'), symPos(10)), + newSymbolNodeWithPos(byte('l'), symPos(11)), + newSymbolNodeWithPos(byte('y'), symPos(12)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(13)), + ), + }, + { + patterns: []string{ + "Langly|Frohike|Byers", + }, + ast: newConcatNode( + genAltNode( + genConcatNode( + newSymbolNodeWithPos(byte('L'), symPos(1)), + newSymbolNodeWithPos(byte('a'), symPos(2)), + newSymbolNodeWithPos(byte('n'), symPos(3)), + newSymbolNodeWithPos(byte('g'), symPos(4)), + newSymbolNodeWithPos(byte('l'), symPos(5)), + newSymbolNodeWithPos(byte('y'), symPos(6)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('F'), symPos(7)), + newSymbolNodeWithPos(byte('r'), symPos(8)), + newSymbolNodeWithPos(byte('o'), symPos(9)), + newSymbolNodeWithPos(byte('h'), symPos(10)), + newSymbolNodeWithPos(byte('i'), symPos(11)), + newSymbolNodeWithPos(byte('k'), symPos(12)), + newSymbolNodeWithPos(byte('e'), symPos(13)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('B'), symPos(14)), + newSymbolNodeWithPos(byte('y'), symPos(15)), + newSymbolNodeWithPos(byte('e'), symPos(16)), + newSymbolNodeWithPos(byte('r'), symPos(17)), + newSymbolNodeWithPos(byte('s'), symPos(18)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(19)), + ), + }, + { + patterns: []string{ + "|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "||", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Mulder|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Scully", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Langly|Frohike|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Langly||Byers", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Frohike|Byers", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Frohike|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Fox(|)Mulder", + }, + syntaxError: true, + }, + { + patterns: []string{ + "(Fox|)Mulder", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Fox(|Mulder)", + }, + syntaxError: true, + }, } - printAST(os.Stdout, root, "", "", false) - - { - expectedAST := genConcatNode( - newRepeatNode( - newAltNode( - newSymbolNodeWithPos(rune2Byte('a', 0), symPos(1)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(2)), - ), - ), - newSymbolNodeWithPos(rune2Byte('a', 0), symPos(3)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(4)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(5)), - newEndMarkerNodeWithPos(1, endPos(6)), - ) - testAST(t, expectedAST, root) + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ps := map[int][]byte{} + for i, p := range tt.patterns { + ps[i+1] = []byte(p) + } + root, _, err := parse(ps) + if tt.syntaxError { + // printAST(os.Stdout, root, "", "", false) + if err == nil { + t.Fatalf("expected syntax error; got: nil") + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf("expected syntax error; got: %v (type: %T)", err, err) + } + if root != nil { + t.Fatalf("root is not nil") + } + } else { + if err != nil { + t.Fatal(err) + } + if root == nil { + t.Fatal("AST is nil") + } + // printAST(os.Stdout, root, "", "", false) + testAST(t, tt.ast, root) + } + }) } + // Test a FOLLOE table and a symbol table { - followTab := genFollowTable(root) - if followTab == nil { - t.Fatal("follow table is nil") + root, symTab, err := parse(map[int][]byte{ + 1: []byte("(a|b)*abb"), + }) + if err != nil { + t.Fatal(err) } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), + if root == nil { + t.Fatal("root of AST is nil") } - testFollowTable(t, expectedFollowTab, followTab) - } + printAST(os.Stdout, root, "", "", false) - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, + { + expectedAST := genConcatNode( + newRepeatNode( + newAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + ), + newSymbolNodeWithPos(byte('a'), symPos(3)), + newSymbolNodeWithPos(byte('b'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newEndMarkerNodeWithPos(1, endPos(6)), + ) + testAST(t, expectedAST, root) + } + + { + followTab := genFollowTable(root) + if followTab == nil { + t.Fatal("follow table is nil") } + expectedFollowTab := followTable{ + 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 3: newSymbolPositionSet().add(symPos(4)), + 4: newSymbolPositionSet().add(symPos(5)), + 5: newSymbolPositionSet().add(endPos(6)), + } + testFollowTable(t, expectedFollowTab, followTab) } - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]int{ - endPos(6): 1, - }, + { + entry := func(v byte) byteRange { + return byteRange{ + from: v, + to: v, + } + } + + expectedSymTab := &symbolTable{ + symPos2Byte: map[symbolPosition]byteRange{ + symPos(1): entry(byte('a')), + symPos(2): entry(byte('b')), + symPos(3): entry(byte('a')), + symPos(4): entry(byte('b')), + symPos(5): entry(byte('b')), + }, + endPos2ID: map[symbolPosition]int{ + endPos(6): 1, + }, + } + testSymbolTable(t, expectedSymTab, symTab) } - testSymbolTable(t, expectedSymTab, symTab) } } diff --git a/compiler/test_util_test.go b/compiler/test_util_test.go index 7e6896d..2ead2c9 100644 --- a/compiler/test_util_test.go +++ b/compiler/test_util_test.go @@ -1,5 +1,11 @@ package compiler +func newRangeSymbolNodeWithPos(from, to byte, pos symbolPosition) *symbolNode { + n := newRangeSymbolNode(from, to) + n.pos = pos + return n +} + func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { n := newSymbolNode(v) n.pos = pos |