diff options
Diffstat (limited to 'compiler/parser_test.go')
-rw-r--r-- | compiler/parser_test.go | 1012 |
1 files changed, 955 insertions, 57 deletions
diff --git a/compiler/parser_test.go b/compiler/parser_test.go index 3bad222..fd44b5d 100644 --- a/compiler/parser_test.go +++ b/compiler/parser_test.go @@ -1,16 +1,13 @@ package compiler import ( + "fmt" "os" "reflect" "testing" ) -func TestParser(t *testing.T) { - rune2Byte := func(char rune, index int) byte { - return []byte(string(char))[index] - } - +func TestParse(t *testing.T) { symPos := func(n uint8) symbolPosition { return newSymbolPosition(n, false) } @@ -19,69 +16,970 @@ func TestParser(t *testing.T) { return newSymbolPosition(n, true) } - root, symTab, err := parse(map[int][]byte{ - 1: []byte("(a|b)*abb"), - }) - if err != nil { - t.Fatal(err) - } - if root == nil { - t.Fatal("root of AST is nil") + tests := []struct { + patterns []string + ast astNode + syntaxError bool + }{ + { + patterns: []string{ + "a", + }, + ast: genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "abc", + }, + ast: genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "a?", + }, + ast: genConcatNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]?", + }, + ast: genConcatNode( + newOptionNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a)?", + }, + ast: genConcatNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "((a?)?)?", + }, + ast: genConcatNode( + newOptionNode( + newOptionNode( + newOptionNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(abc)?", + }, + ast: genConcatNode( + newOptionNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a|b)?", + }, + ast: genConcatNode( + newOptionNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "?", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a??", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a*", + }, + ast: genConcatNode( + newRepeatNode( + newSymbolNodeWithPos(byte('a'), 1), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]*", + }, + ast: genConcatNode( + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + newSymbolNodeWithPos(byte('c'), 3), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "((a*)*)*", + }, + ast: genConcatNode( + newRepeatNode( + newRepeatNode( + newRepeatNode( + newSymbolNodeWithPos(byte('a'), 1), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(abc)*", + }, + ast: genConcatNode( + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + newSymbolNodeWithPos(byte('c'), 3), + ), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "(a|b)*", + }, + ast: genConcatNode( + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), 1), + newSymbolNodeWithPos(byte('b'), 2), + ), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "*", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a**", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a+", + }, + ast: genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[abc]+", + }, + ast: genConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newSymbolNodeWithPos(byte('c'), symPos(6)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(7)), + ), + }, + { + patterns: []string{ + "((a+)+)+", + }, + ast: genConcatNode( + genConcatNode( + genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(2)), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(3)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + ), + ), + ), + ), + newRepeatNode( + genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(5)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(6)), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(7)), + newRepeatNode( + newSymbolNodeWithPos(byte('a'), symPos(8)), + ), + ), + ), + ), + ), + ), + newEndMarkerNodeWithPos(1, endPos(9)), + ), + }, + { + patterns: []string{ + "(abc)+", + }, + ast: genConcatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newRepeatNode( + genConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newSymbolNodeWithPos(byte('c'), symPos(6)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(7)), + ), + }, + { + patterns: []string{ + "(a|b)+", + }, + ast: genConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + newRepeatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(3)), + newSymbolNodeWithPos(byte('b'), symPos(4)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(5)), + ), + }, + { + patterns: []string{ + "+", + }, + syntaxError: true, + }, + { + patterns: []string{ + "a++", + }, + syntaxError: true, + }, + { + patterns: []string{ + ".", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, bounds1[1][0].max, symPos(1)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(2)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(3)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(5)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(6)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(8)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(9)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(11)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(12)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(14)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(15)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(17)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(19)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(21)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(23)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(25)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(27)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(28)), + ), + }, + { + patterns: []string{ + "[a]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[abc]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + newSymbolNodeWithPos(byte('c'), symPos(3)), + ), + newEndMarkerNodeWithPos(1, endPos(4)), + ), + }, + { + patterns: []string{ + "[a-z]", + }, + ast: newConcatNode( + newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[A-Za-z]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(byte('A'), byte('Z'), symPos(1)), + newRangeSymbolNodeWithPos(byte('a'), byte('z'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "a[]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[]a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('^'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a-", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[^a-", + }, + syntaxError: true, + }, + { + patterns: []string{ + "]", + }, + syntaxError: true, + }, + { + patterns: []string{ + "[a-]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('-'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[^a-]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(byte(46), byte(96), symPos(2)), + newRangeSymbolNodeWithPos(byte(98), bounds1[1][0].max, symPos(3)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(30)), + ), + }, + { + patterns: []string{ + "[-z]", + }, + ast: newConcatNode( + genAltNode( + newSymbolNodeWithPos(byte('-'), symPos(1)), + newSymbolNodeWithPos(byte('z'), symPos(2)), + ), + newEndMarkerNodeWithPos(1, endPos(3)), + ), + }, + { + patterns: []string{ + "[^-z]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + genAltNode( + newRangeSymbolNodeWithPos(byte(46), byte(121), symPos(2)), + newRangeSymbolNodeWithPos(byte(123), bounds1[1][0].max, symPos(3)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(4)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(5)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(7)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(8)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(10)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(11)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(13)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(14)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(16)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(17)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(20)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(21)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(24)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(25)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(28)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(29)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(30)), + ), + }, + { + patterns: []string{ + "[-]", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('-'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "[^-]", + }, + ast: newConcatNode( + genAltNode( + newRangeSymbolNodeWithPos(bounds1[1][0].min, byte(44), symPos(1)), + newRangeSymbolNodeWithPos(byte(46), bounds1[1][0].max, symPos(2)), + genConcatNode( + newRangeSymbolNodeWithPos(bounds2[1][0].min, bounds2[1][0].max, symPos(3)), + newRangeSymbolNodeWithPos(bounds2[1][1].min, bounds2[1][1].max, symPos(4)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[1][0].min, bounds3[1][0].max, symPos(5)), + newRangeSymbolNodeWithPos(bounds3[1][1].min, bounds3[1][1].max, symPos(6)), + newRangeSymbolNodeWithPos(bounds3[1][2].min, bounds3[1][2].max, symPos(7)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[2][0].min, bounds3[2][0].max, symPos(8)), + newRangeSymbolNodeWithPos(bounds3[2][1].min, bounds3[2][1].max, symPos(9)), + newRangeSymbolNodeWithPos(bounds3[2][2].min, bounds3[2][2].max, symPos(10)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[3][0].min, bounds3[3][0].max, symPos(11)), + newRangeSymbolNodeWithPos(bounds3[3][1].min, bounds3[3][1].max, symPos(12)), + newRangeSymbolNodeWithPos(bounds3[3][2].min, bounds3[3][2].max, symPos(13)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds3[4][0].min, bounds3[4][0].max, symPos(14)), + newRangeSymbolNodeWithPos(bounds3[4][1].min, bounds3[4][1].max, symPos(15)), + newRangeSymbolNodeWithPos(bounds3[4][2].min, bounds3[4][2].max, symPos(16)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[1][0].min, bounds4[1][0].max, symPos(17)), + newRangeSymbolNodeWithPos(bounds4[1][1].min, bounds4[1][1].max, symPos(18)), + newRangeSymbolNodeWithPos(bounds4[1][2].min, bounds4[1][2].max, symPos(19)), + newRangeSymbolNodeWithPos(bounds4[1][3].min, bounds4[1][3].max, symPos(20)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[2][0].min, bounds4[2][0].max, symPos(21)), + newRangeSymbolNodeWithPos(bounds4[2][1].min, bounds4[2][1].max, symPos(22)), + newRangeSymbolNodeWithPos(bounds4[2][2].min, bounds4[2][2].max, symPos(23)), + newRangeSymbolNodeWithPos(bounds4[2][3].min, bounds4[2][3].max, symPos(24)), + ), + genConcatNode( + newRangeSymbolNodeWithPos(bounds4[3][0].min, bounds4[3][0].max, symPos(25)), + newRangeSymbolNodeWithPos(bounds4[3][1].min, bounds4[3][1].max, symPos(26)), + newRangeSymbolNodeWithPos(bounds4[3][2].min, bounds4[3][2].max, symPos(27)), + newRangeSymbolNodeWithPos(bounds4[3][3].min, bounds4[3][3].max, symPos(28)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(29)), + ), + }, + { + patterns: []string{ + "(a)", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "(((a)))", + }, + ast: newConcatNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newEndMarkerNodeWithPos(1, endPos(2)), + ), + }, + { + patterns: []string{ + "a()", + }, + syntaxError: true, + }, + { + patterns: []string{ + "()a", + }, + syntaxError: true, + }, + { + patterns: []string{ + "()", + }, + syntaxError: true, + }, + { + patterns: []string{ + "(", + }, + syntaxError: true, + }, + { + patterns: []string{ + ")", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Mulder|Scully", + }, + ast: newConcatNode( + genAltNode( + genConcatNode( + newSymbolNodeWithPos(byte('M'), symPos(1)), + newSymbolNodeWithPos(byte('u'), symPos(2)), + newSymbolNodeWithPos(byte('l'), symPos(3)), + newSymbolNodeWithPos(byte('d'), symPos(4)), + newSymbolNodeWithPos(byte('e'), symPos(5)), + newSymbolNodeWithPos(byte('r'), symPos(6)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('S'), symPos(7)), + newSymbolNodeWithPos(byte('c'), symPos(8)), + newSymbolNodeWithPos(byte('u'), symPos(9)), + newSymbolNodeWithPos(byte('l'), symPos(10)), + newSymbolNodeWithPos(byte('l'), symPos(11)), + newSymbolNodeWithPos(byte('y'), symPos(12)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(13)), + ), + }, + { + patterns: []string{ + "Langly|Frohike|Byers", + }, + ast: newConcatNode( + genAltNode( + genConcatNode( + newSymbolNodeWithPos(byte('L'), symPos(1)), + newSymbolNodeWithPos(byte('a'), symPos(2)), + newSymbolNodeWithPos(byte('n'), symPos(3)), + newSymbolNodeWithPos(byte('g'), symPos(4)), + newSymbolNodeWithPos(byte('l'), symPos(5)), + newSymbolNodeWithPos(byte('y'), symPos(6)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('F'), symPos(7)), + newSymbolNodeWithPos(byte('r'), symPos(8)), + newSymbolNodeWithPos(byte('o'), symPos(9)), + newSymbolNodeWithPos(byte('h'), symPos(10)), + newSymbolNodeWithPos(byte('i'), symPos(11)), + newSymbolNodeWithPos(byte('k'), symPos(12)), + newSymbolNodeWithPos(byte('e'), symPos(13)), + ), + genConcatNode( + newSymbolNodeWithPos(byte('B'), symPos(14)), + newSymbolNodeWithPos(byte('y'), symPos(15)), + newSymbolNodeWithPos(byte('e'), symPos(16)), + newSymbolNodeWithPos(byte('r'), symPos(17)), + newSymbolNodeWithPos(byte('s'), symPos(18)), + ), + ), + newEndMarkerNodeWithPos(1, endPos(19)), + ), + }, + { + patterns: []string{ + "|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "||", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Mulder|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Scully", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Langly|Frohike|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Langly||Byers", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Frohike|Byers", + }, + syntaxError: true, + }, + { + patterns: []string{ + "|Frohike|", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Fox(|)Mulder", + }, + syntaxError: true, + }, + { + patterns: []string{ + "(Fox|)Mulder", + }, + syntaxError: true, + }, + { + patterns: []string{ + "Fox(|Mulder)", + }, + syntaxError: true, + }, } - printAST(os.Stdout, root, "", "", false) - - { - expectedAST := genConcatNode( - newRepeatNode( - newAltNode( - newSymbolNodeWithPos(rune2Byte('a', 0), symPos(1)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(2)), - ), - ), - newSymbolNodeWithPos(rune2Byte('a', 0), symPos(3)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(4)), - newSymbolNodeWithPos(rune2Byte('b', 0), symPos(5)), - newEndMarkerNodeWithPos(1, endPos(6)), - ) - testAST(t, expectedAST, root) + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ps := map[int][]byte{} + for i, p := range tt.patterns { + ps[i+1] = []byte(p) + } + root, _, err := parse(ps) + if tt.syntaxError { + // printAST(os.Stdout, root, "", "", false) + if err == nil { + t.Fatalf("expected syntax error; got: nil") + } + if _, ok := err.(*SyntaxError); !ok { + t.Fatalf("expected syntax error; got: %v (type: %T)", err, err) + } + if root != nil { + t.Fatalf("root is not nil") + } + } else { + if err != nil { + t.Fatal(err) + } + if root == nil { + t.Fatal("AST is nil") + } + // printAST(os.Stdout, root, "", "", false) + testAST(t, tt.ast, root) + } + }) } + // Test a FOLLOE table and a symbol table { - followTab := genFollowTable(root) - if followTab == nil { - t.Fatal("follow table is nil") + root, symTab, err := parse(map[int][]byte{ + 1: []byte("(a|b)*abb"), + }) + if err != nil { + t.Fatal(err) } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), + if root == nil { + t.Fatal("root of AST is nil") } - testFollowTable(t, expectedFollowTab, followTab) - } + printAST(os.Stdout, root, "", "", false) - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, + { + expectedAST := genConcatNode( + newRepeatNode( + newAltNode( + newSymbolNodeWithPos(byte('a'), symPos(1)), + newSymbolNodeWithPos(byte('b'), symPos(2)), + ), + ), + newSymbolNodeWithPos(byte('a'), symPos(3)), + newSymbolNodeWithPos(byte('b'), symPos(4)), + newSymbolNodeWithPos(byte('b'), symPos(5)), + newEndMarkerNodeWithPos(1, endPos(6)), + ) + testAST(t, expectedAST, root) + } + + { + followTab := genFollowTable(root) + if followTab == nil { + t.Fatal("follow table is nil") } + expectedFollowTab := followTable{ + 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 3: newSymbolPositionSet().add(symPos(4)), + 4: newSymbolPositionSet().add(symPos(5)), + 5: newSymbolPositionSet().add(endPos(6)), + } + testFollowTable(t, expectedFollowTab, followTab) } - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]int{ - endPos(6): 1, - }, + { + entry := func(v byte) byteRange { + return byteRange{ + from: v, + to: v, + } + } + + expectedSymTab := &symbolTable{ + symPos2Byte: map[symbolPosition]byteRange{ + symPos(1): entry(byte('a')), + symPos(2): entry(byte('b')), + symPos(3): entry(byte('a')), + symPos(4): entry(byte('b')), + symPos(5): entry(byte('b')), + }, + endPos2ID: map[symbolPosition]int{ + endPos(6): 1, + }, + } + testSymbolTable(t, expectedSymTab, symTab) } - testSymbolTable(t, expectedSymTab, symTab) } } |