diff options
Diffstat (limited to 'tests/unit/grammar')
-rw-r--r-- | tests/unit/grammar/lexical.go | 338 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa.go | 442 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser.go | 1907 | ||||
-rw-r--r-- | tests/unit/grammar/symbol.go | 159 |
4 files changed, 0 insertions, 2846 deletions
diff --git a/tests/unit/grammar/lexical.go b/tests/unit/grammar/lexical.go deleted file mode 100644 index b621cd2..0000000 --- a/tests/unit/grammar/lexical.go +++ /dev/null @@ -1,338 +0,0 @@ -package lexical - -import ( - "encoding/json" - "fmt" - "testing" - - spec "urubu/spec/grammar" -) - -func TestLexSpec_Validate(t *testing.T) { - // We expect that the spelling inconsistency error will occur. - spec := &LexSpec{ - Entries: []*LexEntry{ - { - Modes: []spec.LexModeName{ - // 'Default' is the spelling inconsistency because 'default' is predefined. - "Default", - }, - Kind: "foo", - Pattern: "foo", - }, - }, - } - err := spec.Validate() - if err == nil { - t.Fatalf("expected error didn't occur") - } -} - -func TestSnakeCaseToUpperCamelCase(t *testing.T) { - tests := []struct { - snake string - camel string - }{ - { - snake: "foo", - camel: "Foo", - }, - { - snake: "foo_bar", - camel: "FooBar", - }, - { - snake: "foo_bar_baz", - camel: "FooBarBaz", - }, - { - snake: "Foo", - camel: "Foo", - }, - { - snake: "fooBar", - camel: "FooBar", - }, - { - snake: "FOO", - camel: "FOO", - }, - { - snake: "FOO_BAR", - camel: "FOOBAR", - }, - { - snake: "_foo_bar_", - camel: "FooBar", - }, - { - snake: "___foo___bar___", - camel: "FooBar", - }, - } - for _, tt := range tests { - c := SnakeCaseToUpperCamelCase(tt.snake) - if c != tt.camel { - t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) - } - } -} - -func TestFindSpellingInconsistencies(t *testing.T) { - tests := []struct { - ids []string - duplicated [][]string - }{ - { - ids: []string{"foo", "foo"}, - duplicated: nil, - }, - { - ids: []string{"foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo", "foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo_bar_baz", "FooBarBaz"}, - duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - duplicated := FindSpellingInconsistencies(tt.ids) - if len(duplicated) != len(tt.duplicated) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) - } - for i, dupIDs := range duplicated { - if len(dupIDs) != len(tt.duplicated[i]) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - for j, id := range dupIDs { - if id != tt.duplicated[i][j] { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - } - } - }) - } -} - -func TestCompile(t *testing.T) { - tests := []struct { - Caption string - Spec string - Err bool - }{ - { - Caption: "allow duplicates names between fragments and non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragment": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - }, - { - Caption: "don't allow duplicates names in non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow duplicates names in fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names across modes to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow mode names to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["foo_1"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["foo1"], - "kind": "b", - "pattern": "b" - } - ] -} -`, - Err: true, - }, - { - Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a", - "pattern": "a" - }, - { - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - { - Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["default"], - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { - lspec := &LexSpec{} - err := json.Unmarshal([]byte(tt.Spec), lspec) - if err != nil { - t.Fatalf("%v", err) - } - clspec, err, _ := Compile(lspec, CompressionLevelMin) - if tt.Err { - if err == nil { - t.Fatalf("expected an error") - } - if clspec != nil { - t.Fatalf("Compile function mustn't return a compiled specification") - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if clspec == nil { - t.Fatalf("Compile function must return a compiled specification") - } - } - }) - } -} diff --git a/tests/unit/grammar/lexical/dfa.go b/tests/unit/grammar/lexical/dfa.go deleted file mode 100644 index 1a3e16a..0000000 --- a/tests/unit/grammar/lexical/dfa.go +++ /dev/null @@ -1,442 +0,0 @@ -package dfa - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/lexical/parser" - spec "urubu/spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} - -func TestByteTree(t *testing.T) { - tests := []struct { - root byteTree - nullable bool - first *symbolPositionSet - last *symbolPositionSet - }{ - { - root: newSymbolNodeWithPos(0, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newEndMarkerNodeWithPos(1, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newRepeatNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newOptionNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - if tt.root.nullable() != tt.nullable { - t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) - } - if tt.first.hash() != tt.root.first().hash() { - t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) - } - if tt.last.hash() != tt.root.last().hash() { - t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) - } - }) - } -} - -func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { - n := newSymbolNode(v) - n.pos = pos - return n -} - -func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { - n := newEndMarkerNode(spec.LexModeKindID(id)) - n.pos = pos - return n -} - -func TestFollowAndSymbolTable(t *testing.T) { - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - - { - followTab := genFollowTable(bt) - if followTab == nil { - t.Fatal("follow table is nil") - } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), - } - testFollowTable(t, expectedFollowTab, followTab) - } - - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, - } - } - - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]spec.LexModeKindID{ - endPos(6): 1, - }, - } - testSymbolTable(t, expectedSymTab, symTab) - } -} - -func testFollowTable(t *testing.T, expected, actual followTable) { - if len(actual) != len(expected) { - t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) - } - for ePos, eSet := range expected { - aSet, ok := actual[ePos] - if !ok { - t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) - } - if aSet.hash() != eSet.hash() { - t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) - } - } -} - -func testSymbolTable(t *testing.T, expected, actual *symbolTable) { - t.Helper() - - if len(actual.symPos2Byte) != len(expected.symPos2Byte) { - t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) - } - for ePos, eByte := range expected.symPos2Byte { - byte, ok := actual.symPos2Byte[ePos] - if !ok { - t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) - continue - } - if byte.from != eByte.from || byte.to != eByte.to { - t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) - } - } - - if len(actual.endPos2ID) != len(expected.endPos2ID) { - t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) - } - for ePos, eID := range expected.endPos2ID { - id, ok := actual.endPos2ID[ePos] - if !ok { - t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) - continue - } - if id != eID { - t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) - } - } -} diff --git a/tests/unit/grammar/lexical/parser.go b/tests/unit/grammar/lexical/parser.go deleted file mode 100644 index d5d7039..0000000 --- a/tests/unit/grammar/lexical/parser.go +++ /dev/null @@ -1,1907 +0,0 @@ -package parser - -import ( - "fmt" - "reflect" - "strings" - "testing" - - spec "urubu/spec/grammar" - "urubu/ucd" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} - -func TestParse(t *testing.T) { - tests := []struct { - pattern string - fragments map[spec.LexKindName]string - ast CPTree - syntaxError error - - // When an AST is large, as patterns containing a character property expression, this test only checks - // that the pattern is parsable. The check of the validity of such AST is performed by checking that it - // can be matched correctly using the driver. - skipTestAST bool - }{ - { - pattern: "a", - ast: newSymbolNode('a'), - }, - { - pattern: "abc", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "a?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}?", - ast: newOptionNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}?", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}?", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newOptionNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a)?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "((a?)?)?", - ast: newOptionNode( - newOptionNode( - newOptionNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)?", - ast: newOptionNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(?)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "?|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a??", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a*", - ast: newRepeatNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}*", - ast: newRepeatNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}*", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}*", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "((a*)*)*", - ast: newRepeatNode( - newRepeatNode( - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)*", - ast: newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(*)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "*|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a**", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a+", - ast: genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - }, - { - pattern: "[abc]+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "\\u{3042}+", - ast: genConcatNode( - newSymbolNode('\u3042'), - newRepeatNode( - newSymbolNode('\u3042'), - ), - ), - }, - { - pattern: "\\p{Letter}+", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}+", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: genConcatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - ), - }, - { - pattern: "((a+)+)+", - ast: genConcatNode( - genConcatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - newRepeatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - ), - ), - ), - }, - { - pattern: "(abc)+", - ast: genConcatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a|b)+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - ), - }, - { - pattern: "+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(+)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "+|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a++", - syntaxError: synErrRepNoTarget, - }, - { - pattern: ".", - ast: newRangeSymbolNode(0x00, 0x10FFFF), - }, - { - pattern: "[a]", - ast: newSymbolNode('a'), - }, - { - pattern: "[abc]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "[a-z]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[A-Za-z]", - ast: genAltNode( - newRangeSymbolNode('A', 'Z'), - newRangeSymbolNode('a', 'z'), - ), - }, - { - pattern: "[\\u{004E}]", - ast: newSymbolNode('N'), - }, - { - pattern: "[\\u{0061}-\\u{007A}]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[z-a]", - syntaxError: synErrRangeInvalidOrder, - }, - { - pattern: "a[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]a", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[^\\u{004E}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u004E'-1), - newRangeSymbolNode('\u004E'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\u{0061}-\\u{007A}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u0061'-1), - newRangeSymbolNode('\u007A'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[^a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\u{0000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^]", - ast: newSymbolNode('^'), - }, - { - pattern: "[", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "]", - ast: newSymbolNode(']'), - }, - { - pattern: "(]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a]", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode(']'), - ), - }, - { - pattern: "(a]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "([)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('-'), - ), - }, - { - pattern: "[^a-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x60), - newRangeSymbolNode(0x62, 0x10FFFF), - ), - }, - { - pattern: "[-z]", - ast: genAltNode( - newSymbolNode('-'), - newSymbolNode('z'), - ), - }, - { - pattern: "[^-z]", - ast: newAltNode( - newRangeSymbolNode(0x00, 0x2C), - newAltNode( - newRangeSymbolNode(0x2E, 0x79), - newRangeSymbolNode(0x7B, 0x10FFFF), - ), - ), - }, - { - pattern: "[-]", - ast: newSymbolNode('-'), - }, - { - pattern: "[^-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x10FFFF), - ), - }, - { - pattern: "[^01]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^10]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^a-z]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - }, - { - pattern: "[^az]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - genAltNode( - newRangeSymbolNode('a'+1, 'z'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - ), - }, - { - pattern: "\\u{006E}", - ast: newSymbolNode('\u006E'), - }, - { - pattern: "\\u{03BD}", - ast: newSymbolNode('\u03BD'), - }, - { - pattern: "\\u{306B}", - ast: newSymbolNode('\u306B'), - }, - { - pattern: "\\u{01F638}", - ast: newSymbolNode('\U0001F638'), - }, - { - pattern: "\\u{0000}", - ast: newSymbolNode('\u0000'), - }, - { - pattern: "\\u{10FFFF}", - ast: newSymbolNode('\U0010FFFF'), - }, - { - pattern: "\\u{110000}", - syntaxError: synErrCPExpOutOfRange, - }, - { - pattern: "\\u", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{03BD", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{}", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\p{Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{General_Category=Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{ Letter }", - skipTestAST: true, - }, - { - pattern: "\\p{ General_Category = Letter }", - skipTestAST: true, - }, - { - pattern: "\\p", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{Letter", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category= }", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=Letter}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{ =Letter}", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\f{a2c}", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f{ a2c }", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{a2c", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "(a)", - ast: newSymbolNode('a'), - }, - { - pattern: "(((a)))", - ast: newSymbolNode('a'), - }, - { - pattern: "a()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()a", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "(a", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((a)", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: ")", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "a)", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: ")a", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "(a))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "Mulder|Scully", - ast: genAltNode( - genConcatNode( - newSymbolNode('M'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('d'), - newSymbolNode('e'), - newSymbolNode('r'), - ), - genConcatNode( - newSymbolNode('S'), - newSymbolNode('c'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - ), - }, - { - pattern: "Langly|Frohike|Byers", - ast: genAltNode( - genConcatNode( - newSymbolNode('L'), - newSymbolNode('a'), - newSymbolNode('n'), - newSymbolNode('g'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - genConcatNode( - newSymbolNode('F'), - newSymbolNode('r'), - newSymbolNode('o'), - newSymbolNode('h'), - newSymbolNode('i'), - newSymbolNode('k'), - newSymbolNode('e'), - ), - genConcatNode( - newSymbolNode('B'), - newSymbolNode('y'), - newSymbolNode('e'), - newSymbolNode('r'), - newSymbolNode('s'), - ), - ), - }, - { - pattern: "|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "||", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Mulder|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Scully", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly||Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "(Fox|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|Mulder)", - syntaxError: synErrAltLackOfOperand, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { - fragmentTrees := map[spec.LexKindName]CPTree{} - for kind, pattern := range tt.fragments { - p := NewParser(kind, strings.NewReader(pattern)) - root, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - fragmentTrees[kind] = root - } - err := CompleteFragments(fragmentTrees) - if err != nil { - t.Fatal(err) - } - - p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) - root, err := p.Parse() - if tt.syntaxError != nil { - // printCPTree(os.Stdout, root, "", "") - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - _, synErr := p.Error() - if synErr != tt.syntaxError { - t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) - } - if root != nil { - t.Fatalf("tree must be nil") - } - } else { - if err != nil { - detail, cause := p.Error() - t.Fatalf("%v: %v: %v", err, cause, detail) - } - if root == nil { - t.Fatal("tree must be non-nil") - } - - complete, err := ApplyFragments(root, fragmentTrees) - if err != nil { - t.Fatal(err) - } - if !complete { - t.Fatalf("incomplete fragments") - } - - // printCPTree(os.Stdout, root, "", "") - if !tt.skipTestAST { - r := root.(*rootNode) - testAST(t, tt.ast, r.tree) - } - } - }) - } -} - -func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { - for _, cProp := range ucd.ContributoryProperties() { - t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { - p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) - root, err := p.Parse() - if err == nil { - t.Fatalf("expected syntax error: got: nil") - } - _, synErr := p.Error() - if synErr != synErrCharPropUnsupported { - t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) - } - if root != nil { - t.Fatalf("tree is not nil") - } - }) - } -} - -func TestExclude(t *testing.T) { - for _, test := range []struct { - caption string - target CPTree - base CPTree - result CPTree - }{ - // t.From > b.From && t.To < b.To - - // |t.From - b.From| = 1 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '2'), - result: newAltNode( - newSymbolNode('0'), - newSymbolNode('2'), - ), - }, - // |t.From - b.From| > 1 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+--+--+ - // Result (b - t): +--+--+ +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '4'), - result: newAltNode( - newRangeSymbolNode('0', '1'), - newRangeSymbolNode('3', '4'), - ), - }, - - // t.From <= b.From && t.To >= b.From && t.To < b.To - - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('1', '2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '3'), - result: newSymbolNode('3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '4'), - result: newRangeSymbolNode('3', '4'), - }, - - // t.From > b.From && t.From <= b.To && t.To >= b.To - - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '3'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '4'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - - // t.From <= b.From && t.To >= b.To - - // |b.From - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('0'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('1'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '2'), - base: newSymbolNode('1'), - result: nil, - }, - - // Others - - // |b.From - t.From| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 1", - target: newSymbolNode('0'), - base: newSymbolNode('1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 1", - target: newSymbolNode('0'), - base: newSymbolNode('2'), - result: newSymbolNode('2'), - }, - // |t.To - b.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| = 1", - target: newSymbolNode('1'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - // |t.To - b.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| > 1", - target: newSymbolNode('2'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - } { - t.Run(test.caption, func(t *testing.T) { - r := exclude(test.target, test.base) - testAST(t, test.result, r) - }) - } -} - -func testAST(t *testing.T, expected, actual CPTree) { - t.Helper() - - aTy := reflect.TypeOf(actual) - eTy := reflect.TypeOf(expected) - if eTy != aTy { - t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) - } - - if actual == nil { - return - } - - switch e := expected.(type) { - case *symbolNode: - a := actual.(*symbolNode) - if a.From != e.From || a.To != e.To { - t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) - } - } - eLeft, eRight := expected.children() - aLeft, aRight := actual.children() - testAST(t, eLeft, aLeft) - testAST(t, eRight, aRight) -} diff --git a/tests/unit/grammar/symbol.go b/tests/unit/grammar/symbol.go deleted file mode 100644 index 31c3edd..0000000 --- a/tests/unit/grammar/symbol.go +++ /dev/null @@ -1,159 +0,0 @@ -package symbol - -import "testing" - -func TestSymbol(t *testing.T) { - tab := NewSymbolTable() - w := tab.Writer() - _, _ = w.RegisterStartSymbol("expr'") - _, _ = w.RegisterNonTerminalSymbol("expr") - _, _ = w.RegisterNonTerminalSymbol("term") - _, _ = w.RegisterNonTerminalSymbol("factor") - _, _ = w.RegisterTerminalSymbol("id") - _, _ = w.RegisterTerminalSymbol("add") - _, _ = w.RegisterTerminalSymbol("mul") - _, _ = w.RegisterTerminalSymbol("l_paren") - _, _ = w.RegisterTerminalSymbol("r_paren") - - nonTermTexts := []string{ - "", // Nil - "expr'", - "expr", - "term", - "factor", - } - - termTexts := []string{ - "", // Nil - symbolNameEOF, // EOF - "id", - "add", - "mul", - "l_paren", - "r_paren", - } - - tests := []struct { - text string - isNil bool - isStart bool - isEOF bool - isNonTerminal bool - isTerminal bool - }{ - { - text: "expr'", - isStart: true, - isNonTerminal: true, - }, - { - text: "expr", - isNonTerminal: true, - }, - { - text: "term", - isNonTerminal: true, - }, - { - text: "factor", - isNonTerminal: true, - }, - { - text: "id", - isTerminal: true, - }, - { - text: "add", - isTerminal: true, - }, - { - text: "mul", - isTerminal: true, - }, - { - text: "l_paren", - isTerminal: true, - }, - { - text: "r_paren", - isTerminal: true, - }, - } - for _, tt := range tests { - t.Run(tt.text, func(t *testing.T) { - r := tab.Reader() - sym, ok := r.ToSymbol(tt.text) - if !ok { - t.Fatalf("symbol was not found") - } - testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal) - text, ok := r.ToText(sym) - if !ok { - t.Fatalf("text was not found") - } - if text != tt.text { - t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text) - } - }) - } - - t.Run("EOF", func(t *testing.T) { - testSymbolProperty(t, SymbolEOF, false, false, true, false, true) - }) - - t.Run("Nil", func(t *testing.T) { - testSymbolProperty(t, SymbolNil, true, false, false, false, false) - }) - - t.Run("texts of non-terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(nonTermTexts) { - t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts) - } - for i, text := range ts { - if text != nonTermTexts[i] { - t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text) - } - } - }) - - t.Run("texts of terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.TerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(termTexts) { - t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts) - } - for i, text := range ts { - if text != termTexts[i] { - t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text) - } - } - }) -} - -func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) { - t.Helper() - - if v := sym.IsNil(); v != isNil { - t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v) - } - if v := sym.IsStart(); v != isStart { - t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v) - } - if v := sym.isEOF(); v != isEOF { - t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v) - } - if v := sym.isNonTerminal(); v != isNonTerminal { - t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v) - } - if v := sym.IsTerminal(); v != isTerminal { - t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v) - } -} |