diff options
Diffstat (limited to 'tests/unit/grammar/lexical')
-rw-r--r-- | tests/unit/grammar/lexical/dfa/dfa.go | 445 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/main.go | 7 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/lexical.go | 341 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/main.go | 7 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/main.go | 7 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/parser.go | 1910 |
6 files changed, 2717 insertions, 0 deletions
diff --git a/tests/unit/grammar/lexical/dfa/dfa.go b/tests/unit/grammar/lexical/dfa/dfa.go new file mode 100644 index 0000000..3233969 --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/dfa.go @@ -0,0 +1,445 @@ +package dfa + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar/lexical/parser" + spec "urubu/spec/grammar" +) + +func TestGenDFA(t *testing.T) { + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + dfa := GenDFA(bt, symTab) + if dfa == nil { + t.Fatalf("DFA is nil") + } + + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) + s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) + s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) + s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) + + rune2Int := func(char rune, index int) uint8 { + return uint8([]byte(string(char))[index]) + } + + tranS0 := [256]string{} + tranS0[rune2Int('a', 0)] = s1.hash() + tranS0[rune2Int('b', 0)] = s0.hash() + + tranS1 := [256]string{} + tranS1[rune2Int('a', 0)] = s1.hash() + tranS1[rune2Int('b', 0)] = s2.hash() + + tranS2 := [256]string{} + tranS2[rune2Int('a', 0)] = s1.hash() + tranS2[rune2Int('b', 0)] = s3.hash() + + tranS3 := [256]string{} + tranS3[rune2Int('a', 0)] = s1.hash() + tranS3[rune2Int('b', 0)] = s0.hash() + + expectedTranTab := map[string][256]string{ + s0.hash(): tranS0, + s1.hash(): tranS1, + s2.hash(): tranS2, + s3.hash(): tranS3, + } + if len(dfa.TransitionTable) != len(expectedTranTab) { + t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) + } + for h, eTranTab := range expectedTranTab { + tranTab, ok := dfa.TransitionTable[h] + if !ok { + t.Errorf("no entry; hash: %v", h) + continue + } + if len(tranTab) != len(eTranTab) { + t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) + } + for c, eNext := range eTranTab { + if eNext == "" { + continue + } + + next := tranTab[c] + if next == "" { + t.Errorf("no enatry: hash: %v, char: %v", h, c) + } + if next != eNext { + t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) + } + } + } + + if dfa.InitialState != s0.hash() { + t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) + } + + accTab := map[string]spec.LexModeKindID{ + s3.hash(): 1, + } + if len(dfa.AcceptingStatesTable) != len(accTab) { + t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) + } + for eState, eID := range accTab { + id, ok := dfa.AcceptingStatesTable[eState] + if !ok { + t.Errorf("accepting state is not found: state: %v", eState) + } + if id != eID { + t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) + } + } +} + +func TestNewSymbolPosition(t *testing.T) { + tests := []struct { + n uint16 + endMark bool + err bool + }{ + { + n: 0, + endMark: false, + err: true, + }, + { + n: 0, + endMark: true, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: true, + err: true, + }, + { + n: symbolPositionMin, + endMark: false, + }, + { + n: symbolPositionMin, + endMark: true, + }, + { + n: symbolPositionMax, + endMark: false, + }, + { + n: symbolPositionMax, + endMark: true, + }, + { + n: symbolPositionMax + 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMax + 1, + endMark: true, + err: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { + pos, err := newSymbolPosition(tt.n, tt.endMark) + if tt.err { + if err == nil { + t.Fatal("err is nil") + } + return + } + if err != nil { + t.Fatal(err) + } + n, endMark := pos.describe() + if n != tt.n || endMark != tt.endMark { + t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) + } + }) + } +} + +func TestByteTree(t *testing.T) { + tests := []struct { + root byteTree + nullable bool + first *symbolPositionSet + last *symbolPositionSet + }{ + { + root: newSymbolNodeWithPos(0, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newEndMarkerNodeWithPos(1, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newRepeatNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newOptionNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + if tt.root.nullable() != tt.nullable { + t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) + } + if tt.first.hash() != tt.root.first().hash() { + t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) + } + if tt.last.hash() != tt.root.last().hash() { + t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) + } + }) + } +} + +func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { + n := newSymbolNode(v) + n.pos = pos + return n +} + +func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { + n := newEndMarkerNode(spec.LexModeKindID(id)) + n.pos = pos + return n +} + +func TestFollowAndSymbolTable(t *testing.T) { + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + + { + followTab := genFollowTable(bt) + if followTab == nil { + t.Fatal("follow table is nil") + } + expectedFollowTab := followTable{ + 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 3: newSymbolPositionSet().add(symPos(4)), + 4: newSymbolPositionSet().add(symPos(5)), + 5: newSymbolPositionSet().add(endPos(6)), + } + testFollowTable(t, expectedFollowTab, followTab) + } + + { + entry := func(v byte) byteRange { + return byteRange{ + from: v, + to: v, + } + } + + expectedSymTab := &symbolTable{ + symPos2Byte: map[symbolPosition]byteRange{ + symPos(1): entry(byte('a')), + symPos(2): entry(byte('b')), + symPos(3): entry(byte('a')), + symPos(4): entry(byte('b')), + symPos(5): entry(byte('b')), + }, + endPos2ID: map[symbolPosition]spec.LexModeKindID{ + endPos(6): 1, + }, + } + testSymbolTable(t, expectedSymTab, symTab) + } +} + +func testFollowTable(t *testing.T, expected, actual followTable) { + if len(actual) != len(expected) { + t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) + } + for ePos, eSet := range expected { + aSet, ok := actual[ePos] + if !ok { + t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) + } + if aSet.hash() != eSet.hash() { + t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) + } + } +} + +func testSymbolTable(t *testing.T, expected, actual *symbolTable) { + t.Helper() + + if len(actual.symPos2Byte) != len(expected.symPos2Byte) { + t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) + } + for ePos, eByte := range expected.symPos2Byte { + byte, ok := actual.symPos2Byte[ePos] + if !ok { + t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) + continue + } + if byte.from != eByte.from || byte.to != eByte.to { + t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) + } + } + + if len(actual.endPos2ID) != len(expected.endPos2ID) { + t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) + } + for ePos, eID := range expected.endPos2ID { + id, ok := actual.endPos2ID[ePos] + if !ok { + t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) + continue + } + if id != eID { + t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) + } + } +} + + +func MainTest() {} diff --git a/tests/unit/grammar/lexical/dfa/main.go b/tests/unit/grammar/lexical/dfa/main.go new file mode 100644 index 0000000..e0b2fd0 --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/main.go @@ -0,0 +1,7 @@ +package main + +import "dfa" + +func main() { + dfa.MainTest() +} diff --git a/tests/unit/grammar/lexical/lexical.go b/tests/unit/grammar/lexical/lexical.go new file mode 100644 index 0000000..54cace4 --- /dev/null +++ b/tests/unit/grammar/lexical/lexical.go @@ -0,0 +1,341 @@ +package lexical + +import ( + "encoding/json" + "fmt" + "testing" + + spec "urubu/spec/grammar" +) + +func TestLexSpec_Validate(t *testing.T) { + // We expect that the spelling inconsistency error will occur. + spec := &LexSpec{ + Entries: []*LexEntry{ + { + Modes: []spec.LexModeName{ + // 'Default' is the spelling inconsistency because 'default' is predefined. + "Default", + }, + Kind: "foo", + Pattern: "foo", + }, + }, + } + err := spec.Validate() + if err == nil { + t.Fatalf("expected error didn't occur") + } +} + +func TestSnakeCaseToUpperCamelCase(t *testing.T) { + tests := []struct { + snake string + camel string + }{ + { + snake: "foo", + camel: "Foo", + }, + { + snake: "foo_bar", + camel: "FooBar", + }, + { + snake: "foo_bar_baz", + camel: "FooBarBaz", + }, + { + snake: "Foo", + camel: "Foo", + }, + { + snake: "fooBar", + camel: "FooBar", + }, + { + snake: "FOO", + camel: "FOO", + }, + { + snake: "FOO_BAR", + camel: "FOOBAR", + }, + { + snake: "_foo_bar_", + camel: "FooBar", + }, + { + snake: "___foo___bar___", + camel: "FooBar", + }, + } + for _, tt := range tests { + c := SnakeCaseToUpperCamelCase(tt.snake) + if c != tt.camel { + t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) + } + } +} + +func TestFindSpellingInconsistencies(t *testing.T) { + tests := []struct { + ids []string + duplicated [][]string + }{ + { + ids: []string{"foo", "foo"}, + duplicated: nil, + }, + { + ids: []string{"foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo", "foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo_bar_baz", "FooBarBaz"}, + duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + duplicated := FindSpellingInconsistencies(tt.ids) + if len(duplicated) != len(tt.duplicated) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) + } + for i, dupIDs := range duplicated { + if len(dupIDs) != len(tt.duplicated[i]) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + for j, id := range dupIDs { + if id != tt.duplicated[i][j] { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + } + } + }) + } +} + +func TestCompile(t *testing.T) { + tests := []struct { + Caption string + Spec string + Err bool + }{ + { + Caption: "allow duplicates names between fragments and non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragment": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + }, + { + Caption: "don't allow duplicates names in non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow duplicates names in fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names across modes to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow mode names to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["foo_1"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["foo1"], + "kind": "b", + "pattern": "b" + } + ] +} +`, + Err: true, + }, + { + Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a", + "pattern": "a" + }, + { + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + { + Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["default"], + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { + lspec := &LexSpec{} + err := json.Unmarshal([]byte(tt.Spec), lspec) + if err != nil { + t.Fatalf("%v", err) + } + clspec, err, _ := Compile(lspec, CompressionLevelMin) + if tt.Err { + if err == nil { + t.Fatalf("expected an error") + } + if clspec != nil { + t.Fatalf("Compile function mustn't return a compiled specification") + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if clspec == nil { + t.Fatalf("Compile function must return a compiled specification") + } + } + }) + } +} + + +func MainTest() {} diff --git a/tests/unit/grammar/lexical/main.go b/tests/unit/grammar/lexical/main.go new file mode 100644 index 0000000..19fcce8 --- /dev/null +++ b/tests/unit/grammar/lexical/main.go @@ -0,0 +1,7 @@ +package main + +import "lexical" + +func main() { + lexical.MainTest() +} diff --git a/tests/unit/grammar/lexical/parser/main.go b/tests/unit/grammar/lexical/parser/main.go new file mode 100644 index 0000000..a99bfc4 --- /dev/null +++ b/tests/unit/grammar/lexical/parser/main.go @@ -0,0 +1,7 @@ +package main + +import "parser" + +func main() { + parser.MainTest() +} diff --git a/tests/unit/grammar/lexical/parser/parser.go b/tests/unit/grammar/lexical/parser/parser.go new file mode 100644 index 0000000..288de95 --- /dev/null +++ b/tests/unit/grammar/lexical/parser/parser.go @@ -0,0 +1,1910 @@ +package parser + +import ( + "fmt" + "reflect" + "strings" + "testing" + + spec "urubu/spec/grammar" + "urubu/ucd" +) + +func TestLexer(t *testing.T) { + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "lexer can recognize ordinaly characters", + src: "123abcいろは", + tokens: []*token{ + newToken(tokenKindChar, '1'), + newToken(tokenKindChar, '2'), + newToken(tokenKindChar, '3'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, 'b'), + newToken(tokenKindChar, 'c'), + newToken(tokenKindChar, 'い'), + newToken(tokenKindChar, 'ろ'), + newToken(tokenKindChar, 'は'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[\\u", + tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), + newToken(tokenKindRepeat, nullChar), + newToken(tokenKindRepeatOneOrMore, nullChar), + newToken(tokenKindOption, nullChar), + newToken(tokenKindAlt, nullChar), + newToken(tokenKindGroupOpen, nullChar), + newToken(tokenKindGroupClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", + tokens: []*token{ + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "], {, and } are treated as an ordinary character in default mode", + src: "]{}", + tokens: []*token{ + newToken(tokenKindChar, ']'), + newToken(tokenKindChar, '{'), + newToken(tokenKindChar, '}'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09AF"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09abcf"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "in a bracket expression, the special characters are also handled as normal characters", + src: "[\\\\.*+?|()[", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character-range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", + // [^...^...][^] + // ~~ ~ ~~ + // ^ ^ ^^ + // | | |`-- Ordinary Character (c) + // | | `-- Bracket Expression + // | `-- Ordinary Character (b) + // `-- Inverse Bracket Expression (a) + // + // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. + // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. + // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. + src: "[^^][^]", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "\\@", + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "\\", + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer can recognize the special characters and code points in code point expression mode", + src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a one digit hex string isn't a valid code point", + src: "\\u{0", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a two digits hex string isn't a valid code point", + src: "\\u{01", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a three digits hex string isn't a valid code point", + src: "\\u{012", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a four digits hex string is a valid code point", + src: "\\u{0123}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a five digits hex string isn't a valid code point", + src: "\\u{01234", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a six digits hex string is a valid code point", + src: "\\u{012345}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("012345"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a seven digits hex string isn't a valid code point", + src: "\\u{0123456", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{g", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{G", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters and symbols in fragment expression mode", + src: "\\f{integer}", + tokens: []*token{ + newToken(tokenKindFragmentLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newFragmentSymbolToken("integer"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a fragment expression is not supported in a bracket expression", + src: "[\\f", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "a fragment expression is not supported in an inverse bracket expression", + src: "[^\\f", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + lex := newLexer(strings.NewReader(tt.src)) + var err error + var tok *token + i := 0 + for { + tok, err = lex.next() + if err != nil { + break + } + if i >= len(tt.tokens) { + break + } + eTok := tt.tokens[i] + i++ + testToken(t, tok, eTok) + + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + detail, cause := lex.error() + if cause != tt.err { + t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if i < len(tt.tokens) { + t.Fatalf("expecte more tokens") + } + }) + } +} + +func testToken(t *testing.T, a, e *token) { + t.Helper() + if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { + t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) + } +} + +func TestParse(t *testing.T) { + tests := []struct { + pattern string + fragments map[spec.LexKindName]string + ast CPTree + syntaxError error + + // When an AST is large, as patterns containing a character property expression, this test only checks + // that the pattern is parsable. The check of the validity of such AST is performed by checking that it + // can be matched correctly using the driver. + skipTestAST bool + }{ + { + pattern: "a", + ast: newSymbolNode('a'), + }, + { + pattern: "abc", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "a?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}?", + ast: newOptionNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}?", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}?", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newOptionNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a)?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "((a?)?)?", + ast: newOptionNode( + newOptionNode( + newOptionNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)?", + ast: newOptionNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(?)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "?|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a??", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a*", + ast: newRepeatNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}*", + ast: newRepeatNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}*", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}*", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "((a*)*)*", + ast: newRepeatNode( + newRepeatNode( + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)*", + ast: newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(*)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "*|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a**", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a+", + ast: genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + }, + { + pattern: "[abc]+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "\\u{3042}+", + ast: genConcatNode( + newSymbolNode('\u3042'), + newRepeatNode( + newSymbolNode('\u3042'), + ), + ), + }, + { + pattern: "\\p{Letter}+", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}+", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: genConcatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + ), + }, + { + pattern: "((a+)+)+", + ast: genConcatNode( + genConcatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + newRepeatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + ), + ), + ), + }, + { + pattern: "(abc)+", + ast: genConcatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a|b)+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + ), + }, + { + pattern: "+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(+)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "+|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a++", + syntaxError: synErrRepNoTarget, + }, + { + pattern: ".", + ast: newRangeSymbolNode(0x00, 0x10FFFF), + }, + { + pattern: "[a]", + ast: newSymbolNode('a'), + }, + { + pattern: "[abc]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "[a-z]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[A-Za-z]", + ast: genAltNode( + newRangeSymbolNode('A', 'Z'), + newRangeSymbolNode('a', 'z'), + ), + }, + { + pattern: "[\\u{004E}]", + ast: newSymbolNode('N'), + }, + { + pattern: "[\\u{0061}-\\u{007A}]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[z-a]", + syntaxError: synErrRangeInvalidOrder, + }, + { + pattern: "a[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]a", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[^\\u{004E}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u004E'-1), + newRangeSymbolNode('\u004E'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\u{0061}-\\u{007A}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u0061'-1), + newRangeSymbolNode('\u007A'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[^a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\u{0000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^]", + ast: newSymbolNode('^'), + }, + { + pattern: "[", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "]", + ast: newSymbolNode(']'), + }, + { + pattern: "(]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a]", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode(']'), + ), + }, + { + pattern: "(a]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "([)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('-'), + ), + }, + { + pattern: "[^a-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x60), + newRangeSymbolNode(0x62, 0x10FFFF), + ), + }, + { + pattern: "[-z]", + ast: genAltNode( + newSymbolNode('-'), + newSymbolNode('z'), + ), + }, + { + pattern: "[^-z]", + ast: newAltNode( + newRangeSymbolNode(0x00, 0x2C), + newAltNode( + newRangeSymbolNode(0x2E, 0x79), + newRangeSymbolNode(0x7B, 0x10FFFF), + ), + ), + }, + { + pattern: "[-]", + ast: newSymbolNode('-'), + }, + { + pattern: "[^-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x10FFFF), + ), + }, + { + pattern: "[^01]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^10]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^a-z]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + }, + { + pattern: "[^az]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + genAltNode( + newRangeSymbolNode('a'+1, 'z'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + ), + }, + { + pattern: "\\u{006E}", + ast: newSymbolNode('\u006E'), + }, + { + pattern: "\\u{03BD}", + ast: newSymbolNode('\u03BD'), + }, + { + pattern: "\\u{306B}", + ast: newSymbolNode('\u306B'), + }, + { + pattern: "\\u{01F638}", + ast: newSymbolNode('\U0001F638'), + }, + { + pattern: "\\u{0000}", + ast: newSymbolNode('\u0000'), + }, + { + pattern: "\\u{10FFFF}", + ast: newSymbolNode('\U0010FFFF'), + }, + { + pattern: "\\u{110000}", + syntaxError: synErrCPExpOutOfRange, + }, + { + pattern: "\\u", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{03BD", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{}", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\p{Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{General_Category=Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{ Letter }", + skipTestAST: true, + }, + { + pattern: "\\p{ General_Category = Letter }", + skipTestAST: true, + }, + { + pattern: "\\p", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{Letter", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category= }", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=Letter}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{ =Letter}", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\f{a2c}", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f{ a2c }", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{a2c", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "(a)", + ast: newSymbolNode('a'), + }, + { + pattern: "(((a)))", + ast: newSymbolNode('a'), + }, + { + pattern: "a()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()a", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "(a", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((a)", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: ")", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "a)", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: ")a", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "(a))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "Mulder|Scully", + ast: genAltNode( + genConcatNode( + newSymbolNode('M'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('d'), + newSymbolNode('e'), + newSymbolNode('r'), + ), + genConcatNode( + newSymbolNode('S'), + newSymbolNode('c'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + ), + }, + { + pattern: "Langly|Frohike|Byers", + ast: genAltNode( + genConcatNode( + newSymbolNode('L'), + newSymbolNode('a'), + newSymbolNode('n'), + newSymbolNode('g'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + genConcatNode( + newSymbolNode('F'), + newSymbolNode('r'), + newSymbolNode('o'), + newSymbolNode('h'), + newSymbolNode('i'), + newSymbolNode('k'), + newSymbolNode('e'), + ), + genConcatNode( + newSymbolNode('B'), + newSymbolNode('y'), + newSymbolNode('e'), + newSymbolNode('r'), + newSymbolNode('s'), + ), + ), + }, + { + pattern: "|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "||", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Mulder|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Scully", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly||Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "(Fox|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|Mulder)", + syntaxError: synErrAltLackOfOperand, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { + fragmentTrees := map[spec.LexKindName]CPTree{} + for kind, pattern := range tt.fragments { + p := NewParser(kind, strings.NewReader(pattern)) + root, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + fragmentTrees[kind] = root + } + err := CompleteFragments(fragmentTrees) + if err != nil { + t.Fatal(err) + } + + p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) + root, err := p.Parse() + if tt.syntaxError != nil { + // printCPTree(os.Stdout, root, "", "") + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + _, synErr := p.Error() + if synErr != tt.syntaxError { + t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) + } + if root != nil { + t.Fatalf("tree must be nil") + } + } else { + if err != nil { + detail, cause := p.Error() + t.Fatalf("%v: %v: %v", err, cause, detail) + } + if root == nil { + t.Fatal("tree must be non-nil") + } + + complete, err := ApplyFragments(root, fragmentTrees) + if err != nil { + t.Fatal(err) + } + if !complete { + t.Fatalf("incomplete fragments") + } + + // printCPTree(os.Stdout, root, "", "") + if !tt.skipTestAST { + r := root.(*rootNode) + testAST(t, tt.ast, r.tree) + } + } + }) + } +} + +func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { + for _, cProp := range ucd.ContributoryProperties() { + t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { + p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) + root, err := p.Parse() + if err == nil { + t.Fatalf("expected syntax error: got: nil") + } + _, synErr := p.Error() + if synErr != synErrCharPropUnsupported { + t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) + } + if root != nil { + t.Fatalf("tree is not nil") + } + }) + } +} + +func TestExclude(t *testing.T) { + for _, test := range []struct { + caption string + target CPTree + base CPTree + result CPTree + }{ + // t.From > b.From && t.To < b.To + + // |t.From - b.From| = 1 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '2'), + result: newAltNode( + newSymbolNode('0'), + newSymbolNode('2'), + ), + }, + // |t.From - b.From| > 1 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+--+--+ + // Result (b - t): +--+--+ +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '4'), + result: newAltNode( + newRangeSymbolNode('0', '1'), + newRangeSymbolNode('3', '4'), + ), + }, + + // t.From <= b.From && t.To >= b.From && t.To < b.To + + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('1', '2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '3'), + result: newSymbolNode('3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '4'), + result: newRangeSymbolNode('3', '4'), + }, + + // t.From > b.From && t.From <= b.To && t.To >= b.To + + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '3'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '4'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + + // t.From <= b.From && t.To >= b.To + + // |b.From - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('0'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('1'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '2'), + base: newSymbolNode('1'), + result: nil, + }, + + // Others + + // |b.From - t.From| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 1", + target: newSymbolNode('0'), + base: newSymbolNode('1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 1", + target: newSymbolNode('0'), + base: newSymbolNode('2'), + result: newSymbolNode('2'), + }, + // |t.To - b.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| = 1", + target: newSymbolNode('1'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + // |t.To - b.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| > 1", + target: newSymbolNode('2'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + } { + t.Run(test.caption, func(t *testing.T) { + r := exclude(test.target, test.base) + testAST(t, test.result, r) + }) + } +} + +func testAST(t *testing.T, expected, actual CPTree) { + t.Helper() + + aTy := reflect.TypeOf(actual) + eTy := reflect.TypeOf(expected) + if eTy != aTy { + t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) + } + + if actual == nil { + return + } + + switch e := expected.(type) { + case *symbolNode: + a := actual.(*symbolNode) + if a.From != e.From || a.To != e.To { + t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) + } + } + eLeft, eRight := expected.children() + aLeft, aRight := actual.children() + testAST(t, eLeft, aLeft) + testAST(t, eRight, aRight) +} + + +func MainTest() {} |