diff options
Diffstat (limited to 'tests/unit/grammar/lexical/parser')
-rw-r--r-- | tests/unit/grammar/lexical/parser/lexer_test.go | 524 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/parser_test.go | 1389 |
2 files changed, 0 insertions, 1913 deletions
diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go deleted file mode 100644 index 055466e..0000000 --- a/tests/unit/grammar/lexical/parser/lexer_test.go +++ /dev/null @@ -1,524 +0,0 @@ -package parser - -import ( - "strings" - "testing" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser/parser_test.go deleted file mode 100644 index 4c9557d..0000000 --- a/tests/unit/grammar/lexical/parser/parser_test.go +++ /dev/null @@ -1,1389 +0,0 @@ -package parser - -import ( - "fmt" - "reflect" - "strings" - "testing" - - spec "urubu/spec/grammar" - "urubu/ucd" -) - -func TestParse(t *testing.T) { - tests := []struct { - pattern string - fragments map[spec.LexKindName]string - ast CPTree - syntaxError error - - // When an AST is large, as patterns containing a character property expression, this test only checks - // that the pattern is parsable. The check of the validity of such AST is performed by checking that it - // can be matched correctly using the driver. - skipTestAST bool - }{ - { - pattern: "a", - ast: newSymbolNode('a'), - }, - { - pattern: "abc", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "a?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}?", - ast: newOptionNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}?", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}?", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newOptionNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a)?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "((a?)?)?", - ast: newOptionNode( - newOptionNode( - newOptionNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)?", - ast: newOptionNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(?)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "?|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a??", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a*", - ast: newRepeatNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}*", - ast: newRepeatNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}*", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}*", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "((a*)*)*", - ast: newRepeatNode( - newRepeatNode( - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)*", - ast: newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(*)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "*|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a**", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a+", - ast: genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - }, - { - pattern: "[abc]+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "\\u{3042}+", - ast: genConcatNode( - newSymbolNode('\u3042'), - newRepeatNode( - newSymbolNode('\u3042'), - ), - ), - }, - { - pattern: "\\p{Letter}+", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}+", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: genConcatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - ), - }, - { - pattern: "((a+)+)+", - ast: genConcatNode( - genConcatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - newRepeatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - ), - ), - ), - }, - { - pattern: "(abc)+", - ast: genConcatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a|b)+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - ), - }, - { - pattern: "+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(+)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "+|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a++", - syntaxError: synErrRepNoTarget, - }, - { - pattern: ".", - ast: newRangeSymbolNode(0x00, 0x10FFFF), - }, - { - pattern: "[a]", - ast: newSymbolNode('a'), - }, - { - pattern: "[abc]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "[a-z]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[A-Za-z]", - ast: genAltNode( - newRangeSymbolNode('A', 'Z'), - newRangeSymbolNode('a', 'z'), - ), - }, - { - pattern: "[\\u{004E}]", - ast: newSymbolNode('N'), - }, - { - pattern: "[\\u{0061}-\\u{007A}]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[z-a]", - syntaxError: synErrRangeInvalidOrder, - }, - { - pattern: "a[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]a", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[^\\u{004E}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u004E'-1), - newRangeSymbolNode('\u004E'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\u{0061}-\\u{007A}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u0061'-1), - newRangeSymbolNode('\u007A'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[^a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\u{0000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^]", - ast: newSymbolNode('^'), - }, - { - pattern: "[", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "]", - ast: newSymbolNode(']'), - }, - { - pattern: "(]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a]", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode(']'), - ), - }, - { - pattern: "(a]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "([)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('-'), - ), - }, - { - pattern: "[^a-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x60), - newRangeSymbolNode(0x62, 0x10FFFF), - ), - }, - { - pattern: "[-z]", - ast: genAltNode( - newSymbolNode('-'), - newSymbolNode('z'), - ), - }, - { - pattern: "[^-z]", - ast: newAltNode( - newRangeSymbolNode(0x00, 0x2C), - newAltNode( - newRangeSymbolNode(0x2E, 0x79), - newRangeSymbolNode(0x7B, 0x10FFFF), - ), - ), - }, - { - pattern: "[-]", - ast: newSymbolNode('-'), - }, - { - pattern: "[^-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x10FFFF), - ), - }, - { - pattern: "[^01]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^10]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^a-z]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - }, - { - pattern: "[^az]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - genAltNode( - newRangeSymbolNode('a'+1, 'z'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - ), - }, - { - pattern: "\\u{006E}", - ast: newSymbolNode('\u006E'), - }, - { - pattern: "\\u{03BD}", - ast: newSymbolNode('\u03BD'), - }, - { - pattern: "\\u{306B}", - ast: newSymbolNode('\u306B'), - }, - { - pattern: "\\u{01F638}", - ast: newSymbolNode('\U0001F638'), - }, - { - pattern: "\\u{0000}", - ast: newSymbolNode('\u0000'), - }, - { - pattern: "\\u{10FFFF}", - ast: newSymbolNode('\U0010FFFF'), - }, - { - pattern: "\\u{110000}", - syntaxError: synErrCPExpOutOfRange, - }, - { - pattern: "\\u", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{03BD", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{}", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\p{Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{General_Category=Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{ Letter }", - skipTestAST: true, - }, - { - pattern: "\\p{ General_Category = Letter }", - skipTestAST: true, - }, - { - pattern: "\\p", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{Letter", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category= }", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=Letter}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{ =Letter}", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\f{a2c}", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f{ a2c }", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{a2c", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "(a)", - ast: newSymbolNode('a'), - }, - { - pattern: "(((a)))", - ast: newSymbolNode('a'), - }, - { - pattern: "a()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()a", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "(a", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((a)", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: ")", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "a)", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: ")a", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "(a))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "Mulder|Scully", - ast: genAltNode( - genConcatNode( - newSymbolNode('M'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('d'), - newSymbolNode('e'), - newSymbolNode('r'), - ), - genConcatNode( - newSymbolNode('S'), - newSymbolNode('c'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - ), - }, - { - pattern: "Langly|Frohike|Byers", - ast: genAltNode( - genConcatNode( - newSymbolNode('L'), - newSymbolNode('a'), - newSymbolNode('n'), - newSymbolNode('g'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - genConcatNode( - newSymbolNode('F'), - newSymbolNode('r'), - newSymbolNode('o'), - newSymbolNode('h'), - newSymbolNode('i'), - newSymbolNode('k'), - newSymbolNode('e'), - ), - genConcatNode( - newSymbolNode('B'), - newSymbolNode('y'), - newSymbolNode('e'), - newSymbolNode('r'), - newSymbolNode('s'), - ), - ), - }, - { - pattern: "|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "||", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Mulder|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Scully", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly||Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "(Fox|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|Mulder)", - syntaxError: synErrAltLackOfOperand, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { - fragmentTrees := map[spec.LexKindName]CPTree{} - for kind, pattern := range tt.fragments { - p := NewParser(kind, strings.NewReader(pattern)) - root, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - fragmentTrees[kind] = root - } - err := CompleteFragments(fragmentTrees) - if err != nil { - t.Fatal(err) - } - - p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) - root, err := p.Parse() - if tt.syntaxError != nil { - // printCPTree(os.Stdout, root, "", "") - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - _, synErr := p.Error() - if synErr != tt.syntaxError { - t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) - } - if root != nil { - t.Fatalf("tree must be nil") - } - } else { - if err != nil { - detail, cause := p.Error() - t.Fatalf("%v: %v: %v", err, cause, detail) - } - if root == nil { - t.Fatal("tree must be non-nil") - } - - complete, err := ApplyFragments(root, fragmentTrees) - if err != nil { - t.Fatal(err) - } - if !complete { - t.Fatalf("incomplete fragments") - } - - // printCPTree(os.Stdout, root, "", "") - if !tt.skipTestAST { - r := root.(*rootNode) - testAST(t, tt.ast, r.tree) - } - } - }) - } -} - -func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { - for _, cProp := range ucd.ContributoryProperties() { - t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { - p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) - root, err := p.Parse() - if err == nil { - t.Fatalf("expected syntax error: got: nil") - } - _, synErr := p.Error() - if synErr != synErrCharPropUnsupported { - t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) - } - if root != nil { - t.Fatalf("tree is not nil") - } - }) - } -} - -func TestExclude(t *testing.T) { - for _, test := range []struct { - caption string - target CPTree - base CPTree - result CPTree - }{ - // t.From > b.From && t.To < b.To - - // |t.From - b.From| = 1 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '2'), - result: newAltNode( - newSymbolNode('0'), - newSymbolNode('2'), - ), - }, - // |t.From - b.From| > 1 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+--+--+ - // Result (b - t): +--+--+ +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '4'), - result: newAltNode( - newRangeSymbolNode('0', '1'), - newRangeSymbolNode('3', '4'), - ), - }, - - // t.From <= b.From && t.To >= b.From && t.To < b.To - - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('1', '2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '3'), - result: newSymbolNode('3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '4'), - result: newRangeSymbolNode('3', '4'), - }, - - // t.From > b.From && t.From <= b.To && t.To >= b.To - - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '3'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '4'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - - // t.From <= b.From && t.To >= b.To - - // |b.From - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('0'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('1'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '2'), - base: newSymbolNode('1'), - result: nil, - }, - - // Others - - // |b.From - t.From| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 1", - target: newSymbolNode('0'), - base: newSymbolNode('1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 1", - target: newSymbolNode('0'), - base: newSymbolNode('2'), - result: newSymbolNode('2'), - }, - // |t.To - b.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| = 1", - target: newSymbolNode('1'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - // |t.To - b.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| > 1", - target: newSymbolNode('2'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - } { - t.Run(test.caption, func(t *testing.T) { - r := exclude(test.target, test.base) - testAST(t, test.result, r) - }) - } -} - -func testAST(t *testing.T, expected, actual CPTree) { - t.Helper() - - aTy := reflect.TypeOf(actual) - eTy := reflect.TypeOf(expected) - if eTy != aTy { - t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) - } - - if actual == nil { - return - } - - switch e := expected.(type) { - case *symbolNode: - a := actual.(*symbolNode) - if a.From != e.From || a.To != e.To { - t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) - } - } - eLeft, eRight := expected.children() - aLeft, aRight := actual.children() - testAST(t, eLeft, aLeft) - testAST(t, eRight, aRight) -} |