package parser

import (
	"fmt"
	"os"
	"reflect"
	"strings"
	"testing"
	"testing/internal/testdeps"

	spec "urubu/spec/grammar"
	"urubu/ucd"
)


func TestLexer(t *testing.T) {
	tests := []struct {
		caption string
		src     string
		tokens  []*token
		err     error
	}{
		{
			caption: "lexer can recognize ordinaly characters",
			src:     "123abcいろは",
			tokens: []*token{
				newToken(tokenKindChar, '1'),
				newToken(tokenKindChar, '2'),
				newToken(tokenKindChar, '3'),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, 'b'),
				newToken(tokenKindChar, 'c'),
				newToken(tokenKindChar, 'い'),
				newToken(tokenKindChar, 'ろ'),
				newToken(tokenKindChar, 'は'),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters in default mode",
			src:     ".*+?|()[\\u",
			tokens: []*token{
				newToken(tokenKindAnyChar, nullChar),
				newToken(tokenKindRepeat, nullChar),
				newToken(tokenKindRepeatOneOrMore, nullChar),
				newToken(tokenKindOption, nullChar),
				newToken(tokenKindAlt, nullChar),
				newToken(tokenKindGroupOpen, nullChar),
				newToken(tokenKindGroupClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the escape sequences in default mode",
			src:     "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
			tokens: []*token{
				newToken(tokenKindChar, '\\'),
				newToken(tokenKindChar, '.'),
				newToken(tokenKindChar, '*'),
				newToken(tokenKindChar, '+'),
				newToken(tokenKindChar, '?'),
				newToken(tokenKindChar, '|'),
				newToken(tokenKindChar, '('),
				newToken(tokenKindChar, ')'),
				newToken(tokenKindChar, '['),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "], {, and } are treated as an ordinary character in default mode",
			src:     "]{}",
			tokens: []*token{
				newToken(tokenKindChar, ']'),
				newToken(tokenKindChar, '{'),
				newToken(tokenKindChar, '}'),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters in bracket expression mode",
			src:     "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("09AF"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("09abcf"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the escape sequences in bracket expression mode",
			src:     "[\\^a\\-z]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "in a bracket expression, the special characters are also handled as normal characters",
			src:     "[\\\\.*+?|()[",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '\\'),
				newToken(tokenKindChar, '.'),
				newToken(tokenKindChar, '*'),
				newToken(tokenKindChar, '+'),
				newToken(tokenKindChar, '?'),
				newToken(tokenKindChar, '|'),
				newToken(tokenKindChar, '('),
				newToken(tokenKindChar, ')'),
				newToken(tokenKindChar, '['),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
			// [...-...][...-][-...][-]
			//  ~~~~~~~     ~  ~     ~
			//     ^        ^  ^     ^
			//     |        |  |     `-- Ordinary Character (b)
			//     |        |  `-- Ordinary Character (b)
			//     |        `-- Ordinary Character (b)
			//     `-- Character Range (a)
			//
			// a. *-* is handled as a character-range expression.
			// b. *-, -*, or - are handled as ordinary characters.
			src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
			// [^...^...][^]
			// ~~   ~    ~~
			// ^    ^    ^^
			// |    |    |`-- Ordinary Character (c)
			// |    |    `-- Bracket Expression
			// |    `-- Ordinary Character (b)
			// `-- Inverse Bracket Expression (a)
			//
			// a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
			// b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
			// c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
			src: "[^^][^]",
			tokens: []*token{
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer raises an error when an invalid escape sequence appears",
			src:     "\\@",
			err:     synErrInvalidEscSeq,
		},
		{
			caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
			src:     "\\",
			err:     synErrIncompletedEscSeq,
		},
		{
			caption: "lexer raises an error when an invalid escape sequence appears",
			src:     "[\\@",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
		{
			caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
			src:     "[\\",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrIncompletedEscSeq,
		},
		{
			caption: "lexer can recognize the special characters and code points in code point expression mode",
			src:     "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "a one digit hex string isn't a valid code point",
			src:     "\\u{0",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a two digits hex string isn't a valid code point",
			src:     "\\u{01",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a three digits hex string isn't a valid code point",
			src:     "\\u{012",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a four digits hex string is a valid code point",
			src:     "\\u{0123}",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
			},
		},
		{
			caption: "a five digits hex string isn't a valid code point",
			src:     "\\u{01234",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a six digits hex string is a valid code point",
			src:     "\\u{012345}",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("012345"),
				newToken(tokenKindRBrace, nullChar),
			},
		},
		{
			caption: "a seven digits hex string isn't a valid code point",
			src:     "\\u{0123456",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a code point must be hex digits",
			src:     "\\u{g",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a code point must be hex digits",
			src:     "\\u{G",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "lexer can recognize the special characters and symbols in character property expression mode",
			src:     "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
			tokens: []*token{
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters and symbols in fragment expression mode",
			src:     "\\f{integer}",
			tokens: []*token{
				newToken(tokenKindFragmentLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newFragmentSymbolToken("integer"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "a fragment expression is not supported in a bracket expression",
			src:     "[\\f",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
		{
			caption: "a fragment expression is not supported in an inverse bracket expression",
			src:     "[^\\f",
			tokens: []*token{
				newToken(tokenKindInverseBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
	}
	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			lex := newLexer(strings.NewReader(tt.src))
			var err error
			var tok *token
			i := 0
			for {
				tok, err = lex.next()
				if err != nil {
					break
				}
				if i >= len(tt.tokens) {
					break
				}
				eTok := tt.tokens[i]
				i++
				testToken(t, tok, eTok)

				if tok.kind == tokenKindEOF {
					break
				}
			}
			if tt.err != nil {
				if err != ParseErr {
					t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
				}
				detail, cause := lex.error()
				if cause != tt.err {
					t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
				}
			} else {
				if err != nil {
					t.Fatalf("unexpected error: %v", err)
				}
			}
			if i < len(tt.tokens) {
				t.Fatalf("expecte more tokens")
			}
		})
	}
}

func testToken(t *testing.T, a, e *token) {
	t.Helper()
	if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
		t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
	}
}

func TestParse(t *testing.T) {
	tests := []struct {
		pattern     string
		fragments   map[spec.LexKindName]string
		ast         CPTree
		syntaxError error

		// When an AST is large, as patterns containing a character property expression, this test only checks
		// that the pattern is parsable. The check of the validity of such AST is performed by checking that it
		// can be matched correctly using the driver.
		skipTestAST bool
	}{
		{
			pattern: "a",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "abc",
			ast: genConcatNode(
				newSymbolNode('a'),
				newSymbolNode('b'),
				newSymbolNode('c'),
			),
		},
		{
			pattern: "a?",
			ast: newOptionNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "[abc]?",
			ast: newOptionNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\u{3042}?",
			ast: newOptionNode(
				newSymbolNode('\u3042'),
			),
		},
		{
			pattern:     "\\p{Letter}?",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}?",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			ast: newOptionNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "(a)?",
			ast: newOptionNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "((a?)?)?",
			ast: newOptionNode(
				newOptionNode(
					newOptionNode(
						newSymbolNode('a'),
					),
				),
			),
		},
		{
			pattern: "(abc)?",
			ast: newOptionNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "(a|b)?",
			ast: newOptionNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
			),
		},
		{
			pattern:     "?",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(?)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|?",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "?|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a??",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: "a*",
			ast: newRepeatNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "[abc]*",
			ast: newRepeatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\u{3042}*",
			ast: newRepeatNode(
				newSymbolNode('\u3042'),
			),
		},
		{
			pattern:     "\\p{Letter}*",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}*",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			ast: newRepeatNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "((a*)*)*",
			ast: newRepeatNode(
				newRepeatNode(
					newRepeatNode(
						newSymbolNode('a'),
					),
				),
			),
		},
		{
			pattern: "(abc)*",
			ast: newRepeatNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "(a|b)*",
			ast: newRepeatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
			),
		},
		{
			pattern:     "*",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(*)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|*",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "*|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a**",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: "a+",
			ast: genConcatNode(
				newSymbolNode('a'),
				newRepeatNode(
					newSymbolNode('a'),
				),
			),
		},
		{
			pattern: "[abc]+",
			ast: genConcatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
				newRepeatNode(
					genAltNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "\\u{3042}+",
			ast: genConcatNode(
				newSymbolNode('\u3042'),
				newRepeatNode(
					newSymbolNode('\u3042'),
				),
			),
		},
		{
			pattern:     "\\p{Letter}+",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}+",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			ast: genConcatNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
				newRepeatNode(
					newFragmentNode("a2c",
						genConcatNode(
							newSymbolNode('a'),
							newSymbolNode('b'),
							newSymbolNode('c'),
						),
					),
				),
			),
		},
		{
			pattern: "((a+)+)+",
			ast: genConcatNode(
				genConcatNode(
					genConcatNode(
						genConcatNode(
							newSymbolNode('a'),
							newRepeatNode(
								newSymbolNode('a'),
							),
						),
						newRepeatNode(
							genConcatNode(
								newSymbolNode('a'),
								newRepeatNode(
									newSymbolNode('a'),
								),
							),
						),
					),
					newRepeatNode(
						genConcatNode(
							genConcatNode(
								newSymbolNode('a'),
								newRepeatNode(
									newSymbolNode('a'),
								),
							),
							newRepeatNode(
								genConcatNode(
									newSymbolNode('a'),
									newRepeatNode(
										newSymbolNode('a'),
									),
								),
							),
						),
					),
				),
			),
		},
		{
			pattern: "(abc)+",
			ast: genConcatNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
				newRepeatNode(
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "(a|b)+",
			ast: genConcatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
				newRepeatNode(
					genAltNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
					),
				),
			),
		},
		{
			pattern:     "+",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(+)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|+",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "+|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a++",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: ".",
			ast:     newRangeSymbolNode(0x00, 0x10FFFF),
		},
		{
			pattern: "[a]",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "[abc]",
			ast: genAltNode(
				newSymbolNode('a'),
				newSymbolNode('b'),
				newSymbolNode('c'),
			),
		},
		{
			pattern: "[a-z]",
			ast:     newRangeSymbolNode('a', 'z'),
		},
		{
			pattern: "[A-Za-z]",
			ast: genAltNode(
				newRangeSymbolNode('A', 'Z'),
				newRangeSymbolNode('a', 'z'),
			),
		},
		{
			pattern: "[\\u{004E}]",
			ast:     newSymbolNode('N'),
		},
		{
			pattern: "[\\u{0061}-\\u{007A}]",
			ast:     newRangeSymbolNode('a', 'z'),
		},
		{
			pattern:     "[\\p{Lu}]",
			skipTestAST: true,
		},
		{
			pattern:     "[a-\\p{Lu}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[\\p{Lu}-z]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[\\p{Lu}-\\p{Ll}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[z-a]",
			syntaxError: synErrRangeInvalidOrder,
		},
		{
			pattern:     "a[]",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern:     "[]a",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern:     "[]",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern: "[^\\u{004E}]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '\u004E'-1),
				newRangeSymbolNode('\u004E'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^\\u{0061}-\\u{007A}]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '\u0061'-1),
				newRangeSymbolNode('\u007A'+1, 0x10FFFF),
			),
		},
		{
			pattern:     "[^\\p{Lu}]",
			skipTestAST: true,
		},
		{
			pattern:     "[^a-\\p{Lu}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\p{Lu}-z]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\p{Lu}-\\p{Ll}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\u{0000}-\\u{10FFFF}]",
			syntaxError: synErrUnmatchablePattern,
		},
		{
			pattern:     "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]",
			syntaxError: synErrUnmatchablePattern,
		},
		{
			pattern: "[^]",
			ast:     newSymbolNode('^'),
		},
		{
			pattern:     "[",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern: "]",
			ast:     newSymbolNode(']'),
		},
		{
			pattern:     "(]",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern: "a]",
			ast: genConcatNode(
				newSymbolNode('a'),
				newSymbolNode(']'),
			),
		},
		{
			pattern:     "(a]",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "([)",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a)",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern: "[a-]",
			ast: genAltNode(
				newSymbolNode('a'),
				newSymbolNode('-'),
			),
		},
		{
			pattern: "[^a-]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newRangeSymbolNode(0x2E, 0x60),
				newRangeSymbolNode(0x62, 0x10FFFF),
			),
		},
		{
			pattern: "[-z]",
			ast: genAltNode(
				newSymbolNode('-'),
				newSymbolNode('z'),
			),
		},
		{
			pattern: "[^-z]",
			ast: newAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newAltNode(
					newRangeSymbolNode(0x2E, 0x79),
					newRangeSymbolNode(0x7B, 0x10FFFF),
				),
			),
		},
		{
			pattern: "[-]",
			ast:     newSymbolNode('-'),
		},
		{
			pattern: "[^-]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newRangeSymbolNode(0x2E, 0x10FFFF),
			),
		},
		{
			pattern: "[^01]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '0'-1),
				newRangeSymbolNode('1'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^10]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '0'-1),
				newRangeSymbolNode('1'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^a-z]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 'a'-1),
				newRangeSymbolNode('z'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^az]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 'a'-1),
				genAltNode(
					newRangeSymbolNode('a'+1, 'z'-1),
					newRangeSymbolNode('z'+1, 0x10FFFF),
				),
			),
		},
		{
			pattern: "\\u{006E}",
			ast:     newSymbolNode('\u006E'),
		},
		{
			pattern: "\\u{03BD}",
			ast:     newSymbolNode('\u03BD'),
		},
		{
			pattern: "\\u{306B}",
			ast:     newSymbolNode('\u306B'),
		},
		{
			pattern: "\\u{01F638}",
			ast:     newSymbolNode('\U0001F638'),
		},
		{
			pattern: "\\u{0000}",
			ast:     newSymbolNode('\u0000'),
		},
		{
			pattern: "\\u{10FFFF}",
			ast:     newSymbolNode('\U0010FFFF'),
		},
		{
			pattern:     "\\u{110000}",
			syntaxError: synErrCPExpOutOfRange,
		},
		{
			pattern:     "\\u",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{03BD",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{}",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\p{Letter}",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{General_Category=Letter}",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{ Letter }",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{ General_Category = Letter }",
			skipTestAST: true,
		},
		{
			pattern:     "\\p",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{Letter",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{General_Category=}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{General_Category=  }",
			syntaxError: synErrCharPropInvalidSymbol,
		},
		{
			pattern:     "\\p{=Letter}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{  =Letter}",
			syntaxError: synErrCharPropInvalidSymbol,
		},
		{
			pattern:     "\\p{=}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern: "\\f{a2c}",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			ast: newFragmentNode("a2c",
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\f{ a2c }",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			ast: newFragmentNode("a2c",
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern:     "\\f",
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern:     "\\f{",
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern: "\\f{a2c",
			fragments: map[spec.LexKindName]string{
				"a2c": "abc",
			},
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern: "(a)",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "(((a)))",
			ast:     newSymbolNode('a'),
		},
		{
			pattern:     "a()",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "()a",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "()",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "(",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "a(",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "(a",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "((",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "((a)",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     ")",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "a)",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     ")a",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "))",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "(a))",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern: "Mulder|Scully",
			ast: genAltNode(
				genConcatNode(
					newSymbolNode('M'),
					newSymbolNode('u'),
					newSymbolNode('l'),
					newSymbolNode('d'),
					newSymbolNode('e'),
					newSymbolNode('r'),
				),
				genConcatNode(
					newSymbolNode('S'),
					newSymbolNode('c'),
					newSymbolNode('u'),
					newSymbolNode('l'),
					newSymbolNode('l'),
					newSymbolNode('y'),
				),
			),
		},
		{
			pattern: "Langly|Frohike|Byers",
			ast: genAltNode(
				genConcatNode(
					newSymbolNode('L'),
					newSymbolNode('a'),
					newSymbolNode('n'),
					newSymbolNode('g'),
					newSymbolNode('l'),
					newSymbolNode('y'),
				),
				genConcatNode(
					newSymbolNode('F'),
					newSymbolNode('r'),
					newSymbolNode('o'),
					newSymbolNode('h'),
					newSymbolNode('i'),
					newSymbolNode('k'),
					newSymbolNode('e'),
				),
				genConcatNode(
					newSymbolNode('B'),
					newSymbolNode('y'),
					newSymbolNode('e'),
					newSymbolNode('r'),
					newSymbolNode('s'),
				),
			),
		},
		{
			pattern:     "|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "||",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Mulder|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Scully",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Langly|Frohike|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Langly||Byers",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Frohike|Byers",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Frohike|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Fox(|)Mulder",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "(Fox|)Mulder",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Fox(|Mulder)",
			syntaxError: synErrAltLackOfOperand,
		},
	}
	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
			fragmentTrees := map[spec.LexKindName]CPTree{}
			for kind, pattern := range tt.fragments {
				p := NewParser(kind, strings.NewReader(pattern))
				root, err := p.Parse()
				if err != nil {
					t.Fatal(err)
				}

				fragmentTrees[kind] = root
			}
			err := CompleteFragments(fragmentTrees)
			if err != nil {
				t.Fatal(err)
			}

			p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern))
			root, err := p.Parse()
			if tt.syntaxError != nil {
				// printCPTree(os.Stdout, root, "", "")
				if err != ParseErr {
					t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
				}
				_, synErr := p.Error()
				if synErr != tt.syntaxError {
					t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
				}
				if root != nil {
					t.Fatalf("tree must be nil")
				}
			} else {
				if err != nil {
					detail, cause := p.Error()
					t.Fatalf("%v: %v: %v", err, cause, detail)
				}
				if root == nil {
					t.Fatal("tree must be non-nil")
				}

				complete, err := ApplyFragments(root, fragmentTrees)
				if err != nil {
					t.Fatal(err)
				}
				if !complete {
					t.Fatalf("incomplete fragments")
				}

				// printCPTree(os.Stdout, root, "", "")
				if !tt.skipTestAST {
					r := root.(*rootNode)
					testAST(t, tt.ast, r.tree)
				}
			}
		})
	}
}

func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
	for _, cProp := range ucd.ContributoryProperties() {
		t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
			p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp)))
			root, err := p.Parse()
			if err == nil {
				t.Fatalf("expected syntax error: got: nil")
			}
			_, synErr := p.Error()
			if synErr != synErrCharPropUnsupported {
				t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr)
			}
			if root != nil {
				t.Fatalf("tree is not nil")
			}
		})
	}
}

func TestExclude(t *testing.T) {
	for _, test := range []struct {
		caption string
		target  CPTree
		base    CPTree
		result  CPTree
	}{
		// t.From > b.From && t.To < b.To

		// |t.From - b.From| = 1
		// |b.To - t.To| = 1
		//
		//     Target (t):    +--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+  +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1",
			target:  newSymbolNode('1'),
			base:    newRangeSymbolNode('0', '2'),
			result: newAltNode(
				newSymbolNode('0'),
				newSymbolNode('2'),
			),
		},
		// |t.From - b.From| > 1
		// |b.To - t.To| > 1
		//
		//     Target (t):       +--+
		//       Base (b): +--+--+--+--+--+
		// Result (b - t): +--+--+  +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1",
			target:  newSymbolNode('2'),
			base:    newRangeSymbolNode('0', '4'),
			result: newAltNode(
				newRangeSymbolNode('0', '1'),
				newRangeSymbolNode('3', '4'),
			),
		},

		// t.From <= b.From && t.To >= b.From && t.To < b.To

		// |b.From - t.From| = 0
		// |t.To - b.From| = 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+
		//       Base (b): +--+--+
		// Result (b - t):    +--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
			target:  newSymbolNode('0'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('1'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| = 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+
		//       Base (b): +--+--+--+
		// Result (b - t):    +--+--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
			target:  newSymbolNode('0'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('1', '2'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| > 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('2'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| > 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t):       +--+--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('2', '3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| = 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('1', '2'),
			result:  newSymbolNode('2'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| = 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+--+--+
		// Result (b - t):       +--+--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('1', '3'),
			result:  newRangeSymbolNode('2', '3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| > 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+--+--+
		// Result (b - t):          +--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '2'),
			base:    newRangeSymbolNode('1', '3'),
			result:  newSymbolNode('3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| > 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+--+--+--+
		// Result (b - t):          +--+--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '2'),
			base:    newRangeSymbolNode('1', '4'),
			result:  newRangeSymbolNode('3', '4'),
		},

		// t.From > b.From && t.From <= b.To && t.To >= b.To

		// |t.From - b.From| = 1
		// |b.To - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t):    +--+
		//       Base (b): +--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('1'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t):    +--+--+
		//       Base (b): +--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('1', '2'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t):    +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('1', '2'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t):    +--+--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('1', '3'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t):       +--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('2'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t):       +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('2', '3'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t):       +--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('2', '3'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t):       +--+--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('2', '4'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('0', '1'),
		},

		// t.From <= b.From && t.To >= b.To

		// |b.From - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t): +--+
		//       Base (b): +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('0'),
			result:  nil,
		},
		// |b.From - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t): +--+--+
		//       Base (b): +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('0', '1'),
			base:    newSymbolNode('0'),
			result:  nil,
		},
		// |b.From - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('0', '1'),
			base:    newSymbolNode('1'),
			result:  nil,
		},
		// |b.From - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('0', '2'),
			base:    newSymbolNode('1'),
			result:  nil,
		},

		// Others

		// |b.From - t.From| = 1
		//
		//     Target (t): +--+
		//       Base (b):    +--+
		// Result (b - t):    +--+
		{
			caption: "|b.From - t.From| = 1",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('1'),
			result:  newSymbolNode('1'),
		},
		// |b.From - t.From| > 1
		//
		//     Target (t): +--+
		//       Base (b):       +--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| > 1",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('2'),
			result:  newSymbolNode('2'),
		},
		// |t.To - b.To| = 1
		//
		//     Target (t):    +--+
		//       Base (b): +--+
		// Result (b - t): +--+
		{
			caption: "|t.To - b.To| = 1",
			target:  newSymbolNode('1'),
			base:    newSymbolNode('0'),
			result:  newSymbolNode('0'),
		},
		// |t.To - b.To| > 1
		//
		//     Target (t):       +--+
		//       Base (b): +--+
		// Result (b - t): +--+
		{
			caption: "|t.To - b.To| > 1",
			target:  newSymbolNode('2'),
			base:    newSymbolNode('0'),
			result:  newSymbolNode('0'),
		},
	} {
		t.Run(test.caption, func(t *testing.T) {
			r := exclude(test.target, test.base)
			testAST(t, test.result, r)
		})
	}
}

func testAST(t *testing.T, expected, actual CPTree) {
	t.Helper()

	aTy := reflect.TypeOf(actual)
	eTy := reflect.TypeOf(expected)
	if eTy != aTy {
		t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy)
	}

	if actual == nil {
		return
	}

	switch e := expected.(type) {
	case *symbolNode:
		a := actual.(*symbolNode)
		if a.From != e.From || a.To != e.To {
			t.Fatalf("unexpected node: want: %+v, got: %+v", e, a)
		}
	}
	eLeft, eRight := expected.children()
	aLeft, aRight := actual.children()
	testAST(t, eLeft, aLeft)
	testAST(t, eRight, aRight)
}


func MainTest() {
	tests := []testing.InternalTest{
		{ "TestLexer", TestLexer },
		{ "TestParse", TestParse },
		{ "TestParse_ContributoryPropertyIsNotExposed", TestParse_ContributoryPropertyIsNotExposed },
		{ "TestExclude", TestExclude },
	}

	deps := testdeps.TestDeps{}
	benchmarks  := []testing.InternalBenchmark {}
	fuzzTargets := []testing.InternalFuzzTarget{}
	examples    := []testing.InternalExample   {}
	m := testing.MainStart(deps, tests, benchmarks, fuzzTargets, examples)
	os.Exit(m.Run())
}