package tre

import (
	"fmt"
	"os"
	"reflect"
	"strings"
	"testing"
	"testing/internal/testdeps"

	"ucd"
)


func TestGenCharBlocksWellFormed(t *testing.T) {
	cBlk := func(from []byte, to []byte) *CharBlock {
		return &CharBlock{
			From: from,
			To:   to,
		}
	}

	seq := func(b ...byte) []byte {
		return b
	}

	tests := []struct {
		from   rune
		to     rune
		blocks []*CharBlock
	}{
		{
			from: '\u0000',
			to:   '\u007f',
			blocks: []*CharBlock{
				cBlk(seq(0x00), seq(0x7f)),
			},
		},
		{
			from: '\u0080',
			to:   '\u07ff',
			blocks: []*CharBlock{
				cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)),
			},
		},
		{
			from: '\u0800',
			to:   '\u0fff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xe0, 0xa0, 0x80),
					seq(0xe0, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\u1000',
			to:   '\ucfff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xe1, 0x80, 0x80),
					seq(0xec, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\ud000',
			to:   '\ud7ff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xed, 0x80, 0x80),
					seq(0xed, 0x9f, 0xbf),
				),
			},
		},
		{
			from: '\ue000',
			to:   '\uffff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xee, 0x80, 0x80),
					seq(0xef, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\U00010000',
			to:   '\U0003ffff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xf0, 0x90, 0x80, 0x80),
					seq(0xf0, 0xbf, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\U00040000',
			to:   '\U000fffff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xf1, 0x80, 0x80, 0x80),
					seq(0xf3, 0xbf, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\U00100000',
			to:   '\U0010ffff',
			blocks: []*CharBlock{
				cBlk(
					seq(0xf4, 0x80, 0x80, 0x80),
					seq(0xf4, 0x8f, 0xbf, 0xbf),
				),
			},
		},
		{
			from: '\u0000',
			to:   '\U0010ffff',
			blocks: []*CharBlock{
				cBlk(seq(0x00), seq(0x7f)),
				cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)),
				cBlk(
					seq(0xe0, 0xa0, 0x80),
					seq(0xe0, 0xbf, 0xbf),
				),
				cBlk(
					seq(0xe1, 0x80, 0x80),
					seq(0xec, 0xbf, 0xbf),
				),
				cBlk(
					seq(0xed, 0x80, 0x80),
					seq(0xed, 0x9f, 0xbf),
				),
				cBlk(
					seq(0xee, 0x80, 0x80),
					seq(0xef, 0xbf, 0xbf),
				),
				cBlk(
					seq(0xf0, 0x90, 0x80, 0x80),
					seq(0xf0, 0xbf, 0xbf, 0xbf),
				),
				cBlk(
					seq(0xf1, 0x80, 0x80, 0x80),
					seq(0xf3, 0xbf, 0xbf, 0xbf),
				),
				cBlk(
					seq(0xf4, 0x80, 0x80, 0x80),
					seq(0xf4, 0x8f, 0xbf, 0xbf),
				),
			},
		},
	}
	for _, tt := range tests {
		const errmsg = "unexpected character block: want: %+v, got: %+v"
		tts := fmt.Sprintf("%v..%v", tt.from, tt.to)
		t.Run(tts, func(t *testing.T) {
			blks, err := GenCharBlocks(tt.from, tt.to)
			if err != nil {
				t.Fatal(err)
			}
			if len(blks) != len(tt.blocks) {
				t.Fatalf(errmsg, tt.blocks, blks)
			}
			for i, blk := range blks {
				expected := tt.blocks[i]
				neqFrom := len(blk.From) != len(expected.From)
				neqTo   := len(blk.To)   != len(expected.To)
				if neqFrom || neqTo {
					t.Fatalf(errmsg, tt.blocks, blks)
				}
				for j := 0; j < len(blk.From); j++ {
					neqFrom := blk.From[j] !=
						expected.From[j] 
					neqTo   := blk.To[j]   !=
						expected.To[j]
					if neqFrom || neqTo {
						t.Fatalf(
							errmsg,
							tt.blocks,
							blks,
						)
					}
				}
			}
		})
	}
}

func TestGenCharBlocksIllFormed(t *testing.T) {
	tests := []struct {
		from rune
		to   rune
	}{
		{
			// from > to
			from: '\u0001',
			to:   '\u0000',
		},
		{
			from: -1, // <U+0000
			to:   '\u0000',
		},
		{
			from: '\u0000',
			to:   -1, // <U+0000
		},
		{
			from: 0x110000, // >U+10FFFF
			to:   '\u0000',
		},
		{
			from: '\u0000',
			to:   0x110000, // >U+10FFFF
		},
		{
			from: 0xd800, // U+D800 (surrogate code point)
			to:   '\ue000',
		},
		{
			from: 0xdfff, // U+DFFF (surrogate code point)
			to:   '\ue000',
		},
		{
			from: '\ucfff',
			to:   0xd800, // U+D800 (surrogate code point)
		},
		{
			from: '\ucfff',
			to:   0xdfff, // U+DFFF (surrogate code point)
		},
	}
	for _, tt := range tests {
		tts := fmt.Sprintf("%v..%v", tt.from, tt.to)
		t.Run(tts, func(t *testing.T) {
			blks, err := GenCharBlocks(tt.from, tt.to)
			if err == nil {
				t.Fatal("expected error didn't occur")
			}
			if blks != nil {
				t.Fatal("character blocks must be nil")
			}
		})
	}
}

func TestCompressor_Compress(t *testing.T) {
	x := 0 // an empty value

	allCompressors := func() []Compressor {
		return []Compressor{
			NewCompressorUniqueEntriesTable(),
			NewCompressorRowDisplacementTable(x),
		}
	}

	tests := []struct {
		original    []int
		rowCount    int
		colCount    int
		compressors []Compressor
	}{
		{
			original: []int{
				1, 1, 1, 1, 1,
				1, 1, 1, 1, 1,
				1, 1, 1, 1, 1,
			},
			rowCount:    3,
			colCount:    5,
			compressors: allCompressors(),
		},
		{
			original: []int{
				x, x, x, x, x,
				x, x, x, x, x,
				x, x, x, x, x,
			},
			rowCount:    3,
			colCount:    5,
			compressors: allCompressors(),
		},
		{
			original: []int{
				1, 1, 1, 1, 1,
				x, x, x, x, x,
				1, 1, 1, 1, 1,
			},
			rowCount:    3,
			colCount:    5,
			compressors: allCompressors(),
		},
		{
			original: []int{
				1, x, 1, 1, 1,
				1, 1, x, 1, 1,
				1, 1, 1, x, 1,
			},
			rowCount:    3,
			colCount:    5,
			compressors: allCompressors(),
		},
	}
	for i, tt := range tests {
		for _, comp := range tt.compressors {
			t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
				dup := make([]int, len(tt.original))
				copy(dup, tt.original)

				orig, err := NewOriginalTable(tt.original, tt.colCount)
				if err != nil {
					t.Fatal(err)
				}
				err = comp.Compress(orig)
				if err != nil {
					t.Fatal(err)
				}
				rowCount, colCount := comp.OriginalTableSize()
				if rowCount != tt.rowCount || colCount != tt.colCount {
					t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
				}
				for i := 0; i < tt.rowCount; i++ {
					for j := 0; j < tt.colCount; j++ {
						v, err := comp.Lookup(i, j)
						if err != nil {
							t.Fatal(err)
						}
						expected := tt.original[i*tt.colCount+j]
						if v != expected {
							t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
						}
					}
				}

				// Calling with out-of-range indexes should be an error.
				if _, err := comp.Lookup(0, -1); err == nil {
					t.Fatalf("expected error didn't occur (0, -1)")
				}
				if _, err := comp.Lookup(-1, 0); err == nil {
					t.Fatalf("expected error didn't occur (-1, 0)")
				}
				if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
					t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
				}
				if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
					t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
				}

				// The compressor must not break the original table.
				for i := 0; i < tt.rowCount; i++ {
					for j := 0; j < tt.colCount; j++ {
						idx := i*tt.colCount + j
						if tt.original[idx] != dup[idx] {
							t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
						}
					}
				}
			})
		}
	}
}

var idTests = []struct {
	id      string
	invalid bool
}{
	{
		id: "foo",
	},
	{
		id: "foo2",
	},
	{
		id: "foo_bar_baz",
	},
	{
		id: "f_o_o",
	},
	{
		id:      "Foo",
		invalid: true,
	},
	{
		id:      "foo_Bar",
		invalid: true,
	},
	{
		id:      "2foo",
		invalid: true,
	},
	{
		id:      "_foo",
		invalid: true,
	},
	{
		id:      "foo_",
		invalid: true,
	},
	{
		id:      "foo__bar",
		invalid: true,
	},
}

func TestValidateIdentifier(t *testing.T) {
	for _, tt := range idTests {
		t.Run(tt.id, func(t *testing.T) {
			err := validateIdentifier(tt.id)
			if tt.invalid {
				if err == nil {
					t.Errorf("expected error didn't occur")
				}
			} else {
				if err != nil {
					t.Errorf("unexpected error occurred: %v", err)
				}
			}
		})
	}
}

func TestLexKindName_validate(t *testing.T) {
	for _, tt := range idTests {
		t.Run(tt.id, func(t *testing.T) {
			err := LexKindName(tt.id).validate()
			if tt.invalid {
				if err == nil {
					t.Errorf("expected error didn't occur")
				}
			} else {
				if err != nil {
					t.Errorf("unexpected error occurred: %v", err)
				}
			}
		})
	}
}

func TestLexModeName_validate(t *testing.T) {
	for _, tt := range idTests {
		t.Run(tt.id, func(t *testing.T) {
			err := LexModeName(tt.id).validate()
			if tt.invalid {
				if err == nil {
					t.Errorf("expected error didn't occur")
				}
			} else {
				if err != nil {
					t.Errorf("unexpected error occurred: %v", err)
				}
			}
		})
	}
}

func TestSnakeCaseToUpperCamelCase(t *testing.T) {
	tests := []struct {
		snake string
		camel string
	}{
		{
			snake: "foo",
			camel: "Foo",
		},
		{
			snake: "foo_bar",
			camel: "FooBar",
		},
		{
			snake: "foo_bar_baz",
			camel: "FooBarBaz",
		},
		{
			snake: "Foo",
			camel: "Foo",
		},
		{
			snake: "fooBar",
			camel: "FooBar",
		},
		{
			snake: "FOO",
			camel: "FOO",
		},
		{
			snake: "FOO_BAR",
			camel: "FOOBAR",
		},
		{
			snake: "_foo_bar_",
			camel: "FooBar",
		},
		{
			snake: "___foo___bar___",
			camel: "FooBar",
		},
	}
	for _, tt := range tests {
		c := SnakeCaseToUpperCamelCase(tt.snake)
		if c != tt.camel {
			t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
		}
	}
}

func TestFindSpellingInconsistencies(t *testing.T) {
	tests := []struct {
		ids        []string
		duplicated [][]string
	}{
		{
			ids:        []string{"foo", "foo"},
			duplicated: nil,
		},
		{
			ids:        []string{"foo", "Foo"},
			duplicated: [][]string{{"Foo", "foo"}},
		},
		{
			ids:        []string{"foo", "foo", "Foo"},
			duplicated: [][]string{{"Foo", "foo"}},
		},
		{
			ids:        []string{"foo_bar_baz", "FooBarBaz"},
			duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
		},
		{
			ids:        []string{"foo", "Foo", "bar", "Bar"},
			duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
		},
		{
			ids:        []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
			duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
		},
	}
	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
			duplicated := FindSpellingInconsistencies(tt.ids)
			if len(duplicated) != len(tt.duplicated) {
				t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
			}
			for i, dupIDs := range duplicated {
				if len(dupIDs) != len(tt.duplicated[i]) {
					t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
				}
				for j, id := range dupIDs {
					if id != tt.duplicated[i][j] {
						t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
					}
				}
			}
		})
	}
}

func TestLexSpec_Validate(t *testing.T) {
	// We expect that the spelling inconsistency error will occur.
	spec := &LexSpec{
		Entries: []*LexEntry{
			{
				Modes: []LexModeName{
					// 'Default' is the spelling inconsistency because 'default' is predefined.
					"Default",
				},
				Kind:    "foo",
				Pattern: "foo",
			},
		},
	}
	err := spec.Validate()
	if err == nil {
		t.Fatalf("expected error didn't occur")
	}
}

func TestLexer(t *testing.T) {
	tests := []struct {
		caption string
		src     string
		tokens  []*token
		err     error
	}{
		{
			caption: "lexer can recognize ordinaly characters",
			src:     "123abcいろは",
			tokens: []*token{
				newToken(tokenKindChar, '1'),
				newToken(tokenKindChar, '2'),
				newToken(tokenKindChar, '3'),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, 'b'),
				newToken(tokenKindChar, 'c'),
				newToken(tokenKindChar, 'い'),
				newToken(tokenKindChar, 'ろ'),
				newToken(tokenKindChar, 'は'),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters in default mode",
			src:     ".*+?|()[\\u",
			tokens: []*token{
				newToken(tokenKindAnyChar, nullChar),
				newToken(tokenKindRepeat, nullChar),
				newToken(tokenKindRepeatOneOrMore, nullChar),
				newToken(tokenKindOption, nullChar),
				newToken(tokenKindAlt, nullChar),
				newToken(tokenKindGroupOpen, nullChar),
				newToken(tokenKindGroupClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the escape sequences in default mode",
			src:     "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
			tokens: []*token{
				newToken(tokenKindChar, '\\'),
				newToken(tokenKindChar, '.'),
				newToken(tokenKindChar, '*'),
				newToken(tokenKindChar, '+'),
				newToken(tokenKindChar, '?'),
				newToken(tokenKindChar, '|'),
				newToken(tokenKindChar, '('),
				newToken(tokenKindChar, ')'),
				newToken(tokenKindChar, '['),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "], {, and } are treated as an ordinary character in default mode",
			src:     "]{}",
			tokens: []*token{
				newToken(tokenKindChar, ']'),
				newToken(tokenKindChar, '{'),
				newToken(tokenKindChar, '}'),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters in bracket expression mode",
			src:     "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("09AF"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("09abcf"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the escape sequences in bracket expression mode",
			src:     "[\\^a\\-z]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "in a bracket expression, the special characters are also handled as normal characters",
			src:     "[\\\\.*+?|()[",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '\\'),
				newToken(tokenKindChar, '.'),
				newToken(tokenKindChar, '*'),
				newToken(tokenKindChar, '+'),
				newToken(tokenKindChar, '?'),
				newToken(tokenKindChar, '|'),
				newToken(tokenKindChar, '('),
				newToken(tokenKindChar, ')'),
				newToken(tokenKindChar, '['),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
			// [...-...][...-][-...][-]
			//  ~~~~~~~     ~  ~     ~
			//     ^        ^  ^     ^
			//     |        |  |     `-- Ordinary Character (b)
			//     |        |  `-- Ordinary Character (b)
			//     |        `-- Ordinary Character (b)
			//     `-- Character Range (a)
			//
			// a. *-* is handled as a character-range expression.
			// b. *-, -*, or - are handled as ordinary characters.
			src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, 'a'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, 'z'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindCharRange, nullChar),
				newToken(tokenKindChar, '-'),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
			// [^...^...][^]
			// ~~   ~    ~~
			// ^    ^    ^^
			// |    |    |`-- Ordinary Character (c)
			// |    |    `-- Bracket Expression
			// |    `-- Ordinary Character (b)
			// `-- Inverse Bracket Expression (a)
			//
			// a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
			// b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
			// c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
			src: "[^^][^]",
			tokens: []*token{
				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindChar, '^'),
				newToken(tokenKindBExpClose, nullChar),
				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer raises an error when an invalid escape sequence appears",
			src:     "\\@",
			err:     synErrInvalidEscSeq,
		},
		{
			caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
			src:     "\\",
			err:     synErrIncompletedEscSeq,
		},
		{
			caption: "lexer raises an error when an invalid escape sequence appears",
			src:     "[\\@",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
		{
			caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
			src:     "[\\",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrIncompletedEscSeq,
		},
		{
			caption: "lexer can recognize the special characters and code points in code point expression mode",
			src:     "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("4567"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("89abcd"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("efAB"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("CDEF01"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "a one digit hex string isn't a valid code point",
			src:     "\\u{0",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a two digits hex string isn't a valid code point",
			src:     "\\u{01",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a three digits hex string isn't a valid code point",
			src:     "\\u{012",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a four digits hex string is a valid code point",
			src:     "\\u{0123}",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("0123"),
				newToken(tokenKindRBrace, nullChar),
			},
		},
		{
			caption: "a five digits hex string isn't a valid code point",
			src:     "\\u{01234",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a six digits hex string is a valid code point",
			src:     "\\u{012345}",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCodePointToken("012345"),
				newToken(tokenKindRBrace, nullChar),
			},
		},
		{
			caption: "a seven digits hex string isn't a valid code point",
			src:     "\\u{0123456",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a code point must be hex digits",
			src:     "\\u{g",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "a code point must be hex digits",
			src:     "\\u{G",
			tokens: []*token{
				newToken(tokenKindCodePointLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
			},
			err: synErrInvalidCodePoint,
		},
		{
			caption: "lexer can recognize the special characters and symbols in character property expression mode",
			src:     "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
			tokens: []*token{
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindBExpOpen, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindInverseBExpOpen, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindCharPropLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newCharPropSymbolToken("General_Category"),
				newToken(tokenKindEqual, nullChar),
				newCharPropSymbolToken("Letter"),
				newToken(tokenKindRBrace, nullChar),
				newToken(tokenKindBExpClose, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "lexer can recognize the special characters and symbols in fragment expression mode",
			src:     "\\f{integer}",
			tokens: []*token{
				newToken(tokenKindFragmentLeader, nullChar),
				newToken(tokenKindLBrace, nullChar),
				newFragmentSymbolToken("integer"),
				newToken(tokenKindRBrace, nullChar),

				newToken(tokenKindEOF, nullChar),
			},
		},
		{
			caption: "a fragment expression is not supported in a bracket expression",
			src:     "[\\f",
			tokens: []*token{
				newToken(tokenKindBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
		{
			caption: "a fragment expression is not supported in an inverse bracket expression",
			src:     "[^\\f",
			tokens: []*token{
				newToken(tokenKindInverseBExpOpen, nullChar),
			},
			err: synErrInvalidEscSeq,
		},
	}
	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			lex := newLexer(strings.NewReader(tt.src))
			var err error
			var tok *token
			i := 0
			for {
				tok, err = lex.next()
				if err != nil {
					break
				}
				if i >= len(tt.tokens) {
					break
				}
				eTok := tt.tokens[i]
				i++
				testToken(t, tok, eTok)

				if tok.kind == tokenKindEOF {
					break
				}
			}
			if tt.err != nil {
				if err != ParseErr {
					t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
				}
				detail, cause := lex.error()
				if cause != tt.err {
					t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
				}
			} else {
				if err != nil {
					t.Fatalf("unexpected error: %v", err)
				}
			}
			if i < len(tt.tokens) {
				t.Fatalf("expecte more tokens")
			}
		})
	}
}

func testToken(t *testing.T, a, e *token) {
	t.Helper()
	if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
		t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
	}
}

func TestParse(t *testing.T) {
	tests := []struct {
		pattern     string
		fragments   map[LexKindName]string
		ast         CPTree
		syntaxError error

		// When an AST is large, as patterns containing a character property expression, this test only checks
		// that the pattern is parsable. The check of the validity of such AST is performed by checking that it
		// can be matched correctly using the driver.
		skipTestAST bool
	}{
		{
			pattern: "a",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "abc",
			ast: genConcatNode(
				newSymbolNode('a'),
				newSymbolNode('b'),
				newSymbolNode('c'),
			),
		},
		{
			pattern: "a?",
			ast: newOptionNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "[abc]?",
			ast: newOptionNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\u{3042}?",
			ast: newOptionNode(
				newSymbolNode('\u3042'),
			),
		},
		{
			pattern:     "\\p{Letter}?",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}?",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			ast: newOptionNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "(a)?",
			ast: newOptionNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "((a?)?)?",
			ast: newOptionNode(
				newOptionNode(
					newOptionNode(
						newSymbolNode('a'),
					),
				),
			),
		},
		{
			pattern: "(abc)?",
			ast: newOptionNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "(a|b)?",
			ast: newOptionNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
			),
		},
		{
			pattern:     "?",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(?)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|?",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "?|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a??",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: "a*",
			ast: newRepeatNode(
				newSymbolNode('a'),
			),
		},
		{
			pattern: "[abc]*",
			ast: newRepeatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\u{3042}*",
			ast: newRepeatNode(
				newSymbolNode('\u3042'),
			),
		},
		{
			pattern:     "\\p{Letter}*",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}*",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			ast: newRepeatNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "((a*)*)*",
			ast: newRepeatNode(
				newRepeatNode(
					newRepeatNode(
						newSymbolNode('a'),
					),
				),
			),
		},
		{
			pattern: "(abc)*",
			ast: newRepeatNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "(a|b)*",
			ast: newRepeatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
			),
		},
		{
			pattern:     "*",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(*)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|*",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "*|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a**",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: "a+",
			ast: genConcatNode(
				newSymbolNode('a'),
				newRepeatNode(
					newSymbolNode('a'),
				),
			),
		},
		{
			pattern: "[abc]+",
			ast: genConcatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
				newRepeatNode(
					genAltNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "\\u{3042}+",
			ast: genConcatNode(
				newSymbolNode('\u3042'),
				newRepeatNode(
					newSymbolNode('\u3042'),
				),
			),
		},
		{
			pattern:     "\\p{Letter}+",
			skipTestAST: true,
		},
		{
			pattern: "\\f{a2c}+",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			ast: genConcatNode(
				newFragmentNode("a2c",
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
				newRepeatNode(
					newFragmentNode("a2c",
						genConcatNode(
							newSymbolNode('a'),
							newSymbolNode('b'),
							newSymbolNode('c'),
						),
					),
				),
			),
		},
		{
			pattern: "((a+)+)+",
			ast: genConcatNode(
				genConcatNode(
					genConcatNode(
						genConcatNode(
							newSymbolNode('a'),
							newRepeatNode(
								newSymbolNode('a'),
							),
						),
						newRepeatNode(
							genConcatNode(
								newSymbolNode('a'),
								newRepeatNode(
									newSymbolNode('a'),
								),
							),
						),
					),
					newRepeatNode(
						genConcatNode(
							genConcatNode(
								newSymbolNode('a'),
								newRepeatNode(
									newSymbolNode('a'),
								),
							),
							newRepeatNode(
								genConcatNode(
									newSymbolNode('a'),
									newRepeatNode(
										newSymbolNode('a'),
									),
								),
							),
						),
					),
				),
			),
		},
		{
			pattern: "(abc)+",
			ast: genConcatNode(
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
				newRepeatNode(
					genConcatNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
						newSymbolNode('c'),
					),
				),
			),
		},
		{
			pattern: "(a|b)+",
			ast: genConcatNode(
				genAltNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
				),
				newRepeatNode(
					genAltNode(
						newSymbolNode('a'),
						newSymbolNode('b'),
					),
				),
			),
		},
		{
			pattern:     "+",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "(+)",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a|+",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "+|b",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern:     "a++",
			syntaxError: synErrRepNoTarget,
		},
		{
			pattern: ".",
			ast:     newRangeSymbolNode(0x00, 0x10FFFF),
		},
		{
			pattern: "[a]",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "[abc]",
			ast: genAltNode(
				newSymbolNode('a'),
				newSymbolNode('b'),
				newSymbolNode('c'),
			),
		},
		{
			pattern: "[a-z]",
			ast:     newRangeSymbolNode('a', 'z'),
		},
		{
			pattern: "[A-Za-z]",
			ast: genAltNode(
				newRangeSymbolNode('A', 'Z'),
				newRangeSymbolNode('a', 'z'),
			),
		},
		{
			pattern: "[\\u{004E}]",
			ast:     newSymbolNode('N'),
		},
		{
			pattern: "[\\u{0061}-\\u{007A}]",
			ast:     newRangeSymbolNode('a', 'z'),
		},
		{
			pattern:     "[\\p{Lu}]",
			skipTestAST: true,
		},
		{
			pattern:     "[a-\\p{Lu}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[\\p{Lu}-z]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[\\p{Lu}-\\p{Ll}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[z-a]",
			syntaxError: synErrRangeInvalidOrder,
		},
		{
			pattern:     "a[]",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern:     "[]a",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern:     "[]",
			syntaxError: synErrBExpNoElem,
		},
		{
			pattern: "[^\\u{004E}]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '\u004E'-1),
				newRangeSymbolNode('\u004E'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^\\u{0061}-\\u{007A}]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '\u0061'-1),
				newRangeSymbolNode('\u007A'+1, 0x10FFFF),
			),
		},
		{
			pattern:     "[^\\p{Lu}]",
			skipTestAST: true,
		},
		{
			pattern:     "[^a-\\p{Lu}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\p{Lu}-z]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\p{Lu}-\\p{Ll}]",
			syntaxError: synErrRangePropIsUnavailable,
		},
		{
			pattern:     "[^\\u{0000}-\\u{10FFFF}]",
			syntaxError: synErrUnmatchablePattern,
		},
		{
			pattern:     "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]",
			syntaxError: synErrUnmatchablePattern,
		},
		{
			pattern: "[^]",
			ast:     newSymbolNode('^'),
		},
		{
			pattern:     "[",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^a",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "[^a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([^a-",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern: "]",
			ast:     newSymbolNode(']'),
		},
		{
			pattern:     "(]",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern: "a]",
			ast: genConcatNode(
				newSymbolNode('a'),
				newSymbolNode(']'),
			),
		},
		{
			pattern:     "(a]",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "([)",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern:     "([a)",
			syntaxError: synErrBExpUnclosed,
		},
		{
			pattern: "[a-]",
			ast: genAltNode(
				newSymbolNode('a'),
				newSymbolNode('-'),
			),
		},
		{
			pattern: "[^a-]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newRangeSymbolNode(0x2E, 0x60),
				newRangeSymbolNode(0x62, 0x10FFFF),
			),
		},
		{
			pattern: "[-z]",
			ast: genAltNode(
				newSymbolNode('-'),
				newSymbolNode('z'),
			),
		},
		{
			pattern: "[^-z]",
			ast: newAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newAltNode(
					newRangeSymbolNode(0x2E, 0x79),
					newRangeSymbolNode(0x7B, 0x10FFFF),
				),
			),
		},
		{
			pattern: "[-]",
			ast:     newSymbolNode('-'),
		},
		{
			pattern: "[^-]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 0x2C),
				newRangeSymbolNode(0x2E, 0x10FFFF),
			),
		},
		{
			pattern: "[^01]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '0'-1),
				newRangeSymbolNode('1'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^10]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, '0'-1),
				newRangeSymbolNode('1'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^a-z]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 'a'-1),
				newRangeSymbolNode('z'+1, 0x10FFFF),
			),
		},
		{
			pattern: "[^az]",
			ast: genAltNode(
				newRangeSymbolNode(0x00, 'a'-1),
				genAltNode(
					newRangeSymbolNode('a'+1, 'z'-1),
					newRangeSymbolNode('z'+1, 0x10FFFF),
				),
			),
		},
		{
			pattern: "\\u{006E}",
			ast:     newSymbolNode('\u006E'),
		},
		{
			pattern: "\\u{03BD}",
			ast:     newSymbolNode('\u03BD'),
		},
		{
			pattern: "\\u{306B}",
			ast:     newSymbolNode('\u306B'),
		},
		{
			pattern: "\\u{01F638}",
			ast:     newSymbolNode('\U0001F638'),
		},
		{
			pattern: "\\u{0000}",
			ast:     newSymbolNode('\u0000'),
		},
		{
			pattern: "\\u{10FFFF}",
			ast:     newSymbolNode('\U0010FFFF'),
		},
		{
			pattern:     "\\u{110000}",
			syntaxError: synErrCPExpOutOfRange,
		},
		{
			pattern:     "\\u",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{03BD",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\u{}",
			syntaxError: synErrCPExpInvalidForm,
		},
		{
			pattern:     "\\p{Letter}",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{General_Category=Letter}",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{ Letter }",
			skipTestAST: true,
		},
		{
			pattern:     "\\p{ General_Category = Letter }",
			skipTestAST: true,
		},
		{
			pattern:     "\\p",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{Letter",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{General_Category=}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{General_Category=  }",
			syntaxError: synErrCharPropInvalidSymbol,
		},
		{
			pattern:     "\\p{=Letter}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{  =Letter}",
			syntaxError: synErrCharPropInvalidSymbol,
		},
		{
			pattern:     "\\p{=}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern:     "\\p{}",
			syntaxError: synErrCharPropExpInvalidForm,
		},
		{
			pattern: "\\f{a2c}",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			ast: newFragmentNode("a2c",
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern: "\\f{ a2c }",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			ast: newFragmentNode("a2c",
				genConcatNode(
					newSymbolNode('a'),
					newSymbolNode('b'),
					newSymbolNode('c'),
				),
			),
		},
		{
			pattern:     "\\f",
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern:     "\\f{",
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern: "\\f{a2c",
			fragments: map[LexKindName]string{
				"a2c": "abc",
			},
			syntaxError: synErrFragmentExpInvalidForm,
		},
		{
			pattern: "(a)",
			ast:     newSymbolNode('a'),
		},
		{
			pattern: "(((a)))",
			ast:     newSymbolNode('a'),
		},
		{
			pattern:     "a()",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "()a",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "()",
			syntaxError: synErrGroupNoElem,
		},
		{
			pattern:     "(",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "a(",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "(a",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "((",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     "((a)",
			syntaxError: synErrGroupUnclosed,
		},
		{
			pattern:     ")",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "a)",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     ")a",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "))",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern:     "(a))",
			syntaxError: synErrGroupNoInitiator,
		},
		{
			pattern: "Mulder|Scully",
			ast: genAltNode(
				genConcatNode(
					newSymbolNode('M'),
					newSymbolNode('u'),
					newSymbolNode('l'),
					newSymbolNode('d'),
					newSymbolNode('e'),
					newSymbolNode('r'),
				),
				genConcatNode(
					newSymbolNode('S'),
					newSymbolNode('c'),
					newSymbolNode('u'),
					newSymbolNode('l'),
					newSymbolNode('l'),
					newSymbolNode('y'),
				),
			),
		},
		{
			pattern: "Langly|Frohike|Byers",
			ast: genAltNode(
				genConcatNode(
					newSymbolNode('L'),
					newSymbolNode('a'),
					newSymbolNode('n'),
					newSymbolNode('g'),
					newSymbolNode('l'),
					newSymbolNode('y'),
				),
				genConcatNode(
					newSymbolNode('F'),
					newSymbolNode('r'),
					newSymbolNode('o'),
					newSymbolNode('h'),
					newSymbolNode('i'),
					newSymbolNode('k'),
					newSymbolNode('e'),
				),
				genConcatNode(
					newSymbolNode('B'),
					newSymbolNode('y'),
					newSymbolNode('e'),
					newSymbolNode('r'),
					newSymbolNode('s'),
				),
			),
		},
		{
			pattern:     "|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "||",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Mulder|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Scully",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Langly|Frohike|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Langly||Byers",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Frohike|Byers",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "|Frohike|",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Fox(|)Mulder",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "(Fox|)Mulder",
			syntaxError: synErrAltLackOfOperand,
		},
		{
			pattern:     "Fox(|Mulder)",
			syntaxError: synErrAltLackOfOperand,
		},
	}
	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
			fragmentTrees := map[LexKindName]CPTree{}
			for kind, pattern := range tt.fragments {
				p := NewParser(kind, strings.NewReader(pattern))
				root, err := p.Parse()
				if err != nil {
					t.Fatal(err)
				}

				fragmentTrees[kind] = root
			}
			err := CompleteFragments(fragmentTrees)
			if err != nil {
				t.Fatal(err)
			}

			p := NewParser(LexKindName("test"), strings.NewReader(tt.pattern))
			root, err := p.Parse()
			if tt.syntaxError != nil {
				// printCPTree(os.Stdout, root, "", "")
				if err != ParseErr {
					t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
				}
				_, synErr := p.Error()
				if synErr != tt.syntaxError {
					t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
				}
				if root != nil {
					t.Fatalf("tree must be nil")
				}
			} else {
				if err != nil {
					detail, cause := p.Error()
					t.Fatalf("%v: %v: %v", err, cause, detail)
				}
				if root == nil {
					t.Fatal("tree must be non-nil")
				}

				complete, err := ApplyFragments(root, fragmentTrees)
				if err != nil {
					t.Fatal(err)
				}
				if !complete {
					t.Fatalf("incomplete fragments")
				}

				// printCPTree(os.Stdout, root, "", "")
				if !tt.skipTestAST {
					r := root.(*rootNode)
					testAST(t, tt.ast, r.tree)
				}
			}
		})
	}
}

func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
	for _, cProp := range ucd.ContributoryProperties() {
		t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
			p := NewParser(LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp)))
			root, err := p.Parse()
			if err == nil {
				t.Fatalf("expected syntax error: got: nil")
			}
			_, synErr := p.Error()
			if synErr != synErrCharPropUnsupported {
				t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr)
			}
			if root != nil {
				t.Fatalf("tree is not nil")
			}
		})
	}
}

func testAST(t *testing.T, expected, actual CPTree) {
	t.Helper()

	aTy := reflect.TypeOf(actual)
	eTy := reflect.TypeOf(expected)
	if eTy != aTy {
		t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy)
	}

	if actual == nil {
		return
	}

	switch e := expected.(type) {
	case *symbolNode:
		a := actual.(*symbolNode)
		if a.From != e.From || a.To != e.To {
			t.Fatalf("unexpected node: want: %+v, got: %+v", e, a)
		}
	}
	eLeft, eRight := expected.children()
	aLeft, aRight := actual.children()
	testAST(t, eLeft, aLeft)
	testAST(t, eRight, aRight)
}

func TestExclude(t *testing.T) {
	for _, test := range []struct {
		caption string
		target  CPTree
		base    CPTree
		result  CPTree
	}{
		// t.From > b.From && t.To < b.To

		// |t.From - b.From| = 1
		// |b.To - t.To| = 1
		//
		//     Target (t):    +--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+  +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1",
			target:  newSymbolNode('1'),
			base:    newRangeSymbolNode('0', '2'),
			result: newAltNode(
				newSymbolNode('0'),
				newSymbolNode('2'),
			),
		},
		// |t.From - b.From| > 1
		// |b.To - t.To| > 1
		//
		//     Target (t):       +--+
		//       Base (b): +--+--+--+--+--+
		// Result (b - t): +--+--+  +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1",
			target:  newSymbolNode('2'),
			base:    newRangeSymbolNode('0', '4'),
			result: newAltNode(
				newRangeSymbolNode('0', '1'),
				newRangeSymbolNode('3', '4'),
			),
		},

		// t.From <= b.From && t.To >= b.From && t.To < b.To

		// |b.From - t.From| = 0
		// |t.To - b.From| = 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+
		//       Base (b): +--+--+
		// Result (b - t):    +--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
			target:  newSymbolNode('0'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('1'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| = 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+
		//       Base (b): +--+--+--+
		// Result (b - t):    +--+--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
			target:  newSymbolNode('0'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('1', '2'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| > 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('2'),
		},
		// |b.From - t.From| = 0
		// |t.To - b.From| > 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t):       +--+--+
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('2', '3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| = 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('1', '2'),
			result:  newSymbolNode('2'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| = 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+--+--+
		// Result (b - t):       +--+--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '1'),
			base:    newRangeSymbolNode('1', '3'),
			result:  newRangeSymbolNode('2', '3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| > 0
		// |b.To - t.To| = 1
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+--+--+
		// Result (b - t):          +--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
			target:  newRangeSymbolNode('0', '2'),
			base:    newRangeSymbolNode('1', '3'),
			result:  newSymbolNode('3'),
		},
		// |b.From - t.From| > 0
		// |t.To - b.From| > 0
		// |b.To - t.To| > 1
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+--+--+--+
		// Result (b - t):          +--+--+
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
			target:  newRangeSymbolNode('0', '2'),
			base:    newRangeSymbolNode('1', '4'),
			result:  newRangeSymbolNode('3', '4'),
		},

		// t.From > b.From && t.From <= b.To && t.To >= b.To

		// |t.From - b.From| = 1
		// |b.To - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t):    +--+
		//       Base (b): +--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('1'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t):    +--+--+
		//       Base (b): +--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('1', '2'),
			base:    newRangeSymbolNode('0', '1'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t):    +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('1', '2'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| = 1
		// |b.To - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t):    +--+--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+
		{
			caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('1', '3'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newSymbolNode('0'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t):       +--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('2'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t):       +--+--+
		//       Base (b): +--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('2', '3'),
			base:    newRangeSymbolNode('0', '2'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t):       +--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('2', '3'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('0', '1'),
		},
		// |t.From - b.From| > 1
		// |b.To - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t):       +--+--+--+
		//       Base (b): +--+--+--+--+
		// Result (b - t): +--+--+
		{
			caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('2', '4'),
			base:    newRangeSymbolNode('0', '3'),
			result:  newRangeSymbolNode('0', '1'),
		},

		// t.From <= b.From && t.To >= b.To

		// |b.From - t.From| = 0
		// |t.To - b.To| = 0
		//
		//     Target (t): +--+
		//       Base (b): +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('0'),
			result:  nil,
		},
		// |b.From - t.From| = 0
		// |t.To - b.To| > 0
		//
		//     Target (t): +--+--+
		//       Base (b): +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('0', '1'),
			base:    newSymbolNode('0'),
			result:  nil,
		},
		// |b.From - t.From| > 0
		// |t.To - b.To| = 0
		//
		//     Target (t): +--+--+
		//       Base (b):    +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0",
			target:  newRangeSymbolNode('0', '1'),
			base:    newSymbolNode('1'),
			result:  nil,
		},
		// |b.From - t.From| > 0
		// |t.To - b.To| > 0
		//
		//     Target (t): +--+--+--+
		//       Base (b):    +--+
		// Result (b - t): N/A
		{
			caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0",
			target:  newRangeSymbolNode('0', '2'),
			base:    newSymbolNode('1'),
			result:  nil,
		},

		// Others

		// |b.From - t.From| = 1
		//
		//     Target (t): +--+
		//       Base (b):    +--+
		// Result (b - t):    +--+
		{
			caption: "|b.From - t.From| = 1",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('1'),
			result:  newSymbolNode('1'),
		},
		// |b.From - t.From| > 1
		//
		//     Target (t): +--+
		//       Base (b):       +--+
		// Result (b - t):       +--+
		{
			caption: "|b.From - t.From| > 1",
			target:  newSymbolNode('0'),
			base:    newSymbolNode('2'),
			result:  newSymbolNode('2'),
		},
		// |t.To - b.To| = 1
		//
		//     Target (t):    +--+
		//       Base (b): +--+
		// Result (b - t): +--+
		{
			caption: "|t.To - b.To| = 1",
			target:  newSymbolNode('1'),
			base:    newSymbolNode('0'),
			result:  newSymbolNode('0'),
		},
		// |t.To - b.To| > 1
		//
		//     Target (t):       +--+
		//       Base (b): +--+
		// Result (b - t): +--+
		{
			caption: "|t.To - b.To| > 1",
			target:  newSymbolNode('2'),
			base:    newSymbolNode('0'),
			result:  newSymbolNode('0'),
		},
	} {
		t.Run(test.caption, func(t *testing.T) {
			r := exclude(test.target, test.base)
			testAST(t, test.result, r)
		})
	}
}


func MainTest() {
	tests := []testing.InternalTest{
		{ "TestGenCharBlocksWellFormed", TestGenCharBlocksWellFormed },
		{ "TestGenCharBlocksIllFormed", TestGenCharBlocksIllFormed },
		{ "TestCompressor_Compress", TestCompressor_Compress },
		{ "TestValidateIdentifier", TestValidateIdentifier },
		{ "TestLexKindName_validate", TestLexKindName_validate },
		{ "TestLexModeName_validate", TestLexModeName_validate },
		{ "TestSnakeCaseToUpperCamelCase", TestSnakeCaseToUpperCamelCase },
		{ "TestFindSpellingInconsistencies", TestFindSpellingInconsistencies },
		{ "TestLexSpec_Validate", TestLexSpec_Validate },
		{ "TestLexer", TestLexer },
		{ "TestParse", TestParse },
		{ "TestParse_ContributoryPropertyIsNotExposed", TestParse_ContributoryPropertyIsNotExposed },
		{ "TestExclude", TestExclude },
	}

	deps := testdeps.TestDeps{}
	benchmarks  := []testing.InternalBenchmark {}
	fuzzTargets := []testing.InternalFuzzTarget{}
	examples    := []testing.InternalExample   {}
	m := testing.MainStart(deps, tests, benchmarks, fuzzTargets, examples)
	os.Exit(m.Run())
}