package tre import ( "fmt" "os" "reflect" "strings" "testing" "testing/internal/testdeps" "ucd" ) func TestGenCharBlocksWellFormed(t *testing.T) { cBlk := func(from []byte, to []byte) *CharBlock { return &CharBlock{ From: from, To: to, } } seq := func(b ...byte) []byte { return b } tests := []struct { from rune to rune blocks []*CharBlock }{ { from: '\u0000', to: '\u007f', blocks: []*CharBlock{ cBlk(seq(0x00), seq(0x7f)), }, }, { from: '\u0080', to: '\u07ff', blocks: []*CharBlock{ cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), }, }, { from: '\u0800', to: '\u0fff', blocks: []*CharBlock{ cBlk( seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf), ), }, }, { from: '\u1000', to: '\ucfff', blocks: []*CharBlock{ cBlk( seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf), ), }, }, { from: '\ud000', to: '\ud7ff', blocks: []*CharBlock{ cBlk( seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf), ), }, }, { from: '\ue000', to: '\uffff', blocks: []*CharBlock{ cBlk( seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf), ), }, }, { from: '\U00010000', to: '\U0003ffff', blocks: []*CharBlock{ cBlk( seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf), ), }, }, { from: '\U00040000', to: '\U000fffff', blocks: []*CharBlock{ cBlk( seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf), ), }, }, { from: '\U00100000', to: '\U0010ffff', blocks: []*CharBlock{ cBlk( seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf), ), }, }, { from: '\u0000', to: '\U0010ffff', blocks: []*CharBlock{ cBlk(seq(0x00), seq(0x7f)), cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), cBlk( seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf), ), cBlk( seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf), ), cBlk( seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf), ), cBlk( seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf), ), cBlk( seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf), ), cBlk( seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf), ), cBlk( seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf), ), }, }, } for _, tt := range tests { const errmsg = "unexpected character block: want: %+v, got: %+v" tts := fmt.Sprintf("%v..%v", tt.from, tt.to) t.Run(tts, func(t *testing.T) { blks, err := GenCharBlocks(tt.from, tt.to) if err != nil { t.Fatal(err) } if len(blks) != len(tt.blocks) { t.Fatalf(errmsg, tt.blocks, blks) } for i, blk := range blks { expected := tt.blocks[i] neqFrom := len(blk.From) != len(expected.From) neqTo := len(blk.To) != len(expected.To) if neqFrom || neqTo { t.Fatalf(errmsg, tt.blocks, blks) } for j := 0; j < len(blk.From); j++ { neqFrom := blk.From[j] != expected.From[j] neqTo := blk.To[j] != expected.To[j] if neqFrom || neqTo { t.Fatalf( errmsg, tt.blocks, blks, ) } } } }) } } func TestGenCharBlocksIllFormed(t *testing.T) { tests := []struct { from rune to rune }{ { // from > to from: '\u0001', to: '\u0000', }, { from: -1, // U+10FFFF to: '\u0000', }, { from: '\u0000', to: 0x110000, // >U+10FFFF }, { from: 0xd800, // U+D800 (surrogate code point) to: '\ue000', }, { from: 0xdfff, // U+DFFF (surrogate code point) to: '\ue000', }, { from: '\ucfff', to: 0xd800, // U+D800 (surrogate code point) }, { from: '\ucfff', to: 0xdfff, // U+DFFF (surrogate code point) }, } for _, tt := range tests { tts := fmt.Sprintf("%v..%v", tt.from, tt.to) t.Run(tts, func(t *testing.T) { blks, err := GenCharBlocks(tt.from, tt.to) if err == nil { t.Fatal("expected error didn't occur") } if blks != nil { t.Fatal("character blocks must be nil") } }) } } func TestCompressor_Compress(t *testing.T) { x := 0 // an empty value allCompressors := func() []Compressor { return []Compressor{ NewCompressorUniqueEntriesTable(), NewCompressorRowDisplacementTable(x), } } tests := []struct { original []int rowCount int colCount int compressors []Compressor }{ { original: []int{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, rowCount: 3, colCount: 5, compressors: allCompressors(), }, { original: []int{ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, }, rowCount: 3, colCount: 5, compressors: allCompressors(), }, { original: []int{ 1, 1, 1, 1, 1, x, x, x, x, x, 1, 1, 1, 1, 1, }, rowCount: 3, colCount: 5, compressors: allCompressors(), }, { original: []int{ 1, x, 1, 1, 1, 1, 1, x, 1, 1, 1, 1, 1, x, 1, }, rowCount: 3, colCount: 5, compressors: allCompressors(), }, } for i, tt := range tests { for _, comp := range tt.compressors { t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { dup := make([]int, len(tt.original)) copy(dup, tt.original) orig, err := NewOriginalTable(tt.original, tt.colCount) if err != nil { t.Fatal(err) } err = comp.Compress(orig) if err != nil { t.Fatal(err) } rowCount, colCount := comp.OriginalTableSize() if rowCount != tt.rowCount || colCount != tt.colCount { t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) } for i := 0; i < tt.rowCount; i++ { for j := 0; j < tt.colCount; j++ { v, err := comp.Lookup(i, j) if err != nil { t.Fatal(err) } expected := tt.original[i*tt.colCount+j] if v != expected { t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) } } } // Calling with out-of-range indexes should be an error. if _, err := comp.Lookup(0, -1); err == nil { t.Fatalf("expected error didn't occur (0, -1)") } if _, err := comp.Lookup(-1, 0); err == nil { t.Fatalf("expected error didn't occur (-1, 0)") } if _, err := comp.Lookup(rowCount-1, colCount); err == nil { t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) } if _, err := comp.Lookup(rowCount, colCount-1); err == nil { t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) } // The compressor must not break the original table. for i := 0; i < tt.rowCount; i++ { for j := 0; j < tt.colCount; j++ { idx := i*tt.colCount + j if tt.original[idx] != dup[idx] { t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) } } } }) } } } var idTests = []struct { id string invalid bool }{ { id: "foo", }, { id: "foo2", }, { id: "foo_bar_baz", }, { id: "f_o_o", }, { id: "Foo", invalid: true, }, { id: "foo_Bar", invalid: true, }, { id: "2foo", invalid: true, }, { id: "_foo", invalid: true, }, { id: "foo_", invalid: true, }, { id: "foo__bar", invalid: true, }, } func TestValidateIdentifier(t *testing.T) { for _, tt := range idTests { t.Run(tt.id, func(t *testing.T) { err := validateIdentifier(tt.id) if tt.invalid { if err == nil { t.Errorf("expected error didn't occur") } } else { if err != nil { t.Errorf("unexpected error occurred: %v", err) } } }) } } func TestLexKindName_validate(t *testing.T) { for _, tt := range idTests { t.Run(tt.id, func(t *testing.T) { err := LexKindName(tt.id).validate() if tt.invalid { if err == nil { t.Errorf("expected error didn't occur") } } else { if err != nil { t.Errorf("unexpected error occurred: %v", err) } } }) } } func TestLexModeName_validate(t *testing.T) { for _, tt := range idTests { t.Run(tt.id, func(t *testing.T) { err := LexModeName(tt.id).validate() if tt.invalid { if err == nil { t.Errorf("expected error didn't occur") } } else { if err != nil { t.Errorf("unexpected error occurred: %v", err) } } }) } } func TestSnakeCaseToUpperCamelCase(t *testing.T) { tests := []struct { snake string camel string }{ { snake: "foo", camel: "Foo", }, { snake: "foo_bar", camel: "FooBar", }, { snake: "foo_bar_baz", camel: "FooBarBaz", }, { snake: "Foo", camel: "Foo", }, { snake: "fooBar", camel: "FooBar", }, { snake: "FOO", camel: "FOO", }, { snake: "FOO_BAR", camel: "FOOBAR", }, { snake: "_foo_bar_", camel: "FooBar", }, { snake: "___foo___bar___", camel: "FooBar", }, } for _, tt := range tests { c := SnakeCaseToUpperCamelCase(tt.snake) if c != tt.camel { t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) } } } func TestFindSpellingInconsistencies(t *testing.T) { tests := []struct { ids []string duplicated [][]string }{ { ids: []string{"foo", "foo"}, duplicated: nil, }, { ids: []string{"foo", "Foo"}, duplicated: [][]string{{"Foo", "foo"}}, }, { ids: []string{"foo", "foo", "Foo"}, duplicated: [][]string{{"Foo", "foo"}}, }, { ids: []string{"foo_bar_baz", "FooBarBaz"}, duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, }, { ids: []string{"foo", "Foo", "bar", "Bar"}, duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, }, { ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, }, } for i, tt := range tests { t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { duplicated := FindSpellingInconsistencies(tt.ids) if len(duplicated) != len(tt.duplicated) { t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) } for i, dupIDs := range duplicated { if len(dupIDs) != len(tt.duplicated[i]) { t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) } for j, id := range dupIDs { if id != tt.duplicated[i][j] { t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) } } } }) } } func TestLexSpec_Validate(t *testing.T) { // We expect that the spelling inconsistency error will occur. spec := &LexSpec{ Entries: []*LexEntry{ { Modes: []LexModeName{ // 'Default' is the spelling inconsistency because 'default' is predefined. "Default", }, Kind: "foo", Pattern: "foo", }, }, } err := spec.Validate() if err == nil { t.Fatalf("expected error didn't occur") } } func TestLexer(t *testing.T) { tests := []struct { caption string src string tokens []*token err error }{ { caption: "lexer can recognize ordinaly characters", src: "123abcいろは", tokens: []*token{ newToken(tokenKindChar, '1'), newToken(tokenKindChar, '2'), newToken(tokenKindChar, '3'), newToken(tokenKindChar, 'a'), newToken(tokenKindChar, 'b'), newToken(tokenKindChar, 'c'), newToken(tokenKindChar, 'い'), newToken(tokenKindChar, 'ろ'), newToken(tokenKindChar, 'は'), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer can recognize the special characters in default mode", src: ".*+?|()[\\u", tokens: []*token{ newToken(tokenKindAnyChar, nullChar), newToken(tokenKindRepeat, nullChar), newToken(tokenKindRepeatOneOrMore, nullChar), newToken(tokenKindOption, nullChar), newToken(tokenKindAlt, nullChar), newToken(tokenKindGroupOpen, nullChar), newToken(tokenKindGroupClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer can recognize the escape sequences in default mode", src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", tokens: []*token{ newToken(tokenKindChar, '\\'), newToken(tokenKindChar, '.'), newToken(tokenKindChar, '*'), newToken(tokenKindChar, '+'), newToken(tokenKindChar, '?'), newToken(tokenKindChar, '|'), newToken(tokenKindChar, '('), newToken(tokenKindChar, ')'), newToken(tokenKindChar, '['), newToken(tokenKindEOF, nullChar), }, }, { caption: "], {, and } are treated as an ordinary character in default mode", src: "]{}", tokens: []*token{ newToken(tokenKindChar, ']'), newToken(tokenKindChar, '{'), newToken(tokenKindChar, '}'), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer can recognize the special characters in bracket expression mode", src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, 'z'), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("09AF"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, 'z'), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("09abcf"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer can recognize the escape sequences in bracket expression mode", src: "[\\^a\\-z]", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '^'), newToken(tokenKindChar, 'a'), newToken(tokenKindChar, '-'), newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "in a bracket expression, the special characters are also handled as normal characters", src: "[\\\\.*+?|()[", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '\\'), newToken(tokenKindChar, '.'), newToken(tokenKindChar, '*'), newToken(tokenKindChar, '+'), newToken(tokenKindChar, '?'), newToken(tokenKindChar, '|'), newToken(tokenKindChar, '('), newToken(tokenKindChar, ')'), newToken(tokenKindChar, '['), newToken(tokenKindEOF, nullChar), }, }, { caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", // [...-...][...-][-...][-] // ~~~~~~~ ~ ~ ~ // ^ ^ ^ ^ // | | | `-- Ordinary Character (b) // | | `-- Ordinary Character (b) // | `-- Ordinary Character (b) // `-- Character Range (a) // // a. *-* is handled as a character-range expression. // b. *-, -*, or - are handled as ordinary characters. src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, 'a'), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindCharRange, nullChar), newToken(tokenKindChar, '-'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", // [^...^...][^] // ~~ ~ ~~ // ^ ^ ^^ // | | |`-- Ordinary Character (c) // | | `-- Bracket Expression // | `-- Ordinary Character (b) // `-- Inverse Bracket Expression (a) // // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. src: "[^^][^]", tokens: []*token{ newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '^'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindChar, '^'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer raises an error when an invalid escape sequence appears", src: "\\@", err: synErrInvalidEscSeq, }, { caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", src: "\\", err: synErrIncompletedEscSeq, }, { caption: "lexer raises an error when an invalid escape sequence appears", src: "[\\@", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), }, err: synErrInvalidEscSeq, }, { caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", src: "[\\", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), }, err: synErrIncompletedEscSeq, }, { caption: "lexer can recognize the special characters and code points in code point expression mode", src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("0123"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("4567"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("89abcd"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("efAB"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("CDEF01"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("0123"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("4567"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("89abcd"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("efAB"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("CDEF01"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("0123"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("4567"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("89abcd"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("efAB"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("CDEF01"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "a one digit hex string isn't a valid code point", src: "\\u{0", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a two digits hex string isn't a valid code point", src: "\\u{01", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a three digits hex string isn't a valid code point", src: "\\u{012", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a four digits hex string is a valid code point", src: "\\u{0123}", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("0123"), newToken(tokenKindRBrace, nullChar), }, }, { caption: "a five digits hex string isn't a valid code point", src: "\\u{01234", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a six digits hex string is a valid code point", src: "\\u{012345}", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCodePointToken("012345"), newToken(tokenKindRBrace, nullChar), }, }, { caption: "a seven digits hex string isn't a valid code point", src: "\\u{0123456", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a code point must be hex digits", src: "\\u{g", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "a code point must be hex digits", src: "\\u{G", tokens: []*token{ newToken(tokenKindCodePointLeader, nullChar), newToken(tokenKindLBrace, nullChar), }, err: synErrInvalidCodePoint, }, { caption: "lexer can recognize the special characters and symbols in character property expression mode", src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", tokens: []*token{ newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("General_Category"), newToken(tokenKindEqual, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpOpen, nullChar), newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("General_Category"), newToken(tokenKindEqual, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindCharPropLeader, nullChar), newToken(tokenKindLBrace, nullChar), newCharPropSymbolToken("General_Category"), newToken(tokenKindEqual, nullChar), newCharPropSymbolToken("Letter"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "lexer can recognize the special characters and symbols in fragment expression mode", src: "\\f{integer}", tokens: []*token{ newToken(tokenKindFragmentLeader, nullChar), newToken(tokenKindLBrace, nullChar), newFragmentSymbolToken("integer"), newToken(tokenKindRBrace, nullChar), newToken(tokenKindEOF, nullChar), }, }, { caption: "a fragment expression is not supported in a bracket expression", src: "[\\f", tokens: []*token{ newToken(tokenKindBExpOpen, nullChar), }, err: synErrInvalidEscSeq, }, { caption: "a fragment expression is not supported in an inverse bracket expression", src: "[^\\f", tokens: []*token{ newToken(tokenKindInverseBExpOpen, nullChar), }, err: synErrInvalidEscSeq, }, } for _, tt := range tests { t.Run(tt.caption, func(t *testing.T) { lex := newLexer(strings.NewReader(tt.src)) var err error var tok *token i := 0 for { tok, err = lex.next() if err != nil { break } if i >= len(tt.tokens) { break } eTok := tt.tokens[i] i++ testToken(t, tok, eTok) if tok.kind == tokenKindEOF { break } } if tt.err != nil { if err != ParseErr { t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) } detail, cause := lex.error() if cause != tt.err { t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) } } else { if err != nil { t.Fatalf("unexpected error: %v", err) } } if i < len(tt.tokens) { t.Fatalf("expecte more tokens") } }) } } func testToken(t *testing.T, a, e *token) { t.Helper() if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) } } func TestParse(t *testing.T) { tests := []struct { pattern string fragments map[LexKindName]string ast CPTree syntaxError error // When an AST is large, as patterns containing a character property expression, this test only checks // that the pattern is parsable. The check of the validity of such AST is performed by checking that it // can be matched correctly using the driver. skipTestAST bool }{ { pattern: "a", ast: newSymbolNode('a'), }, { pattern: "abc", ast: genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), }, { pattern: "a?", ast: newOptionNode( newSymbolNode('a'), ), }, { pattern: "[abc]?", ast: newOptionNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "\\u{3042}?", ast: newOptionNode( newSymbolNode('\u3042'), ), }, { pattern: "\\p{Letter}?", skipTestAST: true, }, { pattern: "\\f{a2c}?", fragments: map[LexKindName]string{ "a2c": "abc", }, ast: newOptionNode( newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), ), }, { pattern: "(a)?", ast: newOptionNode( newSymbolNode('a'), ), }, { pattern: "((a?)?)?", ast: newOptionNode( newOptionNode( newOptionNode( newSymbolNode('a'), ), ), ), }, { pattern: "(abc)?", ast: newOptionNode( genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "(a|b)?", ast: newOptionNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), ), ), }, { pattern: "?", syntaxError: synErrRepNoTarget, }, { pattern: "(?)", syntaxError: synErrRepNoTarget, }, { pattern: "a|?", syntaxError: synErrRepNoTarget, }, { pattern: "?|b", syntaxError: synErrRepNoTarget, }, { pattern: "a??", syntaxError: synErrRepNoTarget, }, { pattern: "a*", ast: newRepeatNode( newSymbolNode('a'), ), }, { pattern: "[abc]*", ast: newRepeatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "\\u{3042}*", ast: newRepeatNode( newSymbolNode('\u3042'), ), }, { pattern: "\\p{Letter}*", skipTestAST: true, }, { pattern: "\\f{a2c}*", fragments: map[LexKindName]string{ "a2c": "abc", }, ast: newRepeatNode( newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), ), }, { pattern: "((a*)*)*", ast: newRepeatNode( newRepeatNode( newRepeatNode( newSymbolNode('a'), ), ), ), }, { pattern: "(abc)*", ast: newRepeatNode( genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "(a|b)*", ast: newRepeatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), ), ), }, { pattern: "*", syntaxError: synErrRepNoTarget, }, { pattern: "(*)", syntaxError: synErrRepNoTarget, }, { pattern: "a|*", syntaxError: synErrRepNoTarget, }, { pattern: "*|b", syntaxError: synErrRepNoTarget, }, { pattern: "a**", syntaxError: synErrRepNoTarget, }, { pattern: "a+", ast: genConcatNode( newSymbolNode('a'), newRepeatNode( newSymbolNode('a'), ), ), }, { pattern: "[abc]+", ast: genConcatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), newRepeatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), ), }, { pattern: "\\u{3042}+", ast: genConcatNode( newSymbolNode('\u3042'), newRepeatNode( newSymbolNode('\u3042'), ), ), }, { pattern: "\\p{Letter}+", skipTestAST: true, }, { pattern: "\\f{a2c}+", fragments: map[LexKindName]string{ "a2c": "abc", }, ast: genConcatNode( newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), newRepeatNode( newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), ), ), }, { pattern: "((a+)+)+", ast: genConcatNode( genConcatNode( genConcatNode( genConcatNode( newSymbolNode('a'), newRepeatNode( newSymbolNode('a'), ), ), newRepeatNode( genConcatNode( newSymbolNode('a'), newRepeatNode( newSymbolNode('a'), ), ), ), ), newRepeatNode( genConcatNode( genConcatNode( newSymbolNode('a'), newRepeatNode( newSymbolNode('a'), ), ), newRepeatNode( genConcatNode( newSymbolNode('a'), newRepeatNode( newSymbolNode('a'), ), ), ), ), ), ), ), }, { pattern: "(abc)+", ast: genConcatNode( genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), newRepeatNode( genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), ), }, { pattern: "(a|b)+", ast: genConcatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), ), newRepeatNode( genAltNode( newSymbolNode('a'), newSymbolNode('b'), ), ), ), }, { pattern: "+", syntaxError: synErrRepNoTarget, }, { pattern: "(+)", syntaxError: synErrRepNoTarget, }, { pattern: "a|+", syntaxError: synErrRepNoTarget, }, { pattern: "+|b", syntaxError: synErrRepNoTarget, }, { pattern: "a++", syntaxError: synErrRepNoTarget, }, { pattern: ".", ast: newRangeSymbolNode(0x00, 0x10FFFF), }, { pattern: "[a]", ast: newSymbolNode('a'), }, { pattern: "[abc]", ast: genAltNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), }, { pattern: "[a-z]", ast: newRangeSymbolNode('a', 'z'), }, { pattern: "[A-Za-z]", ast: genAltNode( newRangeSymbolNode('A', 'Z'), newRangeSymbolNode('a', 'z'), ), }, { pattern: "[\\u{004E}]", ast: newSymbolNode('N'), }, { pattern: "[\\u{0061}-\\u{007A}]", ast: newRangeSymbolNode('a', 'z'), }, { pattern: "[\\p{Lu}]", skipTestAST: true, }, { pattern: "[a-\\p{Lu}]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[\\p{Lu}-z]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[\\p{Lu}-\\p{Ll}]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[z-a]", syntaxError: synErrRangeInvalidOrder, }, { pattern: "a[]", syntaxError: synErrBExpNoElem, }, { pattern: "[]a", syntaxError: synErrBExpNoElem, }, { pattern: "[]", syntaxError: synErrBExpNoElem, }, { pattern: "[^\\u{004E}]", ast: genAltNode( newRangeSymbolNode(0x00, '\u004E'-1), newRangeSymbolNode('\u004E'+1, 0x10FFFF), ), }, { pattern: "[^\\u{0061}-\\u{007A}]", ast: genAltNode( newRangeSymbolNode(0x00, '\u0061'-1), newRangeSymbolNode('\u007A'+1, 0x10FFFF), ), }, { pattern: "[^\\p{Lu}]", skipTestAST: true, }, { pattern: "[^a-\\p{Lu}]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[^\\p{Lu}-z]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[^\\p{Lu}-\\p{Ll}]", syntaxError: synErrRangePropIsUnavailable, }, { pattern: "[^\\u{0000}-\\u{10FFFF}]", syntaxError: synErrUnmatchablePattern, }, { pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", syntaxError: synErrUnmatchablePattern, }, { pattern: "[^]", ast: newSymbolNode('^'), }, { pattern: "[", syntaxError: synErrBExpUnclosed, }, { pattern: "([", syntaxError: synErrBExpUnclosed, }, { pattern: "[a", syntaxError: synErrBExpUnclosed, }, { pattern: "([a", syntaxError: synErrBExpUnclosed, }, { pattern: "[a-", syntaxError: synErrBExpUnclosed, }, { pattern: "([a-", syntaxError: synErrBExpUnclosed, }, { pattern: "[^", syntaxError: synErrBExpUnclosed, }, { pattern: "([^", syntaxError: synErrBExpUnclosed, }, { pattern: "[^a", syntaxError: synErrBExpUnclosed, }, { pattern: "([^a", syntaxError: synErrBExpUnclosed, }, { pattern: "[^a-", syntaxError: synErrBExpUnclosed, }, { pattern: "([^a-", syntaxError: synErrBExpUnclosed, }, { pattern: "]", ast: newSymbolNode(']'), }, { pattern: "(]", syntaxError: synErrGroupUnclosed, }, { pattern: "a]", ast: genConcatNode( newSymbolNode('a'), newSymbolNode(']'), ), }, { pattern: "(a]", syntaxError: synErrGroupUnclosed, }, { pattern: "([)", syntaxError: synErrBExpUnclosed, }, { pattern: "([a)", syntaxError: synErrBExpUnclosed, }, { pattern: "[a-]", ast: genAltNode( newSymbolNode('a'), newSymbolNode('-'), ), }, { pattern: "[^a-]", ast: genAltNode( newRangeSymbolNode(0x00, 0x2C), newRangeSymbolNode(0x2E, 0x60), newRangeSymbolNode(0x62, 0x10FFFF), ), }, { pattern: "[-z]", ast: genAltNode( newSymbolNode('-'), newSymbolNode('z'), ), }, { pattern: "[^-z]", ast: newAltNode( newRangeSymbolNode(0x00, 0x2C), newAltNode( newRangeSymbolNode(0x2E, 0x79), newRangeSymbolNode(0x7B, 0x10FFFF), ), ), }, { pattern: "[-]", ast: newSymbolNode('-'), }, { pattern: "[^-]", ast: genAltNode( newRangeSymbolNode(0x00, 0x2C), newRangeSymbolNode(0x2E, 0x10FFFF), ), }, { pattern: "[^01]", ast: genAltNode( newRangeSymbolNode(0x00, '0'-1), newRangeSymbolNode('1'+1, 0x10FFFF), ), }, { pattern: "[^10]", ast: genAltNode( newRangeSymbolNode(0x00, '0'-1), newRangeSymbolNode('1'+1, 0x10FFFF), ), }, { pattern: "[^a-z]", ast: genAltNode( newRangeSymbolNode(0x00, 'a'-1), newRangeSymbolNode('z'+1, 0x10FFFF), ), }, { pattern: "[^az]", ast: genAltNode( newRangeSymbolNode(0x00, 'a'-1), genAltNode( newRangeSymbolNode('a'+1, 'z'-1), newRangeSymbolNode('z'+1, 0x10FFFF), ), ), }, { pattern: "\\u{006E}", ast: newSymbolNode('\u006E'), }, { pattern: "\\u{03BD}", ast: newSymbolNode('\u03BD'), }, { pattern: "\\u{306B}", ast: newSymbolNode('\u306B'), }, { pattern: "\\u{01F638}", ast: newSymbolNode('\U0001F638'), }, { pattern: "\\u{0000}", ast: newSymbolNode('\u0000'), }, { pattern: "\\u{10FFFF}", ast: newSymbolNode('\U0010FFFF'), }, { pattern: "\\u{110000}", syntaxError: synErrCPExpOutOfRange, }, { pattern: "\\u", syntaxError: synErrCPExpInvalidForm, }, { pattern: "\\u{", syntaxError: synErrCPExpInvalidForm, }, { pattern: "\\u{03BD", syntaxError: synErrCPExpInvalidForm, }, { pattern: "\\u{}", syntaxError: synErrCPExpInvalidForm, }, { pattern: "\\p{Letter}", skipTestAST: true, }, { pattern: "\\p{General_Category=Letter}", skipTestAST: true, }, { pattern: "\\p{ Letter }", skipTestAST: true, }, { pattern: "\\p{ General_Category = Letter }", skipTestAST: true, }, { pattern: "\\p", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{Letter", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{General_Category=}", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{General_Category= }", syntaxError: synErrCharPropInvalidSymbol, }, { pattern: "\\p{=Letter}", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{ =Letter}", syntaxError: synErrCharPropInvalidSymbol, }, { pattern: "\\p{=}", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\p{}", syntaxError: synErrCharPropExpInvalidForm, }, { pattern: "\\f{a2c}", fragments: map[LexKindName]string{ "a2c": "abc", }, ast: newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "\\f{ a2c }", fragments: map[LexKindName]string{ "a2c": "abc", }, ast: newFragmentNode("a2c", genConcatNode( newSymbolNode('a'), newSymbolNode('b'), newSymbolNode('c'), ), ), }, { pattern: "\\f", syntaxError: synErrFragmentExpInvalidForm, }, { pattern: "\\f{", syntaxError: synErrFragmentExpInvalidForm, }, { pattern: "\\f{a2c", fragments: map[LexKindName]string{ "a2c": "abc", }, syntaxError: synErrFragmentExpInvalidForm, }, { pattern: "(a)", ast: newSymbolNode('a'), }, { pattern: "(((a)))", ast: newSymbolNode('a'), }, { pattern: "a()", syntaxError: synErrGroupNoElem, }, { pattern: "()a", syntaxError: synErrGroupNoElem, }, { pattern: "()", syntaxError: synErrGroupNoElem, }, { pattern: "(", syntaxError: synErrGroupUnclosed, }, { pattern: "a(", syntaxError: synErrGroupUnclosed, }, { pattern: "(a", syntaxError: synErrGroupUnclosed, }, { pattern: "((", syntaxError: synErrGroupUnclosed, }, { pattern: "((a)", syntaxError: synErrGroupUnclosed, }, { pattern: ")", syntaxError: synErrGroupNoInitiator, }, { pattern: "a)", syntaxError: synErrGroupNoInitiator, }, { pattern: ")a", syntaxError: synErrGroupNoInitiator, }, { pattern: "))", syntaxError: synErrGroupNoInitiator, }, { pattern: "(a))", syntaxError: synErrGroupNoInitiator, }, { pattern: "Mulder|Scully", ast: genAltNode( genConcatNode( newSymbolNode('M'), newSymbolNode('u'), newSymbolNode('l'), newSymbolNode('d'), newSymbolNode('e'), newSymbolNode('r'), ), genConcatNode( newSymbolNode('S'), newSymbolNode('c'), newSymbolNode('u'), newSymbolNode('l'), newSymbolNode('l'), newSymbolNode('y'), ), ), }, { pattern: "Langly|Frohike|Byers", ast: genAltNode( genConcatNode( newSymbolNode('L'), newSymbolNode('a'), newSymbolNode('n'), newSymbolNode('g'), newSymbolNode('l'), newSymbolNode('y'), ), genConcatNode( newSymbolNode('F'), newSymbolNode('r'), newSymbolNode('o'), newSymbolNode('h'), newSymbolNode('i'), newSymbolNode('k'), newSymbolNode('e'), ), genConcatNode( newSymbolNode('B'), newSymbolNode('y'), newSymbolNode('e'), newSymbolNode('r'), newSymbolNode('s'), ), ), }, { pattern: "|", syntaxError: synErrAltLackOfOperand, }, { pattern: "||", syntaxError: synErrAltLackOfOperand, }, { pattern: "Mulder|", syntaxError: synErrAltLackOfOperand, }, { pattern: "|Scully", syntaxError: synErrAltLackOfOperand, }, { pattern: "Langly|Frohike|", syntaxError: synErrAltLackOfOperand, }, { pattern: "Langly||Byers", syntaxError: synErrAltLackOfOperand, }, { pattern: "|Frohike|Byers", syntaxError: synErrAltLackOfOperand, }, { pattern: "|Frohike|", syntaxError: synErrAltLackOfOperand, }, { pattern: "Fox(|)Mulder", syntaxError: synErrAltLackOfOperand, }, { pattern: "(Fox|)Mulder", syntaxError: synErrAltLackOfOperand, }, { pattern: "Fox(|Mulder)", syntaxError: synErrAltLackOfOperand, }, } for i, tt := range tests { t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { fragmentTrees := map[LexKindName]CPTree{} for kind, pattern := range tt.fragments { p := NewParser(kind, strings.NewReader(pattern)) root, err := p.Parse() if err != nil { t.Fatal(err) } fragmentTrees[kind] = root } err := CompleteFragments(fragmentTrees) if err != nil { t.Fatal(err) } p := NewParser(LexKindName("test"), strings.NewReader(tt.pattern)) root, err := p.Parse() if tt.syntaxError != nil { // printCPTree(os.Stdout, root, "", "") if err != ParseErr { t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) } _, synErr := p.Error() if synErr != tt.syntaxError { t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) } if root != nil { t.Fatalf("tree must be nil") } } else { if err != nil { detail, cause := p.Error() t.Fatalf("%v: %v: %v", err, cause, detail) } if root == nil { t.Fatal("tree must be non-nil") } complete, err := ApplyFragments(root, fragmentTrees) if err != nil { t.Fatal(err) } if !complete { t.Fatalf("incomplete fragments") } // printCPTree(os.Stdout, root, "", "") if !tt.skipTestAST { r := root.(*rootNode) testAST(t, tt.ast, r.tree) } } }) } } func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { for _, cProp := range ucd.ContributoryProperties() { t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { p := NewParser(LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) root, err := p.Parse() if err == nil { t.Fatalf("expected syntax error: got: nil") } _, synErr := p.Error() if synErr != synErrCharPropUnsupported { t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) } if root != nil { t.Fatalf("tree is not nil") } }) } } func testAST(t *testing.T, expected, actual CPTree) { t.Helper() aTy := reflect.TypeOf(actual) eTy := reflect.TypeOf(expected) if eTy != aTy { t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) } if actual == nil { return } switch e := expected.(type) { case *symbolNode: a := actual.(*symbolNode) if a.From != e.From || a.To != e.To { t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) } } eLeft, eRight := expected.children() aLeft, aRight := actual.children() testAST(t, eLeft, aLeft) testAST(t, eRight, aRight) } func TestExclude(t *testing.T) { for _, test := range []struct { caption string target CPTree base CPTree result CPTree }{ // t.From > b.From && t.To < b.To // |t.From - b.From| = 1 // |b.To - t.To| = 1 // // Target (t): +--+ // Base (b): +--+--+--+ // Result (b - t): +--+ +--+ { caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", target: newSymbolNode('1'), base: newRangeSymbolNode('0', '2'), result: newAltNode( newSymbolNode('0'), newSymbolNode('2'), ), }, // |t.From - b.From| > 1 // |b.To - t.To| > 1 // // Target (t): +--+ // Base (b): +--+--+--+--+--+ // Result (b - t): +--+--+ +--+--+ { caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", target: newSymbolNode('2'), base: newRangeSymbolNode('0', '4'), result: newAltNode( newRangeSymbolNode('0', '1'), newRangeSymbolNode('3', '4'), ), }, // t.From <= b.From && t.To >= b.From && t.To < b.To // |b.From - t.From| = 0 // |t.To - b.From| = 0 // |b.To - t.To| = 1 // // Target (t): +--+ // Base (b): +--+--+ // Result (b - t): +--+ { caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", target: newSymbolNode('0'), base: newRangeSymbolNode('0', '1'), result: newSymbolNode('1'), }, // |b.From - t.From| = 0 // |t.To - b.From| = 0 // |b.To - t.To| > 1 // // Target (t): +--+ // Base (b): +--+--+--+ // Result (b - t): +--+--+ { caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", target: newSymbolNode('0'), base: newRangeSymbolNode('0', '2'), result: newRangeSymbolNode('1', '2'), }, // |b.From - t.From| = 0 // |t.To - b.From| > 0 // |b.To - t.To| = 1 // // Target (t): +--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+ { caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", target: newRangeSymbolNode('0', '1'), base: newRangeSymbolNode('0', '2'), result: newSymbolNode('2'), }, // |b.From - t.From| = 0 // |t.To - b.From| > 0 // |b.To - t.To| > 1 // // Target (t): +--+--+ // Base (b): +--+--+--+--+ // Result (b - t): +--+--+ { caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", target: newRangeSymbolNode('0', '1'), base: newRangeSymbolNode('0', '3'), result: newRangeSymbolNode('2', '3'), }, // |b.From - t.From| > 0 // |t.To - b.From| = 0 // |b.To - t.To| = 1 // // Target (t): +--+--+ // Base (b): +--+--+ // Result (b - t): +--+ { caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", target: newRangeSymbolNode('0', '1'), base: newRangeSymbolNode('1', '2'), result: newSymbolNode('2'), }, // |b.From - t.From| > 0 // |t.To - b.From| = 0 // |b.To - t.To| > 1 // // Target (t): +--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+--+ { caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", target: newRangeSymbolNode('0', '1'), base: newRangeSymbolNode('1', '3'), result: newRangeSymbolNode('2', '3'), }, // |b.From - t.From| > 0 // |t.To - b.From| > 0 // |b.To - t.To| = 1 // // Target (t): +--+--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+ { caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", target: newRangeSymbolNode('0', '2'), base: newRangeSymbolNode('1', '3'), result: newSymbolNode('3'), }, // |b.From - t.From| > 0 // |t.To - b.From| > 0 // |b.To - t.To| > 1 // // Target (t): +--+--+--+ // Base (b): +--+--+--+--+ // Result (b - t): +--+--+ { caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", target: newRangeSymbolNode('0', '2'), base: newRangeSymbolNode('1', '4'), result: newRangeSymbolNode('3', '4'), }, // t.From > b.From && t.From <= b.To && t.To >= b.To // |t.From - b.From| = 1 // |b.To - t.From| = 0 // |t.To - b.To| = 0 // // Target (t): +--+ // Base (b): +--+--+ // Result (b - t): +--+ { caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", target: newSymbolNode('1'), base: newRangeSymbolNode('0', '1'), result: newSymbolNode('0'), }, // |t.From - b.From| = 1 // |b.To - t.From| = 0 // |t.To - b.To| > 0 // // Target (t): +--+--+ // Base (b): +--+--+ // Result (b - t): +--+ { caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('1', '2'), base: newRangeSymbolNode('0', '1'), result: newSymbolNode('0'), }, // |t.From - b.From| = 1 // |b.To - t.From| > 0 // |t.To - b.To| = 0 // // Target (t): +--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+ { caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", target: newRangeSymbolNode('1', '2'), base: newRangeSymbolNode('0', '2'), result: newSymbolNode('0'), }, // |t.From - b.From| = 1 // |b.To - t.From| > 0 // |t.To - b.To| > 0 // // Target (t): +--+--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+ { caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('1', '3'), base: newRangeSymbolNode('0', '2'), result: newSymbolNode('0'), }, // |t.From - b.From| > 1 // |b.To - t.From| = 0 // |t.To - b.To| = 0 // // Target (t): +--+ // Base (b): +--+--+--+ // Result (b - t): +--+--+ { caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", target: newSymbolNode('2'), base: newRangeSymbolNode('0', '2'), result: newRangeSymbolNode('0', '1'), }, // |t.From - b.From| > 1 // |b.To - t.From| = 0 // |t.To - b.To| > 0 // // Target (t): +--+--+ // Base (b): +--+--+--+ // Result (b - t): +--+--+ { caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('2', '3'), base: newRangeSymbolNode('0', '2'), result: newRangeSymbolNode('0', '1'), }, // |t.From - b.From| > 1 // |b.To - t.From| > 0 // |t.To - b.To| = 0 // // Target (t): +--+--+ // Base (b): +--+--+--+--+ // Result (b - t): +--+--+ { caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", target: newRangeSymbolNode('2', '3'), base: newRangeSymbolNode('0', '3'), result: newRangeSymbolNode('0', '1'), }, // |t.From - b.From| > 1 // |b.To - t.From| > 0 // |t.To - b.To| > 0 // // Target (t): +--+--+--+ // Base (b): +--+--+--+--+ // Result (b - t): +--+--+ { caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('2', '4'), base: newRangeSymbolNode('0', '3'), result: newRangeSymbolNode('0', '1'), }, // t.From <= b.From && t.To >= b.To // |b.From - t.From| = 0 // |t.To - b.To| = 0 // // Target (t): +--+ // Base (b): +--+ // Result (b - t): N/A { caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", target: newSymbolNode('0'), base: newSymbolNode('0'), result: nil, }, // |b.From - t.From| = 0 // |t.To - b.To| > 0 // // Target (t): +--+--+ // Base (b): +--+ // Result (b - t): N/A { caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('0', '1'), base: newSymbolNode('0'), result: nil, }, // |b.From - t.From| > 0 // |t.To - b.To| = 0 // // Target (t): +--+--+ // Base (b): +--+ // Result (b - t): N/A { caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", target: newRangeSymbolNode('0', '1'), base: newSymbolNode('1'), result: nil, }, // |b.From - t.From| > 0 // |t.To - b.To| > 0 // // Target (t): +--+--+--+ // Base (b): +--+ // Result (b - t): N/A { caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", target: newRangeSymbolNode('0', '2'), base: newSymbolNode('1'), result: nil, }, // Others // |b.From - t.From| = 1 // // Target (t): +--+ // Base (b): +--+ // Result (b - t): +--+ { caption: "|b.From - t.From| = 1", target: newSymbolNode('0'), base: newSymbolNode('1'), result: newSymbolNode('1'), }, // |b.From - t.From| > 1 // // Target (t): +--+ // Base (b): +--+ // Result (b - t): +--+ { caption: "|b.From - t.From| > 1", target: newSymbolNode('0'), base: newSymbolNode('2'), result: newSymbolNode('2'), }, // |t.To - b.To| = 1 // // Target (t): +--+ // Base (b): +--+ // Result (b - t): +--+ { caption: "|t.To - b.To| = 1", target: newSymbolNode('1'), base: newSymbolNode('0'), result: newSymbolNode('0'), }, // |t.To - b.To| > 1 // // Target (t): +--+ // Base (b): +--+ // Result (b - t): +--+ { caption: "|t.To - b.To| > 1", target: newSymbolNode('2'), base: newSymbolNode('0'), result: newSymbolNode('0'), }, } { t.Run(test.caption, func(t *testing.T) { r := exclude(test.target, test.base) testAST(t, test.result, r) }) } } func MainTest() { tests := []testing.InternalTest{ { "TestGenCharBlocksWellFormed", TestGenCharBlocksWellFormed }, { "TestGenCharBlocksIllFormed", TestGenCharBlocksIllFormed }, { "TestCompressor_Compress", TestCompressor_Compress }, { "TestValidateIdentifier", TestValidateIdentifier }, { "TestLexKindName_validate", TestLexKindName_validate }, { "TestLexModeName_validate", TestLexModeName_validate }, { "TestSnakeCaseToUpperCamelCase", TestSnakeCaseToUpperCamelCase }, { "TestFindSpellingInconsistencies", TestFindSpellingInconsistencies }, { "TestLexSpec_Validate", TestLexSpec_Validate }, { "TestLexer", TestLexer }, { "TestParse", TestParse }, { "TestParse_ContributoryPropertyIsNotExposed", TestParse_ContributoryPropertyIsNotExposed }, { "TestExclude", TestExclude }, } deps := testdeps.TestDeps{} benchmarks := []testing.InternalBenchmark {} fuzzTargets := []testing.InternalFuzzTarget{} examples := []testing.InternalExample {} m := testing.MainStart(deps, tests, benchmarks, fuzzTargets, examples) os.Exit(m.Run()) }