From db845a7717121fd03becad63ffba4f7b7c60ed9b Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Thu, 12 Dec 2024 10:44:14 -0300 Subject: Build and run existing test executables with no actual tests --- tests/unit/compressor/compressor.go | 125 + tests/unit/compressor/main.go | 7 + tests/unit/driver/lexer/lexer.go | 935 ++++++ tests/unit/driver/lexer/main.go | 7 + tests/unit/driver/parser/main.go | 7 + tests/unit/driver/parser/parser.go | 1975 ++++++++++++ tests/unit/grammar/grammar.go | 4650 +++++++++++++++++++++++++++ tests/unit/grammar/lexical/dfa/dfa.go | 445 +++ tests/unit/grammar/lexical/dfa/main.go | 7 + tests/unit/grammar/lexical/lexical.go | 341 ++ tests/unit/grammar/lexical/main.go | 7 + tests/unit/grammar/lexical/parser/main.go | 7 + tests/unit/grammar/lexical/parser/parser.go | 1910 +++++++++++ tests/unit/grammar/main.go | 7 + tests/unit/grammar/symbol/main.go | 7 + tests/unit/grammar/symbol/symbol.go | 162 + tests/unit/spec/grammar/parser/main.go | 7 + tests/unit/spec/grammar/parser/parser.go | 1442 +++++++++ tests/unit/spec/test/main.go | 7 + tests/unit/spec/test/test.go | 414 +++ tests/unit/tester/main.go | 7 + tests/unit/tester/tester.go | 172 + tests/unit/urubu/compressor.go | 122 - tests/unit/urubu/compressor.main.go | 7 - tests/unit/urubu/driver/lexer.go | 932 ------ tests/unit/urubu/driver/parser.go | 1972 ------------ tests/unit/urubu/grammar.go | 4647 -------------------------- tests/unit/urubu/grammar/lexical.go | 338 -- tests/unit/urubu/grammar/lexical/dfa.go | 442 --- tests/unit/urubu/grammar/lexical/parser.go | 1907 ----------- tests/unit/urubu/grammar/symbol.go | 159 - tests/unit/urubu/spec/grammar/parser.go | 1439 --------- tests/unit/urubu/spec/test.go | 411 --- tests/unit/urubu/tester.go | 169 - tests/unit/urubu/utf8.go | 181 -- tests/unit/utf8/main.go | 7 + tests/unit/utf8/utf8.go | 184 ++ 37 files changed, 12839 insertions(+), 12726 deletions(-) create mode 100644 tests/unit/compressor/compressor.go create mode 100644 tests/unit/compressor/main.go create mode 100644 tests/unit/driver/lexer/lexer.go create mode 100644 tests/unit/driver/lexer/main.go create mode 100644 tests/unit/driver/parser/main.go create mode 100644 tests/unit/driver/parser/parser.go create mode 100644 tests/unit/grammar/grammar.go create mode 100644 tests/unit/grammar/lexical/dfa/dfa.go create mode 100644 tests/unit/grammar/lexical/dfa/main.go create mode 100644 tests/unit/grammar/lexical/lexical.go create mode 100644 tests/unit/grammar/lexical/main.go create mode 100644 tests/unit/grammar/lexical/parser/main.go create mode 100644 tests/unit/grammar/lexical/parser/parser.go create mode 100644 tests/unit/grammar/main.go create mode 100644 tests/unit/grammar/symbol/main.go create mode 100644 tests/unit/grammar/symbol/symbol.go create mode 100644 tests/unit/spec/grammar/parser/main.go create mode 100644 tests/unit/spec/grammar/parser/parser.go create mode 100644 tests/unit/spec/test/main.go create mode 100644 tests/unit/spec/test/test.go create mode 100644 tests/unit/tester/main.go create mode 100644 tests/unit/tester/tester.go delete mode 100644 tests/unit/urubu/compressor.go delete mode 100644 tests/unit/urubu/compressor.main.go delete mode 100644 tests/unit/urubu/driver/lexer.go delete mode 100644 tests/unit/urubu/driver/parser.go delete mode 100644 tests/unit/urubu/grammar.go delete mode 100644 tests/unit/urubu/grammar/lexical.go delete mode 100644 tests/unit/urubu/grammar/lexical/dfa.go delete mode 100644 tests/unit/urubu/grammar/lexical/parser.go delete mode 100644 tests/unit/urubu/grammar/symbol.go delete mode 100644 tests/unit/urubu/spec/grammar/parser.go delete mode 100644 tests/unit/urubu/spec/test.go delete mode 100644 tests/unit/urubu/tester.go delete mode 100644 tests/unit/urubu/utf8.go create mode 100644 tests/unit/utf8/main.go create mode 100644 tests/unit/utf8/utf8.go (limited to 'tests/unit') diff --git a/tests/unit/compressor/compressor.go b/tests/unit/compressor/compressor.go new file mode 100644 index 0000000..3b760ce --- /dev/null +++ b/tests/unit/compressor/compressor.go @@ -0,0 +1,125 @@ +package compressor + +import ( + "fmt" + "testing" +) + +func TestCompressor_Compress(t *testing.T) { + x := 0 // an empty value + + allCompressors := func() []Compressor { + return []Compressor{ + NewUniqueEntriesTable(), + NewRowDisplacementTable(x), + } + } + + tests := []struct { + original []int + rowCount int + colCount int + compressors []Compressor + }{ + { + original: []int{ + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + x, x, x, x, x, + x, x, x, x, x, + x, x, x, x, x, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, 1, 1, 1, 1, + x, x, x, x, x, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, x, 1, 1, 1, + 1, 1, x, 1, 1, + 1, 1, 1, x, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + } + for i, tt := range tests { + for _, comp := range tt.compressors { + t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { + dup := make([]int, len(tt.original)) + copy(dup, tt.original) + + orig, err := NewOriginalTable(tt.original, tt.colCount) + if err != nil { + t.Fatal(err) + } + err = comp.Compress(orig) + if err != nil { + t.Fatal(err) + } + rowCount, colCount := comp.OriginalTableSize() + if rowCount != tt.rowCount || colCount != tt.colCount { + t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) + } + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + v, err := comp.Lookup(i, j) + if err != nil { + t.Fatal(err) + } + expected := tt.original[i*tt.colCount+j] + if v != expected { + t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) + } + } + } + + // Calling with out-of-range indexes should be an error. + if _, err := comp.Lookup(0, -1); err == nil { + t.Fatalf("expected error didn't occur (0, -1)") + } + if _, err := comp.Lookup(-1, 0); err == nil { + t.Fatalf("expected error didn't occur (-1, 0)") + } + if _, err := comp.Lookup(rowCount-1, colCount); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) + } + if _, err := comp.Lookup(rowCount, colCount-1); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) + } + + // The compressor must not break the original table. + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + idx := i*tt.colCount + j + if tt.original[idx] != dup[idx] { + t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) + } + } + } + }) + } + } +} + +func MainTest() { +} diff --git a/tests/unit/compressor/main.go b/tests/unit/compressor/main.go new file mode 100644 index 0000000..a1bc202 --- /dev/null +++ b/tests/unit/compressor/main.go @@ -0,0 +1,7 @@ +package main + +import "compressor" + +func main() { + compressor.MainTest() +} diff --git a/tests/unit/driver/lexer/lexer.go b/tests/unit/driver/lexer/lexer.go new file mode 100644 index 0000000..364a0f5 --- /dev/null +++ b/tests/unit/driver/lexer/lexer.go @@ -0,0 +1,935 @@ +package lexer + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "urubu/grammar/lexical" + spec "urubu/spec/grammar" +) + +func newLexEntry(modes []string, kind string, pattern string, push string, pop bool) *lexical.LexEntry { + ms := []spec.LexModeName{} + for _, m := range modes { + ms = append(ms, spec.LexModeName(m)) + } + return &lexical.LexEntry{ + Kind: spec.LexKindName(kind), + Pattern: pattern, + Modes: ms, + Push: spec.LexModeName(push), + Pop: pop, + } +} + +func newLexEntryDefaultNOP(kind string, pattern string) *lexical.LexEntry { + return &lexical.LexEntry{ + Kind: spec.LexKindName(kind), + Pattern: pattern, + Modes: []spec.LexModeName{ + spec.LexModeNameDefault, + }, + } +} + +func newLexEntryFragment(kind string, pattern string) *lexical.LexEntry { + return &lexical.LexEntry{ + Kind: spec.LexKindName(kind), + Pattern: pattern, + Fragment: true, + } +} + +func newToken(modeID ModeID, kindID KindID, modeKindID ModeKindID, lexeme []byte) *Token { + return &Token{ + ModeID: modeID, + KindID: kindID, + ModeKindID: modeKindID, + Lexeme: lexeme, + } +} + +func newTokenDefault(kindID int, modeKindID int, lexeme []byte) *Token { + return newToken( + ModeID(spec.LexModeIDDefault.Int()), + KindID(spec.LexKindID(kindID).Int()), + ModeKindID(spec.LexModeKindID(modeKindID).Int()), + lexeme, + ) +} + +func newEOFToken(modeID ModeID, modeName string) *Token { + return &Token{ + ModeID: modeID, + ModeKindID: 0, + EOF: true, + } +} + +func newEOFTokenDefault() *Token { + return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String()) +} + +func newInvalidTokenDefault(lexeme []byte) *Token { + return &Token{ + ModeID: ModeID(spec.LexModeIDDefault.Int()), + ModeKindID: 0, + Lexeme: lexeme, + Invalid: true, + } +} + +func withPos(tok *Token, bytePos int, byteLen int, row int, col int) *Token { + tok.BytePos = bytePos + tok.ByteLen = byteLen + tok.Row = row + tok.Col = col + return tok +} + +func TestLexer_Next(t *testing.T) { + test := []struct { + lspec *lexical.LexSpec + src string + tokens []*Token + passiveModeTran bool + tran func(l *Lexer, tok *Token) error + }{ + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "(a|b)*abb"), + newLexEntryDefaultNOP("t2", " +"), + }, + }, + src: "abb aabb aaabb babb bbabb abbbabb", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("abb")), 0, 3, 0, 0), + withPos(newTokenDefault(2, 2, []byte(" ")), 3, 1, 0, 3), + withPos(newTokenDefault(1, 1, []byte("aabb")), 4, 4, 0, 4), + withPos(newTokenDefault(2, 2, []byte(" ")), 8, 1, 0, 8), + withPos(newTokenDefault(1, 1, []byte("aaabb")), 9, 5, 0, 9), + withPos(newTokenDefault(2, 2, []byte(" ")), 14, 1, 0, 14), + withPos(newTokenDefault(1, 1, []byte("babb")), 15, 4, 0, 15), + withPos(newTokenDefault(2, 2, []byte(" ")), 19, 1, 0, 19), + withPos(newTokenDefault(1, 1, []byte("bbabb")), 20, 5, 0, 20), + withPos(newTokenDefault(2, 2, []byte(" ")), 25, 1, 0, 25), + withPos(newTokenDefault(1, 1, []byte("abbbabb")), 26, 7, 0, 26), + withPos(newEOFTokenDefault(), 33, 0, 0, 33), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "b?a+"), + newLexEntryDefaultNOP("t2", "(ab)?(cd)+"), + newLexEntryDefaultNOP("t3", " +"), + }, + }, + src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("ba")), 0, 2, 0, 0), + withPos(newTokenDefault(3, 3, []byte(" ")), 2, 1, 0, 2), + withPos(newTokenDefault(1, 1, []byte("baaa")), 3, 4, 0, 3), + withPos(newTokenDefault(3, 3, []byte(" ")), 7, 1, 0, 7), + withPos(newTokenDefault(1, 1, []byte("a")), 8, 1, 0, 8), + withPos(newTokenDefault(3, 3, []byte(" ")), 9, 1, 0, 9), + withPos(newTokenDefault(1, 1, []byte("aaa")), 10, 3, 0, 10), + withPos(newTokenDefault(3, 3, []byte(" ")), 13, 1, 0, 13), + withPos(newTokenDefault(2, 2, []byte("abcd")), 14, 4, 0, 14), + withPos(newTokenDefault(3, 3, []byte(" ")), 18, 1, 0, 18), + withPos(newTokenDefault(2, 2, []byte("abcdcdcd")), 19, 8, 0, 19), + withPos(newTokenDefault(3, 3, []byte(" ")), 27, 1, 0, 27), + withPos(newTokenDefault(2, 2, []byte("cd")), 28, 2, 0, 28), + withPos(newTokenDefault(3, 3, []byte(" ")), 30, 1, 0, 30), + withPos(newTokenDefault(2, 2, []byte("cdcdcd")), 31, 6, 0, 31), + withPos(newEOFTokenDefault(), 37, 0, 0, 37), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "."), + }, + }, + src: string([]byte{ + 0x00, + 0x7f, + 0xc2, 0x80, + 0xdf, 0xbf, + 0xe1, 0x80, 0x80, + 0xec, 0xbf, 0xbf, + 0xed, 0x80, 0x80, + 0xed, 0x9f, 0xbf, + 0xee, 0x80, 0x80, + 0xef, 0xbf, 0xbf, + 0xf0, 0x90, 0x80, 0x80, + 0xf0, 0xbf, 0xbf, 0xbf, + 0xf1, 0x80, 0x80, 0x80, + 0xf3, 0xbf, 0xbf, 0xbf, + 0xf4, 0x80, 0x80, 0x80, + 0xf4, 0x8f, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0x00}), 0, 1, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0x7f}), 1, 1, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 2, 2, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 4, 2, 0, 3), + withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 6, 3, 0, 4), + withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 9, 3, 0, 5), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 12, 3, 0, 6), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 15, 3, 0, 7), + withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 18, 3, 0, 8), + withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 21, 3, 0, 9), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 24, 4, 0, 10), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 11), + withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 32, 4, 0, 12), + withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 36, 4, 0, 13), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 40, 4, 0, 14), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 15), + withPos(newEOFTokenDefault(), 48, 0, 0, 16), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "[ab.*+?|()[\\]]"), + }, + }, + src: "ab.*+?|()[]", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("a")), 0, 1, 0, 0), + withPos(newTokenDefault(1, 1, []byte("b")), 1, 1, 0, 1), + withPos(newTokenDefault(1, 1, []byte(".")), 2, 1, 0, 2), + withPos(newTokenDefault(1, 1, []byte("*")), 3, 1, 0, 3), + withPos(newTokenDefault(1, 1, []byte("+")), 4, 1, 0, 4), + withPos(newTokenDefault(1, 1, []byte("?")), 5, 1, 0, 5), + withPos(newTokenDefault(1, 1, []byte("|")), 6, 1, 0, 6), + withPos(newTokenDefault(1, 1, []byte("(")), 7, 1, 0, 7), + withPos(newTokenDefault(1, 1, []byte(")")), 8, 1, 0, 8), + withPos(newTokenDefault(1, 1, []byte("[")), 9, 1, 0, 9), + withPos(newTokenDefault(1, 1, []byte("]")), 10, 1, 0, 10), + withPos(newEOFTokenDefault(), 11, 0, 0, 11), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // all 1 byte characters except null character (U+0000) + // + // NOTE: + // vartan cannot handle the null character in patterns because lexical.lexer, + // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist. + // If a pattern needs a null character, use code point expression \u{0000}. + newLexEntryDefaultNOP("char_1_byte", "[\x01-\x7f]"), + }, + }, + src: string([]byte{ + 0x01, + 0x02, + 0x7e, + 0x7f, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0x01}), 0, 1, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0x02}), 1, 1, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0x7e}), 2, 1, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0x7f}), 3, 1, 0, 3), + withPos(newEOFTokenDefault(), 4, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // all 2 byte characters + newLexEntryDefaultNOP("char_2_byte", "[\xc2\x80-\xdf\xbf]"), + }, + }, + src: string([]byte{ + 0xc2, 0x80, + 0xc2, 0x81, + 0xdf, 0xbe, + 0xdf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 0, 2, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xc2, 0x81}), 2, 2, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbe}), 4, 2, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 6, 2, 0, 3), + withPos(newEOFTokenDefault(), 8, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // All bytes are the same. + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"), + }, + }, + src: string([]byte{ + 0xe0, 0xa0, 0x80, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), + withPos(newEOFTokenDefault(), 3, 0, 0, 1), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // The first two bytes are the same. + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), + }, + }, + src: string([]byte{ + 0xe0, 0xa0, 0x80, + 0xe0, 0xa0, 0x81, + 0xe0, 0xa0, 0xbe, + 0xe0, 0xa0, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbe}), 6, 3, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbf}), 9, 3, 0, 3), + withPos(newEOFTokenDefault(), 12, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // The first byte are the same. + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), + }, + }, + src: string([]byte{ + 0xe0, 0xa0, 0x80, + 0xe0, 0xa0, 0x81, + 0xe0, 0xbf, 0xbe, + 0xe0, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3), + withPos(newEOFTokenDefault(), 12, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // all 3 byte characters + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"), + }, + }, + src: string([]byte{ + 0xe0, 0xa0, 0x80, + 0xe0, 0xa0, 0x81, + 0xe0, 0xbf, 0xbe, + 0xe0, 0xbf, 0xbf, + 0xe1, 0x80, 0x80, + 0xe1, 0x80, 0x81, + 0xec, 0xbf, 0xbe, + 0xec, 0xbf, 0xbf, + 0xed, 0x80, 0x80, + 0xed, 0x80, 0x81, + 0xed, 0x9f, 0xbe, + 0xed, 0x9f, 0xbf, + 0xee, 0x80, 0x80, + 0xee, 0x80, 0x81, + 0xef, 0xbf, 0xbe, + 0xef, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3), + withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 12, 3, 0, 4), + withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x81}), 15, 3, 0, 5), + withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbe}), 18, 3, 0, 6), + withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 21, 3, 0, 7), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 24, 3, 0, 8), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x81}), 27, 3, 0, 9), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbe}), 30, 3, 0, 10), + withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 33, 3, 0, 11), + withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 36, 3, 0, 12), + withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x81}), 39, 3, 0, 13), + withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbe}), 42, 3, 0, 14), + withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 45, 3, 0, 15), + withPos(newEOFTokenDefault(), 48, 0, 0, 16), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // All bytes are the same. + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), + }, + }, + src: string([]byte{ + 0xf0, 0x90, 0x80, 0x80, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), + withPos(newEOFTokenDefault(), 4, 0, 0, 1), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // The first 3 bytes are the same. + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), + }, + }, + src: string([]byte{ + 0xf0, 0x90, 0x80, 0x80, + 0xf0, 0x90, 0x80, 0x81, + 0xf0, 0x90, 0x80, 0xbe, + 0xf0, 0x90, 0x80, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbe}), 8, 4, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbf}), 12, 4, 0, 3), + withPos(newEOFTokenDefault(), 16, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // The first 2 bytes are the same. + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), + }, + }, + src: string([]byte{ + 0xf0, 0x90, 0x80, 0x80, + 0xf0, 0x90, 0x80, 0x81, + 0xf0, 0x90, 0xbf, 0xbe, + 0xf0, 0x90, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbe}), 8, 4, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbf}), 12, 4, 0, 3), + withPos(newEOFTokenDefault(), 16, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // The first byte are the same. + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), + }, + }, + src: string([]byte{ + 0xf0, 0x90, 0x80, 0x80, + 0xf0, 0x90, 0x80, 0x81, + 0xf0, 0xbf, 0xbf, 0xbe, + 0xf0, 0xbf, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3), + withPos(newEOFTokenDefault(), 16, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // all 4 byte characters + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), + }, + }, + src: string([]byte{ + 0xf0, 0x90, 0x80, 0x80, + 0xf0, 0x90, 0x80, 0x81, + 0xf0, 0xbf, 0xbf, 0xbe, + 0xf0, 0xbf, 0xbf, 0xbf, + 0xf1, 0x80, 0x80, 0x80, + 0xf1, 0x80, 0x80, 0x81, + 0xf3, 0xbf, 0xbf, 0xbe, + 0xf3, 0xbf, 0xbf, 0xbf, + 0xf4, 0x80, 0x80, 0x80, + 0xf4, 0x80, 0x80, 0x81, + 0xf4, 0x8f, 0xbf, 0xbe, + 0xf4, 0x8f, 0xbf, 0xbf, + }), + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3), + withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 16, 4, 0, 4), + withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x81}), 20, 4, 0, 5), + withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbe}), 24, 4, 0, 6), + withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 7), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 32, 4, 0, 8), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x81}), 36, 4, 0, 9), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbe}), 40, 4, 0, 10), + withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 11), + withPos(newEOFTokenDefault(), 48, 0, 0, 12), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("non_number", "[^0-9]+[0-9]"), + }, + }, + src: "foo9", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("foo9")), 0, 4, 0, 0), + withPos(newEOFTokenDefault(), 4, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("char_1_byte", "\\u{006E}"), + newLexEntryDefaultNOP("char_2_byte", "\\u{03BD}"), + newLexEntryDefaultNOP("char_3_byte", "\\u{306B}"), + newLexEntryDefaultNOP("char_4_byte", "\\u{01F638}"), + }, + }, + src: "nνに😸", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0), + withPos(newTokenDefault(2, 2, []byte{0xCE, 0xBD}), 1, 2, 0, 1), + withPos(newTokenDefault(3, 3, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2), + withPos(newTokenDefault(4, 4, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3), + withPos(newEOFTokenDefault(), 10, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("code_points_alt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"), + }, + }, + src: "nνに😸", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0), + withPos(newTokenDefault(1, 1, []byte{0xCE, 0xBD}), 1, 2, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2), + withPos(newTokenDefault(1, 1, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3), + withPos(newEOFTokenDefault(), 10, 0, 0, 4), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "\\f{a2c}\\f{d2f}+"), + newLexEntryFragment("a2c", "abc"), + newLexEntryFragment("d2f", "def"), + }, + }, + src: "abcdefdefabcdef", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("abcdefdef")), 0, 9, 0, 0), + withPos(newTokenDefault(1, 1, []byte("abcdef")), 9, 6, 0, 9), + withPos(newEOFTokenDefault(), 15, 0, 0, 15), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "(\\f{a2c}|\\f{d2f})+"), + newLexEntryFragment("a2c", "abc"), + newLexEntryFragment("d2f", "def"), + }, + }, + src: "abcdefdefabc", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0), + withPos(newEOFTokenDefault(), 12, 0, 0, 12), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("t1", "\\f{a2c_or_d2f}+"), + newLexEntryFragment("a2c_or_d2f", "\\f{a2c}|\\f{d2f}"), + newLexEntryFragment("a2c", "abc"), + newLexEntryFragment("d2f", "def"), + }, + }, + src: "abcdefdefabc", + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0), + withPos(newEOFTokenDefault(), 12, 0, 0, 12), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("white_space", ` *`), + newLexEntry([]string{"default"}, "string_open", `"`, "string", false), + newLexEntry([]string{"string"}, "escape_sequence", `\\[n"\\]`, "", false), + newLexEntry([]string{"string"}, "char_sequence", `[^"\\]*`, "", false), + newLexEntry([]string{"string"}, "string_close", `"`, "", true), + }, + }, + src: `"" "Hello world.\n\"Hello world.\""`, + tokens: []*Token{ + withPos(newToken(1, 2, 2, []byte(`"`)), 0, 1, 0, 0), + withPos(newToken(2, 5, 3, []byte(`"`)), 1, 1, 0, 1), + withPos(newToken(1, 1, 1, []byte(` `)), 2, 1, 0, 2), + withPos(newToken(1, 2, 2, []byte(`"`)), 3, 1, 0, 3), + withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 4, 12, 0, 4), + withPos(newToken(2, 3, 1, []byte(`\n`)), 16, 2, 0, 16), + withPos(newToken(2, 3, 1, []byte(`\"`)), 18, 2, 0, 18), + withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 20, 12, 0, 20), + withPos(newToken(2, 3, 1, []byte(`\"`)), 32, 2, 0, 32), + withPos(newToken(2, 5, 3, []byte(`"`)), 34, 1, 0, 34), + withPos(newEOFTokenDefault(), 35, 0, 0, 35), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + // `white_space` is enabled in multiple modes. + newLexEntry([]string{"default", "state_a", "state_b"}, "white_space", ` *`, "", false), + newLexEntry([]string{"default"}, "char_a", `a`, "state_a", false), + newLexEntry([]string{"state_a"}, "char_b", `b`, "state_b", false), + newLexEntry([]string{"state_a"}, "back_from_a", `<`, "", true), + newLexEntry([]string{"state_b"}, "back_from_b", `<`, "", true), + }, + }, + src: ` a b < < `, + tokens: []*Token{ + withPos(newToken(1, 1, 1, []byte(` `)), 0, 1, 0, 0), + withPos(newToken(1, 2, 2, []byte(`a`)), 1, 1, 0, 1), + withPos(newToken(2, 1, 1, []byte(` `)), 2, 1, 0, 2), + withPos(newToken(2, 3, 2, []byte(`b`)), 3, 1, 0, 3), + withPos(newToken(3, 1, 1, []byte(` `)), 4, 1, 0, 4), + withPos(newToken(3, 5, 2, []byte(`<`)), 5, 1, 0, 5), + withPos(newToken(2, 1, 1, []byte(` `)), 6, 1, 0, 6), + withPos(newToken(2, 4, 3, []byte(`<`)), 7, 1, 0, 7), + withPos(newToken(1, 1, 1, []byte(` `)), 8, 1, 0, 8), + withPos(newEOFTokenDefault(), 9, 0, 0, 9), + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false), + newLexEntry([]string{"default"}, "char", `.`, "", false), + newLexEntry([]string{"default"}, "push_1", `-> 1`, "", false), + newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false), + newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false), + newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", false), + }, + }, + src: `-> 1 -> 2 <- <- a`, + tokens: []*Token{ + withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0), + withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4), + withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5), + withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9), + withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10), + withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12), + withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13), + withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15), + withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16), + withPos(newEOFTokenDefault(), 17, 0, 0, 17), + }, + passiveModeTran: true, + tran: func(l *Lexer, tok *Token) error { + switch l.spec.ModeName(l.Mode()) { + case "default": + switch tok.KindID { + case 3: // push_1 + l.PushMode(2) + } + case "mode_1": + switch tok.KindID { + case 4: // push_2 + l.PushMode(3) + case 5: // pop_1 + return l.PopMode() + } + case "mode_2": + switch tok.KindID { + case 6: // pop_2 + return l.PopMode() + } + } + return nil + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false), + newLexEntry([]string{"default"}, "char", `.`, "", false), + newLexEntry([]string{"default"}, "push_1", `-> 1`, "mode_1", false), + newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false), + newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false), + newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", true), + }, + }, + src: `-> 1 -> 2 <- <- a`, + tokens: []*Token{ + withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0), + withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4), + withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5), + withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9), + withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10), + withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12), + withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13), + withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15), + withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16), + withPos(newEOFTokenDefault(), 17, 0, 0, 17), + }, + // Active mode transition and an external transition function can be used together. + passiveModeTran: false, + tran: func(l *Lexer, tok *Token) error { + switch l.spec.ModeName(l.Mode()) { + case "mode_1": + switch tok.KindID { + case 4: // push_2 + l.PushMode(3) + case 5: // pop_1 + return l.PopMode() + } + } + return nil + }, + }, + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("dot", spec.EscapePattern(`.`)), + newLexEntryDefaultNOP("star", spec.EscapePattern(`*`)), + newLexEntryDefaultNOP("plus", spec.EscapePattern(`+`)), + newLexEntryDefaultNOP("question", spec.EscapePattern(`?`)), + newLexEntryDefaultNOP("vbar", spec.EscapePattern(`|`)), + newLexEntryDefaultNOP("lparen", spec.EscapePattern(`(`)), + newLexEntryDefaultNOP("rparen", spec.EscapePattern(`)`)), + newLexEntryDefaultNOP("lbrace", spec.EscapePattern(`[`)), + newLexEntryDefaultNOP("backslash", spec.EscapePattern(`\`)), + }, + }, + src: `.*+?|()[\`, + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte(`.`)), 0, 1, 0, 0), + withPos(newTokenDefault(2, 2, []byte(`*`)), 1, 1, 0, 1), + withPos(newTokenDefault(3, 3, []byte(`+`)), 2, 1, 0, 2), + withPos(newTokenDefault(4, 4, []byte(`?`)), 3, 1, 0, 3), + withPos(newTokenDefault(5, 5, []byte(`|`)), 4, 1, 0, 4), + withPos(newTokenDefault(6, 6, []byte(`(`)), 5, 1, 0, 5), + withPos(newTokenDefault(7, 7, []byte(`)`)), 6, 1, 0, 6), + withPos(newTokenDefault(8, 8, []byte(`[`)), 7, 1, 0, 7), + withPos(newTokenDefault(9, 9, []byte(`\`)), 8, 1, 0, 8), + withPos(newEOFTokenDefault(), 9, 0, 0, 9), + }, + }, + // Character properties are available in a bracket expression. + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("letter", `[\p{Letter}]+`), + newLexEntryDefaultNOP("non_letter", `[^\p{Letter}]+`), + }, + }, + src: `foo123`, + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), + withPos(newTokenDefault(2, 2, []byte(`123`)), 3, 3, 0, 3), + withPos(newEOFTokenDefault(), 6, 0, 0, 6), + }, + }, + // The driver can continue lexical analysis even after it detects an invalid token. + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("lower", `[a-z]+`), + }, + }, + src: `foo123bar`, + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), + withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3), + withPos(newTokenDefault(1, 1, []byte(`bar`)), 6, 3, 0, 6), + withPos(newEOFTokenDefault(), 9, 0, 0, 9), + }, + }, + // The driver can detect an invalid token immediately preceding an EOF. + { + lspec: &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("lower", `[a-z]+`), + }, + }, + src: `foo123`, + tokens: []*Token{ + withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), + withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3), + withPos(newEOFTokenDefault(), 6, 0, 0, 6), + }, + }, + } + for i, tt := range test { + for compLv := lexical.CompressionLevelMin; compLv <= lexical.CompressionLevelMax; compLv++ { + t.Run(fmt.Sprintf("#%v-%v", i, compLv), func(t *testing.T) { + clspec, err, cerrs := lexical.Compile(tt.lspec, compLv) + if err != nil { + for _, cerr := range cerrs { + t.Logf("%#v", cerr) + } + t.Fatalf("unexpected error: %v", err) + } + opts := []LexerOption{} + if tt.passiveModeTran { + opts = append(opts, DisableModeTransition()) + } + lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, eTok := range tt.tokens { + tok, err := lexer.Next() + if err != nil { + t.Log(err) + break + } + testToken(t, eTok, tok) + + if tok.EOF { + break + } + + if tt.tran != nil { + err := tt.tran(lexer, tok) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + } + }) + } + } +} + +func TestLexer_Next_WithPosition(t *testing.T) { + lspec := &lexical.LexSpec{ + Entries: []*lexical.LexEntry{ + newLexEntryDefaultNOP("newline", `\u{000A}+`), + newLexEntryDefaultNOP("any", `.`), + }, + } + + clspec, err, _ := lexical.Compile(lspec, lexical.CompressionLevelMax) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + src := string([]byte{ + 0x00, + 0x7F, + 0x0A, + + 0xC2, 0x80, + 0xDF, 0xBF, + 0x0A, + + 0xE0, 0xA0, 0x80, + 0xE0, 0xBF, 0xBF, + 0xE1, 0x80, 0x80, + 0xEC, 0xBF, 0xBF, + 0xED, 0x80, 0x80, + 0xED, 0x9F, 0xBF, + 0xEE, 0x80, 0x80, + 0xEF, 0xBF, 0xBF, + 0x0A, + + 0xF0, 0x90, 0x80, 0x80, + 0xF0, 0xBF, 0xBF, 0xBF, + 0xF1, 0x80, 0x80, 0x80, + 0xF3, 0xBF, 0xBF, 0xBF, + 0xF4, 0x80, 0x80, 0x80, + 0xF4, 0x8F, 0xBF, 0xBF, + 0x0A, + 0x0A, + 0x0A, + }) + + expected := []*Token{ + withPos(newTokenDefault(2, 2, []byte{0x00}), 0, 1, 0, 0), + withPos(newTokenDefault(2, 2, []byte{0x7F}), 1, 1, 0, 1), + withPos(newTokenDefault(1, 1, []byte{0x0A}), 2, 1, 0, 2), + + withPos(newTokenDefault(2, 2, []byte{0xC2, 0x80}), 3, 2, 1, 0), + withPos(newTokenDefault(2, 2, []byte{0xDF, 0xBF}), 5, 2, 1, 1), + withPos(newTokenDefault(1, 1, []byte{0x0A}), 7, 1, 1, 2), + + withPos(newTokenDefault(2, 2, []byte{0xE0, 0xA0, 0x80}), 8, 3, 2, 0), + withPos(newTokenDefault(2, 2, []byte{0xE0, 0xBF, 0xBF}), 11, 3, 2, 1), + withPos(newTokenDefault(2, 2, []byte{0xE1, 0x80, 0x80}), 14, 3, 2, 2), + withPos(newTokenDefault(2, 2, []byte{0xEC, 0xBF, 0xBF}), 17, 3, 2, 3), + withPos(newTokenDefault(2, 2, []byte{0xED, 0x80, 0x80}), 20, 3, 2, 4), + withPos(newTokenDefault(2, 2, []byte{0xED, 0x9F, 0xBF}), 23, 3, 2, 5), + withPos(newTokenDefault(2, 2, []byte{0xEE, 0x80, 0x80}), 26, 3, 2, 6), + withPos(newTokenDefault(2, 2, []byte{0xEF, 0xBF, 0xBF}), 29, 3, 2, 7), + withPos(newTokenDefault(1, 1, []byte{0x0A}), 32, 1, 2, 8), + + withPos(newTokenDefault(2, 2, []byte{0xF0, 0x90, 0x80, 0x80}), 33, 4, 3, 0), + withPos(newTokenDefault(2, 2, []byte{0xF0, 0xBF, 0xBF, 0xBF}), 37, 4, 3, 1), + withPos(newTokenDefault(2, 2, []byte{0xF1, 0x80, 0x80, 0x80}), 41, 4, 3, 2), + withPos(newTokenDefault(2, 2, []byte{0xF3, 0xBF, 0xBF, 0xBF}), 45, 4, 3, 3), + withPos(newTokenDefault(2, 2, []byte{0xF4, 0x80, 0x80, 0x80}), 49, 4, 3, 4), + withPos(newTokenDefault(2, 2, []byte{0xF4, 0x8F, 0xBF, 0xBF}), 53, 4, 3, 5), + // When a token contains multiple line breaks, the driver sets the token position to + // the line number where a lexeme first appears. + withPos(newTokenDefault(1, 1, []byte{0x0A, 0x0A, 0x0A}), 57, 3, 3, 6), + + withPos(newEOFTokenDefault(), 60, 0, 6, 0), + } + + lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + for _, eTok := range expected { + tok, err := lexer.Next() + if err != nil { + t.Fatal(err) + } + + testToken(t, eTok, tok) + + if tok.EOF { + break + } + } +} + +func testToken(t *testing.T, expected, actual *Token) { + t.Helper() + + if actual.ModeID != expected.ModeID || + actual.KindID != expected.KindID || + actual.ModeKindID != expected.ModeKindID || + !bytes.Equal(actual.Lexeme, expected.Lexeme) || + actual.EOF != expected.EOF || + actual.Invalid != expected.Invalid { + t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual) + } + + if actual.BytePos != expected.BytePos || actual.ByteLen != expected.ByteLen || + actual.Row != expected.Row || actual.Col != expected.Col { + t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual) + } +} + + +func MainTest() {} diff --git a/tests/unit/driver/lexer/main.go b/tests/unit/driver/lexer/main.go new file mode 100644 index 0000000..5953d9f --- /dev/null +++ b/tests/unit/driver/lexer/main.go @@ -0,0 +1,7 @@ +package main + +import "lexer" + +func main() { + lexer.MainTest() +} diff --git a/tests/unit/driver/parser/main.go b/tests/unit/driver/parser/main.go new file mode 100644 index 0000000..a99bfc4 --- /dev/null +++ b/tests/unit/driver/parser/main.go @@ -0,0 +1,7 @@ +package main + +import "parser" + +func main() { + parser.MainTest() +} diff --git a/tests/unit/driver/parser/parser.go b/tests/unit/driver/parser/parser.go new file mode 100644 index 0000000..cdf32a3 --- /dev/null +++ b/tests/unit/driver/parser/parser.go @@ -0,0 +1,1975 @@ +package parser + +import ( + "fmt" + "sort" + "strings" + "testing" + + "urubu/grammar" + spec "urubu/spec/grammar" + "urubu/spec/grammar/parser" +) + +func TestParserWithConflicts(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + cst *Node + }{ + { + caption: "when a shift/reduce conflict occurred, we prioritize the shift action", + specSrc: ` +#name test; + +expr + : expr assign expr + | id + ; + +id: "[A-Za-z0-9_]+"; +assign: '='; +`, + src: `foo=bar=baz`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "foo"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "bar"), + ), + termNode("assign", "="), + nonTermNode("expr", + termNode("id", "baz"), + ), + ), + ), + }, + { + caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar", + specSrc: ` +#name test; + +s + : a + | b + ; +a + : id + ; +b + : id + ; + +id: "[A-Za-z0-9_]+"; +`, + src: `foo`, + cst: nonTermNode("s", + nonTermNode("a", + termNode("id", "foo"), + ), + ), + }, + { + caption: "left associativities defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #left mul + #left add +); + +expr + : expr add expr + | expr mul expr + | id + ; + +id: "[A-Za-z0-9_]+"; +add: '+'; +mul: '*'; +`, + src: `a+b*c*d+e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("add", "+"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + }, + { + caption: "left associativities defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #left add sub +); + +expr + : expr add expr + | expr sub expr + | id + ; + +id: "[A-Za-z0-9_]+"; +add: '+'; +sub: '-'; +`, + src: `a-b+c+d-e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("sub", "-"), + nonTermNode("expr", + termNode("id", "b"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + }, + { + caption: "right associativities defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #right r1 + #right r2 +); + +expr + : expr r2 expr + | expr r1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a r2 b r1 c r1 d r2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("r2", "r2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + }, + { + caption: "right associativities defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #right r1 r2 +); + +expr + : expr r2 expr + | expr r1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a r2 b r1 c r1 d r2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + ), + ), + }, + { + caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #assign a1 + #assign a2 +); + +expr + : expr a2 expr + | expr a1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +a1 + : 'a1'; +a2 + : 'a2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a a2 b a1 c a1 d a2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("a2", "a2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + }, + { + caption: "terminal symbols with an #assign directive defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #assign a1 a2 +); + +expr + : expr a2 expr + | expr a1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +a1 + : 'a1'; +a2 + : 'a2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a a2 b a1 c a1 d a2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + ), + ), + }, + { + caption: "#left, #right, and #assign can be mixed", + specSrc: ` +#name test; + +#prec ( + #left mul div + #left add sub + #assign else + #assign then + #right assign +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | expr assign expr + | if expr then expr + | if expr then expr else expr + | id + ; + +ws #skip: "[\u{0009}\u{0020}]+"; +if: 'if'; +then: 'then'; +else: 'else'; +id: "[A-Za-z0-9_]+"; +add: '+'; +sub: '-'; +mul: '*'; +div: '/'; +assign: '='; +`, + src: `x = y = a + b * c - d / e + if f then if g then h else i`, + cst: nonTermNode( + "expr", + nonTermNode("expr", + termNode("id", "x"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "y"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("add", "+"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("if", "if"), + nonTermNode("expr", + termNode("id", "f"), + ), + termNode("then", "then"), + nonTermNode("expr", + termNode("if", "if"), + nonTermNode("expr", + termNode("id", "g"), + ), + termNode("then", "then"), + nonTermNode("expr", + termNode("id", "h"), + ), + termNode("else", "else"), + nonTermNode("expr", + termNode("id", "i"), + ), + ), + ), + ), + ), + ), + }, + } + + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + cg, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + gram := NewGrammar(cg) + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb))) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if tt.cst != nil { + testTree(t, tb.Tree(), tt.cst) + } + }) + } +} + +func TestParserWithLAC(t *testing.T) { + specSrc := ` +#name test; + +s + : t t + ; +t + : c t + | d + ; + +c: 'c'; +d: 'd'; +` + + src := `ccd` + + actLogWithLAC := []string{ + "shift/c", + "shift/c", + "shift/d", + "miss", + } + + actLogWithoutLAC := []string{ + "shift/c", + "shift/c", + "shift/d", + "reduce/t", + "reduce/t", + "reduce/t", + "miss", + } + + ast, err := parser.Parse(strings.NewReader(specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + t.Run("LAC is enabled", func(t *testing.T) { + semAct := &testSemAct{ + gram: gram, + } + + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(actLogWithLAC) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) + } + + for i, e := range actLogWithLAC { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) + } + } + }) + + t.Run("LAC is disabled", func(t *testing.T) { + semAct := &testSemAct{ + gram: gram, + } + + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC()) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(actLogWithoutLAC) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) + } + + for i, e := range actLogWithoutLAC { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) + } + } + }) +} + +func termNode(kind string, text string, children ...*Node) *Node { + return &Node{ + Type: NodeTypeTerminal, + KindName: kind, + Text: text, + Children: children, + } +} + +func errorNode() *Node { + return &Node{ + Type: NodeTypeError, + KindName: "error", + } +} + +func nonTermNode(kind string, children ...*Node) *Node { + return &Node{ + Type: NodeTypeNonTerminal, + KindName: kind, + Children: children, + } +} + +func TestParser_Parse(t *testing.T) { + tests := []struct { + specSrc string + src string + synErr bool + cst *Node + ast *Node + }{ + { + specSrc: ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; + +add + : '+'; +mul + : '*'; +l_paren + : '('; +r_paren + : ')'; +id + : "[A-Za-z_][0-9A-Za-z_]*"; +`, + src: `(a+(b+c))*d+e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("term", + nonTermNode("factor", + termNode("l_paren", "("), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("factor", + termNode("id", "a"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("l_paren", "("), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("factor", + termNode("id", "b"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("id", "c"), + ), + ), + ), + termNode("r_paren", ")"), + ), + ), + ), + termNode("r_paren", ")"), + ), + ), + termNode("mul", "*"), + nonTermNode("factor", + termNode("id", "d"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("id", "e"), + ), + ), + ), + }, + // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are + // not allowed in string literals. + { + specSrc: ` +#name test; + +s + : a b c + ; + +a + : '\f{foo}'; +b + : '\u{0000}'; +c + : '\p{gc=Letter}'; +`, + src: `\f{foo}\u{0000}\p{gc=Letter}`, + cst: nonTermNode("s", + termNode("a", `\f{foo}`), + termNode("b", `\u{0000}`), + termNode("c", `\p{gc=Letter}`), + ), + }, + // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. + { + specSrc: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : bar_text + | + ; +bar_text: "bar"; +`, + src: ``, + cst: nonTermNode("s", + nonTermNode("foo"), + nonTermNode("bar"), + ), + }, + // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. + { + specSrc: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : bar_text + | + ; + +bar_text + : "bar"; +`, + src: `bar`, + cst: nonTermNode("s", + nonTermNode("foo"), + nonTermNode("bar", + termNode("bar_text", "bar"), + ), + ), + }, + // A production can have multiple alternative productions. + { + specSrc: ` +#name test; + +#prec ( + #assign $uminus + #left mul div + #left add sub +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | int + | sub int #prec $uminus // This 'sub' means the unary minus symbol. + ; + +int + : "0|[1-9][0-9]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +`, + src: `-1*-2+3-4/5`, + ast: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "1"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "2"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "3"), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + nonTermNode("expr", + termNode("int", "4"), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("int", "5"), + ), + ), + ), + }, + // A lexical production can have multiple production directives. + { + specSrc: ` +#name test; + +s + : push_a push_b pop pop + ; + +push_a #mode default #push a + : '->a'; +push_b #mode a #push b + : '->b'; +pop #mode a b #pop + : '<-'; +`, + src: `->a->b<-<-`, + ast: nonTermNode("s", + termNode("push_a", "->a"), + termNode("push_b", "->b"), + termNode("pop", "<-"), + termNode("pop", "<-"), + ), + }, + { + specSrc: ` +#name test; + +mode_tran_seq + : mode_tran_seq mode_tran + | mode_tran + ; +mode_tran + : push_m1 + | push_m2 + | pop_m1 + | pop_m2 + ; + +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; +`, + src: ` -> --> <-- <- `, + }, + { + specSrc: ` +#name test; + +s + : foo bar + ; + +foo + : "foo"; +bar #mode default + : "bar"; +`, + src: `foobar`, + }, + // When #push and #pop are applied to the same symbol, #pop will run first, then #push. + { + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push m1 + : 'foo'; +bar #mode m1 #pop #push m2 + : 'bar'; +baz #mode m2 + : 'baz'; +`, + src: `foobarbaz`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("bar", "bar"), + termNode("baz", "baz"), + ), + }, + // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first + // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which + // they are executed. + { + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push m1 + : 'foo'; +bar #mode m1 #push m2 #pop + : 'bar'; +baz #mode m2 + : 'baz'; +`, + src: `foobarbaz`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("bar", "bar"), + termNode("baz", "baz"), + ), + }, + // The parser can skips specified tokens. + { + specSrc: ` +#name test; + +s + : foo bar + ; + +foo + : "foo"; +bar + : "bar"; +white_space #skip + : "[\u{0009}\u{0020}]+"; +`, + src: `foo bar`, + }, + // A grammar can contain fragments. + { + specSrc: ` +#name test; + +s + : tagline + ; +tagline + : "\f{words} IS OUT THERE."; +fragment words + : "[A-Za-z\u{0020}]+"; +`, + src: `THE TRUTH IS OUT THERE.`, + }, + // A grammar can contain ast actions. + { + specSrc: ` +#name test; + +list + : l_bracket elems r_bracket #ast elems... + ; +elems + : elems comma id #ast elems... id + | id + ; + +whitespace #skip + : "\u{0020}+"; +l_bracket + : '['; +r_bracket + : ']'; +comma + : ','; +id + : "[A-Za-z]+"; +`, + src: `[Byers, Frohike, Langly]`, + cst: nonTermNode("list", + termNode("x_1", "["), + nonTermNode("elems", + nonTermNode("elems", + nonTermNode("elems", + termNode("id", "Byers"), + ), + termNode("x_3", ","), + termNode("id", "Frohike"), + ), + termNode("x_3", ","), + termNode("id", "Langly"), + ), + termNode("x_2", "]"), + ), + ast: nonTermNode("list", + termNode("id", "Byers"), + termNode("id", "Frohike"), + termNode("id", "Langly"), + ), + }, + // The '...' operator can expand child nodes. + { + specSrc: ` +#name test; + +s + : a #ast a... + ; +a + : a comma foo #ast a... foo + | foo + ; + +comma + : ','; +foo + : 'foo'; +`, + src: `foo,foo,foo`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("foo", "foo"), + termNode("foo", "foo"), + ), + }, + // The '...' operator also can applied to an element having no children. + { + specSrc: ` +#name test; + +s + : a semi_colon #ast a... + ; +a + : + ; + +semi_colon + : ';'; +`, + src: `;`, + ast: nonTermNode("s"), + }, + // A label can be a parameter of #ast directive. + { + specSrc: ` +#name test; + +#prec ( + #left add sub +); + +expr + : expr@lhs add expr@rhs #ast add lhs rhs + | expr@lhs sub expr@rhs #ast sub lhs rhs + | num + ; + +add + : '+'; +sub + : '-'; +num + : "0|[1-9][0-9]*"; +`, + src: `1+2-3`, + ast: nonTermNode("expr", + termNode("sub", "-"), + nonTermNode("expr", + termNode("add", "+"), + nonTermNode("expr", + termNode("num", "1"), + ), + nonTermNode("expr", + termNode("num", "2"), + ), + ), + nonTermNode("expr", + termNode("num", "3"), + ), + ), + }, + // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed. + { + specSrc: ` +#name test; + +s + : foo@x semi_colon #ast foo + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `foo;`, + ast: nonTermNode("s", + termNode("foo", "foo"), + ), + }, + // A production has the same precedence and associativity as the right-most terminal symbol. + { + specSrc: ` +#name test; + +#prec ( + #left add +); + +expr + : expr add expr // This alternative has the same precedence and associativiry as 'add'. + | int + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +add + : '+'; +`, + // This source is recognized as the following structure because the production `expr → expr add expr` has the same + // precedence and associativity as the symbol 'add'. + // + // ((1+2)+3) + // + // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence + // and associativity and this source will be recognized as the following structure. + // + // (1+(2+3)) + src: `1+2+3`, + ast: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("int", "1"), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "2"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "3"), + ), + ), + }, + // The 'prec' directive can set precedence of a production. + { + specSrc: ` +#name test; + +#prec ( + #assign $uminus + #left mul div + #left add sub +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | int + | sub int #prec $uminus // This 'sub' means a unary minus symbol. + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +`, + // This source is recognized as the following structure because the production `expr → sub expr` + // has the `#prec mul` directive and has the same precedence of the symbol `mul`. + // + // (((-1) * 20) / 5) + // + // If the production doesn't have the `#prec` directive, this source will be recognized as + // the following structure. + // + // (- ((1 * 20) / 5)) + src: `-1*20/5`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "1"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("int", "20"), + ), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("int", "5"), + ), + ), + }, + // The grammar can contain the 'error' symbol. + { + specSrc: ` +#name test; + +s + : id id id semi_colon + | error semi_colon + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +id + : "[A-Za-z_]+"; +`, + src: `foo bar baz ;`, + }, + // The 'error' symbol can appear in an #ast directive. + { + specSrc: ` +#name test; + +s + : foo semi_colon + | error semi_colon #ast error + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `bar;`, + synErr: true, + ast: nonTermNode("s", + errorNode(), + ), + }, + // The 'error' symbol can have a label, and an #ast can reference it. + { + specSrc: ` +#name test; + +s + : foo semi_colon + | error@e semi_colon #ast e + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `bar;`, + synErr: true, + ast: nonTermNode("s", + errorNode(), + ), + }, + // The grammar can contain the 'recover' directive. + { + specSrc: ` +#name test; + +seq + : seq elem + | elem + ; +elem + : id id id semi_colon + | error semi_colon #recover + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +id + : "[A-Za-z_]+"; +`, + src: `a b c ; d e f ;`, + }, + // The same label can be used between different alternatives. + { + specSrc: ` +#name test; + +s + : foo@x bar + | foo@x + ; + +foo: 'foo'; +bar: 'bar'; +`, + src: `foo`, + }, + } + + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + cg, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + gram := NewGrammar(cg) + tb := NewDefaultSyntaxTreeBuilder() + var opt []ParserOption + switch { + case tt.ast != nil: + opt = append(opt, SemanticAction(NewASTActionSet(gram, tb))) + case tt.cst != nil: + opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb))) + } + p, err := NewParser(toks, gram, opt...) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if !tt.synErr && len(p.SyntaxErrors()) > 0 { + for _, synErr := range p.SyntaxErrors() { + t.Fatalf("unexpected syntax errors occurred: %v", synErr) + } + } + + switch { + case tt.ast != nil: + testTree(t, tb.Tree(), tt.ast) + case tt.cst != nil: + testTree(t, tb.Tree(), tt.cst) + } + }) + } +} + +func testTree(t *testing.T, node, expected *Node) { + t.Helper() + + if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text { + t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node) + } + if len(node.Children) != len(expected.Children) { + t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children)) + } + for i, c := range node.Children { + testTree(t, c, expected.Children[i]) + } +} + +type testSemAct struct { + gram *spec.CompiledGrammar + actLog []string +} + +func (a *testSemAct) Shift(tok VToken, recovered bool) { + t := a.gram.Syntactic.Terminals[tok.TerminalID()] + if recovered { + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t)) + } else { + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t)) + } +} + +func (a *testSemAct) Reduce(prodNum int, recovered bool) { + lhsSym := a.gram.Syntactic.LHSSymbols[prodNum] + lhsText := a.gram.Syntactic.NonTerminals[lhsSym] + if recovered { + a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText)) + } else { + a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText)) + } +} + +func (a *testSemAct) Accept() { + a.actLog = append(a.actLog, "accept") +} + +func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) { + a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped)) +} + +func (a *testSemAct) MissError(cause VToken) { + a.actLog = append(a.actLog, "miss") +} + +func TestParserWithSemanticAction(t *testing.T) { + specSrcWithErrorProd := ` +#name test; + +seq + : seq elem semicolon + | elem semicolon + | error star star semicolon + | error semicolon #recover + ; +elem + : char char char + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +star + : '*'; +char + : "[a-z]"; +` + + specSrcWithoutErrorProd := ` +#name test; + +seq + : seq elem semicolon + | elem semicolon + ; +elem + : char char char + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +char + : "[a-z]"; +` + + tests := []struct { + caption string + specSrc string + src string + actLog []string + }{ + { + caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.", + specSrc: specSrcWithErrorProd, + src: `a b c; d e f;`, + actLog: []string{ + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "accept", + }, + }, + { + caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.", + specSrc: specSrcWithErrorProd, + src: `a; b !; c d !; e ! * *; h i j;`, + actLog: []string{ + "shift/char", + "trap/1/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "trap/2/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "shift/char", + "trap/3/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "trap/2/shift/error", + "shift/star", + "shift/star", + // When the driver shifts three times, it recovers from an error. + "shift/semicolon/recovered", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted + // according to the error production. + "accept", + }, + }, + { + caption: "when the input doesn't meet the error production, the driver calls `MissError`.", + specSrc: specSrcWithErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + "trap/1/shift/error", + + "miss", + }, + }, + { + caption: "when a syntax error isn't trapped, the driver calls `MissError`.", + specSrc: specSrcWithoutErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + + "miss", + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + semAct := &testSemAct{ + gram: gram, + } + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(tt.actLog) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + + for i, e := range tt.actLog { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + } + }) + } +} + +func TestParserWithSyntaxErrors(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + synErrCount int + }{ + { + caption: "the parser can report a syntax error", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `bar`, + synErrCount: 1, + }, + { + caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error semi_colon #recover + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + src: `!; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error semi_colon #recover + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + // After the parser trasits to the error state reading the first invalid symbol ('!'), + // the second and third invalid symbols ('!') are ignored. + src: `! ! !; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "when the parser performs shift three times, the parser recovers from the error state", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error star star semi_colon + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +star + : '*'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + src: `!**; a!**; ab!**; abc!`, + synErrCount: 4, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if len(synErrs) != tt.synErrCount { + t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs)) + } + }) + } +} + +func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + cause string + expected []string + }{ + { + caption: "the parser reports an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + }, + }, + { + caption: "the parser reports expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + src: `baz`, + cause: `baz`, + expected: []string{ + "foo", + "bar", + }, + }, + { + caption: "the parser may report the EOF as an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `foobar`, + cause: `bar`, + expected: []string{ + "", + }, + }, + { + caption: "the parser may report the EOF and others as expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + "", + }, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if synErrs == nil { + t.Fatalf("expected one syntax error, but it didn't occur") + } + if len(synErrs) != 1 { + t.Fatalf("too many syntax errors: %v errors", len(synErrs)) + } + synErr := synErrs[0] + if string(synErr.Token.Lexeme()) != tt.cause { + t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme())) + } + if len(synErr.ExpectedTerminals) != len(tt.expected) { + t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals) + } + sort.Slice(tt.expected, func(i, j int) bool { + return tt.expected[i] < tt.expected[j] + }) + sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool { + return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j] + }) + for i, e := range tt.expected { + if synErr.ExpectedTerminals[i] != e { + t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i]) + } + } + }) + } +} + + +func MainTest() {} diff --git a/tests/unit/grammar/grammar.go b/tests/unit/grammar/grammar.go new file mode 100644 index 0000000..fe66976 --- /dev/null +++ b/tests/unit/grammar/grammar.go @@ -0,0 +1,4650 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + verr "urubu/error" + "urubu/grammar/symbol" + "urubu/spec/grammar/parser" +) + +type first struct { + lhs string + num int + dot int + symbols []string + empty bool +} + +func TestGenFirst(t *testing.T) { + tests := []struct { + caption string + src string + first []first + }{ + { + caption: "productions contain only non-empty productions", + src: ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +`, + first: []first{ + {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, + {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, + {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, + {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, + {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, + }, + }, + { + caption: "productions contain the empty start production", + src: ` +#name test; + +s + : + ; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "productions contain an empty production", + src: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a start production contains a non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + | + ; +foo: "foo"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, + {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a production contains non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + ; +foo + : bar + | + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, + {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + fst, gram := genActualFirst(t, tt.src) + + for _, ttFirst := range tt.first { + lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) + } + + prod, ok := gram.productionSet.findByLHS(lhsSym) + if !ok { + t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) + } + + actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) + if err != nil { + t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) + } + + expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) + + testFirst(t, actualFirst, expectedFirst) + } + }) + } +} + +func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.build() + if err != nil { + t.Fatal(err) + } + fst, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + if fst == nil { + t.Fatal("genFiest returned nil without any error") + } + + return fst, gram +} + +func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { + t.Helper() + + entry := newFirstEntry() + if empty { + entry.addEmpty() + } + for _, sym := range symbols { + symSym, ok := symTab.ToSymbol(sym) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", sym) + } + entry.add(symSym) + } + + return entry +} + +func testFirst(t *testing.T, actual, expected *firstEntry) { + if actual.empty != expected.empty { + t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) + } + + if len(actual.symbols) != len(expected.symbols) { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + + for eSym := range expected.symbols { + if _, ok := actual.symbols[eSym]; !ok { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + } +} + +func TestGrammarBuilderOK(t *testing.T) { + type okTest struct { + caption string + specSrc string + validate func(t *testing.T, g *Grammar) + } + + nameTests := []*okTest{ + { + caption: "the `#name` can be the same identifier as a non-terminal symbol", + specSrc: ` +#name s; + +s + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "s" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as a terminal symbol", + specSrc: ` +#name foo; + +s + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "foo" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as the error symbol", + specSrc: ` +#name error; + +s + : foo + | error + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "error" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as a fragment", + specSrc: ` +#name f; + +s + : foo + ; + +foo + : "\f{f}"; +fragment f + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "f" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + } + + modeTests := []*okTest{ + { + caption: "a `#mode` can be the same identifier as a non-terminal symbol", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push s + : 'foo'; +bar #mode s + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "s" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as a terminal symbol", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push bar + : 'foo'; +bar #mode bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "bar" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as the error symbol", + specSrc: ` +#name test; + +s + : foo bar + | error + ; + +foo #push error + : 'foo'; +bar #mode error + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "error" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as a fragment", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push f + : "\f{f}"; +bar #mode f + : 'bar'; +fragment f + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "f" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + } + + precTests := []*okTest{ + { + caption: "a `#prec` allows the empty directive group", + specSrc: ` +#name test; + +#prec (); + +s + : foo + ; + +foo + : 'foo'; +`, + }, + { + caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #left foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #right foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "an `#assign` directive gives only a precedence to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #assign foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "a production has the same precedence and associativity as the right-most terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if barPrec != precNil || barAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc) + } + if sPrec != barPrec || sAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "a production has the same precedence and associativity as the right-most terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if sPrec != barPrec || sAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'. + ; +a + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var aPrec int + var aAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("a") + ps, _ := g.productionSet.findByLHS(s) + aPrec = g.precAndAssoc.productionPredence(ps[0].num) + aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if aPrec != barPrec || aAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc) + } + if sPrec != fooPrec || sAssoc != fooAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "each alternative in the same production can have its own precedence and associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar + #assign baz +); + +s + : foo + | bar + | baz + | bra + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +bra + : 'bra'; +`, + validate: func(t *testing.T, g *Grammar) { + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + var alt3Prec int + var alt3Assoc assocType + var alt4Prec int + var alt4Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + alt3Prec = g.precAndAssoc.productionPredence(ps[2].num) + alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num) + alt4Prec = g.precAndAssoc.productionPredence(ps[3].num) + alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeLeft { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeRight { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc) + } + if alt3Prec != 3 || alt3Assoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc) + } + if alt4Prec != precNil || alt4Assoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc) + } + }, + }, + { + caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : a + ; +a + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var aPrec int + var aAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("a") + ps, _ := g.productionSet.findByLHS(s) + aPrec = g.precAndAssoc.productionPredence(ps[0].num) + aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if aPrec != fooPrec || aAssoc != fooAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc) + } + if sPrec != precNil || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if sPrec != fooPrec || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if sPrec != fooPrec || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#left` directive", + specSrc: ` +#name test; + +#prec ( + #left $high + #right foo bar + #left $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 3 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#right` directive", + specSrc: ` +#name test; + +#prec ( + #right $high + #left foo bar + #right $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 2 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 3 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#assign` directive", + specSrc: ` +#name test; + +#prec ( + #assign $high + #left foo + #right bar + #assign $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 3 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 4 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "names of an ordered symbol and a terminal symbol can duplicate", + specSrc: ` +#name test; + +#prec ( + #left foo bar + #right $foo +); + +s + : foo + | bar #prec $foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != fooPrec || alt1Assoc != fooAssoc { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "names of an ordered symbol and a non-terminal symbol can duplicate", + specSrc: ` +#name test; + +#prec ( + #left foo bar + #right $a +); + +s + : a + | bar #prec $a + ; +a + : foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != precNil || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + } + + var tests []*okTest + tests = append(tests, nameTests...) + tests = append(tests, modeTests...) + tests = append(tests, precTests...) + + for _, test := range tests { + t.Run(test.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(test.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + g, err := b.build() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if test.validate != nil { + test.validate(t, g) + } + }) + } +} + +func TestGrammarBuilderSpecError(t *testing.T) { + type specErrTest struct { + caption string + specSrc string + errs []error + } + + spellingInconsistenciesTests := []*specErrTest{ + { + caption: "a spelling inconsistency appears among non-terminal symbols", + specSrc: ` +#name test; + +a1 + : a_1 + ; +a_1 + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among terminal symbols", + specSrc: ` +#name test; + +s + : foo1 foo_1 + ; + +foo1 + : 'foo1'; +foo_1 + : 'foo_1'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among non-terminal and terminal symbols", + specSrc: ` +#name test; + +a1 + : a_1 + ; + +a_1 + : 'a_1'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same", + specSrc: ` +#name test; + +#prec ( + #assign $p1 $p_1 +); + +s + : foo #prec $p1 + | bar #prec $p_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same", + specSrc: ` +#name test; + +#prec ( + #assign $p1 + #assign $p_1 +); + +s + : foo #prec $p1 + | bar #prec $p_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels the same alternative contains", + specSrc: ` +#name test; + +s + : foo@l1 foo@l_1 + ; + +foo + : 'foo'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels the same production contains", + specSrc: ` +#name test; + +s + : foo@l1 + | bar@l_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels different productions contain", + specSrc: ` +#name test; + +s + : foo@l1 + ; +a + : bar@l_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + } + + prodTests := []*specErrTest{ + { + caption: "a production `b` is unused", + specSrc: ` +#name test; + +a + : foo + ; +b + : foo + ; + +foo + : "foo"; +`, + errs: []error{semErrUnusedProduction}, + }, + { + caption: "a terminal symbol `bar` is unused", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrUnusedTerminal}, + }, + { + caption: "a production `b` and terminal symbol `bar` is unused", + specSrc: ` +#name test; + +a + : foo + ; +b + : bar + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{ + semErrUnusedProduction, + semErrUnusedTerminal, + }, + }, + { + caption: "a production cannot have production directives", + specSrc: ` +#name test; + +s #prec foo + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrInvalidProdDir}, + }, + { + caption: "a lexical production cannot have alternative directives", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo' #skip; +`, + errs: []error{semErrInvalidAltDir}, + }, + { + caption: "a production directive must not be duplicated", + specSrc: ` +#name test; + +s + : foo + ; + +foo #skip #skip + : 'foo'; +`, + errs: []error{semErrDuplicateDir}, + }, + { + caption: "an alternative directive must not be duplicated", + specSrc: ` +#name test; + +s + : foo bar #ast foo bar #ast foo bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateDir}, + }, + { + caption: "a production must not have a duplicate alternative (non-empty alternatives)", + specSrc: ` +#name test; + +s + : foo + | foo + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : bar + ; +s + : foo + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (empty alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : + | + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (empty and split alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : + | foo + ; +a + : + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "foo"; +s + : "a"; +`, + errs: []error{semErrDuplicateName}, + }, + { + caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : bar + ; + +foo + : "foo"; +bar + : "bar"; +a + : "a"; +`, + errs: []error{semErrDuplicateName}, + }, + { + caption: "an invalid top-level directive", + specSrc: ` +#name test; + +#foo; + +s + : a + ; + +a + : 'a'; +`, + errs: []error{semErrDirInvalidName}, + }, + { + caption: "a label must be unique in an alternative", + specSrc: ` +#name test; + +s + : foo@x bar@x + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateLabel}, + }, + { + caption: "a label cannot be the same name as terminal symbols", + specSrc: ` +#name test; + +s + : foo bar@foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateLabel}, + }, + { + caption: "a label cannot be the same name as non-terminal symbols", + specSrc: ` +#name test; + +s + : foo@a + | a + ; +a + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{ + semErrInvalidLabel, + }, + }, + } + + nameDirTests := []*specErrTest{ + { + caption: "the `#name` directive is required", + specSrc: ` +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrNoGrammarName}, + }, + { + caption: "the `#name` directive needs an ID parameter", + specSrc: ` +#name; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a pattern parameter", + specSrc: ` +#name "test"; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a string parameter", + specSrc: ` +#name 'test'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive takes just one parameter", + specSrc: ` +#name test1 test2; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + precDirTests := []*specErrTest{ + { + caption: "the `#prec` directive needs a directive group parameter", + specSrc: ` +#name test; + +#prec; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an ID parameter", + specSrc: ` +#name test; + +#prec foo; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec $x; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec "foo"; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec 'foo'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive takes just one directive group parameter", + specSrc: ` +#name test; + +#prec () (); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + leftDirTests := []*specErrTest{ + { + caption: "the `#left` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #left +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #left error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #left x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #left "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #left 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a directive parameter", + specSrc: ` +#name test; + +#prec ( + #left () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #left $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #left foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #left $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #right foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #right $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + rightDirTests := []*specErrTest{ + { + caption: "the `#right` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #right +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #right error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #right x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #right s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #right "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #right 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +#prec ( + #right () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #right foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#right` directive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #right $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #right foo + #right foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #right $x + #right $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #left $x + #right $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + assignDirTests := []*specErrTest{ + { + caption: "the `#assign` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #assign +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #assign error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #assign x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #assign s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #assign "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #assign 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a directive parameter", + specSrc: ` +#name test; + +#prec ( + #assign () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #assign foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #assign $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #assign foo + #assign foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #assign $x + #assign $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #assign foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #assign $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + errorSymTests := []*specErrTest{ + { + caption: "cannot use the error symbol as a non-terminal symbol", + specSrc: ` +#name test; + +s + : error + ; +error + : foo + ; + +foo: 'foo'; +`, + errs: []error{ + semErrErrSymIsReserved, + semErrDuplicateName, + }, + }, + { + caption: "cannot use the error symbol as a terminal symbol", + specSrc: ` +#name test; + +s + : error + ; + +error: 'error'; +`, + errs: []error{semErrErrSymIsReserved}, + }, + { + caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +error #skip + : 'error'; +`, + errs: []error{semErrErrSymIsReserved}, + }, + } + + astDirTests := []*specErrTest{ + { + caption: "the `#ast` directive needs ID or label prameters", + specSrc: ` +#name test; + +s + : foo #ast + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo #ast $x + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #ast "foo" + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #ast 'foo' + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #ast () + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", + specSrc: ` +#name test; + +s + : foo bar #ast foo x + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", + specSrc: ` +#name test; + +s + : foo #ast bar + | bar + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", + specSrc: ` +#name test; + +s + : foo #ast b + | bar@b + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol can appear in the `#ast` directive only once", + specSrc: ` +#name test; + +s + : foo #ast foo foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "a label can appear in the `#ast` directive only once", + specSrc: ` +#name test; + +s + : foo@x #ast x x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", + specSrc: ` +#name test; + +s + : foo@x #ast foo x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "symbol `foo` is ambiguous because it appears in an alternative twice", + specSrc: ` +#name test; + +s + : foo foo #ast foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrAmbiguousElem}, + }, + { + caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", + specSrc: ` +#name test; + +s + : foo@x foo #ast foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrAmbiguousElem}, + }, + { + caption: "the expansion operator cannot be applied to a terminal symbol", + specSrc: ` +#name test; + +s + : foo #ast foo... + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + altPrecDirTests := []*specErrTest{ + { + caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter", + specSrc: ` +#name test; + +s + : foo #prec + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +s + : foo #prec error + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +s + : foo #prec x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +s + : a #prec b + | b + ; +a + : foo + ; +b + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an undefined ordered symbol parameter", + specSrc: ` +#name test; + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrUndefinedOrdSym}, + }, + { + caption: "the `#prec` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #prec "foo" + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #prec 'foo' + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo #prec () + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol the `#prec` directive takes must be given precedence explicitly", + specSrc: ` +#name test; + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrUndefinedPrec}, + }, + } + + recoverDirTests := []*specErrTest{ + { + caption: "the `#recover` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo #recover foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo #recover $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #recover "foo" + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #recover 'foo' + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #recover () + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + fragmentTests := []*specErrTest{ + { + caption: "a production cannot contain a fragment", + specSrc: ` +#name test; + +s + : f + ; + +fragment f + : 'fragment'; +`, + errs: []error{semErrUndefinedSym}, + }, + { + caption: "fragments cannot be duplicated", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "\f{f}"; +fragment f + : 'fragment 1'; +fragment f + : 'fragment 2'; +`, + errs: []error{semErrDuplicateFragment}, + }, + } + + modeDirTests := []*specErrTest{ + { + caption: "the `#mode` directive needs an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo + : 'foo'; +bar #mode $x + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode "mode_1" + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode 'mode_1' + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode () + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + pushDirTests := []*specErrTest{ + { + caption: "the `#push` directive needs an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive takes just one ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 mode_2 + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo #push $x + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push "mode_1" + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push 'mode_1' + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push () + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + popDirTests := []*specErrTest{ + { + caption: "the `#pop` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop mode_1 + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop $x + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop "mode_1" + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop 'mode_1' + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop () + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + skipDirTests := []*specErrTest{ + { + caption: "the `#skip` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip bar + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo #skip $x + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip "bar" + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip 'bar' + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip () + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a terminal symbol used in productions cannot have the skip directive", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrTermCannotBeSkipped}, + }, + } + + var tests []*specErrTest + tests = append(tests, spellingInconsistenciesTests...) + tests = append(tests, prodTests...) + tests = append(tests, nameDirTests...) + tests = append(tests, precDirTests...) + tests = append(tests, leftDirTests...) + tests = append(tests, rightDirTests...) + tests = append(tests, assignDirTests...) + tests = append(tests, errorSymTests...) + tests = append(tests, astDirTests...) + tests = append(tests, altPrecDirTests...) + tests = append(tests, recoverDirTests...) + tests = append(tests, fragmentTests...) + tests = append(tests, modeDirTests...) + tests = append(tests, pushDirTests...) + tests = append(tests, popDirTests...) + tests = append(tests, skipDirTests...) + for _, test := range tests { + t.Run(test.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(test.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + _, err = b.build() + if err == nil { + t.Fatal("an expected error didn't occur") + } + specErrs, ok := err.(verr.SpecErrors) + if !ok { + t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err) + } + if len(specErrs) != len(test.errs) { + t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs) + } + for _, expected := range test.errs { + for _, actual := range specErrs { + if actual.Cause == expected { + return + } + } + } + t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs) + }) + } +} + +func TestGenLALR1Automaton(t *testing.T) { + // This grammar belongs to LALR(1) class, not SLR(1). + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var gram *Grammar + var automaton *lalr1Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatalf("failed to create a FIRST set: %v", err) + } + + automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + t.Fatalf("failed to create a LALR1 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLALR1Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("eq"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "r"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[9], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "ref", "r"), + }, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "l", "eq", "r"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton.lr0Automaton) +} + +type expectedLRState struct { + kernelItems []*lrItem + nextStates map[symbol.Symbol][]*lrItem + reducibleProds []*production + emptyProdItems []*lrItem +} + +func TestGenLR0Automaton(t *testing.T) { + src := ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("expr'", "expr"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "term"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "factor"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("factor"): expectedKernels[10], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + genSym("r_paren"): expectedKernels[11], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "expr", "add", "term"), + }, + }, + { + kernelItems: expectedKernels[10], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "term", "mul", "factor"), + }, + }, + { + kernelItems: expectedKernels[11], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { + src := ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : b + | + ; + +b: "bar"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("s'", 0, "s"), + }, + 1: { + genLR0Item("s'", 1, "s"), + }, + 2: { + genLR0Item("s", 1, "foo", "bar"), + }, + 3: { + genLR0Item("s", 2, "foo", "bar"), + }, + 4: { + genLR0Item("bar", 1, "b"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("foo"): expectedKernels[2], + }, + reducibleProds: []*production{ + genProd("foo"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("foo", 0), + }, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("bar"): expectedKernels[3], + genSym("b"): expectedKernels[4], + }, + reducibleProds: []*production{ + genProd("bar"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("bar", 0), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "foo", "bar"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("bar", "b"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { + if len(automaton.states) != len(expected) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) + } + + for i, eState := range expected { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test look-ahead symbols + { + if len(state.kernel.items) != len(eState.kernelItems) { + t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) + } + for _, eKItem := range eState.kernelItems { + var kItem *lrItem + for _, it := range state.kernel.items { + if it.id != eKItem.id { + continue + } + kItem = it + break + } + if kItem == nil { + t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) + } + + if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { + t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) + } + + for eSym := range eKItem.lookAhead.symbols { + if _, ok := kItem.lookAhead.symbols[eSym]; !ok { + t.Errorf("look-ahead symbol not found: %v", eSym) + } + } + } + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } + } + }) + } +} + +type expectedState struct { + kernelItems []*lrItem + acts map[symbol.Symbol]testActionEntry + goTos map[symbol.Symbol][]*lrItem +} + +func TestGenLALRParsingTable(t *testing.T) { + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var ptab *ParsingTable + var automaton *lalr1Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatal(err) + } + automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.NonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.TerminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + lalr := &lrTableBuilder{ + automaton: automaton.lr0Automaton, + prods: gram.productionSet, + termCount: termCount, + nonTermCount: nonTermCount, + symTab: gram.symbolTable, + } + ptab, err = lalr.build() + if err != nil { + t.Fatalf("failed to create a LALR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genLALRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []expectedState{ + { + kernelItems: expectedKernels[0], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s'", "s"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("l"): expectedKernels[8], + genSym("r"): expectedKernels[9], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "l", "eq", "r"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) + }) + } +} + +func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eAct := range expectedState.acts { + nonEmptyEntries[eSym.Num()] = struct{}{} + + ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) + if ty != eAct.ty { + t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + } + switch eAct.ty { + case ActionTypeShift: + eNextState, err := newKernel(eAct.nextState) + if err != nil { + t.Fatal(err) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found; state: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + case ActionTypeReduce: + prod := findProductionByNum(gram.productionSet, prodNum) + if prod == nil { + t.Fatalf("production was not found: #%v", prodNum) + } + if prod.id != eAct.production.id { + t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + } + } + } + for symNum := 0; symNum < termCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) + if ty != ActionTypeError { + t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + } + } +} + +func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eGoTo := range expectedState.goTos { + nonEmptyEntries[eSym.Num()] = struct{}{} + + eNextState, err := newKernel(eGoTo) + if err != nil { + t.Fatal(err) + } + ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) + if ty != GoToTypeRegistered { + t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + } + for symNum := 0; symNum < nonTermCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) + if ty != GoToTypeError { + t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + } + } +} + +type testActionEntry struct { + ty ActionType + nextState []*lrItem + production *production +} + +func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { + for _, state := range states { + if state.num == num { + return state + } + } + return nil +} + +func findProductionByNum(prods *productionSet, num productionNum) *production { + for _, prod := range prods.getAllProductions() { + if prod.num == num { + return prod + } + } + return nil +} + +type testSymbolGenerator func(text string) symbol.Symbol + +func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { + return func(text string) symbol.Symbol { + t.Helper() + + sym, ok := symTab.ToSymbol(text) + if !ok { + t.Fatalf("symbol was not found: %v", text) + } + return sym + } +} + +type testProductionGenerator func(lhs string, rhs ...string) *production + +func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { + return func(lhs string, rhs ...string) *production { + t.Helper() + + rhsSym := []symbol.Symbol{} + for _, text := range rhs { + rhsSym = append(rhsSym, genSym(text)) + } + prod, err := newProduction(genSym(lhs), rhsSym) + if err != nil { + t.Fatalf("failed to create a production: %v", err) + } + + return prod + } +} + +type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem + +func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { + return func(lhs string, dot int, rhs ...string) *lrItem { + t.Helper() + + prod := genProd(lhs, rhs...) + item, err := newLR0Item(prod, dot) + if err != nil { + t.Fatalf("failed to create a LR0 item: %v", err) + } + + return item + } +} + +func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { + if item.lookAhead.symbols == nil { + item.lookAhead.symbols = map[symbol.Symbol]struct{}{} + } + + for _, a := range lookAhead { + item.lookAhead.symbols[a] = struct{}{} + } + + return item +} + + +func MainTest() {} diff --git a/tests/unit/grammar/lexical/dfa/dfa.go b/tests/unit/grammar/lexical/dfa/dfa.go new file mode 100644 index 0000000..3233969 --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/dfa.go @@ -0,0 +1,445 @@ +package dfa + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar/lexical/parser" + spec "urubu/spec/grammar" +) + +func TestGenDFA(t *testing.T) { + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + dfa := GenDFA(bt, symTab) + if dfa == nil { + t.Fatalf("DFA is nil") + } + + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) + s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) + s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) + s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) + + rune2Int := func(char rune, index int) uint8 { + return uint8([]byte(string(char))[index]) + } + + tranS0 := [256]string{} + tranS0[rune2Int('a', 0)] = s1.hash() + tranS0[rune2Int('b', 0)] = s0.hash() + + tranS1 := [256]string{} + tranS1[rune2Int('a', 0)] = s1.hash() + tranS1[rune2Int('b', 0)] = s2.hash() + + tranS2 := [256]string{} + tranS2[rune2Int('a', 0)] = s1.hash() + tranS2[rune2Int('b', 0)] = s3.hash() + + tranS3 := [256]string{} + tranS3[rune2Int('a', 0)] = s1.hash() + tranS3[rune2Int('b', 0)] = s0.hash() + + expectedTranTab := map[string][256]string{ + s0.hash(): tranS0, + s1.hash(): tranS1, + s2.hash(): tranS2, + s3.hash(): tranS3, + } + if len(dfa.TransitionTable) != len(expectedTranTab) { + t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) + } + for h, eTranTab := range expectedTranTab { + tranTab, ok := dfa.TransitionTable[h] + if !ok { + t.Errorf("no entry; hash: %v", h) + continue + } + if len(tranTab) != len(eTranTab) { + t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) + } + for c, eNext := range eTranTab { + if eNext == "" { + continue + } + + next := tranTab[c] + if next == "" { + t.Errorf("no enatry: hash: %v, char: %v", h, c) + } + if next != eNext { + t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) + } + } + } + + if dfa.InitialState != s0.hash() { + t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) + } + + accTab := map[string]spec.LexModeKindID{ + s3.hash(): 1, + } + if len(dfa.AcceptingStatesTable) != len(accTab) { + t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) + } + for eState, eID := range accTab { + id, ok := dfa.AcceptingStatesTable[eState] + if !ok { + t.Errorf("accepting state is not found: state: %v", eState) + } + if id != eID { + t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) + } + } +} + +func TestNewSymbolPosition(t *testing.T) { + tests := []struct { + n uint16 + endMark bool + err bool + }{ + { + n: 0, + endMark: false, + err: true, + }, + { + n: 0, + endMark: true, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: true, + err: true, + }, + { + n: symbolPositionMin, + endMark: false, + }, + { + n: symbolPositionMin, + endMark: true, + }, + { + n: symbolPositionMax, + endMark: false, + }, + { + n: symbolPositionMax, + endMark: true, + }, + { + n: symbolPositionMax + 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMax + 1, + endMark: true, + err: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { + pos, err := newSymbolPosition(tt.n, tt.endMark) + if tt.err { + if err == nil { + t.Fatal("err is nil") + } + return + } + if err != nil { + t.Fatal(err) + } + n, endMark := pos.describe() + if n != tt.n || endMark != tt.endMark { + t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) + } + }) + } +} + +func TestByteTree(t *testing.T) { + tests := []struct { + root byteTree + nullable bool + first *symbolPositionSet + last *symbolPositionSet + }{ + { + root: newSymbolNodeWithPos(0, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newEndMarkerNodeWithPos(1, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newRepeatNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newOptionNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + if tt.root.nullable() != tt.nullable { + t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) + } + if tt.first.hash() != tt.root.first().hash() { + t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) + } + if tt.last.hash() != tt.root.last().hash() { + t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) + } + }) + } +} + +func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { + n := newSymbolNode(v) + n.pos = pos + return n +} + +func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { + n := newEndMarkerNode(spec.LexModeKindID(id)) + n.pos = pos + return n +} + +func TestFollowAndSymbolTable(t *testing.T) { + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + + { + followTab := genFollowTable(bt) + if followTab == nil { + t.Fatal("follow table is nil") + } + expectedFollowTab := followTable{ + 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 3: newSymbolPositionSet().add(symPos(4)), + 4: newSymbolPositionSet().add(symPos(5)), + 5: newSymbolPositionSet().add(endPos(6)), + } + testFollowTable(t, expectedFollowTab, followTab) + } + + { + entry := func(v byte) byteRange { + return byteRange{ + from: v, + to: v, + } + } + + expectedSymTab := &symbolTable{ + symPos2Byte: map[symbolPosition]byteRange{ + symPos(1): entry(byte('a')), + symPos(2): entry(byte('b')), + symPos(3): entry(byte('a')), + symPos(4): entry(byte('b')), + symPos(5): entry(byte('b')), + }, + endPos2ID: map[symbolPosition]spec.LexModeKindID{ + endPos(6): 1, + }, + } + testSymbolTable(t, expectedSymTab, symTab) + } +} + +func testFollowTable(t *testing.T, expected, actual followTable) { + if len(actual) != len(expected) { + t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) + } + for ePos, eSet := range expected { + aSet, ok := actual[ePos] + if !ok { + t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) + } + if aSet.hash() != eSet.hash() { + t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) + } + } +} + +func testSymbolTable(t *testing.T, expected, actual *symbolTable) { + t.Helper() + + if len(actual.symPos2Byte) != len(expected.symPos2Byte) { + t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) + } + for ePos, eByte := range expected.symPos2Byte { + byte, ok := actual.symPos2Byte[ePos] + if !ok { + t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) + continue + } + if byte.from != eByte.from || byte.to != eByte.to { + t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) + } + } + + if len(actual.endPos2ID) != len(expected.endPos2ID) { + t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) + } + for ePos, eID := range expected.endPos2ID { + id, ok := actual.endPos2ID[ePos] + if !ok { + t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) + continue + } + if id != eID { + t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) + } + } +} + + +func MainTest() {} diff --git a/tests/unit/grammar/lexical/dfa/main.go b/tests/unit/grammar/lexical/dfa/main.go new file mode 100644 index 0000000..e0b2fd0 --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/main.go @@ -0,0 +1,7 @@ +package main + +import "dfa" + +func main() { + dfa.MainTest() +} diff --git a/tests/unit/grammar/lexical/lexical.go b/tests/unit/grammar/lexical/lexical.go new file mode 100644 index 0000000..54cace4 --- /dev/null +++ b/tests/unit/grammar/lexical/lexical.go @@ -0,0 +1,341 @@ +package lexical + +import ( + "encoding/json" + "fmt" + "testing" + + spec "urubu/spec/grammar" +) + +func TestLexSpec_Validate(t *testing.T) { + // We expect that the spelling inconsistency error will occur. + spec := &LexSpec{ + Entries: []*LexEntry{ + { + Modes: []spec.LexModeName{ + // 'Default' is the spelling inconsistency because 'default' is predefined. + "Default", + }, + Kind: "foo", + Pattern: "foo", + }, + }, + } + err := spec.Validate() + if err == nil { + t.Fatalf("expected error didn't occur") + } +} + +func TestSnakeCaseToUpperCamelCase(t *testing.T) { + tests := []struct { + snake string + camel string + }{ + { + snake: "foo", + camel: "Foo", + }, + { + snake: "foo_bar", + camel: "FooBar", + }, + { + snake: "foo_bar_baz", + camel: "FooBarBaz", + }, + { + snake: "Foo", + camel: "Foo", + }, + { + snake: "fooBar", + camel: "FooBar", + }, + { + snake: "FOO", + camel: "FOO", + }, + { + snake: "FOO_BAR", + camel: "FOOBAR", + }, + { + snake: "_foo_bar_", + camel: "FooBar", + }, + { + snake: "___foo___bar___", + camel: "FooBar", + }, + } + for _, tt := range tests { + c := SnakeCaseToUpperCamelCase(tt.snake) + if c != tt.camel { + t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) + } + } +} + +func TestFindSpellingInconsistencies(t *testing.T) { + tests := []struct { + ids []string + duplicated [][]string + }{ + { + ids: []string{"foo", "foo"}, + duplicated: nil, + }, + { + ids: []string{"foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo", "foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo_bar_baz", "FooBarBaz"}, + duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + duplicated := FindSpellingInconsistencies(tt.ids) + if len(duplicated) != len(tt.duplicated) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) + } + for i, dupIDs := range duplicated { + if len(dupIDs) != len(tt.duplicated[i]) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + for j, id := range dupIDs { + if id != tt.duplicated[i][j] { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + } + } + }) + } +} + +func TestCompile(t *testing.T) { + tests := []struct { + Caption string + Spec string + Err bool + }{ + { + Caption: "allow duplicates names between fragments and non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragment": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + }, + { + Caption: "don't allow duplicates names in non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow duplicates names in fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names across modes to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow mode names to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["foo_1"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["foo1"], + "kind": "b", + "pattern": "b" + } + ] +} +`, + Err: true, + }, + { + Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a", + "pattern": "a" + }, + { + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + { + Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["default"], + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { + lspec := &LexSpec{} + err := json.Unmarshal([]byte(tt.Spec), lspec) + if err != nil { + t.Fatalf("%v", err) + } + clspec, err, _ := Compile(lspec, CompressionLevelMin) + if tt.Err { + if err == nil { + t.Fatalf("expected an error") + } + if clspec != nil { + t.Fatalf("Compile function mustn't return a compiled specification") + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if clspec == nil { + t.Fatalf("Compile function must return a compiled specification") + } + } + }) + } +} + + +func MainTest() {} diff --git a/tests/unit/grammar/lexical/main.go b/tests/unit/grammar/lexical/main.go new file mode 100644 index 0000000..19fcce8 --- /dev/null +++ b/tests/unit/grammar/lexical/main.go @@ -0,0 +1,7 @@ +package main + +import "lexical" + +func main() { + lexical.MainTest() +} diff --git a/tests/unit/grammar/lexical/parser/main.go b/tests/unit/grammar/lexical/parser/main.go new file mode 100644 index 0000000..a99bfc4 --- /dev/null +++ b/tests/unit/grammar/lexical/parser/main.go @@ -0,0 +1,7 @@ +package main + +import "parser" + +func main() { + parser.MainTest() +} diff --git a/tests/unit/grammar/lexical/parser/parser.go b/tests/unit/grammar/lexical/parser/parser.go new file mode 100644 index 0000000..288de95 --- /dev/null +++ b/tests/unit/grammar/lexical/parser/parser.go @@ -0,0 +1,1910 @@ +package parser + +import ( + "fmt" + "reflect" + "strings" + "testing" + + spec "urubu/spec/grammar" + "urubu/ucd" +) + +func TestLexer(t *testing.T) { + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "lexer can recognize ordinaly characters", + src: "123abcいろは", + tokens: []*token{ + newToken(tokenKindChar, '1'), + newToken(tokenKindChar, '2'), + newToken(tokenKindChar, '3'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, 'b'), + newToken(tokenKindChar, 'c'), + newToken(tokenKindChar, 'い'), + newToken(tokenKindChar, 'ろ'), + newToken(tokenKindChar, 'は'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[\\u", + tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), + newToken(tokenKindRepeat, nullChar), + newToken(tokenKindRepeatOneOrMore, nullChar), + newToken(tokenKindOption, nullChar), + newToken(tokenKindAlt, nullChar), + newToken(tokenKindGroupOpen, nullChar), + newToken(tokenKindGroupClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", + tokens: []*token{ + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "], {, and } are treated as an ordinary character in default mode", + src: "]{}", + tokens: []*token{ + newToken(tokenKindChar, ']'), + newToken(tokenKindChar, '{'), + newToken(tokenKindChar, '}'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09AF"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09abcf"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "in a bracket expression, the special characters are also handled as normal characters", + src: "[\\\\.*+?|()[", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character-range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", + // [^...^...][^] + // ~~ ~ ~~ + // ^ ^ ^^ + // | | |`-- Ordinary Character (c) + // | | `-- Bracket Expression + // | `-- Ordinary Character (b) + // `-- Inverse Bracket Expression (a) + // + // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. + // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. + // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. + src: "[^^][^]", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "\\@", + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "\\", + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer can recognize the special characters and code points in code point expression mode", + src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a one digit hex string isn't a valid code point", + src: "\\u{0", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a two digits hex string isn't a valid code point", + src: "\\u{01", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a three digits hex string isn't a valid code point", + src: "\\u{012", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a four digits hex string is a valid code point", + src: "\\u{0123}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a five digits hex string isn't a valid code point", + src: "\\u{01234", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a six digits hex string is a valid code point", + src: "\\u{012345}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("012345"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a seven digits hex string isn't a valid code point", + src: "\\u{0123456", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{g", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{G", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters and symbols in fragment expression mode", + src: "\\f{integer}", + tokens: []*token{ + newToken(tokenKindFragmentLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newFragmentSymbolToken("integer"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a fragment expression is not supported in a bracket expression", + src: "[\\f", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "a fragment expression is not supported in an inverse bracket expression", + src: "[^\\f", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + lex := newLexer(strings.NewReader(tt.src)) + var err error + var tok *token + i := 0 + for { + tok, err = lex.next() + if err != nil { + break + } + if i >= len(tt.tokens) { + break + } + eTok := tt.tokens[i] + i++ + testToken(t, tok, eTok) + + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + detail, cause := lex.error() + if cause != tt.err { + t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if i < len(tt.tokens) { + t.Fatalf("expecte more tokens") + } + }) + } +} + +func testToken(t *testing.T, a, e *token) { + t.Helper() + if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { + t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) + } +} + +func TestParse(t *testing.T) { + tests := []struct { + pattern string + fragments map[spec.LexKindName]string + ast CPTree + syntaxError error + + // When an AST is large, as patterns containing a character property expression, this test only checks + // that the pattern is parsable. The check of the validity of such AST is performed by checking that it + // can be matched correctly using the driver. + skipTestAST bool + }{ + { + pattern: "a", + ast: newSymbolNode('a'), + }, + { + pattern: "abc", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "a?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}?", + ast: newOptionNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}?", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}?", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newOptionNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a)?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "((a?)?)?", + ast: newOptionNode( + newOptionNode( + newOptionNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)?", + ast: newOptionNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(?)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "?|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a??", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a*", + ast: newRepeatNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}*", + ast: newRepeatNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}*", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}*", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "((a*)*)*", + ast: newRepeatNode( + newRepeatNode( + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)*", + ast: newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(*)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "*|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a**", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a+", + ast: genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + }, + { + pattern: "[abc]+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "\\u{3042}+", + ast: genConcatNode( + newSymbolNode('\u3042'), + newRepeatNode( + newSymbolNode('\u3042'), + ), + ), + }, + { + pattern: "\\p{Letter}+", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}+", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: genConcatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + ), + }, + { + pattern: "((a+)+)+", + ast: genConcatNode( + genConcatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + newRepeatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + ), + ), + ), + }, + { + pattern: "(abc)+", + ast: genConcatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a|b)+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + ), + }, + { + pattern: "+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(+)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "+|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a++", + syntaxError: synErrRepNoTarget, + }, + { + pattern: ".", + ast: newRangeSymbolNode(0x00, 0x10FFFF), + }, + { + pattern: "[a]", + ast: newSymbolNode('a'), + }, + { + pattern: "[abc]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "[a-z]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[A-Za-z]", + ast: genAltNode( + newRangeSymbolNode('A', 'Z'), + newRangeSymbolNode('a', 'z'), + ), + }, + { + pattern: "[\\u{004E}]", + ast: newSymbolNode('N'), + }, + { + pattern: "[\\u{0061}-\\u{007A}]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[z-a]", + syntaxError: synErrRangeInvalidOrder, + }, + { + pattern: "a[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]a", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[^\\u{004E}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u004E'-1), + newRangeSymbolNode('\u004E'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\u{0061}-\\u{007A}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u0061'-1), + newRangeSymbolNode('\u007A'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[^a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\u{0000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^]", + ast: newSymbolNode('^'), + }, + { + pattern: "[", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "]", + ast: newSymbolNode(']'), + }, + { + pattern: "(]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a]", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode(']'), + ), + }, + { + pattern: "(a]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "([)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('-'), + ), + }, + { + pattern: "[^a-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x60), + newRangeSymbolNode(0x62, 0x10FFFF), + ), + }, + { + pattern: "[-z]", + ast: genAltNode( + newSymbolNode('-'), + newSymbolNode('z'), + ), + }, + { + pattern: "[^-z]", + ast: newAltNode( + newRangeSymbolNode(0x00, 0x2C), + newAltNode( + newRangeSymbolNode(0x2E, 0x79), + newRangeSymbolNode(0x7B, 0x10FFFF), + ), + ), + }, + { + pattern: "[-]", + ast: newSymbolNode('-'), + }, + { + pattern: "[^-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x10FFFF), + ), + }, + { + pattern: "[^01]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^10]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^a-z]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + }, + { + pattern: "[^az]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + genAltNode( + newRangeSymbolNode('a'+1, 'z'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + ), + }, + { + pattern: "\\u{006E}", + ast: newSymbolNode('\u006E'), + }, + { + pattern: "\\u{03BD}", + ast: newSymbolNode('\u03BD'), + }, + { + pattern: "\\u{306B}", + ast: newSymbolNode('\u306B'), + }, + { + pattern: "\\u{01F638}", + ast: newSymbolNode('\U0001F638'), + }, + { + pattern: "\\u{0000}", + ast: newSymbolNode('\u0000'), + }, + { + pattern: "\\u{10FFFF}", + ast: newSymbolNode('\U0010FFFF'), + }, + { + pattern: "\\u{110000}", + syntaxError: synErrCPExpOutOfRange, + }, + { + pattern: "\\u", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{03BD", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{}", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\p{Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{General_Category=Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{ Letter }", + skipTestAST: true, + }, + { + pattern: "\\p{ General_Category = Letter }", + skipTestAST: true, + }, + { + pattern: "\\p", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{Letter", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category= }", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=Letter}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{ =Letter}", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\f{a2c}", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f{ a2c }", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{a2c", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "(a)", + ast: newSymbolNode('a'), + }, + { + pattern: "(((a)))", + ast: newSymbolNode('a'), + }, + { + pattern: "a()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()a", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "(a", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((a)", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: ")", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "a)", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: ")a", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "(a))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "Mulder|Scully", + ast: genAltNode( + genConcatNode( + newSymbolNode('M'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('d'), + newSymbolNode('e'), + newSymbolNode('r'), + ), + genConcatNode( + newSymbolNode('S'), + newSymbolNode('c'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + ), + }, + { + pattern: "Langly|Frohike|Byers", + ast: genAltNode( + genConcatNode( + newSymbolNode('L'), + newSymbolNode('a'), + newSymbolNode('n'), + newSymbolNode('g'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + genConcatNode( + newSymbolNode('F'), + newSymbolNode('r'), + newSymbolNode('o'), + newSymbolNode('h'), + newSymbolNode('i'), + newSymbolNode('k'), + newSymbolNode('e'), + ), + genConcatNode( + newSymbolNode('B'), + newSymbolNode('y'), + newSymbolNode('e'), + newSymbolNode('r'), + newSymbolNode('s'), + ), + ), + }, + { + pattern: "|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "||", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Mulder|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Scully", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly||Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "(Fox|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|Mulder)", + syntaxError: synErrAltLackOfOperand, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { + fragmentTrees := map[spec.LexKindName]CPTree{} + for kind, pattern := range tt.fragments { + p := NewParser(kind, strings.NewReader(pattern)) + root, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + fragmentTrees[kind] = root + } + err := CompleteFragments(fragmentTrees) + if err != nil { + t.Fatal(err) + } + + p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) + root, err := p.Parse() + if tt.syntaxError != nil { + // printCPTree(os.Stdout, root, "", "") + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + _, synErr := p.Error() + if synErr != tt.syntaxError { + t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) + } + if root != nil { + t.Fatalf("tree must be nil") + } + } else { + if err != nil { + detail, cause := p.Error() + t.Fatalf("%v: %v: %v", err, cause, detail) + } + if root == nil { + t.Fatal("tree must be non-nil") + } + + complete, err := ApplyFragments(root, fragmentTrees) + if err != nil { + t.Fatal(err) + } + if !complete { + t.Fatalf("incomplete fragments") + } + + // printCPTree(os.Stdout, root, "", "") + if !tt.skipTestAST { + r := root.(*rootNode) + testAST(t, tt.ast, r.tree) + } + } + }) + } +} + +func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { + for _, cProp := range ucd.ContributoryProperties() { + t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { + p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) + root, err := p.Parse() + if err == nil { + t.Fatalf("expected syntax error: got: nil") + } + _, synErr := p.Error() + if synErr != synErrCharPropUnsupported { + t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) + } + if root != nil { + t.Fatalf("tree is not nil") + } + }) + } +} + +func TestExclude(t *testing.T) { + for _, test := range []struct { + caption string + target CPTree + base CPTree + result CPTree + }{ + // t.From > b.From && t.To < b.To + + // |t.From - b.From| = 1 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '2'), + result: newAltNode( + newSymbolNode('0'), + newSymbolNode('2'), + ), + }, + // |t.From - b.From| > 1 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+--+--+ + // Result (b - t): +--+--+ +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '4'), + result: newAltNode( + newRangeSymbolNode('0', '1'), + newRangeSymbolNode('3', '4'), + ), + }, + + // t.From <= b.From && t.To >= b.From && t.To < b.To + + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('1', '2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '3'), + result: newSymbolNode('3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '4'), + result: newRangeSymbolNode('3', '4'), + }, + + // t.From > b.From && t.From <= b.To && t.To >= b.To + + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '3'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '4'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + + // t.From <= b.From && t.To >= b.To + + // |b.From - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('0'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('1'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '2'), + base: newSymbolNode('1'), + result: nil, + }, + + // Others + + // |b.From - t.From| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 1", + target: newSymbolNode('0'), + base: newSymbolNode('1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 1", + target: newSymbolNode('0'), + base: newSymbolNode('2'), + result: newSymbolNode('2'), + }, + // |t.To - b.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| = 1", + target: newSymbolNode('1'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + // |t.To - b.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| > 1", + target: newSymbolNode('2'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + } { + t.Run(test.caption, func(t *testing.T) { + r := exclude(test.target, test.base) + testAST(t, test.result, r) + }) + } +} + +func testAST(t *testing.T, expected, actual CPTree) { + t.Helper() + + aTy := reflect.TypeOf(actual) + eTy := reflect.TypeOf(expected) + if eTy != aTy { + t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) + } + + if actual == nil { + return + } + + switch e := expected.(type) { + case *symbolNode: + a := actual.(*symbolNode) + if a.From != e.From || a.To != e.To { + t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) + } + } + eLeft, eRight := expected.children() + aLeft, aRight := actual.children() + testAST(t, eLeft, aLeft) + testAST(t, eRight, aRight) +} + + +func MainTest() {} diff --git a/tests/unit/grammar/main.go b/tests/unit/grammar/main.go new file mode 100644 index 0000000..a68adfd --- /dev/null +++ b/tests/unit/grammar/main.go @@ -0,0 +1,7 @@ +package main + +import "grammar" + +func main() { + grammar.MainTest() +} diff --git a/tests/unit/grammar/symbol/main.go b/tests/unit/grammar/symbol/main.go new file mode 100644 index 0000000..721161d --- /dev/null +++ b/tests/unit/grammar/symbol/main.go @@ -0,0 +1,7 @@ +package main + +import "symbol" + +func main() { + symbol.MainTest() +} diff --git a/tests/unit/grammar/symbol/symbol.go b/tests/unit/grammar/symbol/symbol.go new file mode 100644 index 0000000..3e23c2b --- /dev/null +++ b/tests/unit/grammar/symbol/symbol.go @@ -0,0 +1,162 @@ +package symbol + +import "testing" + +func TestSymbol(t *testing.T) { + tab := NewSymbolTable() + w := tab.Writer() + _, _ = w.RegisterStartSymbol("expr'") + _, _ = w.RegisterNonTerminalSymbol("expr") + _, _ = w.RegisterNonTerminalSymbol("term") + _, _ = w.RegisterNonTerminalSymbol("factor") + _, _ = w.RegisterTerminalSymbol("id") + _, _ = w.RegisterTerminalSymbol("add") + _, _ = w.RegisterTerminalSymbol("mul") + _, _ = w.RegisterTerminalSymbol("l_paren") + _, _ = w.RegisterTerminalSymbol("r_paren") + + nonTermTexts := []string{ + "", // Nil + "expr'", + "expr", + "term", + "factor", + } + + termTexts := []string{ + "", // Nil + symbolNameEOF, // EOF + "id", + "add", + "mul", + "l_paren", + "r_paren", + } + + tests := []struct { + text string + isNil bool + isStart bool + isEOF bool + isNonTerminal bool + isTerminal bool + }{ + { + text: "expr'", + isStart: true, + isNonTerminal: true, + }, + { + text: "expr", + isNonTerminal: true, + }, + { + text: "term", + isNonTerminal: true, + }, + { + text: "factor", + isNonTerminal: true, + }, + { + text: "id", + isTerminal: true, + }, + { + text: "add", + isTerminal: true, + }, + { + text: "mul", + isTerminal: true, + }, + { + text: "l_paren", + isTerminal: true, + }, + { + text: "r_paren", + isTerminal: true, + }, + } + for _, tt := range tests { + t.Run(tt.text, func(t *testing.T) { + r := tab.Reader() + sym, ok := r.ToSymbol(tt.text) + if !ok { + t.Fatalf("symbol was not found") + } + testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal) + text, ok := r.ToText(sym) + if !ok { + t.Fatalf("text was not found") + } + if text != tt.text { + t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text) + } + }) + } + + t.Run("EOF", func(t *testing.T) { + testSymbolProperty(t, SymbolEOF, false, false, true, false, true) + }) + + t.Run("Nil", func(t *testing.T) { + testSymbolProperty(t, SymbolNil, true, false, false, false, false) + }) + + t.Run("texts of non-terminals", func(t *testing.T) { + r := tab.Reader() + ts, err := r.NonTerminalTexts() + if err != nil { + t.Fatal(err) + } + if len(ts) != len(nonTermTexts) { + t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts) + } + for i, text := range ts { + if text != nonTermTexts[i] { + t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text) + } + } + }) + + t.Run("texts of terminals", func(t *testing.T) { + r := tab.Reader() + ts, err := r.TerminalTexts() + if err != nil { + t.Fatal(err) + } + if len(ts) != len(termTexts) { + t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts) + } + for i, text := range ts { + if text != termTexts[i] { + t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text) + } + } + }) +} + +func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) { + t.Helper() + + if v := sym.IsNil(); v != isNil { + t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v) + } + if v := sym.IsStart(); v != isStart { + t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v) + } + if v := sym.isEOF(); v != isEOF { + t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v) + } + if v := sym.isNonTerminal(); v != isNonTerminal { + t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v) + } + if v := sym.IsTerminal(); v != isTerminal { + t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v) + } +} + + +func MainTest() {} diff --git a/tests/unit/spec/grammar/parser/main.go b/tests/unit/spec/grammar/parser/main.go new file mode 100644 index 0000000..a99bfc4 --- /dev/null +++ b/tests/unit/spec/grammar/parser/main.go @@ -0,0 +1,7 @@ +package main + +import "parser" + +func main() { + parser.MainTest() +} diff --git a/tests/unit/spec/grammar/parser/parser.go b/tests/unit/spec/grammar/parser/parser.go new file mode 100644 index 0000000..8170518 --- /dev/null +++ b/tests/unit/spec/grammar/parser/parser.go @@ -0,0 +1,1442 @@ +package parser + +import ( + "strings" + "testing" + + verr "urubu/error" +) + +func TestLexer_Run(t *testing.T) { + idTok := func(text string) *token { + return newIDToken(text, newPosition(1, 0)) + } + + termPatTok := func(text string) *token { + return newTerminalPatternToken(text, newPosition(1, 0)) + } + + strTok := func(text string) *token { + return newStringLiteralToken(text, newPosition(1, 0)) + } + + symTok := func(kind tokenKind) *token { + return newSymbolToken(kind, newPosition(1, 0)) + } + + invalidTok := func(text string) *token { + return newInvalidToken(text, newPosition(1, 0)) + } + + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "the lexer can recognize all kinds of tokens", + src: `id"terminal"'string':|;@...#$()`, + tokens: []*token{ + idTok("id"), + termPatTok("terminal"), + strTok(`string`), + symTok(tokenKindColon), + symTok(tokenKindOr), + symTok(tokenKindSemicolon), + symTok(tokenKindLabelMarker), + symTok(tokenKindExpantion), + symTok(tokenKindDirectiveMarker), + symTok(tokenKindOrderedSymbolMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize keywords", + src: `fragment`, + tokens: []*token{ + symTok(tokenKindKWFragment), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize character sequences and escape sequences in a terminal", + src: `"abc\"\\"`, + tokens: []*token{ + termPatTok(`abc"\\`), + newEOFToken(), + }, + }, + { + caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", + src: `'\\\'`, + tokens: []*token{ + strTok(`\\\`), + newEOFToken(), + }, + }, + { + caption: "a pattern must include at least one character", + src: `""`, + err: synErrEmptyPattern, + }, + { + caption: "a string must include at least one character", + src: `''`, + err: synErrEmptyString, + }, + { + caption: "the lexer can recognize newlines and combine consecutive newlines into one", + src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", + tokens: []*token{ + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "the lexer ignores line comments", + src: ` +// This is the first comment. +foo +// This is the second comment. +// This is the third comment. +bar // This is the fourth comment. +`, + tokens: []*token{ + symTok(tokenKindNewline), + idTok("foo"), + symTok(tokenKindNewline), + idTok("bar"), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", + src: `_abc`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "an unclosed terminal is not a valid token", + src: `"abc`, + err: synErrUnclosedTerminal, + }, + { + caption: "an incompleted escape sequence in a pattern is not a valid token", + src: `"\`, + err: synErrIncompletedEscSeq, + }, + { + caption: "an unclosed string is not a valid token", + src: `'abc`, + err: synErrUnclosedString, + }, + { + caption: "the lexer can recognize valid tokens following an invalid token", + src: `abc!!!def`, + tokens: []*token{ + idTok("abc"), + invalidTok("!!!"), + idTok("def"), + newEOFToken(), + }, + }, + { + caption: "the lexer skips white spaces", + // \u0009: HT + // \u0020: SP + src: "a\u0009b\u0020c", + tokens: []*token{ + idTok("a"), + idTok("b"), + idTok("c"), + newEOFToken(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + l, err := newLexer(strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + n := 0 + for { + var tok *token + tok, err = l.next() + if err != nil { + break + } + testToken(t, tok, tt.tokens[n]) + n++ + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + synErr, ok := err.(*verr.SpecError) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + if tt.err != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) + } + } else { + if err != nil { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + } + }) + } +} + +func testToken(t *testing.T, tok, expected *token) { + t.Helper() + if tok.kind != expected.kind || tok.text != expected.text { + t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) + } +} + +func TestParse(t *testing.T) { + name := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "name", + Parameters: []*ParameterNode{param}, + } + } + prec := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "prec", + Parameters: []*ParameterNode{param}, + } + } + leftAssoc := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "left", + Parameters: params, + } + } + rightAssoc := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "right", + Parameters: params, + } + } + assign := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "assign", + Parameters: params, + } + } + prod := func(lhs string, alts ...*AlternativeNode) *ProductionNode { + return &ProductionNode{ + LHS: lhs, + RHS: alts, + } + } + withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode { + prod.Pos = pos + return prod + } + withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode { + prod.Directives = dirs + return prod + } + alt := func(elems ...*ElementNode) *AlternativeNode { + return &AlternativeNode{ + Elements: elems, + } + } + withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode { + alt.Pos = pos + return alt + } + withAltDir := func(alt *AlternativeNode, dirs ...*DirectiveNode) *AlternativeNode { + alt.Directives = dirs + return alt + } + dir := func(name string, params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: name, + Parameters: params, + } + } + withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode { + dir.Pos = pos + return dir + } + idParam := func(id string) *ParameterNode { + return &ParameterNode{ + ID: id, + } + } + ordSymParam := func(id string) *ParameterNode { + return &ParameterNode{ + OrderedSymbol: id, + } + } + exp := func(param *ParameterNode) *ParameterNode { + param.Expansion = true + return param + } + group := func(dirs ...*DirectiveNode) *ParameterNode { + return &ParameterNode{ + Group: dirs, + } + } + withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { + param.Pos = pos + return param + } + id := func(id string) *ElementNode { + return &ElementNode{ + ID: id, + } + } + pat := func(p string) *ElementNode { + return &ElementNode{ + Pattern: p, + } + } + label := func(name string) *LabelNode { + return &LabelNode{ + Name: name, + } + } + withLabelPos := func(label *LabelNode, pos Position) *LabelNode { + label.Pos = pos + return label + } + withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode { + elem.Label = label + return elem + } + withElemPos := func(elem *ElementNode, pos Position) *ElementNode { + elem.Pos = pos + return elem + } + frag := func(lhs string, rhs string) *FragmentNode { + return &FragmentNode{ + LHS: lhs, + RHS: rhs, + } + } + withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode { + frag.Pos = pos + return frag + } + newPos := func(row int) Position { + return Position{ + Row: row, + Col: 0, + } + } + + tests := []struct { + caption string + src string + checkPosition bool + ast *RootNode + synErr *SyntaxError + }{ + { + caption: "a grammar can contain top-level directives", + src: ` +#name test; + +#prec ( + #left a b $x1 + #right c d $x2 + #assign e f $x3 +); +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + name( + withParamPos( + idParam("test"), + newPos(2), + ), + ), + newPos(2), + ), + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos( + idParam("a"), + newPos(5), + ), + withParamPos( + idParam("b"), + newPos(5), + ), + withParamPos( + ordSymParam("x1"), + newPos(5), + ), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos( + idParam("c"), + newPos(6), + ), + withParamPos( + idParam("d"), + newPos(6), + ), + withParamPos( + ordSymParam("x2"), + newPos(6), + ), + ), + newPos(6), + ), + withDirPos( + assign( + withParamPos( + idParam("e"), + newPos(7), + ), + withParamPos( + idParam("f"), + newPos(7), + ), + withParamPos( + ordSymParam("x3"), + newPos(7), + ), + ), + newPos(7), + ), + ), + newPos(4), + ), + ), + newPos(4), + ), + }, + }, + }, + { + caption: "a top-level directive must be followed by ';'", + src: ` +#name test +`, + synErr: synErrTopLevelDirNoSemicolon, + }, + { + caption: "a directive group must be closed by ')'", + src: ` +#prec ( + #left a b +; +`, + synErr: synErrUnclosedDirGroup, + }, + { + caption: "an ordered symbol marker '$' must be followed by and ID", + src: ` +#prec ( + #assign $ +); +`, + synErr: synErrNoOrderedSymbolName, + }, + { + caption: "single production is a valid grammar", + src: `a: "a";`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("a", alt(pat("a"))), + }, + }, + }, + { + caption: "multiple productions are a valid grammar", + src: ` +e + : e add t + | e sub t + | t + ; +t + : t mul f + | t div f + | f + ; +f + : l_paren e r_paren + | id + ; + +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +l_paren + : '('; +r_paren + : ')'; +id + : "[A-Za-z_][0-9A-Za-z_]*"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("e", + alt(id("e"), id("add"), id("t")), + alt(id("e"), id("sub"), id("t")), + alt(id("t")), + ), + prod("t", + alt(id("t"), id("mul"), id("f")), + alt(id("t"), id("div"), id("f")), + alt(id("f")), + ), + prod("f", + alt(id("l_paren"), id("e"), id("r_paren")), + alt(id("id")), + ), + }, + LexProductions: []*ProductionNode{ + prod("add", alt(pat(`+`))), + prod("sub", alt(pat(`-`))), + prod("mul", alt(pat(`*`))), + prod("div", alt(pat(`/`))), + prod("l_paren", alt(pat(`(`))), + prod("r_paren", alt(pat(`)`))), + prod("id", alt(pat(`[A-Za-z_][0-9A-Za-z_]*`))), + }, + }, + }, + { + caption: "productions can contain the empty alternative", + src: ` +a + : foo + | + ; +b + : + | bar + ; +c + : + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("a", + alt(id("foo")), + alt(), + ), + prod("b", + alt(), + alt(id("bar")), + ), + prod("c", + alt(), + ), + }, + LexProductions: []*ProductionNode{ + prod("foo", alt(pat(`foo`))), + prod("bar", alt(pat(`bar`))), + }, + }, + }, + { + caption: "a production cannot contain an ordered symbol", + src: ` +a: $x; +`, + synErr: synErrNoSemicolon, + }, + { + caption: "an alternative cannot contain a pattern directly", + src: ` +s + : "foo" bar + ; + +bar + : "bar"; +`, + synErr: synErrPatternInAlt, + }, + { + caption: "an alternative cannot contain a string directly", + src: ` +s + : 'foo' bar + ; +bar + : "bar"; +`, + synErr: synErrPatternInAlt, + }, + { + caption: "a terminal symbol can be defined using a string literal", + src: ` +foo + : 'foo'; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { + caption: "a terminal symbol can be defined using a pattern", + src: ` +foo + : "foo"; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { + caption: "`fragment` is a reserved word", + src: `fragment: 'fragment';`, + synErr: synErrNoProductionName, + }, + { + caption: "when a source contains an unknown token, the parser raises a syntax error", + src: `a: !;`, + synErr: synErrInvalidToken, + }, + { + caption: "a production must have its name as the first element", + src: `: "a";`, + synErr: synErrNoProductionName, + }, + { + caption: "':' must precede an alternative", + src: `a "a";`, + synErr: synErrNoColon, + }, + { + caption: "';' must follow a production", + src: `a: "a"`, + synErr: synErrNoSemicolon, + }, + { + caption: "';' can only appear at the end of a production", + src: `;`, + synErr: synErrNoProductionName, + }, + { + caption: "a grammar can contain fragments", + src: ` +s + : tagline + ; +tagline: "\f{words} IS OUT THERE."; +fragment words: "[A-Za-z\u{0020}]+"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + alt(id("tagline")), + ), + }, + LexProductions: []*ProductionNode{ + prod("tagline", + alt(pat(`\f{words} IS OUT THERE.`)), + ), + }, + Fragments: []*FragmentNode{ + frag("words", `[A-Za-z\u{0020}]+`), + }, + }, + }, + { + caption: "the lexer treats consecutive lines as a single token but can count lines correctly", + src: `// This line precedes line comments and blank lines. +// This is a line comment. + + +s + : foo + ; + + +// This line is sandwiched between blank lines. + + +foo: 'foo'; +`, + checkPosition: true, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("s", + withAltPos( + alt( + withElemPos( + id("foo"), + newPos(6), + ), + ), + newPos(6), + ), + ), + newPos(5), + ), + }, + LexProductions: []*ProductionNode{ + withProdPos( + prod("foo", + withAltPos( + alt( + withElemPos( + pat(`foo`), + newPos(13), + ), + ), + newPos(13), + ), + ), + newPos(13), + ), + }, + }, + }, + { + caption: "a grammar can contain production directives and alternative directives", + src: ` +mode_tran_seq + : mode_tran_seq mode_tran + | mode_tran + ; +mode_tran + : push_m1 + | push_m2 + | pop_m1 + | pop_m2 + ; + +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("mode_tran_seq", + alt(id("mode_tran_seq"), id("mode_tran")), + alt(id("mode_tran")), + ), + prod("mode_tran", + alt(id("push_m1")), + alt(id("push_m2")), + alt(id("pop_m1")), + alt(id("pop_m2")), + ), + }, + LexProductions: []*ProductionNode{ + withProdDir( + prod("push_m1", + alt(pat(`->`)), + ), + dir("push", idParam("m1")), + ), + withProdDir( + prod("push_m2", + alt(pat(`-->`)), + ), + dir("mode", idParam("m1")), + dir("push", idParam("m2")), + ), + withProdDir( + prod("pop_m1", + alt(pat(`<-`)), + ), + dir("mode", idParam("m1")), + dir("pop"), + ), + withProdDir( + prod("pop_m2", + alt(pat(`<--`)), + ), + dir("mode", idParam("m2")), + dir("pop"), + ), + withProdDir( + prod("whitespace", + alt(pat(`\u{0020}+`)), + ), + dir("mode", idParam("default"), idParam("m1"), idParam("m2")), + dir("skip"), + ), + }, + }, + }, + { + caption: "an alternative of a production can have multiple alternative directives", + src: ` +s + : foo bar #prec baz #ast foo bar + ; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + withAltDir( + alt(id("foo"), id("bar")), + dir("prec", idParam("baz")), + dir("ast", idParam("foo"), idParam("bar")), + ), + ), + }, + }, + }, + { + caption: "a lexical production can have multiple production directives", + src: ` +foo #mode a #push b + : 'foo'; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + withProdDir( + prod("foo", + alt(pat("foo")), + ), + dir("mode", idParam("a")), + dir("push", idParam("b")), + ), + }, + }, + }, + { + caption: "a production must be followed by a newline", + src: ` +s: foo; foo: "foo"; +`, + synErr: synErrSemicolonNoNewline, + }, + { + caption: "a grammar can contain 'ast' directives and expansion operator", + src: ` +s + : foo bar_list #ast foo bar_list + ; +bar_list + : bar_list bar #ast bar_list... bar + | bar #ast bar + ; +foo: "foo"; +bar: "bar"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + withAltDir( + alt(id("foo"), id("bar_list")), + dir("ast", idParam("foo"), idParam("bar_list")), + ), + ), + prod("bar_list", + withAltDir( + alt(id("bar_list"), id("bar")), + dir("ast", exp(idParam("bar_list")), idParam("bar")), + ), + withAltDir( + alt(id("bar")), + dir("ast", idParam("bar")), + ), + ), + }, + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat("foo")), + ), + prod("bar", + alt(pat("bar")), + ), + }, + }, + }, + { + caption: "an expansion operator must be preceded by an identifier", + src: ` +s + : foo #ast ... + ; +`, + synErr: synErrStrayExpOp, + }, + { + caption: "an expansion operator must be preceded by an identifier", + src: ` +a + : foo #ast ... foo + ; +`, + synErr: synErrStrayExpOp, + }, + { + caption: "an expansion operator cannot be applied to a pattern", + src: ` +a + : "foo" #ast "foo"... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to a string", + src: ` +a + : 'foo' #ast 'foo'... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to an ordered symbol", + src: ` +a + : foo #ast $foo... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to a directive group", + src: ` +a + : foo #ast ()... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an AST has node positions", + src: ` +exp + : exp add id #ast exp id + | id + ; + +whitespace #skip + : "\u{0020}+"; +add + : '+'; +id + : "\f{letter}(\f{letter}|\f{number})*"; +fragment letter + : "[A-Za-z_]"; +fragment number + : "[0-9]"; +`, + checkPosition: true, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("exp", + withAltPos( + withAltDir( + alt( + withElemPos(id("exp"), newPos(3)), + withElemPos(id("add"), newPos(3)), + withElemPos(id("id"), newPos(3)), + ), + withDirPos( + dir("ast", + withParamPos(idParam("exp"), newPos(3)), + withParamPos(idParam("id"), newPos(3)), + ), + newPos(3), + ), + ), + newPos(3), + ), + withAltPos( + alt( + withElemPos(id("id"), newPos(4)), + ), + newPos(4), + ), + ), + newPos(2), + ), + }, + LexProductions: []*ProductionNode{ + withProdPos( + withProdDir( + prod("whitespace", + withAltPos( + alt( + withElemPos( + pat(`\u{0020}+`), + newPos(8), + ), + ), + newPos(8), + ), + ), + withDirPos( + dir("skip"), + newPos(7), + ), + ), + newPos(7), + ), + withProdPos( + prod("add", + withAltPos( + alt( + withElemPos( + pat(`+`), + newPos(10), + ), + ), + newPos(10), + ), + ), + newPos(9), + ), + withProdPos( + prod("id", + withAltPos( + alt( + withElemPos( + pat(`\f{letter}(\f{letter}|\f{number})*`), + newPos(12), + ), + ), + newPos(12), + ), + ), + newPos(11), + ), + }, + Fragments: []*FragmentNode{ + withFragmentPos( + frag("letter", "[A-Za-z_]"), + newPos(13), + ), + withFragmentPos( + frag("number", "[0-9]"), + newPos(15), + ), + }, + }, + }, + { + caption: "a symbol can have a label", + src: ` +expr + : term@lhs add term@rhs + ; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("expr", + withAltPos( + alt( + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("lhs"), + newPos(3), + ), + ), + newPos(3), + ), + withElemPos( + id("add"), + newPos(3), + ), + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("rhs"), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(2), + ), + }, + }, + }, + { + caption: "a label must be an identifier, not a string", + src: ` +foo + : bar@'baz' + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a label must be an identifier, not a pattern", + src: ` +foo + : bar@"baz" + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "the symbol marker @ must be followed by an identifier", + src: ` +foo + : bar@ + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a symbol cannot have more than or equal to two labels", + src: ` +foo + : bar@baz@bra + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { + caption: "a label must follow a symbol", + src: ` +foo + : @baz + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { + caption: "a grammar can contain left and right associativities", + src: ` +#prec ( + #left l1 l2 + #left l3 + #right r1 r2 + #right r3 +); + +s + : id l1 id l2 id l3 id + | id r1 id r2 id r3 id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +l1 + : 'l1'; +l2 + : 'l2'; +l3 + : 'l3'; +r1 + : 'r1'; +r2 + : 'r2'; +r3 + : 'r3'; +id + : "[A-Za-z0-9_]+"; +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos(idParam("l1"), newPos(3)), + withParamPos(idParam("l2"), newPos(3)), + ), + newPos(3), + ), + withDirPos( + leftAssoc( + withParamPos(idParam("l3"), newPos(4)), + ), + newPos(4), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r1"), newPos(5)), + withParamPos(idParam("r2"), newPos(5)), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r3"), newPos(6)), + ), + newPos(6), + ), + ), + newPos(2), + ), + ), + newPos(2), + ), + }, + Productions: []*ProductionNode{ + prod("s", + alt(id(`id`), id(`l1`), id(`id`), id(`l2`), id(`id`), id(`l3`), id(`id`)), + alt(id(`id`), id(`r1`), id(`id`), id(`r2`), id(`id`), id(`r3`), id(`id`)), + ), + }, + LexProductions: []*ProductionNode{ + withProdDir( + prod("whitespaces", + alt(pat(`[\u{0009}\u{0020}]+`)), + ), + dir("skip"), + ), + prod("l1", alt(pat(`l1`))), + prod("l2", alt(pat(`l2`))), + prod("l3", alt(pat(`l3`))), + prod("r1", alt(pat(`r1`))), + prod("r2", alt(pat(`r2`))), + prod("r3", alt(pat(`r3`))), + prod("id", alt(pat(`[A-Za-z0-9_]+`))), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := Parse(strings.NewReader(tt.src)) + if tt.synErr != nil { + synErrs, ok := err.(verr.SpecErrors) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err) + } + synErr := synErrs[0] + if tt.synErr != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause) + } + if ast != nil { + t.Fatalf("AST must be nil") + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ast == nil { + t.Fatalf("AST must be non-nil") + } + testRootNode(t, ast, tt.ast, tt.checkPosition) + } + }) + } +} + +func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { + t.Helper() + if len(root.Productions) != len(expected.Productions) { + t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) + } + if len(root.Directives) != len(expected.Directives) { + t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) + } + for i, dir := range root.Directives { + testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) + } + for i, prod := range root.Productions { + testProductionNode(t, prod, expected.Productions[i], checkPosition) + } + for i, prod := range root.LexProductions { + testProductionNode(t, prod, expected.LexProductions[i], checkPosition) + } + for i, frag := range root.Fragments { + testFragmentNode(t, frag, expected.Fragments[i], checkPosition) + } +} + +func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { + t.Helper() + if len(expected.Directives) != len(prod.Directives) { + t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives)) + } + if len(expected.Directives) > 0 { + testDirectives(t, prod.Directives, expected.Directives, checkPosition) + } + if prod.LHS != expected.LHS { + t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) + } + if len(prod.RHS) != len(expected.RHS) { + t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS)) + } + for i, alt := range prod.RHS { + testAlternativeNode(t, alt, expected.RHS[i], checkPosition) + } + if checkPosition { + testPosition(t, prod.Pos, expected.Pos) + } +} + +func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) { + t.Helper() + if frag.LHS != expected.LHS { + t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS) + } + if frag.RHS != expected.RHS { + t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS) + } + if checkPosition { + testPosition(t, frag.Pos, expected.Pos) + } +} + +func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) { + t.Helper() + if len(alt.Elements) != len(expected.Elements) { + t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements)) + } + for i, elem := range alt.Elements { + testElementNode(t, elem, expected.Elements[i], checkPosition) + } + if len(alt.Directives) != len(expected.Directives) { + t.Fatalf("unexpected alternative directive count; want: %v directive, got: %v directive", len(expected.Directives), len(alt.Directives)) + } + if len(alt.Directives) > 0 { + testDirectives(t, alt.Directives, expected.Directives, checkPosition) + } + if checkPosition { + testPosition(t, alt.Pos, expected.Pos) + } +} + +func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) { + t.Helper() + if elem.ID != expected.ID { + t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID) + } + if elem.Pattern != expected.Pattern { + t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern) + } + if checkPosition { + testPosition(t, elem.Pos, expected.Pos) + } +} + +func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) { + t.Helper() + for i, exp := range expected { + dir := dirs[i] + + if exp.Name != dir.Name { + t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name) + } + if len(exp.Parameters) != len(dir.Parameters) { + t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters) + } + for j, expParam := range exp.Parameters { + testParameter(t, dir.Parameters[j], expParam, checkPosition) + } + if checkPosition { + testPosition(t, dir.Pos, exp.Pos) + } + } +} + +func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) { + t.Helper() + if param.ID != expected.ID { + t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID) + } + if param.String != expected.String { + t.Fatalf("unexpected string parameter; want: %v, got: %v", expected.ID, param.ID) + } + if param.Expansion != expected.Expansion { + t.Fatalf("unexpected expansion; want: %v, got: %v", expected.Expansion, param.Expansion) + } + if checkPosition { + testPosition(t, param.Pos, expected.Pos) + } +} + +func testPosition(t *testing.T, pos, expected Position) { + t.Helper() + if pos.Row != expected.Row { + t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) + } +} + + +func MainTest() {} diff --git a/tests/unit/spec/test/main.go b/tests/unit/spec/test/main.go new file mode 100644 index 0000000..0f9aed7 --- /dev/null +++ b/tests/unit/spec/test/main.go @@ -0,0 +1,7 @@ +package main + +import "test" + +func main() { + test.MainTest() +} diff --git a/tests/unit/spec/test/test.go b/tests/unit/spec/test/test.go new file mode 100644 index 0000000..c6ec8f1 --- /dev/null +++ b/tests/unit/spec/test/test.go @@ -0,0 +1,414 @@ +package test + +import ( + "fmt" + "reflect" + "strings" + "testing" +) + +func TestTree_Format(t *testing.T) { + expected := `(a + (b + (c)) + (d) + (e))` + tree := NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d"), + NewNonTerminalTree("e"), + ) + actual := string(tree.Format()) + if actual != expected { + t.Fatalf("unexpected format:\n%v", actual) + } +} + +func TestDiffTree(t *testing.T) { + tests := []struct { + t1 *Tree + t2 *Tree + different bool + }{ + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("a", "a"), + }, + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("a", "A"), + different: true, + }, + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("A", "a"), + different: true, + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("a"), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d", + NewNonTerminalTree("d"), + ), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d", + NewNonTerminalTree("d"), + ), + ), + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("b"), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a"), + different: true, + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("c"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("d"), + ), + ), + different: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + diffs := DiffTree(tt.t1, tt.t2) + if tt.different && len(diffs) == 0 { + t.Fatalf("unexpected result") + } else if !tt.different && len(diffs) > 0 { + t.Fatalf("unexpected result") + } + }) + } +} + +func TestParseTestCase(t *testing.T) { + tests := []struct { + src string + tc *TestCase + parseErr bool + }{ + { + src: `test +--- +foo +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + { + src: ` +test + +--- + +foo + +--- + +(foo) + +`, + tc: &TestCase{ + Description: "\ntest\n", + Source: []byte("\nfoo\n"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The length of a part delimiter may be greater than 3. + { + src: ` +test +---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "\ntest", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The description part may be empty. + { + src: `---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The source part may be empty. + { + src: `test +--- +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte{}, + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error, + // but we allow it to simplify the implementation of the parser. + { + src: `test +---- +foo +---- +(foo) +--- +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + { + src: ``, + parseErr: true, + }, + { + src: `test +--- +`, + parseErr: true, + }, + { + src: `test +--- +foo +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +`, + parseErr: true, + }, + { + src: `test +-- +foo +-- +(foo) +`, + parseErr: true, + }, + // A node may have just one string node. + { + src: `test +---- +foo bar +---- +(foo (bar 'bar')) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo bar"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("bar", "bar"), + ).Fill(), + }, + }, + // A node may have just one pattern node. + { + src: `test +---- +foo bar +---- +(foo (bar "bar")) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo bar"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("bar", "bar"), + ).Fill(), + }, + }, + // A node may be the error node. + { + src: `test +---- +foo x +---- +(foo (error)) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo x"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("error", ""), + ).Fill(), + }, + }, + // The error node cannot have a string node. + { + src: `test +---- +foo x +---- +(foo (error 'x')) +`, + parseErr: true, + }, + // The error node cannot have a pattern node. + { + src: `test +---- +foo x +---- +(foo (error "x")) +`, + parseErr: true, + }, + // The error node cannot have another node. + { + src: `test +---- +foo x +---- +(foo (error (a))) +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +? +`, + parseErr: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + tc, err := ParseTestCase(strings.NewReader(tt.src)) + if tt.parseErr { + if err == nil { + t.Fatalf("an expected error didn't occur") + } + } else { + if err != nil { + t.Fatal(err) + } + testTestCase(t, tt.tc, tc) + } + }) + } +} + +func testTestCase(t *testing.T, expected, actual *TestCase) { + t.Helper() + + if expected.Description != actual.Description || + !reflect.DeepEqual(expected.Source, actual.Source) || + len(DiffTree(expected.Output, actual.Output)) > 0 { + t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual) + } +} + + +func MainTest() {} diff --git a/tests/unit/tester/main.go b/tests/unit/tester/main.go new file mode 100644 index 0000000..4a85b25 --- /dev/null +++ b/tests/unit/tester/main.go @@ -0,0 +1,7 @@ +package main + +import "tester" + +func main() { + tester.MainTest() +} diff --git a/tests/unit/tester/tester.go b/tests/unit/tester/tester.go new file mode 100644 index 0000000..9245cc4 --- /dev/null +++ b/tests/unit/tester/tester.go @@ -0,0 +1,172 @@ +package tester + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar" + "urubu/spec/grammar/parser" + tspec "urubu/spec/test" +) + +func TestTester_Run(t *testing.T) { + grammarSrc1 := ` +#name test; + +s + : foo bar baz + | foo error baz #recover + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +` + + grammarSrc2 := ` +#name test; + +s + : foos + ; +foos + : foos foo #ast foos... foo + | foo + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +foo + : 'foo'; +` + + tests := []struct { + grammarSrc string + testSrc string + error bool + }{ + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo 'foo') (bar 'bar') (baz 'baz')) +`, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo ? baz +--- +(s + (foo 'foo') (error) (baz 'baz')) +`, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s) +`, + error: true, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo) (bar)) +`, + error: true, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo) (bar) (xxx)) +`, + error: true, + }, + { + grammarSrc: grammarSrc2, + testSrc: ` +Test +--- +foo foo foo +--- +(s + (foos + (foo 'foo') (foo 'foo') (foo 'foo'))) +`, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.grammarSrc)) + if err != nil { + t.Fatal(err) + } + b := grammar.GrammarBuilder{ + AST: ast, + } + cg, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + c, err := tspec.ParseTestCase(strings.NewReader(tt.testSrc)) + if err != nil { + t.Fatal(err) + } + tester := &Tester{ + Grammar: cg, + Cases: []*TestCaseWithMetadata{ + { + TestCase: c, + }, + }, + } + rs := tester.Run() + if tt.error { + errOccurred := false + for _, r := range rs { + if r.Error != nil { + errOccurred = true + } + } + if !errOccurred { + t.Fatal("this test must fail, but it passed") + } + } else { + for _, r := range rs { + if r.Error != nil { + t.Fatalf("unexpected error occurred: %v", r.Error) + } + } + } + }) + } +} + + +func MainTest() {} diff --git a/tests/unit/urubu/compressor.go b/tests/unit/urubu/compressor.go deleted file mode 100644 index 621b731..0000000 --- a/tests/unit/urubu/compressor.go +++ /dev/null @@ -1,122 +0,0 @@ -package compressor - -import ( - "fmt" - "testing" -) - -func TestCompressor_Compress(t *testing.T) { - x := 0 // an empty value - - allCompressors := func() []Compressor { - return []Compressor{ - NewUniqueEntriesTable(), - NewRowDisplacementTable(x), - } - } - - tests := []struct { - original []int - rowCount int - colCount int - compressors []Compressor - }{ - { - original: []int{ - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - x, x, x, x, x, - x, x, x, x, x, - x, x, x, x, x, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, 1, 1, 1, 1, - x, x, x, x, x, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, x, 1, 1, 1, - 1, 1, x, 1, 1, - 1, 1, 1, x, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - } - for i, tt := range tests { - for _, comp := range tt.compressors { - t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { - dup := make([]int, len(tt.original)) - copy(dup, tt.original) - - orig, err := NewOriginalTable(tt.original, tt.colCount) - if err != nil { - t.Fatal(err) - } - err = comp.Compress(orig) - if err != nil { - t.Fatal(err) - } - rowCount, colCount := comp.OriginalTableSize() - if rowCount != tt.rowCount || colCount != tt.colCount { - t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) - } - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - v, err := comp.Lookup(i, j) - if err != nil { - t.Fatal(err) - } - expected := tt.original[i*tt.colCount+j] - if v != expected { - t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) - } - } - } - - // Calling with out-of-range indexes should be an error. - if _, err := comp.Lookup(0, -1); err == nil { - t.Fatalf("expected error didn't occur (0, -1)") - } - if _, err := comp.Lookup(-1, 0); err == nil { - t.Fatalf("expected error didn't occur (-1, 0)") - } - if _, err := comp.Lookup(rowCount-1, colCount); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) - } - if _, err := comp.Lookup(rowCount, colCount-1); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) - } - - // The compressor must not break the original table. - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - idx := i*tt.colCount + j - if tt.original[idx] != dup[idx] { - t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) - } - } - } - }) - } - } -} diff --git a/tests/unit/urubu/compressor.main.go b/tests/unit/urubu/compressor.main.go deleted file mode 100644 index a1bc202..0000000 --- a/tests/unit/urubu/compressor.main.go +++ /dev/null @@ -1,7 +0,0 @@ -package main - -import "compressor" - -func main() { - compressor.MainTest() -} diff --git a/tests/unit/urubu/driver/lexer.go b/tests/unit/urubu/driver/lexer.go deleted file mode 100644 index a3d0231..0000000 --- a/tests/unit/urubu/driver/lexer.go +++ /dev/null @@ -1,932 +0,0 @@ -package lexer - -import ( - "bytes" - "fmt" - "strings" - "testing" - - "urubu/grammar/lexical" - spec "urubu/spec/grammar" -) - -func newLexEntry(modes []string, kind string, pattern string, push string, pop bool) *lexical.LexEntry { - ms := []spec.LexModeName{} - for _, m := range modes { - ms = append(ms, spec.LexModeName(m)) - } - return &lexical.LexEntry{ - Kind: spec.LexKindName(kind), - Pattern: pattern, - Modes: ms, - Push: spec.LexModeName(push), - Pop: pop, - } -} - -func newLexEntryDefaultNOP(kind string, pattern string) *lexical.LexEntry { - return &lexical.LexEntry{ - Kind: spec.LexKindName(kind), - Pattern: pattern, - Modes: []spec.LexModeName{ - spec.LexModeNameDefault, - }, - } -} - -func newLexEntryFragment(kind string, pattern string) *lexical.LexEntry { - return &lexical.LexEntry{ - Kind: spec.LexKindName(kind), - Pattern: pattern, - Fragment: true, - } -} - -func newToken(modeID ModeID, kindID KindID, modeKindID ModeKindID, lexeme []byte) *Token { - return &Token{ - ModeID: modeID, - KindID: kindID, - ModeKindID: modeKindID, - Lexeme: lexeme, - } -} - -func newTokenDefault(kindID int, modeKindID int, lexeme []byte) *Token { - return newToken( - ModeID(spec.LexModeIDDefault.Int()), - KindID(spec.LexKindID(kindID).Int()), - ModeKindID(spec.LexModeKindID(modeKindID).Int()), - lexeme, - ) -} - -func newEOFToken(modeID ModeID, modeName string) *Token { - return &Token{ - ModeID: modeID, - ModeKindID: 0, - EOF: true, - } -} - -func newEOFTokenDefault() *Token { - return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String()) -} - -func newInvalidTokenDefault(lexeme []byte) *Token { - return &Token{ - ModeID: ModeID(spec.LexModeIDDefault.Int()), - ModeKindID: 0, - Lexeme: lexeme, - Invalid: true, - } -} - -func withPos(tok *Token, bytePos int, byteLen int, row int, col int) *Token { - tok.BytePos = bytePos - tok.ByteLen = byteLen - tok.Row = row - tok.Col = col - return tok -} - -func TestLexer_Next(t *testing.T) { - test := []struct { - lspec *lexical.LexSpec - src string - tokens []*Token - passiveModeTran bool - tran func(l *Lexer, tok *Token) error - }{ - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "(a|b)*abb"), - newLexEntryDefaultNOP("t2", " +"), - }, - }, - src: "abb aabb aaabb babb bbabb abbbabb", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("abb")), 0, 3, 0, 0), - withPos(newTokenDefault(2, 2, []byte(" ")), 3, 1, 0, 3), - withPos(newTokenDefault(1, 1, []byte("aabb")), 4, 4, 0, 4), - withPos(newTokenDefault(2, 2, []byte(" ")), 8, 1, 0, 8), - withPos(newTokenDefault(1, 1, []byte("aaabb")), 9, 5, 0, 9), - withPos(newTokenDefault(2, 2, []byte(" ")), 14, 1, 0, 14), - withPos(newTokenDefault(1, 1, []byte("babb")), 15, 4, 0, 15), - withPos(newTokenDefault(2, 2, []byte(" ")), 19, 1, 0, 19), - withPos(newTokenDefault(1, 1, []byte("bbabb")), 20, 5, 0, 20), - withPos(newTokenDefault(2, 2, []byte(" ")), 25, 1, 0, 25), - withPos(newTokenDefault(1, 1, []byte("abbbabb")), 26, 7, 0, 26), - withPos(newEOFTokenDefault(), 33, 0, 0, 33), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "b?a+"), - newLexEntryDefaultNOP("t2", "(ab)?(cd)+"), - newLexEntryDefaultNOP("t3", " +"), - }, - }, - src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("ba")), 0, 2, 0, 0), - withPos(newTokenDefault(3, 3, []byte(" ")), 2, 1, 0, 2), - withPos(newTokenDefault(1, 1, []byte("baaa")), 3, 4, 0, 3), - withPos(newTokenDefault(3, 3, []byte(" ")), 7, 1, 0, 7), - withPos(newTokenDefault(1, 1, []byte("a")), 8, 1, 0, 8), - withPos(newTokenDefault(3, 3, []byte(" ")), 9, 1, 0, 9), - withPos(newTokenDefault(1, 1, []byte("aaa")), 10, 3, 0, 10), - withPos(newTokenDefault(3, 3, []byte(" ")), 13, 1, 0, 13), - withPos(newTokenDefault(2, 2, []byte("abcd")), 14, 4, 0, 14), - withPos(newTokenDefault(3, 3, []byte(" ")), 18, 1, 0, 18), - withPos(newTokenDefault(2, 2, []byte("abcdcdcd")), 19, 8, 0, 19), - withPos(newTokenDefault(3, 3, []byte(" ")), 27, 1, 0, 27), - withPos(newTokenDefault(2, 2, []byte("cd")), 28, 2, 0, 28), - withPos(newTokenDefault(3, 3, []byte(" ")), 30, 1, 0, 30), - withPos(newTokenDefault(2, 2, []byte("cdcdcd")), 31, 6, 0, 31), - withPos(newEOFTokenDefault(), 37, 0, 0, 37), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "."), - }, - }, - src: string([]byte{ - 0x00, - 0x7f, - 0xc2, 0x80, - 0xdf, 0xbf, - 0xe1, 0x80, 0x80, - 0xec, 0xbf, 0xbf, - 0xed, 0x80, 0x80, - 0xed, 0x9f, 0xbf, - 0xee, 0x80, 0x80, - 0xef, 0xbf, 0xbf, - 0xf0, 0x90, 0x80, 0x80, - 0xf0, 0xbf, 0xbf, 0xbf, - 0xf1, 0x80, 0x80, 0x80, - 0xf3, 0xbf, 0xbf, 0xbf, - 0xf4, 0x80, 0x80, 0x80, - 0xf4, 0x8f, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0x00}), 0, 1, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0x7f}), 1, 1, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 2, 2, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 4, 2, 0, 3), - withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 6, 3, 0, 4), - withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 9, 3, 0, 5), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 12, 3, 0, 6), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 15, 3, 0, 7), - withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 18, 3, 0, 8), - withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 21, 3, 0, 9), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 24, 4, 0, 10), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 11), - withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 32, 4, 0, 12), - withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 36, 4, 0, 13), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 40, 4, 0, 14), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 15), - withPos(newEOFTokenDefault(), 48, 0, 0, 16), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "[ab.*+?|()[\\]]"), - }, - }, - src: "ab.*+?|()[]", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("a")), 0, 1, 0, 0), - withPos(newTokenDefault(1, 1, []byte("b")), 1, 1, 0, 1), - withPos(newTokenDefault(1, 1, []byte(".")), 2, 1, 0, 2), - withPos(newTokenDefault(1, 1, []byte("*")), 3, 1, 0, 3), - withPos(newTokenDefault(1, 1, []byte("+")), 4, 1, 0, 4), - withPos(newTokenDefault(1, 1, []byte("?")), 5, 1, 0, 5), - withPos(newTokenDefault(1, 1, []byte("|")), 6, 1, 0, 6), - withPos(newTokenDefault(1, 1, []byte("(")), 7, 1, 0, 7), - withPos(newTokenDefault(1, 1, []byte(")")), 8, 1, 0, 8), - withPos(newTokenDefault(1, 1, []byte("[")), 9, 1, 0, 9), - withPos(newTokenDefault(1, 1, []byte("]")), 10, 1, 0, 10), - withPos(newEOFTokenDefault(), 11, 0, 0, 11), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // all 1 byte characters except null character (U+0000) - // - // NOTE: - // vartan cannot handle the null character in patterns because lexical.lexer, - // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist. - // If a pattern needs a null character, use code point expression \u{0000}. - newLexEntryDefaultNOP("char_1_byte", "[\x01-\x7f]"), - }, - }, - src: string([]byte{ - 0x01, - 0x02, - 0x7e, - 0x7f, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0x01}), 0, 1, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0x02}), 1, 1, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0x7e}), 2, 1, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0x7f}), 3, 1, 0, 3), - withPos(newEOFTokenDefault(), 4, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // all 2 byte characters - newLexEntryDefaultNOP("char_2_byte", "[\xc2\x80-\xdf\xbf]"), - }, - }, - src: string([]byte{ - 0xc2, 0x80, - 0xc2, 0x81, - 0xdf, 0xbe, - 0xdf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xc2, 0x80}), 0, 2, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xc2, 0x81}), 2, 2, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbe}), 4, 2, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xdf, 0xbf}), 6, 2, 0, 3), - withPos(newEOFTokenDefault(), 8, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // All bytes are the same. - newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"), - }, - }, - src: string([]byte{ - 0xe0, 0xa0, 0x80, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), - withPos(newEOFTokenDefault(), 3, 0, 0, 1), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // The first two bytes are the same. - newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), - }, - }, - src: string([]byte{ - 0xe0, 0xa0, 0x80, - 0xe0, 0xa0, 0x81, - 0xe0, 0xa0, 0xbe, - 0xe0, 0xa0, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbe}), 6, 3, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0xbf}), 9, 3, 0, 3), - withPos(newEOFTokenDefault(), 12, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // The first byte are the same. - newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), - }, - }, - src: string([]byte{ - 0xe0, 0xa0, 0x80, - 0xe0, 0xa0, 0x81, - 0xe0, 0xbf, 0xbe, - 0xe0, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3), - withPos(newEOFTokenDefault(), 12, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // all 3 byte characters - newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"), - }, - }, - src: string([]byte{ - 0xe0, 0xa0, 0x80, - 0xe0, 0xa0, 0x81, - 0xe0, 0xbf, 0xbe, - 0xe0, 0xbf, 0xbf, - 0xe1, 0x80, 0x80, - 0xe1, 0x80, 0x81, - 0xec, 0xbf, 0xbe, - 0xec, 0xbf, 0xbf, - 0xed, 0x80, 0x80, - 0xed, 0x80, 0x81, - 0xed, 0x9f, 0xbe, - 0xed, 0x9f, 0xbf, - 0xee, 0x80, 0x80, - 0xee, 0x80, 0x81, - 0xef, 0xbf, 0xbe, - 0xef, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x80}), 0, 3, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xa0, 0x81}), 3, 3, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbe}), 6, 3, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xe0, 0xbf, 0xbf}), 9, 3, 0, 3), - withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x80}), 12, 3, 0, 4), - withPos(newTokenDefault(1, 1, []byte{0xe1, 0x80, 0x81}), 15, 3, 0, 5), - withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbe}), 18, 3, 0, 6), - withPos(newTokenDefault(1, 1, []byte{0xec, 0xbf, 0xbf}), 21, 3, 0, 7), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x80}), 24, 3, 0, 8), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x80, 0x81}), 27, 3, 0, 9), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbe}), 30, 3, 0, 10), - withPos(newTokenDefault(1, 1, []byte{0xed, 0x9f, 0xbf}), 33, 3, 0, 11), - withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x80}), 36, 3, 0, 12), - withPos(newTokenDefault(1, 1, []byte{0xee, 0x80, 0x81}), 39, 3, 0, 13), - withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbe}), 42, 3, 0, 14), - withPos(newTokenDefault(1, 1, []byte{0xef, 0xbf, 0xbf}), 45, 3, 0, 15), - withPos(newEOFTokenDefault(), 48, 0, 0, 16), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // All bytes are the same. - newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), - }, - }, - src: string([]byte{ - 0xf0, 0x90, 0x80, 0x80, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), - withPos(newEOFTokenDefault(), 4, 0, 0, 1), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // The first 3 bytes are the same. - newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), - }, - }, - src: string([]byte{ - 0xf0, 0x90, 0x80, 0x80, - 0xf0, 0x90, 0x80, 0x81, - 0xf0, 0x90, 0x80, 0xbe, - 0xf0, 0x90, 0x80, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbe}), 8, 4, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0xbf}), 12, 4, 0, 3), - withPos(newEOFTokenDefault(), 16, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // The first 2 bytes are the same. - newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), - }, - }, - src: string([]byte{ - 0xf0, 0x90, 0x80, 0x80, - 0xf0, 0x90, 0x80, 0x81, - 0xf0, 0x90, 0xbf, 0xbe, - 0xf0, 0x90, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbe}), 8, 4, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0xbf, 0xbf}), 12, 4, 0, 3), - withPos(newEOFTokenDefault(), 16, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // The first byte are the same. - newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), - }, - }, - src: string([]byte{ - 0xf0, 0x90, 0x80, 0x80, - 0xf0, 0x90, 0x80, 0x81, - 0xf0, 0xbf, 0xbf, 0xbe, - 0xf0, 0xbf, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3), - withPos(newEOFTokenDefault(), 16, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // all 4 byte characters - newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), - }, - }, - src: string([]byte{ - 0xf0, 0x90, 0x80, 0x80, - 0xf0, 0x90, 0x80, 0x81, - 0xf0, 0xbf, 0xbf, 0xbe, - 0xf0, 0xbf, 0xbf, 0xbf, - 0xf1, 0x80, 0x80, 0x80, - 0xf1, 0x80, 0x80, 0x81, - 0xf3, 0xbf, 0xbf, 0xbe, - 0xf3, 0xbf, 0xbf, 0xbf, - 0xf4, 0x80, 0x80, 0x80, - 0xf4, 0x80, 0x80, 0x81, - 0xf4, 0x8f, 0xbf, 0xbe, - 0xf4, 0x8f, 0xbf, 0xbf, - }), - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x80}), 0, 4, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0x90, 0x80, 0x81}), 4, 4, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbe}), 8, 4, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), 12, 4, 0, 3), - withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x80}), 16, 4, 0, 4), - withPos(newTokenDefault(1, 1, []byte{0xf1, 0x80, 0x80, 0x81}), 20, 4, 0, 5), - withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbe}), 24, 4, 0, 6), - withPos(newTokenDefault(1, 1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), 28, 4, 0, 7), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x80}), 32, 4, 0, 8), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x80, 0x80, 0x81}), 36, 4, 0, 9), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbe}), 40, 4, 0, 10), - withPos(newTokenDefault(1, 1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), 44, 4, 0, 11), - withPos(newEOFTokenDefault(), 48, 0, 0, 12), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("non_number", "[^0-9]+[0-9]"), - }, - }, - src: "foo9", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("foo9")), 0, 4, 0, 0), - withPos(newEOFTokenDefault(), 4, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("char_1_byte", "\\u{006E}"), - newLexEntryDefaultNOP("char_2_byte", "\\u{03BD}"), - newLexEntryDefaultNOP("char_3_byte", "\\u{306B}"), - newLexEntryDefaultNOP("char_4_byte", "\\u{01F638}"), - }, - }, - src: "nνに😸", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0), - withPos(newTokenDefault(2, 2, []byte{0xCE, 0xBD}), 1, 2, 0, 1), - withPos(newTokenDefault(3, 3, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2), - withPos(newTokenDefault(4, 4, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3), - withPos(newEOFTokenDefault(), 10, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("code_points_alt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"), - }, - }, - src: "nνに😸", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte{0x6E}), 0, 1, 0, 0), - withPos(newTokenDefault(1, 1, []byte{0xCE, 0xBD}), 1, 2, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0xE3, 0x81, 0xAB}), 3, 3, 0, 2), - withPos(newTokenDefault(1, 1, []byte{0xF0, 0x9F, 0x98, 0xB8}), 6, 4, 0, 3), - withPos(newEOFTokenDefault(), 10, 0, 0, 4), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "\\f{a2c}\\f{d2f}+"), - newLexEntryFragment("a2c", "abc"), - newLexEntryFragment("d2f", "def"), - }, - }, - src: "abcdefdefabcdef", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("abcdefdef")), 0, 9, 0, 0), - withPos(newTokenDefault(1, 1, []byte("abcdef")), 9, 6, 0, 9), - withPos(newEOFTokenDefault(), 15, 0, 0, 15), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "(\\f{a2c}|\\f{d2f})+"), - newLexEntryFragment("a2c", "abc"), - newLexEntryFragment("d2f", "def"), - }, - }, - src: "abcdefdefabc", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0), - withPos(newEOFTokenDefault(), 12, 0, 0, 12), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("t1", "\\f{a2c_or_d2f}+"), - newLexEntryFragment("a2c_or_d2f", "\\f{a2c}|\\f{d2f}"), - newLexEntryFragment("a2c", "abc"), - newLexEntryFragment("d2f", "def"), - }, - }, - src: "abcdefdefabc", - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte("abcdefdefabc")), 0, 12, 0, 0), - withPos(newEOFTokenDefault(), 12, 0, 0, 12), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("white_space", ` *`), - newLexEntry([]string{"default"}, "string_open", `"`, "string", false), - newLexEntry([]string{"string"}, "escape_sequence", `\\[n"\\]`, "", false), - newLexEntry([]string{"string"}, "char_sequence", `[^"\\]*`, "", false), - newLexEntry([]string{"string"}, "string_close", `"`, "", true), - }, - }, - src: `"" "Hello world.\n\"Hello world.\""`, - tokens: []*Token{ - withPos(newToken(1, 2, 2, []byte(`"`)), 0, 1, 0, 0), - withPos(newToken(2, 5, 3, []byte(`"`)), 1, 1, 0, 1), - withPos(newToken(1, 1, 1, []byte(` `)), 2, 1, 0, 2), - withPos(newToken(1, 2, 2, []byte(`"`)), 3, 1, 0, 3), - withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 4, 12, 0, 4), - withPos(newToken(2, 3, 1, []byte(`\n`)), 16, 2, 0, 16), - withPos(newToken(2, 3, 1, []byte(`\"`)), 18, 2, 0, 18), - withPos(newToken(2, 4, 2, []byte(`Hello world.`)), 20, 12, 0, 20), - withPos(newToken(2, 3, 1, []byte(`\"`)), 32, 2, 0, 32), - withPos(newToken(2, 5, 3, []byte(`"`)), 34, 1, 0, 34), - withPos(newEOFTokenDefault(), 35, 0, 0, 35), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - // `white_space` is enabled in multiple modes. - newLexEntry([]string{"default", "state_a", "state_b"}, "white_space", ` *`, "", false), - newLexEntry([]string{"default"}, "char_a", `a`, "state_a", false), - newLexEntry([]string{"state_a"}, "char_b", `b`, "state_b", false), - newLexEntry([]string{"state_a"}, "back_from_a", `<`, "", true), - newLexEntry([]string{"state_b"}, "back_from_b", `<`, "", true), - }, - }, - src: ` a b < < `, - tokens: []*Token{ - withPos(newToken(1, 1, 1, []byte(` `)), 0, 1, 0, 0), - withPos(newToken(1, 2, 2, []byte(`a`)), 1, 1, 0, 1), - withPos(newToken(2, 1, 1, []byte(` `)), 2, 1, 0, 2), - withPos(newToken(2, 3, 2, []byte(`b`)), 3, 1, 0, 3), - withPos(newToken(3, 1, 1, []byte(` `)), 4, 1, 0, 4), - withPos(newToken(3, 5, 2, []byte(`<`)), 5, 1, 0, 5), - withPos(newToken(2, 1, 1, []byte(` `)), 6, 1, 0, 6), - withPos(newToken(2, 4, 3, []byte(`<`)), 7, 1, 0, 7), - withPos(newToken(1, 1, 1, []byte(` `)), 8, 1, 0, 8), - withPos(newEOFTokenDefault(), 9, 0, 0, 9), - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false), - newLexEntry([]string{"default"}, "char", `.`, "", false), - newLexEntry([]string{"default"}, "push_1", `-> 1`, "", false), - newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false), - newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false), - newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", false), - }, - }, - src: `-> 1 -> 2 <- <- a`, - tokens: []*Token{ - withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0), - withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4), - withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5), - withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9), - withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10), - withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12), - withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13), - withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15), - withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16), - withPos(newEOFTokenDefault(), 17, 0, 0, 17), - }, - passiveModeTran: true, - tran: func(l *Lexer, tok *Token) error { - switch l.spec.ModeName(l.Mode()) { - case "default": - switch tok.KindID { - case 3: // push_1 - l.PushMode(2) - } - case "mode_1": - switch tok.KindID { - case 4: // push_2 - l.PushMode(3) - case 5: // pop_1 - return l.PopMode() - } - case "mode_2": - switch tok.KindID { - case 6: // pop_2 - return l.PopMode() - } - } - return nil - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntry([]string{"default", "mode_1", "mode_2"}, "white_space", ` *`, "", false), - newLexEntry([]string{"default"}, "char", `.`, "", false), - newLexEntry([]string{"default"}, "push_1", `-> 1`, "mode_1", false), - newLexEntry([]string{"mode_1"}, "push_2", `-> 2`, "", false), - newLexEntry([]string{"mode_1"}, "pop_1", `<-`, "", false), - newLexEntry([]string{"mode_2"}, "pop_2", `<-`, "", true), - }, - }, - src: `-> 1 -> 2 <- <- a`, - tokens: []*Token{ - withPos(newToken(1, 3, 3, []byte(`-> 1`)), 0, 4, 0, 0), - withPos(newToken(2, 1, 1, []byte(` `)), 4, 1, 0, 4), - withPos(newToken(2, 4, 2, []byte(`-> 2`)), 5, 4, 0, 5), - withPos(newToken(3, 1, 1, []byte(` `)), 9, 1, 0, 9), - withPos(newToken(3, 6, 2, []byte(`<-`)), 10, 2, 0, 10), - withPos(newToken(2, 1, 1, []byte(` `)), 12, 1, 0, 12), - withPos(newToken(2, 5, 3, []byte(`<-`)), 13, 2, 0, 13), - withPos(newToken(1, 1, 1, []byte(` `)), 15, 1, 0, 15), - withPos(newToken(1, 2, 2, []byte(`a`)), 16, 1, 0, 16), - withPos(newEOFTokenDefault(), 17, 0, 0, 17), - }, - // Active mode transition and an external transition function can be used together. - passiveModeTran: false, - tran: func(l *Lexer, tok *Token) error { - switch l.spec.ModeName(l.Mode()) { - case "mode_1": - switch tok.KindID { - case 4: // push_2 - l.PushMode(3) - case 5: // pop_1 - return l.PopMode() - } - } - return nil - }, - }, - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("dot", spec.EscapePattern(`.`)), - newLexEntryDefaultNOP("star", spec.EscapePattern(`*`)), - newLexEntryDefaultNOP("plus", spec.EscapePattern(`+`)), - newLexEntryDefaultNOP("question", spec.EscapePattern(`?`)), - newLexEntryDefaultNOP("vbar", spec.EscapePattern(`|`)), - newLexEntryDefaultNOP("lparen", spec.EscapePattern(`(`)), - newLexEntryDefaultNOP("rparen", spec.EscapePattern(`)`)), - newLexEntryDefaultNOP("lbrace", spec.EscapePattern(`[`)), - newLexEntryDefaultNOP("backslash", spec.EscapePattern(`\`)), - }, - }, - src: `.*+?|()[\`, - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte(`.`)), 0, 1, 0, 0), - withPos(newTokenDefault(2, 2, []byte(`*`)), 1, 1, 0, 1), - withPos(newTokenDefault(3, 3, []byte(`+`)), 2, 1, 0, 2), - withPos(newTokenDefault(4, 4, []byte(`?`)), 3, 1, 0, 3), - withPos(newTokenDefault(5, 5, []byte(`|`)), 4, 1, 0, 4), - withPos(newTokenDefault(6, 6, []byte(`(`)), 5, 1, 0, 5), - withPos(newTokenDefault(7, 7, []byte(`)`)), 6, 1, 0, 6), - withPos(newTokenDefault(8, 8, []byte(`[`)), 7, 1, 0, 7), - withPos(newTokenDefault(9, 9, []byte(`\`)), 8, 1, 0, 8), - withPos(newEOFTokenDefault(), 9, 0, 0, 9), - }, - }, - // Character properties are available in a bracket expression. - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("letter", `[\p{Letter}]+`), - newLexEntryDefaultNOP("non_letter", `[^\p{Letter}]+`), - }, - }, - src: `foo123`, - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), - withPos(newTokenDefault(2, 2, []byte(`123`)), 3, 3, 0, 3), - withPos(newEOFTokenDefault(), 6, 0, 0, 6), - }, - }, - // The driver can continue lexical analysis even after it detects an invalid token. - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("lower", `[a-z]+`), - }, - }, - src: `foo123bar`, - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), - withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3), - withPos(newTokenDefault(1, 1, []byte(`bar`)), 6, 3, 0, 6), - withPos(newEOFTokenDefault(), 9, 0, 0, 9), - }, - }, - // The driver can detect an invalid token immediately preceding an EOF. - { - lspec: &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("lower", `[a-z]+`), - }, - }, - src: `foo123`, - tokens: []*Token{ - withPos(newTokenDefault(1, 1, []byte(`foo`)), 0, 3, 0, 0), - withPos(newInvalidTokenDefault([]byte(`123`)), 3, 3, 0, 3), - withPos(newEOFTokenDefault(), 6, 0, 0, 6), - }, - }, - } - for i, tt := range test { - for compLv := lexical.CompressionLevelMin; compLv <= lexical.CompressionLevelMax; compLv++ { - t.Run(fmt.Sprintf("#%v-%v", i, compLv), func(t *testing.T) { - clspec, err, cerrs := lexical.Compile(tt.lspec, compLv) - if err != nil { - for _, cerr := range cerrs { - t.Logf("%#v", cerr) - } - t.Fatalf("unexpected error: %v", err) - } - opts := []LexerOption{} - if tt.passiveModeTran { - opts = append(opts, DisableModeTransition()) - } - lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - for _, eTok := range tt.tokens { - tok, err := lexer.Next() - if err != nil { - t.Log(err) - break - } - testToken(t, eTok, tok) - - if tok.EOF { - break - } - - if tt.tran != nil { - err := tt.tran(lexer, tok) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - } - }) - } - } -} - -func TestLexer_Next_WithPosition(t *testing.T) { - lspec := &lexical.LexSpec{ - Entries: []*lexical.LexEntry{ - newLexEntryDefaultNOP("newline", `\u{000A}+`), - newLexEntryDefaultNOP("any", `.`), - }, - } - - clspec, err, _ := lexical.Compile(lspec, lexical.CompressionLevelMax) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - src := string([]byte{ - 0x00, - 0x7F, - 0x0A, - - 0xC2, 0x80, - 0xDF, 0xBF, - 0x0A, - - 0xE0, 0xA0, 0x80, - 0xE0, 0xBF, 0xBF, - 0xE1, 0x80, 0x80, - 0xEC, 0xBF, 0xBF, - 0xED, 0x80, 0x80, - 0xED, 0x9F, 0xBF, - 0xEE, 0x80, 0x80, - 0xEF, 0xBF, 0xBF, - 0x0A, - - 0xF0, 0x90, 0x80, 0x80, - 0xF0, 0xBF, 0xBF, 0xBF, - 0xF1, 0x80, 0x80, 0x80, - 0xF3, 0xBF, 0xBF, 0xBF, - 0xF4, 0x80, 0x80, 0x80, - 0xF4, 0x8F, 0xBF, 0xBF, - 0x0A, - 0x0A, - 0x0A, - }) - - expected := []*Token{ - withPos(newTokenDefault(2, 2, []byte{0x00}), 0, 1, 0, 0), - withPos(newTokenDefault(2, 2, []byte{0x7F}), 1, 1, 0, 1), - withPos(newTokenDefault(1, 1, []byte{0x0A}), 2, 1, 0, 2), - - withPos(newTokenDefault(2, 2, []byte{0xC2, 0x80}), 3, 2, 1, 0), - withPos(newTokenDefault(2, 2, []byte{0xDF, 0xBF}), 5, 2, 1, 1), - withPos(newTokenDefault(1, 1, []byte{0x0A}), 7, 1, 1, 2), - - withPos(newTokenDefault(2, 2, []byte{0xE0, 0xA0, 0x80}), 8, 3, 2, 0), - withPos(newTokenDefault(2, 2, []byte{0xE0, 0xBF, 0xBF}), 11, 3, 2, 1), - withPos(newTokenDefault(2, 2, []byte{0xE1, 0x80, 0x80}), 14, 3, 2, 2), - withPos(newTokenDefault(2, 2, []byte{0xEC, 0xBF, 0xBF}), 17, 3, 2, 3), - withPos(newTokenDefault(2, 2, []byte{0xED, 0x80, 0x80}), 20, 3, 2, 4), - withPos(newTokenDefault(2, 2, []byte{0xED, 0x9F, 0xBF}), 23, 3, 2, 5), - withPos(newTokenDefault(2, 2, []byte{0xEE, 0x80, 0x80}), 26, 3, 2, 6), - withPos(newTokenDefault(2, 2, []byte{0xEF, 0xBF, 0xBF}), 29, 3, 2, 7), - withPos(newTokenDefault(1, 1, []byte{0x0A}), 32, 1, 2, 8), - - withPos(newTokenDefault(2, 2, []byte{0xF0, 0x90, 0x80, 0x80}), 33, 4, 3, 0), - withPos(newTokenDefault(2, 2, []byte{0xF0, 0xBF, 0xBF, 0xBF}), 37, 4, 3, 1), - withPos(newTokenDefault(2, 2, []byte{0xF1, 0x80, 0x80, 0x80}), 41, 4, 3, 2), - withPos(newTokenDefault(2, 2, []byte{0xF3, 0xBF, 0xBF, 0xBF}), 45, 4, 3, 3), - withPos(newTokenDefault(2, 2, []byte{0xF4, 0x80, 0x80, 0x80}), 49, 4, 3, 4), - withPos(newTokenDefault(2, 2, []byte{0xF4, 0x8F, 0xBF, 0xBF}), 53, 4, 3, 5), - // When a token contains multiple line breaks, the driver sets the token position to - // the line number where a lexeme first appears. - withPos(newTokenDefault(1, 1, []byte{0x0A, 0x0A, 0x0A}), 57, 3, 3, 6), - - withPos(newEOFTokenDefault(), 60, 0, 6, 0), - } - - lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src)) - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - - for _, eTok := range expected { - tok, err := lexer.Next() - if err != nil { - t.Fatal(err) - } - - testToken(t, eTok, tok) - - if tok.EOF { - break - } - } -} - -func testToken(t *testing.T, expected, actual *Token) { - t.Helper() - - if actual.ModeID != expected.ModeID || - actual.KindID != expected.KindID || - actual.ModeKindID != expected.ModeKindID || - !bytes.Equal(actual.Lexeme, expected.Lexeme) || - actual.EOF != expected.EOF || - actual.Invalid != expected.Invalid { - t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual) - } - - if actual.BytePos != expected.BytePos || actual.ByteLen != expected.ByteLen || - actual.Row != expected.Row || actual.Col != expected.Col { - t.Fatalf(`unexpected token; want: %+v, got: %+v`, expected, actual) - } -} diff --git a/tests/unit/urubu/driver/parser.go b/tests/unit/urubu/driver/parser.go deleted file mode 100644 index 31fec45..0000000 --- a/tests/unit/urubu/driver/parser.go +++ /dev/null @@ -1,1972 +0,0 @@ -package parser - -import ( - "fmt" - "sort" - "strings" - "testing" - - "urubu/grammar" - spec "urubu/spec/grammar" - "urubu/spec/grammar/parser" -) - -func TestParserWithConflicts(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - cst *Node - }{ - { - caption: "when a shift/reduce conflict occurred, we prioritize the shift action", - specSrc: ` -#name test; - -expr - : expr assign expr - | id - ; - -id: "[A-Za-z0-9_]+"; -assign: '='; -`, - src: `foo=bar=baz`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "foo"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "bar"), - ), - termNode("assign", "="), - nonTermNode("expr", - termNode("id", "baz"), - ), - ), - ), - }, - { - caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar", - specSrc: ` -#name test; - -s - : a - | b - ; -a - : id - ; -b - : id - ; - -id: "[A-Za-z0-9_]+"; -`, - src: `foo`, - cst: nonTermNode("s", - nonTermNode("a", - termNode("id", "foo"), - ), - ), - }, - { - caption: "left associativities defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #left mul - #left add -); - -expr - : expr add expr - | expr mul expr - | id - ; - -id: "[A-Za-z0-9_]+"; -add: '+'; -mul: '*'; -`, - src: `a+b*c*d+e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("add", "+"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - }, - { - caption: "left associativities defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #left add sub -); - -expr - : expr add expr - | expr sub expr - | id - ; - -id: "[A-Za-z0-9_]+"; -add: '+'; -sub: '-'; -`, - src: `a-b+c+d-e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("sub", "-"), - nonTermNode("expr", - termNode("id", "b"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - }, - { - caption: "right associativities defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #right r1 - #right r2 -); - -expr - : expr r2 expr - | expr r1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -r1 - : 'r1'; -r2 - : 'r2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a r2 b r1 c r1 d r2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("r2", "r2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - }, - { - caption: "right associativities defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #right r1 r2 -); - -expr - : expr r2 expr - | expr r1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -r1 - : 'r1'; -r2 - : 'r2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a r2 b r1 c r1 d r2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - ), - ), - }, - { - caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #assign a1 - #assign a2 -); - -expr - : expr a2 expr - | expr a1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -a1 - : 'a1'; -a2 - : 'a2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a a2 b a1 c a1 d a2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("a2", "a2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - }, - { - caption: "terminal symbols with an #assign directive defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #assign a1 a2 -); - -expr - : expr a2 expr - | expr a1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -a1 - : 'a1'; -a2 - : 'a2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a a2 b a1 c a1 d a2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - ), - ), - }, - { - caption: "#left, #right, and #assign can be mixed", - specSrc: ` -#name test; - -#prec ( - #left mul div - #left add sub - #assign else - #assign then - #right assign -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | expr assign expr - | if expr then expr - | if expr then expr else expr - | id - ; - -ws #skip: "[\u{0009}\u{0020}]+"; -if: 'if'; -then: 'then'; -else: 'else'; -id: "[A-Za-z0-9_]+"; -add: '+'; -sub: '-'; -mul: '*'; -div: '/'; -assign: '='; -`, - src: `x = y = a + b * c - d / e + if f then if g then h else i`, - cst: nonTermNode( - "expr", - nonTermNode("expr", - termNode("id", "x"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "y"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("add", "+"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("if", "if"), - nonTermNode("expr", - termNode("id", "f"), - ), - termNode("then", "then"), - nonTermNode("expr", - termNode("if", "if"), - nonTermNode("expr", - termNode("id", "g"), - ), - termNode("then", "then"), - nonTermNode("expr", - termNode("id", "h"), - ), - termNode("else", "else"), - nonTermNode("expr", - termNode("id", "i"), - ), - ), - ), - ), - ), - ), - }, - } - - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - cg, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - gram := NewGrammar(cg) - tb := NewDefaultSyntaxTreeBuilder() - p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb))) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if tt.cst != nil { - testTree(t, tb.Tree(), tt.cst) - } - }) - } -} - -func TestParserWithLAC(t *testing.T) { - specSrc := ` -#name test; - -s - : t t - ; -t - : c t - | d - ; - -c: 'c'; -d: 'd'; -` - - src := `ccd` - - actLogWithLAC := []string{ - "shift/c", - "shift/c", - "shift/d", - "miss", - } - - actLogWithoutLAC := []string{ - "shift/c", - "shift/c", - "shift/d", - "reduce/t", - "reduce/t", - "reduce/t", - "miss", - } - - ast, err := parser.Parse(strings.NewReader(specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - t.Run("LAC is enabled", func(t *testing.T) { - semAct := &testSemAct{ - gram: gram, - } - - toks, err := NewTokenStream(gram, strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(actLogWithLAC) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) - } - - for i, e := range actLogWithLAC { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) - } - } - }) - - t.Run("LAC is disabled", func(t *testing.T) { - semAct := &testSemAct{ - gram: gram, - } - - toks, err := NewTokenStream(gram, strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC()) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(actLogWithoutLAC) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) - } - - for i, e := range actLogWithoutLAC { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) - } - } - }) -} - -func termNode(kind string, text string, children ...*Node) *Node { - return &Node{ - Type: NodeTypeTerminal, - KindName: kind, - Text: text, - Children: children, - } -} - -func errorNode() *Node { - return &Node{ - Type: NodeTypeError, - KindName: "error", - } -} - -func nonTermNode(kind string, children ...*Node) *Node { - return &Node{ - Type: NodeTypeNonTerminal, - KindName: kind, - Children: children, - } -} - -func TestParser_Parse(t *testing.T) { - tests := []struct { - specSrc string - src string - synErr bool - cst *Node - ast *Node - }{ - { - specSrc: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; - -add - : '+'; -mul - : '*'; -l_paren - : '('; -r_paren - : ')'; -id - : "[A-Za-z_][0-9A-Za-z_]*"; -`, - src: `(a+(b+c))*d+e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("term", - nonTermNode("factor", - termNode("l_paren", "("), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("factor", - termNode("id", "a"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("l_paren", "("), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("factor", - termNode("id", "b"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("id", "c"), - ), - ), - ), - termNode("r_paren", ")"), - ), - ), - ), - termNode("r_paren", ")"), - ), - ), - termNode("mul", "*"), - nonTermNode("factor", - termNode("id", "d"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("id", "e"), - ), - ), - ), - }, - // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are - // not allowed in string literals. - { - specSrc: ` -#name test; - -s - : a b c - ; - -a - : '\f{foo}'; -b - : '\u{0000}'; -c - : '\p{gc=Letter}'; -`, - src: `\f{foo}\u{0000}\p{gc=Letter}`, - cst: nonTermNode("s", - termNode("a", `\f{foo}`), - termNode("b", `\u{0000}`), - termNode("c", `\p{gc=Letter}`), - ), - }, - // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. - { - specSrc: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : bar_text - | - ; -bar_text: "bar"; -`, - src: ``, - cst: nonTermNode("s", - nonTermNode("foo"), - nonTermNode("bar"), - ), - }, - // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. - { - specSrc: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : bar_text - | - ; - -bar_text - : "bar"; -`, - src: `bar`, - cst: nonTermNode("s", - nonTermNode("foo"), - nonTermNode("bar", - termNode("bar_text", "bar"), - ), - ), - }, - // A production can have multiple alternative productions. - { - specSrc: ` -#name test; - -#prec ( - #assign $uminus - #left mul div - #left add sub -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | int - | sub int #prec $uminus // This 'sub' means the unary minus symbol. - ; - -int - : "0|[1-9][0-9]*"; -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -`, - src: `-1*-2+3-4/5`, - ast: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "1"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "2"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "3"), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - nonTermNode("expr", - termNode("int", "4"), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("int", "5"), - ), - ), - ), - }, - // A lexical production can have multiple production directives. - { - specSrc: ` -#name test; - -s - : push_a push_b pop pop - ; - -push_a #mode default #push a - : '->a'; -push_b #mode a #push b - : '->b'; -pop #mode a b #pop - : '<-'; -`, - src: `->a->b<-<-`, - ast: nonTermNode("s", - termNode("push_a", "->a"), - termNode("push_b", "->b"), - termNode("pop", "<-"), - termNode("pop", "<-"), - ), - }, - { - specSrc: ` -#name test; - -mode_tran_seq - : mode_tran_seq mode_tran - | mode_tran - ; -mode_tran - : push_m1 - | push_m2 - | pop_m1 - | pop_m2 - ; - -push_m1 #push m1 - : "->"; -push_m2 #mode m1 #push m2 - : "-->"; -pop_m1 #mode m1 #pop - : "<-"; -pop_m2 #mode m2 #pop - : "<--"; -whitespace #mode default m1 m2 #skip - : "\u{0020}+"; -`, - src: ` -> --> <-- <- `, - }, - { - specSrc: ` -#name test; - -s - : foo bar - ; - -foo - : "foo"; -bar #mode default - : "bar"; -`, - src: `foobar`, - }, - // When #push and #pop are applied to the same symbol, #pop will run first, then #push. - { - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push m1 - : 'foo'; -bar #mode m1 #pop #push m2 - : 'bar'; -baz #mode m2 - : 'baz'; -`, - src: `foobarbaz`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("bar", "bar"), - termNode("baz", "baz"), - ), - }, - // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first - // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which - // they are executed. - { - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push m1 - : 'foo'; -bar #mode m1 #push m2 #pop - : 'bar'; -baz #mode m2 - : 'baz'; -`, - src: `foobarbaz`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("bar", "bar"), - termNode("baz", "baz"), - ), - }, - // The parser can skips specified tokens. - { - specSrc: ` -#name test; - -s - : foo bar - ; - -foo - : "foo"; -bar - : "bar"; -white_space #skip - : "[\u{0009}\u{0020}]+"; -`, - src: `foo bar`, - }, - // A grammar can contain fragments. - { - specSrc: ` -#name test; - -s - : tagline - ; -tagline - : "\f{words} IS OUT THERE."; -fragment words - : "[A-Za-z\u{0020}]+"; -`, - src: `THE TRUTH IS OUT THERE.`, - }, - // A grammar can contain ast actions. - { - specSrc: ` -#name test; - -list - : l_bracket elems r_bracket #ast elems... - ; -elems - : elems comma id #ast elems... id - | id - ; - -whitespace #skip - : "\u{0020}+"; -l_bracket - : '['; -r_bracket - : ']'; -comma - : ','; -id - : "[A-Za-z]+"; -`, - src: `[Byers, Frohike, Langly]`, - cst: nonTermNode("list", - termNode("x_1", "["), - nonTermNode("elems", - nonTermNode("elems", - nonTermNode("elems", - termNode("id", "Byers"), - ), - termNode("x_3", ","), - termNode("id", "Frohike"), - ), - termNode("x_3", ","), - termNode("id", "Langly"), - ), - termNode("x_2", "]"), - ), - ast: nonTermNode("list", - termNode("id", "Byers"), - termNode("id", "Frohike"), - termNode("id", "Langly"), - ), - }, - // The '...' operator can expand child nodes. - { - specSrc: ` -#name test; - -s - : a #ast a... - ; -a - : a comma foo #ast a... foo - | foo - ; - -comma - : ','; -foo - : 'foo'; -`, - src: `foo,foo,foo`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("foo", "foo"), - termNode("foo", "foo"), - ), - }, - // The '...' operator also can applied to an element having no children. - { - specSrc: ` -#name test; - -s - : a semi_colon #ast a... - ; -a - : - ; - -semi_colon - : ';'; -`, - src: `;`, - ast: nonTermNode("s"), - }, - // A label can be a parameter of #ast directive. - { - specSrc: ` -#name test; - -#prec ( - #left add sub -); - -expr - : expr@lhs add expr@rhs #ast add lhs rhs - | expr@lhs sub expr@rhs #ast sub lhs rhs - | num - ; - -add - : '+'; -sub - : '-'; -num - : "0|[1-9][0-9]*"; -`, - src: `1+2-3`, - ast: nonTermNode("expr", - termNode("sub", "-"), - nonTermNode("expr", - termNode("add", "+"), - nonTermNode("expr", - termNode("num", "1"), - ), - nonTermNode("expr", - termNode("num", "2"), - ), - ), - nonTermNode("expr", - termNode("num", "3"), - ), - ), - }, - // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed. - { - specSrc: ` -#name test; - -s - : foo@x semi_colon #ast foo - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `foo;`, - ast: nonTermNode("s", - termNode("foo", "foo"), - ), - }, - // A production has the same precedence and associativity as the right-most terminal symbol. - { - specSrc: ` -#name test; - -#prec ( - #left add -); - -expr - : expr add expr // This alternative has the same precedence and associativiry as 'add'. - | int - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -int - : "0|[1-9][0-9]*"; -add - : '+'; -`, - // This source is recognized as the following structure because the production `expr → expr add expr` has the same - // precedence and associativity as the symbol 'add'. - // - // ((1+2)+3) - // - // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence - // and associativity and this source will be recognized as the following structure. - // - // (1+(2+3)) - src: `1+2+3`, - ast: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("int", "1"), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "2"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "3"), - ), - ), - }, - // The 'prec' directive can set precedence of a production. - { - specSrc: ` -#name test; - -#prec ( - #assign $uminus - #left mul div - #left add sub -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | int - | sub int #prec $uminus // This 'sub' means a unary minus symbol. - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -int - : "0|[1-9][0-9]*"; -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -`, - // This source is recognized as the following structure because the production `expr → sub expr` - // has the `#prec mul` directive and has the same precedence of the symbol `mul`. - // - // (((-1) * 20) / 5) - // - // If the production doesn't have the `#prec` directive, this source will be recognized as - // the following structure. - // - // (- ((1 * 20) / 5)) - src: `-1*20/5`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "1"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("int", "20"), - ), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("int", "5"), - ), - ), - }, - // The grammar can contain the 'error' symbol. - { - specSrc: ` -#name test; - -s - : id id id semi_colon - | error semi_colon - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -id - : "[A-Za-z_]+"; -`, - src: `foo bar baz ;`, - }, - // The 'error' symbol can appear in an #ast directive. - { - specSrc: ` -#name test; - -s - : foo semi_colon - | error semi_colon #ast error - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `bar;`, - synErr: true, - ast: nonTermNode("s", - errorNode(), - ), - }, - // The 'error' symbol can have a label, and an #ast can reference it. - { - specSrc: ` -#name test; - -s - : foo semi_colon - | error@e semi_colon #ast e - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `bar;`, - synErr: true, - ast: nonTermNode("s", - errorNode(), - ), - }, - // The grammar can contain the 'recover' directive. - { - specSrc: ` -#name test; - -seq - : seq elem - | elem - ; -elem - : id id id semi_colon - | error semi_colon #recover - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -id - : "[A-Za-z_]+"; -`, - src: `a b c ; d e f ;`, - }, - // The same label can be used between different alternatives. - { - specSrc: ` -#name test; - -s - : foo@x bar - | foo@x - ; - -foo: 'foo'; -bar: 'bar'; -`, - src: `foo`, - }, - } - - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - cg, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - gram := NewGrammar(cg) - tb := NewDefaultSyntaxTreeBuilder() - var opt []ParserOption - switch { - case tt.ast != nil: - opt = append(opt, SemanticAction(NewASTActionSet(gram, tb))) - case tt.cst != nil: - opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb))) - } - p, err := NewParser(toks, gram, opt...) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if !tt.synErr && len(p.SyntaxErrors()) > 0 { - for _, synErr := range p.SyntaxErrors() { - t.Fatalf("unexpected syntax errors occurred: %v", synErr) - } - } - - switch { - case tt.ast != nil: - testTree(t, tb.Tree(), tt.ast) - case tt.cst != nil: - testTree(t, tb.Tree(), tt.cst) - } - }) - } -} - -func testTree(t *testing.T, node, expected *Node) { - t.Helper() - - if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text { - t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node) - } - if len(node.Children) != len(expected.Children) { - t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children)) - } - for i, c := range node.Children { - testTree(t, c, expected.Children[i]) - } -} - -type testSemAct struct { - gram *spec.CompiledGrammar - actLog []string -} - -func (a *testSemAct) Shift(tok VToken, recovered bool) { - t := a.gram.Syntactic.Terminals[tok.TerminalID()] - if recovered { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t)) - } else { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t)) - } -} - -func (a *testSemAct) Reduce(prodNum int, recovered bool) { - lhsSym := a.gram.Syntactic.LHSSymbols[prodNum] - lhsText := a.gram.Syntactic.NonTerminals[lhsSym] - if recovered { - a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText)) - } else { - a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText)) - } -} - -func (a *testSemAct) Accept() { - a.actLog = append(a.actLog, "accept") -} - -func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) { - a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped)) -} - -func (a *testSemAct) MissError(cause VToken) { - a.actLog = append(a.actLog, "miss") -} - -func TestParserWithSemanticAction(t *testing.T) { - specSrcWithErrorProd := ` -#name test; - -seq - : seq elem semicolon - | elem semicolon - | error star star semicolon - | error semicolon #recover - ; -elem - : char char char - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semicolon - : ';'; -star - : '*'; -char - : "[a-z]"; -` - - specSrcWithoutErrorProd := ` -#name test; - -seq - : seq elem semicolon - | elem semicolon - ; -elem - : char char char - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semicolon - : ';'; -char - : "[a-z]"; -` - - tests := []struct { - caption string - specSrc string - src string - actLog []string - }{ - { - caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.", - specSrc: specSrcWithErrorProd, - src: `a b c; d e f;`, - actLog: []string{ - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - "accept", - }, - }, - { - caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.", - specSrc: specSrcWithErrorProd, - src: `a; b !; c d !; e ! * *; h i j;`, - actLog: []string{ - "shift/char", - "trap/1/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "trap/2/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "shift/char", - "trap/3/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "trap/2/shift/error", - "shift/star", - "shift/star", - // When the driver shifts three times, it recovers from an error. - "shift/semicolon/recovered", - "reduce/seq", - - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted - // according to the error production. - "accept", - }, - }, - { - caption: "when the input doesn't meet the error production, the driver calls `MissError`.", - specSrc: specSrcWithErrorProd, - src: `a !`, - actLog: []string{ - "shift/char", - "trap/1/shift/error", - - "miss", - }, - }, - { - caption: "when a syntax error isn't trapped, the driver calls `MissError`.", - specSrc: specSrcWithoutErrorProd, - src: `a !`, - actLog: []string{ - "shift/char", - - "miss", - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - semAct := &testSemAct{ - gram: gram, - } - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(tt.actLog) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) - } - - for i, e := range tt.actLog { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) - } - } - }) - } -} - -func TestParserWithSyntaxErrors(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - synErrCount int - }{ - { - caption: "the parser can report a syntax error", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `bar`, - synErrCount: 1, - }, - { - caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error semi_colon #recover - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - src: `!; a!; ab!;`, - synErrCount: 3, - }, - { - caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error semi_colon #recover - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - // After the parser trasits to the error state reading the first invalid symbol ('!'), - // the second and third invalid symbols ('!') are ignored. - src: `! ! !; a!; ab!;`, - synErrCount: 3, - }, - { - caption: "when the parser performs shift three times, the parser recovers from the error state", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error star star semi_colon - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -star - : '*'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - src: `!**; a!**; ab!**; abc!`, - synErrCount: 4, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - synErrs := p.SyntaxErrors() - if len(synErrs) != tt.synErrCount { - t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs)) - } - }) - } -} - -func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - cause string - expected []string - }{ - { - caption: "the parser reports an expected lookahead symbol", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `bar`, - cause: `bar`, - expected: []string{ - "foo", - }, - }, - { - caption: "the parser reports expected lookahead symbols", - specSrc: ` -#name test; - -s - : foo - | bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - src: `baz`, - cause: `baz`, - expected: []string{ - "foo", - "bar", - }, - }, - { - caption: "the parser may report the EOF as an expected lookahead symbol", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `foobar`, - cause: `bar`, - expected: []string{ - "", - }, - }, - { - caption: "the parser may report the EOF and others as expected lookahead symbols", - specSrc: ` -#name test; - -s - : foo - | - ; - -foo - : 'foo'; -`, - src: `bar`, - cause: `bar`, - expected: []string{ - "foo", - "", - }, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - synErrs := p.SyntaxErrors() - if synErrs == nil { - t.Fatalf("expected one syntax error, but it didn't occur") - } - if len(synErrs) != 1 { - t.Fatalf("too many syntax errors: %v errors", len(synErrs)) - } - synErr := synErrs[0] - if string(synErr.Token.Lexeme()) != tt.cause { - t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme())) - } - if len(synErr.ExpectedTerminals) != len(tt.expected) { - t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals) - } - sort.Slice(tt.expected, func(i, j int) bool { - return tt.expected[i] < tt.expected[j] - }) - sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool { - return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j] - }) - for i, e := range tt.expected { - if synErr.ExpectedTerminals[i] != e { - t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i]) - } - } - }) - } -} diff --git a/tests/unit/urubu/grammar.go b/tests/unit/urubu/grammar.go deleted file mode 100644 index 3743b23..0000000 --- a/tests/unit/urubu/grammar.go +++ /dev/null @@ -1,4647 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - verr "urubu/error" - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type first struct { - lhs string - num int - dot int - symbols []string - empty bool -} - -func TestGenFirst(t *testing.T) { - tests := []struct { - caption string - src string - first []first - }{ - { - caption: "productions contain only non-empty productions", - src: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -`, - first: []first{ - {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, - {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, - {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, - {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, - {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, - }, - }, - { - caption: "productions contain the empty start production", - src: ` -#name test; - -s - : - ; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "productions contain an empty production", - src: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a start production contains a non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - | - ; -foo: "foo"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, - {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a production contains non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - ; -foo - : bar - | - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, - {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - fst, gram := genActualFirst(t, tt.src) - - for _, ttFirst := range tt.first { - lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) - } - - prod, ok := gram.productionSet.findByLHS(lhsSym) - if !ok { - t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) - } - - actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) - if err != nil { - t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) - } - - expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) - - testFirst(t, actualFirst, expectedFirst) - } - }) - } -} - -func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err := b.build() - if err != nil { - t.Fatal(err) - } - fst, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - if fst == nil { - t.Fatal("genFiest returned nil without any error") - } - - return fst, gram -} - -func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { - t.Helper() - - entry := newFirstEntry() - if empty { - entry.addEmpty() - } - for _, sym := range symbols { - symSym, ok := symTab.ToSymbol(sym) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", sym) - } - entry.add(symSym) - } - - return entry -} - -func testFirst(t *testing.T, actual, expected *firstEntry) { - if actual.empty != expected.empty { - t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) - } - - if len(actual.symbols) != len(expected.symbols) { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - - for eSym := range expected.symbols { - if _, ok := actual.symbols[eSym]; !ok { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - } -} - -func TestGrammarBuilderOK(t *testing.T) { - type okTest struct { - caption string - specSrc string - validate func(t *testing.T, g *Grammar) - } - - nameTests := []*okTest{ - { - caption: "the `#name` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name s; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "s" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a terminal symbol", - specSrc: ` -#name foo; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "foo" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as the error symbol", - specSrc: ` -#name error; - -s - : foo - | error - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "error" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a fragment", - specSrc: ` -#name f; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "f" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - } - - modeTests := []*okTest{ - { - caption: "a `#mode` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push s - : 'foo'; -bar #mode s - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "s" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push bar - : 'foo'; -bar #mode bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "bar" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as the error symbol", - specSrc: ` -#name test; - -s - : foo bar - | error - ; - -foo #push error - : 'foo'; -bar #mode error - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "error" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a fragment", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push f - : "\f{f}"; -bar #mode f - : 'bar'; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "f" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - } - - precTests := []*okTest{ - { - caption: "a `#prec` allows the empty directive group", - specSrc: ` -#name test; - -#prec (); - -s - : foo - ; - -foo - : 'foo'; -`, - }, - { - caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #left foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #right foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "an `#assign` directive gives only a precedence to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #assign foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != precNil || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'. - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if aPrec != barPrec || aAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc) - } - if sPrec != fooPrec || sAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "each alternative in the same production can have its own precedence and associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar - #assign baz -); - -s - : foo - | bar - | baz - | bra - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -bra - : 'bra'; -`, - validate: func(t *testing.T, g *Grammar) { - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - var alt3Prec int - var alt3Assoc assocType - var alt4Prec int - var alt4Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - alt3Prec = g.precAndAssoc.productionPredence(ps[2].num) - alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num) - alt4Prec = g.precAndAssoc.productionPredence(ps[3].num) - alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeLeft { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeRight { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc) - } - if alt3Prec != 3 || alt3Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc) - } - if alt4Prec != precNil || alt4Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc) - } - }, - }, - { - caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : a - ; -a - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if aPrec != fooPrec || aAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc) - } - if sPrec != precNil || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#left` directive", - specSrc: ` -#name test; - -#prec ( - #left $high - #right foo bar - #left $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#right` directive", - specSrc: ` -#name test; - -#prec ( - #right $high - #left foo bar - #right $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#assign` directive", - specSrc: ` -#name test; - -#prec ( - #assign $high - #left foo - #right bar - #assign $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 3 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 4 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $foo -); - -s - : foo - | bar #prec $foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != fooPrec || alt1Assoc != fooAssoc { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a non-terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $a -); - -s - : a - | bar #prec $a - ; -a - : foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != precNil || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - } - - var tests []*okTest - tests = append(tests, nameTests...) - tests = append(tests, modeTests...) - tests = append(tests, precTests...) - - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - g, err := b.build() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if test.validate != nil { - test.validate(t, g) - } - }) - } -} - -func TestGrammarBuilderSpecError(t *testing.T) { - type specErrTest struct { - caption string - specSrc string - errs []error - } - - spellingInconsistenciesTests := []*specErrTest{ - { - caption: "a spelling inconsistency appears among non-terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; -a_1 - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among terminal symbols", - specSrc: ` -#name test; - -s - : foo1 foo_1 - ; - -foo1 - : 'foo1'; -foo_1 - : 'foo_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among non-terminal and terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; - -a_1 - : 'a_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 - #assign $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same alternative contains", - specSrc: ` -#name test; - -s - : foo@l1 foo@l_1 - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same production contains", - specSrc: ` -#name test; - -s - : foo@l1 - | bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels different productions contain", - specSrc: ` -#name test; - -s - : foo@l1 - ; -a - : bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - } - - prodTests := []*specErrTest{ - { - caption: "a production `b` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : foo - ; - -foo - : "foo"; -`, - errs: []error{semErrUnusedProduction}, - }, - { - caption: "a terminal symbol `bar` is unused", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrUnusedTerminal}, - }, - { - caption: "a production `b` and terminal symbol `bar` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{ - semErrUnusedProduction, - semErrUnusedTerminal, - }, - }, - { - caption: "a production cannot have production directives", - specSrc: ` -#name test; - -s #prec foo - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrInvalidProdDir}, - }, - { - caption: "a lexical production cannot have alternative directives", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo' #skip; -`, - errs: []error{semErrInvalidAltDir}, - }, - { - caption: "a production directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo #skip #skip - : 'foo'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "an alternative directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo bar #ast foo bar #ast foo bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | foo - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | foo - ; -a - : - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -s - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; - -foo - : "foo"; -bar - : "bar"; -a - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "an invalid top-level directive", - specSrc: ` -#name test; - -#foo; - -s - : a - ; - -a - : 'a'; -`, - errs: []error{semErrDirInvalidName}, - }, - { - caption: "a label must be unique in an alternative", - specSrc: ` -#name test; - -s - : foo@x bar@x - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as terminal symbols", - specSrc: ` -#name test; - -s - : foo bar@foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as non-terminal symbols", - specSrc: ` -#name test; - -s - : foo@a - | a - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{ - semErrInvalidLabel, - }, - }, - } - - nameDirTests := []*specErrTest{ - { - caption: "the `#name` directive is required", - specSrc: ` -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrNoGrammarName}, - }, - { - caption: "the `#name` directive needs an ID parameter", - specSrc: ` -#name; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a pattern parameter", - specSrc: ` -#name "test"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a string parameter", - specSrc: ` -#name 'test'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive takes just one parameter", - specSrc: ` -#name test1 test2; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - precDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs a directive group parameter", - specSrc: ` -#name test; - -#prec; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ID parameter", - specSrc: ` -#name test; - -#prec foo; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec $x; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec "foo"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec 'foo'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive takes just one directive group parameter", - specSrc: ` -#name test; - -#prec () (); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - leftDirTests := []*specErrTest{ - { - caption: "the `#left` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #left -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #left error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #left x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #left "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #left 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #left () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #left $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - rightDirTests := []*specErrTest{ - { - caption: "the `#right` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #right -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #right error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #right x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #right "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #right 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -#prec ( - #right () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #right $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - assignDirTests := []*specErrTest{ - { - caption: "the `#assign` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #assign -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #assign error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #assign x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #assign "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #assign 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #assign () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #assign $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign foo - #assign foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign $x - #assign $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - errorSymTests := []*specErrTest{ - { - caption: "cannot use the error symbol as a non-terminal symbol", - specSrc: ` -#name test; - -s - : error - ; -error - : foo - ; - -foo: 'foo'; -`, - errs: []error{ - semErrErrSymIsReserved, - semErrDuplicateName, - }, - }, - { - caption: "cannot use the error symbol as a terminal symbol", - specSrc: ` -#name test; - -s - : error - ; - -error: 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - { - caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -error #skip - : 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - } - - astDirTests := []*specErrTest{ - { - caption: "the `#ast` directive needs ID or label prameters", - specSrc: ` -#name test; - -s - : foo #ast - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #ast $x - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #ast "foo" - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #ast 'foo' - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #ast () - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", - specSrc: ` -#name test; - -s - : foo bar #ast foo x - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast bar - | bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast b - | bar@b - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo #ast foo foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a label can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo@x #ast x x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", - specSrc: ` -#name test; - -s - : foo@x #ast foo x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice", - specSrc: ` -#name test; - -s - : foo foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", - specSrc: ` -#name test; - -s - : foo@x foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "the expansion operator cannot be applied to a terminal symbol", - specSrc: ` -#name test; - -s - : foo #ast foo... - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - altPrecDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -s - : foo #prec error - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -s - : foo #prec x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -s - : a #prec b - | b - ; -a - : foo - ; -b - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrUndefinedOrdSym}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #prec "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #prec 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo #prec () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol the `#prec` directive takes must be given precedence explicitly", - specSrc: ` -#name test; - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrUndefinedPrec}, - }, - } - - recoverDirTests := []*specErrTest{ - { - caption: "the `#recover` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo #recover foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #recover $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #recover "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #recover 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #recover () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - fragmentTests := []*specErrTest{ - { - caption: "a production cannot contain a fragment", - specSrc: ` -#name test; - -s - : f - ; - -fragment f - : 'fragment'; -`, - errs: []error{semErrUndefinedSym}, - }, - { - caption: "fragments cannot be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'fragment 1'; -fragment f - : 'fragment 2'; -`, - errs: []error{semErrDuplicateFragment}, - }, - } - - modeDirTests := []*specErrTest{ - { - caption: "the `#mode` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo - : 'foo'; -bar #mode $x - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode "mode_1" - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode 'mode_1' - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode () - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - pushDirTests := []*specErrTest{ - { - caption: "the `#push` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive takes just one ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 mode_2 - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #push $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push "mode_1" - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push 'mode_1' - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push () - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - popDirTests := []*specErrTest{ - { - caption: "the `#pop` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop mode_1 - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop $x - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop "mode_1" - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop 'mode_1' - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop () - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - skipDirTests := []*specErrTest{ - { - caption: "the `#skip` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip bar - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #skip $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip "bar" - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip 'bar' - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip () - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a terminal symbol used in productions cannot have the skip directive", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrTermCannotBeSkipped}, - }, - } - - var tests []*specErrTest - tests = append(tests, spellingInconsistenciesTests...) - tests = append(tests, prodTests...) - tests = append(tests, nameDirTests...) - tests = append(tests, precDirTests...) - tests = append(tests, leftDirTests...) - tests = append(tests, rightDirTests...) - tests = append(tests, assignDirTests...) - tests = append(tests, errorSymTests...) - tests = append(tests, astDirTests...) - tests = append(tests, altPrecDirTests...) - tests = append(tests, recoverDirTests...) - tests = append(tests, fragmentTests...) - tests = append(tests, modeDirTests...) - tests = append(tests, pushDirTests...) - tests = append(tests, popDirTests...) - tests = append(tests, skipDirTests...) - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - _, err = b.build() - if err == nil { - t.Fatal("an expected error didn't occur") - } - specErrs, ok := err.(verr.SpecErrors) - if !ok { - t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err) - } - if len(specErrs) != len(test.errs) { - t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs) - } - for _, expected := range test.errs { - for _, actual := range specErrs { - if actual.Cause == expected { - return - } - } - } - t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs) - }) - } -} - -func TestGenLALR1Automaton(t *testing.T) { - // This grammar belongs to LALR(1) class, not SLR(1). - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var gram *Grammar - var automaton *lalr1Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatalf("failed to create a FIRST set: %v", err) - } - - automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - t.Fatalf("failed to create a LALR1 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLALR1Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("eq"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "r"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[9], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "ref", "r"), - }, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "l", "eq", "r"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton.lr0Automaton) -} - -type expectedLRState struct { - kernelItems []*lrItem - nextStates map[symbol.Symbol][]*lrItem - reducibleProds []*production - emptyProdItems []*lrItem -} - -func TestGenLR0Automaton(t *testing.T) { - src := ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("expr'", 0, "expr"), - }, - 1: { - genLR0Item("expr'", 1, "expr"), - genLR0Item("expr", 1, "expr", "add", "term"), - }, - 2: { - genLR0Item("expr", 1, "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 3: { - genLR0Item("term", 1, "factor"), - }, - 4: { - genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), - }, - 5: { - genLR0Item("factor", 1, "id"), - }, - 6: { - genLR0Item("expr", 2, "expr", "add", "term"), - }, - 7: { - genLR0Item("term", 2, "term", "mul", "factor"), - }, - 8: { - genLR0Item("expr", 1, "expr", "add", "term"), - genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), - }, - 9: { - genLR0Item("expr", 3, "expr", "add", "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 10: { - genLR0Item("term", 3, "term", "mul", "factor"), - }, - 11: { - genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[1], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("expr'", "expr"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "term"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "factor"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[8], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("term"): expectedKernels[9], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("factor"): expectedKernels[10], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - genSym("r_paren"): expectedKernels[11], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "expr", "add", "term"), - }, - }, - { - kernelItems: expectedKernels[10], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "term", "mul", "factor"), - }, - }, - { - kernelItems: expectedKernels[11], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "l_paren", "expr", "r_paren"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { - src := ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : b - | - ; - -b: "bar"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("s'", 0, "s"), - }, - 1: { - genLR0Item("s'", 1, "s"), - }, - 2: { - genLR0Item("s", 1, "foo", "bar"), - }, - 3: { - genLR0Item("s", 2, "foo", "bar"), - }, - 4: { - genLR0Item("bar", 1, "b"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("foo"): expectedKernels[2], - }, - reducibleProds: []*production{ - genProd("foo"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("foo", 0), - }, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("bar"): expectedKernels[3], - genSym("b"): expectedKernels[4], - }, - reducibleProds: []*production{ - genProd("bar"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("bar", 0), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "foo", "bar"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("bar", "b"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { - if len(automaton.states) != len(expected) { - t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) - } - - for i, eState := range expected { - t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("a kernel was not found: %v", k.id) - } - - // test look-ahead symbols - { - if len(state.kernel.items) != len(eState.kernelItems) { - t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) - } - for _, eKItem := range eState.kernelItems { - var kItem *lrItem - for _, it := range state.kernel.items { - if it.id != eKItem.id { - continue - } - kItem = it - break - } - if kItem == nil { - t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) - } - - if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { - t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) - } - - for eSym := range eKItem.lookAhead.symbols { - if _, ok := kItem.lookAhead.symbols[eSym]; !ok { - t.Errorf("look-ahead symbol not found: %v", eSym) - } - } - } - } - - // test next states - { - if len(state.next) != len(eState.nextStates) { - t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) - } - for eSym, eKItems := range eState.nextStates { - nextStateKernel, err := newKernel(eKItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - nextState, ok := state.next[eSym] - if !ok { - t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) - } - if nextState != nextStateKernel.id { - t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) - } - } - } - - // test reducible productions - { - if len(state.reducible) != len(eState.reducibleProds) { - t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) - } - for _, eProd := range eState.reducibleProds { - if _, ok := state.reducible[eProd.id]; !ok { - t.Errorf("reducible production was not found: %v", eProd.id) - } - } - - if len(state.emptyProdItems) != len(eState.emptyProdItems) { - t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) - } - for _, eItem := range eState.emptyProdItems { - found := false - for _, item := range state.emptyProdItems { - if item.id != eItem.id { - continue - } - found = true - break - } - if !found { - t.Errorf("empty production item not found: %v", eItem.id) - } - } - } - }) - } -} - -type expectedState struct { - kernelItems []*lrItem - acts map[symbol.Symbol]testActionEntry - goTos map[symbol.Symbol][]*lrItem -} - -func TestGenLALRParsingTable(t *testing.T) { - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var ptab *ParsingTable - var automaton *lalr1Automaton - var gram *Grammar - var nonTermCount int - var termCount int - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - first, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatal(err) - } - automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) - if err != nil { - t.Fatal(err) - } - - nonTermTexts, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - t.Fatal(err) - } - nonTermCount = len(nonTermTexts) - termCount = len(termTexts) - - lalr := &lrTableBuilder{ - automaton: automaton.lr0Automaton, - prods: gram.productionSet, - termCount: termCount, - nonTermCount: nonTermCount, - symTab: gram.symbolTable, - } - ptab, err = lalr.build() - if err != nil { - t.Fatalf("failed to create a LALR parsing table: %v", err) - } - if ptab == nil { - t.Fatal("genLALRParsingTable returns nil without any error") - } - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []expectedState{ - { - kernelItems: expectedKernels[0], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[1], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s'", "s"), - }, - }, - }, - { - kernelItems: expectedKernels[2], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[3], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[4], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - }, - }, - { - kernelItems: expectedKernels[5], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - }, - }, - { - kernelItems: expectedKernels[6], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("l"): expectedKernels[8], - genSym("r"): expectedKernels[9], - }, - }, - { - kernelItems: expectedKernels[7], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[8], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[9], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "l", "eq", "r"), - }, - }, - }, - } - - t.Run("initial state", func(t *testing.T) { - iniState := findStateByNum(automaton.states, ptab.InitialState) - if iniState == nil { - t.Fatalf("the initial state was not found: #%v", ptab.InitialState) - } - eIniState, err := newKernel(expectedKernels[0]) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - if iniState.id != eIniState.id { - t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) - } - }) - - for i, eState := range expectedStates { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("state was not found: #%v", 0) - } - - testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) - testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) - }) - } -} - -func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eAct := range expectedState.acts { - nonEmptyEntries[eSym.Num()] = struct{}{} - - ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) - if ty != eAct.ty { - t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - } - switch eAct.ty { - case ActionTypeShift: - eNextState, err := newKernel(eAct.nextState) - if err != nil { - t.Fatal(err) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found; state: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - case ActionTypeReduce: - prod := findProductionByNum(gram.productionSet, prodNum) - if prod == nil { - t.Fatalf("production was not found: #%v", prodNum) - } - if prod.id != eAct.production.id { - t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - } - } - } - for symNum := 0; symNum < termCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) - if ty != ActionTypeError { - t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - } - } -} - -func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eGoTo := range expectedState.goTos { - nonEmptyEntries[eSym.Num()] = struct{}{} - - eNextState, err := newKernel(eGoTo) - if err != nil { - t.Fatal(err) - } - ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) - if ty != GoToTypeRegistered { - t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - } - for symNum := 0; symNum < nonTermCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) - if ty != GoToTypeError { - t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - } - } -} - -type testActionEntry struct { - ty ActionType - nextState []*lrItem - production *production -} - -func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { - for _, state := range states { - if state.num == num { - return state - } - } - return nil -} - -func findProductionByNum(prods *productionSet, num productionNum) *production { - for _, prod := range prods.getAllProductions() { - if prod.num == num { - return prod - } - } - return nil -} - -type testSymbolGenerator func(text string) symbol.Symbol - -func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { - return func(text string) symbol.Symbol { - t.Helper() - - sym, ok := symTab.ToSymbol(text) - if !ok { - t.Fatalf("symbol was not found: %v", text) - } - return sym - } -} - -type testProductionGenerator func(lhs string, rhs ...string) *production - -func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { - return func(lhs string, rhs ...string) *production { - t.Helper() - - rhsSym := []symbol.Symbol{} - for _, text := range rhs { - rhsSym = append(rhsSym, genSym(text)) - } - prod, err := newProduction(genSym(lhs), rhsSym) - if err != nil { - t.Fatalf("failed to create a production: %v", err) - } - - return prod - } -} - -type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem - -func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { - return func(lhs string, dot int, rhs ...string) *lrItem { - t.Helper() - - prod := genProd(lhs, rhs...) - item, err := newLR0Item(prod, dot) - if err != nil { - t.Fatalf("failed to create a LR0 item: %v", err) - } - - return item - } -} - -func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { - if item.lookAhead.symbols == nil { - item.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - for _, a := range lookAhead { - item.lookAhead.symbols[a] = struct{}{} - } - - return item -} diff --git a/tests/unit/urubu/grammar/lexical.go b/tests/unit/urubu/grammar/lexical.go deleted file mode 100644 index b621cd2..0000000 --- a/tests/unit/urubu/grammar/lexical.go +++ /dev/null @@ -1,338 +0,0 @@ -package lexical - -import ( - "encoding/json" - "fmt" - "testing" - - spec "urubu/spec/grammar" -) - -func TestLexSpec_Validate(t *testing.T) { - // We expect that the spelling inconsistency error will occur. - spec := &LexSpec{ - Entries: []*LexEntry{ - { - Modes: []spec.LexModeName{ - // 'Default' is the spelling inconsistency because 'default' is predefined. - "Default", - }, - Kind: "foo", - Pattern: "foo", - }, - }, - } - err := spec.Validate() - if err == nil { - t.Fatalf("expected error didn't occur") - } -} - -func TestSnakeCaseToUpperCamelCase(t *testing.T) { - tests := []struct { - snake string - camel string - }{ - { - snake: "foo", - camel: "Foo", - }, - { - snake: "foo_bar", - camel: "FooBar", - }, - { - snake: "foo_bar_baz", - camel: "FooBarBaz", - }, - { - snake: "Foo", - camel: "Foo", - }, - { - snake: "fooBar", - camel: "FooBar", - }, - { - snake: "FOO", - camel: "FOO", - }, - { - snake: "FOO_BAR", - camel: "FOOBAR", - }, - { - snake: "_foo_bar_", - camel: "FooBar", - }, - { - snake: "___foo___bar___", - camel: "FooBar", - }, - } - for _, tt := range tests { - c := SnakeCaseToUpperCamelCase(tt.snake) - if c != tt.camel { - t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) - } - } -} - -func TestFindSpellingInconsistencies(t *testing.T) { - tests := []struct { - ids []string - duplicated [][]string - }{ - { - ids: []string{"foo", "foo"}, - duplicated: nil, - }, - { - ids: []string{"foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo", "foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo_bar_baz", "FooBarBaz"}, - duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - duplicated := FindSpellingInconsistencies(tt.ids) - if len(duplicated) != len(tt.duplicated) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) - } - for i, dupIDs := range duplicated { - if len(dupIDs) != len(tt.duplicated[i]) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - for j, id := range dupIDs { - if id != tt.duplicated[i][j] { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - } - } - }) - } -} - -func TestCompile(t *testing.T) { - tests := []struct { - Caption string - Spec string - Err bool - }{ - { - Caption: "allow duplicates names between fragments and non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragment": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - }, - { - Caption: "don't allow duplicates names in non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow duplicates names in fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names across modes to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow mode names to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["foo_1"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["foo1"], - "kind": "b", - "pattern": "b" - } - ] -} -`, - Err: true, - }, - { - Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a", - "pattern": "a" - }, - { - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - { - Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["default"], - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { - lspec := &LexSpec{} - err := json.Unmarshal([]byte(tt.Spec), lspec) - if err != nil { - t.Fatalf("%v", err) - } - clspec, err, _ := Compile(lspec, CompressionLevelMin) - if tt.Err { - if err == nil { - t.Fatalf("expected an error") - } - if clspec != nil { - t.Fatalf("Compile function mustn't return a compiled specification") - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if clspec == nil { - t.Fatalf("Compile function must return a compiled specification") - } - } - }) - } -} diff --git a/tests/unit/urubu/grammar/lexical/dfa.go b/tests/unit/urubu/grammar/lexical/dfa.go deleted file mode 100644 index 1a3e16a..0000000 --- a/tests/unit/urubu/grammar/lexical/dfa.go +++ /dev/null @@ -1,442 +0,0 @@ -package dfa - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/lexical/parser" - spec "urubu/spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} - -func TestByteTree(t *testing.T) { - tests := []struct { - root byteTree - nullable bool - first *symbolPositionSet - last *symbolPositionSet - }{ - { - root: newSymbolNodeWithPos(0, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newEndMarkerNodeWithPos(1, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newRepeatNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newOptionNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - if tt.root.nullable() != tt.nullable { - t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) - } - if tt.first.hash() != tt.root.first().hash() { - t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) - } - if tt.last.hash() != tt.root.last().hash() { - t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) - } - }) - } -} - -func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { - n := newSymbolNode(v) - n.pos = pos - return n -} - -func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { - n := newEndMarkerNode(spec.LexModeKindID(id)) - n.pos = pos - return n -} - -func TestFollowAndSymbolTable(t *testing.T) { - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - - { - followTab := genFollowTable(bt) - if followTab == nil { - t.Fatal("follow table is nil") - } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), - } - testFollowTable(t, expectedFollowTab, followTab) - } - - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, - } - } - - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]spec.LexModeKindID{ - endPos(6): 1, - }, - } - testSymbolTable(t, expectedSymTab, symTab) - } -} - -func testFollowTable(t *testing.T, expected, actual followTable) { - if len(actual) != len(expected) { - t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) - } - for ePos, eSet := range expected { - aSet, ok := actual[ePos] - if !ok { - t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) - } - if aSet.hash() != eSet.hash() { - t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) - } - } -} - -func testSymbolTable(t *testing.T, expected, actual *symbolTable) { - t.Helper() - - if len(actual.symPos2Byte) != len(expected.symPos2Byte) { - t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) - } - for ePos, eByte := range expected.symPos2Byte { - byte, ok := actual.symPos2Byte[ePos] - if !ok { - t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) - continue - } - if byte.from != eByte.from || byte.to != eByte.to { - t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) - } - } - - if len(actual.endPos2ID) != len(expected.endPos2ID) { - t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) - } - for ePos, eID := range expected.endPos2ID { - id, ok := actual.endPos2ID[ePos] - if !ok { - t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) - continue - } - if id != eID { - t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) - } - } -} diff --git a/tests/unit/urubu/grammar/lexical/parser.go b/tests/unit/urubu/grammar/lexical/parser.go deleted file mode 100644 index d5d7039..0000000 --- a/tests/unit/urubu/grammar/lexical/parser.go +++ /dev/null @@ -1,1907 +0,0 @@ -package parser - -import ( - "fmt" - "reflect" - "strings" - "testing" - - spec "urubu/spec/grammar" - "urubu/ucd" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} - -func TestParse(t *testing.T) { - tests := []struct { - pattern string - fragments map[spec.LexKindName]string - ast CPTree - syntaxError error - - // When an AST is large, as patterns containing a character property expression, this test only checks - // that the pattern is parsable. The check of the validity of such AST is performed by checking that it - // can be matched correctly using the driver. - skipTestAST bool - }{ - { - pattern: "a", - ast: newSymbolNode('a'), - }, - { - pattern: "abc", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "a?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}?", - ast: newOptionNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}?", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}?", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newOptionNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a)?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "((a?)?)?", - ast: newOptionNode( - newOptionNode( - newOptionNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)?", - ast: newOptionNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(?)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "?|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a??", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a*", - ast: newRepeatNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}*", - ast: newRepeatNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}*", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}*", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "((a*)*)*", - ast: newRepeatNode( - newRepeatNode( - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)*", - ast: newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(*)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "*|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a**", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a+", - ast: genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - }, - { - pattern: "[abc]+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "\\u{3042}+", - ast: genConcatNode( - newSymbolNode('\u3042'), - newRepeatNode( - newSymbolNode('\u3042'), - ), - ), - }, - { - pattern: "\\p{Letter}+", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}+", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: genConcatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - ), - }, - { - pattern: "((a+)+)+", - ast: genConcatNode( - genConcatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - newRepeatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - ), - ), - ), - }, - { - pattern: "(abc)+", - ast: genConcatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a|b)+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - ), - }, - { - pattern: "+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(+)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "+|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a++", - syntaxError: synErrRepNoTarget, - }, - { - pattern: ".", - ast: newRangeSymbolNode(0x00, 0x10FFFF), - }, - { - pattern: "[a]", - ast: newSymbolNode('a'), - }, - { - pattern: "[abc]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "[a-z]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[A-Za-z]", - ast: genAltNode( - newRangeSymbolNode('A', 'Z'), - newRangeSymbolNode('a', 'z'), - ), - }, - { - pattern: "[\\u{004E}]", - ast: newSymbolNode('N'), - }, - { - pattern: "[\\u{0061}-\\u{007A}]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[z-a]", - syntaxError: synErrRangeInvalidOrder, - }, - { - pattern: "a[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]a", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[^\\u{004E}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u004E'-1), - newRangeSymbolNode('\u004E'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\u{0061}-\\u{007A}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u0061'-1), - newRangeSymbolNode('\u007A'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[^a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\u{0000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^]", - ast: newSymbolNode('^'), - }, - { - pattern: "[", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "]", - ast: newSymbolNode(']'), - }, - { - pattern: "(]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a]", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode(']'), - ), - }, - { - pattern: "(a]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "([)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('-'), - ), - }, - { - pattern: "[^a-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x60), - newRangeSymbolNode(0x62, 0x10FFFF), - ), - }, - { - pattern: "[-z]", - ast: genAltNode( - newSymbolNode('-'), - newSymbolNode('z'), - ), - }, - { - pattern: "[^-z]", - ast: newAltNode( - newRangeSymbolNode(0x00, 0x2C), - newAltNode( - newRangeSymbolNode(0x2E, 0x79), - newRangeSymbolNode(0x7B, 0x10FFFF), - ), - ), - }, - { - pattern: "[-]", - ast: newSymbolNode('-'), - }, - { - pattern: "[^-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x10FFFF), - ), - }, - { - pattern: "[^01]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^10]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^a-z]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - }, - { - pattern: "[^az]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - genAltNode( - newRangeSymbolNode('a'+1, 'z'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - ), - }, - { - pattern: "\\u{006E}", - ast: newSymbolNode('\u006E'), - }, - { - pattern: "\\u{03BD}", - ast: newSymbolNode('\u03BD'), - }, - { - pattern: "\\u{306B}", - ast: newSymbolNode('\u306B'), - }, - { - pattern: "\\u{01F638}", - ast: newSymbolNode('\U0001F638'), - }, - { - pattern: "\\u{0000}", - ast: newSymbolNode('\u0000'), - }, - { - pattern: "\\u{10FFFF}", - ast: newSymbolNode('\U0010FFFF'), - }, - { - pattern: "\\u{110000}", - syntaxError: synErrCPExpOutOfRange, - }, - { - pattern: "\\u", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{03BD", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{}", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\p{Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{General_Category=Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{ Letter }", - skipTestAST: true, - }, - { - pattern: "\\p{ General_Category = Letter }", - skipTestAST: true, - }, - { - pattern: "\\p", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{Letter", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category= }", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=Letter}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{ =Letter}", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\f{a2c}", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f{ a2c }", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{a2c", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "(a)", - ast: newSymbolNode('a'), - }, - { - pattern: "(((a)))", - ast: newSymbolNode('a'), - }, - { - pattern: "a()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()a", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "(a", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((a)", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: ")", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "a)", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: ")a", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "(a))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "Mulder|Scully", - ast: genAltNode( - genConcatNode( - newSymbolNode('M'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('d'), - newSymbolNode('e'), - newSymbolNode('r'), - ), - genConcatNode( - newSymbolNode('S'), - newSymbolNode('c'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - ), - }, - { - pattern: "Langly|Frohike|Byers", - ast: genAltNode( - genConcatNode( - newSymbolNode('L'), - newSymbolNode('a'), - newSymbolNode('n'), - newSymbolNode('g'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - genConcatNode( - newSymbolNode('F'), - newSymbolNode('r'), - newSymbolNode('o'), - newSymbolNode('h'), - newSymbolNode('i'), - newSymbolNode('k'), - newSymbolNode('e'), - ), - genConcatNode( - newSymbolNode('B'), - newSymbolNode('y'), - newSymbolNode('e'), - newSymbolNode('r'), - newSymbolNode('s'), - ), - ), - }, - { - pattern: "|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "||", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Mulder|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Scully", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly||Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "(Fox|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|Mulder)", - syntaxError: synErrAltLackOfOperand, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { - fragmentTrees := map[spec.LexKindName]CPTree{} - for kind, pattern := range tt.fragments { - p := NewParser(kind, strings.NewReader(pattern)) - root, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - fragmentTrees[kind] = root - } - err := CompleteFragments(fragmentTrees) - if err != nil { - t.Fatal(err) - } - - p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) - root, err := p.Parse() - if tt.syntaxError != nil { - // printCPTree(os.Stdout, root, "", "") - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - _, synErr := p.Error() - if synErr != tt.syntaxError { - t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) - } - if root != nil { - t.Fatalf("tree must be nil") - } - } else { - if err != nil { - detail, cause := p.Error() - t.Fatalf("%v: %v: %v", err, cause, detail) - } - if root == nil { - t.Fatal("tree must be non-nil") - } - - complete, err := ApplyFragments(root, fragmentTrees) - if err != nil { - t.Fatal(err) - } - if !complete { - t.Fatalf("incomplete fragments") - } - - // printCPTree(os.Stdout, root, "", "") - if !tt.skipTestAST { - r := root.(*rootNode) - testAST(t, tt.ast, r.tree) - } - } - }) - } -} - -func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { - for _, cProp := range ucd.ContributoryProperties() { - t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { - p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) - root, err := p.Parse() - if err == nil { - t.Fatalf("expected syntax error: got: nil") - } - _, synErr := p.Error() - if synErr != synErrCharPropUnsupported { - t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) - } - if root != nil { - t.Fatalf("tree is not nil") - } - }) - } -} - -func TestExclude(t *testing.T) { - for _, test := range []struct { - caption string - target CPTree - base CPTree - result CPTree - }{ - // t.From > b.From && t.To < b.To - - // |t.From - b.From| = 1 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '2'), - result: newAltNode( - newSymbolNode('0'), - newSymbolNode('2'), - ), - }, - // |t.From - b.From| > 1 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+--+--+ - // Result (b - t): +--+--+ +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '4'), - result: newAltNode( - newRangeSymbolNode('0', '1'), - newRangeSymbolNode('3', '4'), - ), - }, - - // t.From <= b.From && t.To >= b.From && t.To < b.To - - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('1', '2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '3'), - result: newSymbolNode('3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '4'), - result: newRangeSymbolNode('3', '4'), - }, - - // t.From > b.From && t.From <= b.To && t.To >= b.To - - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '3'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '4'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - - // t.From <= b.From && t.To >= b.To - - // |b.From - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('0'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('1'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '2'), - base: newSymbolNode('1'), - result: nil, - }, - - // Others - - // |b.From - t.From| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 1", - target: newSymbolNode('0'), - base: newSymbolNode('1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 1", - target: newSymbolNode('0'), - base: newSymbolNode('2'), - result: newSymbolNode('2'), - }, - // |t.To - b.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| = 1", - target: newSymbolNode('1'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - // |t.To - b.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| > 1", - target: newSymbolNode('2'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - } { - t.Run(test.caption, func(t *testing.T) { - r := exclude(test.target, test.base) - testAST(t, test.result, r) - }) - } -} - -func testAST(t *testing.T, expected, actual CPTree) { - t.Helper() - - aTy := reflect.TypeOf(actual) - eTy := reflect.TypeOf(expected) - if eTy != aTy { - t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) - } - - if actual == nil { - return - } - - switch e := expected.(type) { - case *symbolNode: - a := actual.(*symbolNode) - if a.From != e.From || a.To != e.To { - t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) - } - } - eLeft, eRight := expected.children() - aLeft, aRight := actual.children() - testAST(t, eLeft, aLeft) - testAST(t, eRight, aRight) -} diff --git a/tests/unit/urubu/grammar/symbol.go b/tests/unit/urubu/grammar/symbol.go deleted file mode 100644 index 31c3edd..0000000 --- a/tests/unit/urubu/grammar/symbol.go +++ /dev/null @@ -1,159 +0,0 @@ -package symbol - -import "testing" - -func TestSymbol(t *testing.T) { - tab := NewSymbolTable() - w := tab.Writer() - _, _ = w.RegisterStartSymbol("expr'") - _, _ = w.RegisterNonTerminalSymbol("expr") - _, _ = w.RegisterNonTerminalSymbol("term") - _, _ = w.RegisterNonTerminalSymbol("factor") - _, _ = w.RegisterTerminalSymbol("id") - _, _ = w.RegisterTerminalSymbol("add") - _, _ = w.RegisterTerminalSymbol("mul") - _, _ = w.RegisterTerminalSymbol("l_paren") - _, _ = w.RegisterTerminalSymbol("r_paren") - - nonTermTexts := []string{ - "", // Nil - "expr'", - "expr", - "term", - "factor", - } - - termTexts := []string{ - "", // Nil - symbolNameEOF, // EOF - "id", - "add", - "mul", - "l_paren", - "r_paren", - } - - tests := []struct { - text string - isNil bool - isStart bool - isEOF bool - isNonTerminal bool - isTerminal bool - }{ - { - text: "expr'", - isStart: true, - isNonTerminal: true, - }, - { - text: "expr", - isNonTerminal: true, - }, - { - text: "term", - isNonTerminal: true, - }, - { - text: "factor", - isNonTerminal: true, - }, - { - text: "id", - isTerminal: true, - }, - { - text: "add", - isTerminal: true, - }, - { - text: "mul", - isTerminal: true, - }, - { - text: "l_paren", - isTerminal: true, - }, - { - text: "r_paren", - isTerminal: true, - }, - } - for _, tt := range tests { - t.Run(tt.text, func(t *testing.T) { - r := tab.Reader() - sym, ok := r.ToSymbol(tt.text) - if !ok { - t.Fatalf("symbol was not found") - } - testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal) - text, ok := r.ToText(sym) - if !ok { - t.Fatalf("text was not found") - } - if text != tt.text { - t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text) - } - }) - } - - t.Run("EOF", func(t *testing.T) { - testSymbolProperty(t, SymbolEOF, false, false, true, false, true) - }) - - t.Run("Nil", func(t *testing.T) { - testSymbolProperty(t, SymbolNil, true, false, false, false, false) - }) - - t.Run("texts of non-terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(nonTermTexts) { - t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts) - } - for i, text := range ts { - if text != nonTermTexts[i] { - t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text) - } - } - }) - - t.Run("texts of terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.TerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(termTexts) { - t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts) - } - for i, text := range ts { - if text != termTexts[i] { - t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text) - } - } - }) -} - -func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) { - t.Helper() - - if v := sym.IsNil(); v != isNil { - t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v) - } - if v := sym.IsStart(); v != isStart { - t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v) - } - if v := sym.isEOF(); v != isEOF { - t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v) - } - if v := sym.isNonTerminal(); v != isNonTerminal { - t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v) - } - if v := sym.IsTerminal(); v != isTerminal { - t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v) - } -} diff --git a/tests/unit/urubu/spec/grammar/parser.go b/tests/unit/urubu/spec/grammar/parser.go deleted file mode 100644 index 773c466..0000000 --- a/tests/unit/urubu/spec/grammar/parser.go +++ /dev/null @@ -1,1439 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - verr "urubu/error" -) - -func TestLexer_Run(t *testing.T) { - idTok := func(text string) *token { - return newIDToken(text, newPosition(1, 0)) - } - - termPatTok := func(text string) *token { - return newTerminalPatternToken(text, newPosition(1, 0)) - } - - strTok := func(text string) *token { - return newStringLiteralToken(text, newPosition(1, 0)) - } - - symTok := func(kind tokenKind) *token { - return newSymbolToken(kind, newPosition(1, 0)) - } - - invalidTok := func(text string) *token { - return newInvalidToken(text, newPosition(1, 0)) - } - - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#$()`, - tokens: []*token{ - idTok("id"), - termPatTok("terminal"), - strTok(`string`), - symTok(tokenKindColon), - symTok(tokenKindOr), - symTok(tokenKindSemicolon), - symTok(tokenKindLabelMarker), - symTok(tokenKindExpantion), - symTok(tokenKindDirectiveMarker), - symTok(tokenKindOrderedSymbolMarker), - symTok(tokenKindLParen), - symTok(tokenKindRParen), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize keywords", - src: `fragment`, - tokens: []*token{ - symTok(tokenKindKWFragment), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize character sequences and escape sequences in a terminal", - src: `"abc\"\\"`, - tokens: []*token{ - termPatTok(`abc"\\`), - newEOFToken(), - }, - }, - { - caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", - src: `'\\\'`, - tokens: []*token{ - strTok(`\\\`), - newEOFToken(), - }, - }, - { - caption: "a pattern must include at least one character", - src: `""`, - err: synErrEmptyPattern, - }, - { - caption: "a string must include at least one character", - src: `''`, - err: synErrEmptyString, - }, - { - caption: "the lexer can recognize newlines and combine consecutive newlines into one", - src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", - tokens: []*token{ - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "the lexer ignores line comments", - src: ` -// This is the first comment. -foo -// This is the second comment. -// This is the third comment. -bar // This is the fourth comment. -`, - tokens: []*token{ - symTok(tokenKindNewline), - idTok("foo"), - symTok(tokenKindNewline), - idTok("bar"), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Abc`, - err: synErrIDInvalidChar, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Zyx`, - err: synErrIDInvalidChar, - }, - { - caption: "the underscore cannot be placed at the beginning of an identifier", - src: `_abc`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed at the end of an identifier", - src: `abc_`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed consecutively", - src: `a__b`, - err: synErrIDConsecutiveUnderscores, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `0abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `9abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "an unclosed terminal is not a valid token", - src: `"abc`, - err: synErrUnclosedTerminal, - }, - { - caption: "an incompleted escape sequence in a pattern is not a valid token", - src: `"\`, - err: synErrIncompletedEscSeq, - }, - { - caption: "an unclosed string is not a valid token", - src: `'abc`, - err: synErrUnclosedString, - }, - { - caption: "the lexer can recognize valid tokens following an invalid token", - src: `abc!!!def`, - tokens: []*token{ - idTok("abc"), - invalidTok("!!!"), - idTok("def"), - newEOFToken(), - }, - }, - { - caption: "the lexer skips white spaces", - // \u0009: HT - // \u0020: SP - src: "a\u0009b\u0020c", - tokens: []*token{ - idTok("a"), - idTok("b"), - idTok("c"), - newEOFToken(), - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - l, err := newLexer(strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - n := 0 - for { - var tok *token - tok, err = l.next() - if err != nil { - break - } - testToken(t, tok, tt.tokens[n]) - n++ - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - synErr, ok := err.(*verr.SpecError) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - if tt.err != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) - } - } else { - if err != nil { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - } - }) - } -} - -func testToken(t *testing.T, tok, expected *token) { - t.Helper() - if tok.kind != expected.kind || tok.text != expected.text { - t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) - } -} - -func TestParse(t *testing.T) { - name := func(param *ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "name", - Parameters: []*ParameterNode{param}, - } - } - prec := func(param *ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "prec", - Parameters: []*ParameterNode{param}, - } - } - leftAssoc := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "left", - Parameters: params, - } - } - rightAssoc := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "right", - Parameters: params, - } - } - assign := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "assign", - Parameters: params, - } - } - prod := func(lhs string, alts ...*AlternativeNode) *ProductionNode { - return &ProductionNode{ - LHS: lhs, - RHS: alts, - } - } - withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode { - prod.Pos = pos - return prod - } - withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode { - prod.Directives = dirs - return prod - } - alt := func(elems ...*ElementNode) *AlternativeNode { - return &AlternativeNode{ - Elements: elems, - } - } - withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode { - alt.Pos = pos - return alt - } - withAltDir := func(alt *AlternativeNode, dirs ...*DirectiveNode) *AlternativeNode { - alt.Directives = dirs - return alt - } - dir := func(name string, params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: name, - Parameters: params, - } - } - withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode { - dir.Pos = pos - return dir - } - idParam := func(id string) *ParameterNode { - return &ParameterNode{ - ID: id, - } - } - ordSymParam := func(id string) *ParameterNode { - return &ParameterNode{ - OrderedSymbol: id, - } - } - exp := func(param *ParameterNode) *ParameterNode { - param.Expansion = true - return param - } - group := func(dirs ...*DirectiveNode) *ParameterNode { - return &ParameterNode{ - Group: dirs, - } - } - withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { - param.Pos = pos - return param - } - id := func(id string) *ElementNode { - return &ElementNode{ - ID: id, - } - } - pat := func(p string) *ElementNode { - return &ElementNode{ - Pattern: p, - } - } - label := func(name string) *LabelNode { - return &LabelNode{ - Name: name, - } - } - withLabelPos := func(label *LabelNode, pos Position) *LabelNode { - label.Pos = pos - return label - } - withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode { - elem.Label = label - return elem - } - withElemPos := func(elem *ElementNode, pos Position) *ElementNode { - elem.Pos = pos - return elem - } - frag := func(lhs string, rhs string) *FragmentNode { - return &FragmentNode{ - LHS: lhs, - RHS: rhs, - } - } - withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode { - frag.Pos = pos - return frag - } - newPos := func(row int) Position { - return Position{ - Row: row, - Col: 0, - } - } - - tests := []struct { - caption string - src string - checkPosition bool - ast *RootNode - synErr *SyntaxError - }{ - { - caption: "a grammar can contain top-level directives", - src: ` -#name test; - -#prec ( - #left a b $x1 - #right c d $x2 - #assign e f $x3 -); -`, - ast: &RootNode{ - Directives: []*DirectiveNode{ - withDirPos( - name( - withParamPos( - idParam("test"), - newPos(2), - ), - ), - newPos(2), - ), - withDirPos( - prec( - withParamPos( - group( - withDirPos( - leftAssoc( - withParamPos( - idParam("a"), - newPos(5), - ), - withParamPos( - idParam("b"), - newPos(5), - ), - withParamPos( - ordSymParam("x1"), - newPos(5), - ), - ), - newPos(5), - ), - withDirPos( - rightAssoc( - withParamPos( - idParam("c"), - newPos(6), - ), - withParamPos( - idParam("d"), - newPos(6), - ), - withParamPos( - ordSymParam("x2"), - newPos(6), - ), - ), - newPos(6), - ), - withDirPos( - assign( - withParamPos( - idParam("e"), - newPos(7), - ), - withParamPos( - idParam("f"), - newPos(7), - ), - withParamPos( - ordSymParam("x3"), - newPos(7), - ), - ), - newPos(7), - ), - ), - newPos(4), - ), - ), - newPos(4), - ), - }, - }, - }, - { - caption: "a top-level directive must be followed by ';'", - src: ` -#name test -`, - synErr: synErrTopLevelDirNoSemicolon, - }, - { - caption: "a directive group must be closed by ')'", - src: ` -#prec ( - #left a b -; -`, - synErr: synErrUnclosedDirGroup, - }, - { - caption: "an ordered symbol marker '$' must be followed by and ID", - src: ` -#prec ( - #assign $ -); -`, - synErr: synErrNoOrderedSymbolName, - }, - { - caption: "single production is a valid grammar", - src: `a: "a";`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("a", alt(pat("a"))), - }, - }, - }, - { - caption: "multiple productions are a valid grammar", - src: ` -e - : e add t - | e sub t - | t - ; -t - : t mul f - | t div f - | f - ; -f - : l_paren e r_paren - | id - ; - -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -l_paren - : '('; -r_paren - : ')'; -id - : "[A-Za-z_][0-9A-Za-z_]*"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("e", - alt(id("e"), id("add"), id("t")), - alt(id("e"), id("sub"), id("t")), - alt(id("t")), - ), - prod("t", - alt(id("t"), id("mul"), id("f")), - alt(id("t"), id("div"), id("f")), - alt(id("f")), - ), - prod("f", - alt(id("l_paren"), id("e"), id("r_paren")), - alt(id("id")), - ), - }, - LexProductions: []*ProductionNode{ - prod("add", alt(pat(`+`))), - prod("sub", alt(pat(`-`))), - prod("mul", alt(pat(`*`))), - prod("div", alt(pat(`/`))), - prod("l_paren", alt(pat(`(`))), - prod("r_paren", alt(pat(`)`))), - prod("id", alt(pat(`[A-Za-z_][0-9A-Za-z_]*`))), - }, - }, - }, - { - caption: "productions can contain the empty alternative", - src: ` -a - : foo - | - ; -b - : - | bar - ; -c - : - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("a", - alt(id("foo")), - alt(), - ), - prod("b", - alt(), - alt(id("bar")), - ), - prod("c", - alt(), - ), - }, - LexProductions: []*ProductionNode{ - prod("foo", alt(pat(`foo`))), - prod("bar", alt(pat(`bar`))), - }, - }, - }, - { - caption: "a production cannot contain an ordered symbol", - src: ` -a: $x; -`, - synErr: synErrNoSemicolon, - }, - { - caption: "an alternative cannot contain a pattern directly", - src: ` -s - : "foo" bar - ; - -bar - : "bar"; -`, - synErr: synErrPatternInAlt, - }, - { - caption: "an alternative cannot contain a string directly", - src: ` -s - : 'foo' bar - ; -bar - : "bar"; -`, - synErr: synErrPatternInAlt, - }, - { - caption: "a terminal symbol can be defined using a string literal", - src: ` -foo - : 'foo'; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat(`foo`)), - ), - }, - }, - }, - { - caption: "a terminal symbol can be defined using a pattern", - src: ` -foo - : "foo"; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat(`foo`)), - ), - }, - }, - }, - { - caption: "`fragment` is a reserved word", - src: `fragment: 'fragment';`, - synErr: synErrNoProductionName, - }, - { - caption: "when a source contains an unknown token, the parser raises a syntax error", - src: `a: !;`, - synErr: synErrInvalidToken, - }, - { - caption: "a production must have its name as the first element", - src: `: "a";`, - synErr: synErrNoProductionName, - }, - { - caption: "':' must precede an alternative", - src: `a "a";`, - synErr: synErrNoColon, - }, - { - caption: "';' must follow a production", - src: `a: "a"`, - synErr: synErrNoSemicolon, - }, - { - caption: "';' can only appear at the end of a production", - src: `;`, - synErr: synErrNoProductionName, - }, - { - caption: "a grammar can contain fragments", - src: ` -s - : tagline - ; -tagline: "\f{words} IS OUT THERE."; -fragment words: "[A-Za-z\u{0020}]+"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - alt(id("tagline")), - ), - }, - LexProductions: []*ProductionNode{ - prod("tagline", - alt(pat(`\f{words} IS OUT THERE.`)), - ), - }, - Fragments: []*FragmentNode{ - frag("words", `[A-Za-z\u{0020}]+`), - }, - }, - }, - { - caption: "the lexer treats consecutive lines as a single token but can count lines correctly", - src: `// This line precedes line comments and blank lines. -// This is a line comment. - - -s - : foo - ; - - -// This line is sandwiched between blank lines. - - -foo: 'foo'; -`, - checkPosition: true, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("s", - withAltPos( - alt( - withElemPos( - id("foo"), - newPos(6), - ), - ), - newPos(6), - ), - ), - newPos(5), - ), - }, - LexProductions: []*ProductionNode{ - withProdPos( - prod("foo", - withAltPos( - alt( - withElemPos( - pat(`foo`), - newPos(13), - ), - ), - newPos(13), - ), - ), - newPos(13), - ), - }, - }, - }, - { - caption: "a grammar can contain production directives and alternative directives", - src: ` -mode_tran_seq - : mode_tran_seq mode_tran - | mode_tran - ; -mode_tran - : push_m1 - | push_m2 - | pop_m1 - | pop_m2 - ; - -push_m1 #push m1 - : "->"; -push_m2 #mode m1 #push m2 - : "-->"; -pop_m1 #mode m1 #pop - : "<-"; -pop_m2 #mode m2 #pop - : "<--"; -whitespace #mode default m1 m2 #skip - : "\u{0020}+"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("mode_tran_seq", - alt(id("mode_tran_seq"), id("mode_tran")), - alt(id("mode_tran")), - ), - prod("mode_tran", - alt(id("push_m1")), - alt(id("push_m2")), - alt(id("pop_m1")), - alt(id("pop_m2")), - ), - }, - LexProductions: []*ProductionNode{ - withProdDir( - prod("push_m1", - alt(pat(`->`)), - ), - dir("push", idParam("m1")), - ), - withProdDir( - prod("push_m2", - alt(pat(`-->`)), - ), - dir("mode", idParam("m1")), - dir("push", idParam("m2")), - ), - withProdDir( - prod("pop_m1", - alt(pat(`<-`)), - ), - dir("mode", idParam("m1")), - dir("pop"), - ), - withProdDir( - prod("pop_m2", - alt(pat(`<--`)), - ), - dir("mode", idParam("m2")), - dir("pop"), - ), - withProdDir( - prod("whitespace", - alt(pat(`\u{0020}+`)), - ), - dir("mode", idParam("default"), idParam("m1"), idParam("m2")), - dir("skip"), - ), - }, - }, - }, - { - caption: "an alternative of a production can have multiple alternative directives", - src: ` -s - : foo bar #prec baz #ast foo bar - ; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - withAltDir( - alt(id("foo"), id("bar")), - dir("prec", idParam("baz")), - dir("ast", idParam("foo"), idParam("bar")), - ), - ), - }, - }, - }, - { - caption: "a lexical production can have multiple production directives", - src: ` -foo #mode a #push b - : 'foo'; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - withProdDir( - prod("foo", - alt(pat("foo")), - ), - dir("mode", idParam("a")), - dir("push", idParam("b")), - ), - }, - }, - }, - { - caption: "a production must be followed by a newline", - src: ` -s: foo; foo: "foo"; -`, - synErr: synErrSemicolonNoNewline, - }, - { - caption: "a grammar can contain 'ast' directives and expansion operator", - src: ` -s - : foo bar_list #ast foo bar_list - ; -bar_list - : bar_list bar #ast bar_list... bar - | bar #ast bar - ; -foo: "foo"; -bar: "bar"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - withAltDir( - alt(id("foo"), id("bar_list")), - dir("ast", idParam("foo"), idParam("bar_list")), - ), - ), - prod("bar_list", - withAltDir( - alt(id("bar_list"), id("bar")), - dir("ast", exp(idParam("bar_list")), idParam("bar")), - ), - withAltDir( - alt(id("bar")), - dir("ast", idParam("bar")), - ), - ), - }, - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat("foo")), - ), - prod("bar", - alt(pat("bar")), - ), - }, - }, - }, - { - caption: "an expansion operator must be preceded by an identifier", - src: ` -s - : foo #ast ... - ; -`, - synErr: synErrStrayExpOp, - }, - { - caption: "an expansion operator must be preceded by an identifier", - src: ` -a - : foo #ast ... foo - ; -`, - synErr: synErrStrayExpOp, - }, - { - caption: "an expansion operator cannot be applied to a pattern", - src: ` -a - : "foo" #ast "foo"... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to a string", - src: ` -a - : 'foo' #ast 'foo'... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to an ordered symbol", - src: ` -a - : foo #ast $foo... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to a directive group", - src: ` -a - : foo #ast ()... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an AST has node positions", - src: ` -exp - : exp add id #ast exp id - | id - ; - -whitespace #skip - : "\u{0020}+"; -add - : '+'; -id - : "\f{letter}(\f{letter}|\f{number})*"; -fragment letter - : "[A-Za-z_]"; -fragment number - : "[0-9]"; -`, - checkPosition: true, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("exp", - withAltPos( - withAltDir( - alt( - withElemPos(id("exp"), newPos(3)), - withElemPos(id("add"), newPos(3)), - withElemPos(id("id"), newPos(3)), - ), - withDirPos( - dir("ast", - withParamPos(idParam("exp"), newPos(3)), - withParamPos(idParam("id"), newPos(3)), - ), - newPos(3), - ), - ), - newPos(3), - ), - withAltPos( - alt( - withElemPos(id("id"), newPos(4)), - ), - newPos(4), - ), - ), - newPos(2), - ), - }, - LexProductions: []*ProductionNode{ - withProdPos( - withProdDir( - prod("whitespace", - withAltPos( - alt( - withElemPos( - pat(`\u{0020}+`), - newPos(8), - ), - ), - newPos(8), - ), - ), - withDirPos( - dir("skip"), - newPos(7), - ), - ), - newPos(7), - ), - withProdPos( - prod("add", - withAltPos( - alt( - withElemPos( - pat(`+`), - newPos(10), - ), - ), - newPos(10), - ), - ), - newPos(9), - ), - withProdPos( - prod("id", - withAltPos( - alt( - withElemPos( - pat(`\f{letter}(\f{letter}|\f{number})*`), - newPos(12), - ), - ), - newPos(12), - ), - ), - newPos(11), - ), - }, - Fragments: []*FragmentNode{ - withFragmentPos( - frag("letter", "[A-Za-z_]"), - newPos(13), - ), - withFragmentPos( - frag("number", "[0-9]"), - newPos(15), - ), - }, - }, - }, - { - caption: "a symbol can have a label", - src: ` -expr - : term@lhs add term@rhs - ; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("expr", - withAltPos( - alt( - withElemPos( - withLabel( - id("term"), - withLabelPos( - label("lhs"), - newPos(3), - ), - ), - newPos(3), - ), - withElemPos( - id("add"), - newPos(3), - ), - withElemPos( - withLabel( - id("term"), - withLabelPos( - label("rhs"), - newPos(3), - ), - ), - newPos(3), - ), - ), - newPos(3), - ), - ), - newPos(2), - ), - }, - }, - }, - { - caption: "a label must be an identifier, not a string", - src: ` -foo - : bar@'baz' - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "a label must be an identifier, not a pattern", - src: ` -foo - : bar@"baz" - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "the symbol marker @ must be followed by an identifier", - src: ` -foo - : bar@ - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "a symbol cannot have more than or equal to two labels", - src: ` -foo - : bar@baz@bra - ; -`, - synErr: synErrLabelWithNoSymbol, - }, - { - caption: "a label must follow a symbol", - src: ` -foo - : @baz - ; -`, - synErr: synErrLabelWithNoSymbol, - }, - { - caption: "a grammar can contain left and right associativities", - src: ` -#prec ( - #left l1 l2 - #left l3 - #right r1 r2 - #right r3 -); - -s - : id l1 id l2 id l3 id - | id r1 id r2 id r3 id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -l1 - : 'l1'; -l2 - : 'l2'; -l3 - : 'l3'; -r1 - : 'r1'; -r2 - : 'r2'; -r3 - : 'r3'; -id - : "[A-Za-z0-9_]+"; -`, - ast: &RootNode{ - Directives: []*DirectiveNode{ - withDirPos( - prec( - withParamPos( - group( - withDirPos( - leftAssoc( - withParamPos(idParam("l1"), newPos(3)), - withParamPos(idParam("l2"), newPos(3)), - ), - newPos(3), - ), - withDirPos( - leftAssoc( - withParamPos(idParam("l3"), newPos(4)), - ), - newPos(4), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r1"), newPos(5)), - withParamPos(idParam("r2"), newPos(5)), - ), - newPos(5), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r3"), newPos(6)), - ), - newPos(6), - ), - ), - newPos(2), - ), - ), - newPos(2), - ), - }, - Productions: []*ProductionNode{ - prod("s", - alt(id(`id`), id(`l1`), id(`id`), id(`l2`), id(`id`), id(`l3`), id(`id`)), - alt(id(`id`), id(`r1`), id(`id`), id(`r2`), id(`id`), id(`r3`), id(`id`)), - ), - }, - LexProductions: []*ProductionNode{ - withProdDir( - prod("whitespaces", - alt(pat(`[\u{0009}\u{0020}]+`)), - ), - dir("skip"), - ), - prod("l1", alt(pat(`l1`))), - prod("l2", alt(pat(`l2`))), - prod("l3", alt(pat(`l3`))), - prod("r1", alt(pat(`r1`))), - prod("r2", alt(pat(`r2`))), - prod("r3", alt(pat(`r3`))), - prod("id", alt(pat(`[A-Za-z0-9_]+`))), - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := Parse(strings.NewReader(tt.src)) - if tt.synErr != nil { - synErrs, ok := err.(verr.SpecErrors) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err) - } - synErr := synErrs[0] - if tt.synErr != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause) - } - if ast != nil { - t.Fatalf("AST must be nil") - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if ast == nil { - t.Fatalf("AST must be non-nil") - } - testRootNode(t, ast, tt.ast, tt.checkPosition) - } - }) - } -} - -func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { - t.Helper() - if len(root.Productions) != len(expected.Productions) { - t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) - } - if len(root.Directives) != len(expected.Directives) { - t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) - } - for i, dir := range root.Directives { - testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) - } - for i, prod := range root.Productions { - testProductionNode(t, prod, expected.Productions[i], checkPosition) - } - for i, prod := range root.LexProductions { - testProductionNode(t, prod, expected.LexProductions[i], checkPosition) - } - for i, frag := range root.Fragments { - testFragmentNode(t, frag, expected.Fragments[i], checkPosition) - } -} - -func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { - t.Helper() - if len(expected.Directives) != len(prod.Directives) { - t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives)) - } - if len(expected.Directives) > 0 { - testDirectives(t, prod.Directives, expected.Directives, checkPosition) - } - if prod.LHS != expected.LHS { - t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) - } - if len(prod.RHS) != len(expected.RHS) { - t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS)) - } - for i, alt := range prod.RHS { - testAlternativeNode(t, alt, expected.RHS[i], checkPosition) - } - if checkPosition { - testPosition(t, prod.Pos, expected.Pos) - } -} - -func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) { - t.Helper() - if frag.LHS != expected.LHS { - t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS) - } - if frag.RHS != expected.RHS { - t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS) - } - if checkPosition { - testPosition(t, frag.Pos, expected.Pos) - } -} - -func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) { - t.Helper() - if len(alt.Elements) != len(expected.Elements) { - t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements)) - } - for i, elem := range alt.Elements { - testElementNode(t, elem, expected.Elements[i], checkPosition) - } - if len(alt.Directives) != len(expected.Directives) { - t.Fatalf("unexpected alternative directive count; want: %v directive, got: %v directive", len(expected.Directives), len(alt.Directives)) - } - if len(alt.Directives) > 0 { - testDirectives(t, alt.Directives, expected.Directives, checkPosition) - } - if checkPosition { - testPosition(t, alt.Pos, expected.Pos) - } -} - -func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) { - t.Helper() - if elem.ID != expected.ID { - t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID) - } - if elem.Pattern != expected.Pattern { - t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern) - } - if checkPosition { - testPosition(t, elem.Pos, expected.Pos) - } -} - -func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) { - t.Helper() - for i, exp := range expected { - dir := dirs[i] - - if exp.Name != dir.Name { - t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name) - } - if len(exp.Parameters) != len(dir.Parameters) { - t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters) - } - for j, expParam := range exp.Parameters { - testParameter(t, dir.Parameters[j], expParam, checkPosition) - } - if checkPosition { - testPosition(t, dir.Pos, exp.Pos) - } - } -} - -func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) { - t.Helper() - if param.ID != expected.ID { - t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID) - } - if param.String != expected.String { - t.Fatalf("unexpected string parameter; want: %v, got: %v", expected.ID, param.ID) - } - if param.Expansion != expected.Expansion { - t.Fatalf("unexpected expansion; want: %v, got: %v", expected.Expansion, param.Expansion) - } - if checkPosition { - testPosition(t, param.Pos, expected.Pos) - } -} - -func testPosition(t *testing.T, pos, expected Position) { - t.Helper() - if pos.Row != expected.Row { - t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) - } -} diff --git a/tests/unit/urubu/spec/test.go b/tests/unit/urubu/spec/test.go deleted file mode 100644 index eddba92..0000000 --- a/tests/unit/urubu/spec/test.go +++ /dev/null @@ -1,411 +0,0 @@ -package test - -import ( - "fmt" - "reflect" - "strings" - "testing" -) - -func TestTree_Format(t *testing.T) { - expected := `(a - (b - (c)) - (d) - (e))` - tree := NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d"), - NewNonTerminalTree("e"), - ) - actual := string(tree.Format()) - if actual != expected { - t.Fatalf("unexpected format:\n%v", actual) - } -} - -func TestDiffTree(t *testing.T) { - tests := []struct { - t1 *Tree - t2 *Tree - different bool - }{ - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("a", "a"), - }, - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("a", "A"), - different: true, - }, - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("A", "a"), - different: true, - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("a"), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d", - NewNonTerminalTree("d"), - ), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d", - NewNonTerminalTree("d"), - ), - ), - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("b"), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a"), - different: true, - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("c"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("d"), - ), - ), - different: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - diffs := DiffTree(tt.t1, tt.t2) - if tt.different && len(diffs) == 0 { - t.Fatalf("unexpected result") - } else if !tt.different && len(diffs) > 0 { - t.Fatalf("unexpected result") - } - }) - } -} - -func TestParseTestCase(t *testing.T) { - tests := []struct { - src string - tc *TestCase - parseErr bool - }{ - { - src: `test ---- -foo ---- -(foo) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - { - src: ` -test - ---- - -foo - ---- - -(foo) - -`, - tc: &TestCase{ - Description: "\ntest\n", - Source: []byte("\nfoo\n"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The length of a part delimiter may be greater than 3. - { - src: ` -test ----- -foo ----- -(foo) -`, - tc: &TestCase{ - Description: "\ntest", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The description part may be empty. - { - src: `---- -foo ----- -(foo) -`, - tc: &TestCase{ - Description: "", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The source part may be empty. - { - src: `test ---- ---- -(foo) -`, - tc: &TestCase{ - Description: "test", - Source: []byte{}, - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error, - // but we allow it to simplify the implementation of the parser. - { - src: `test ----- -foo ----- -(foo) ---- -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - { - src: ``, - parseErr: true, - }, - { - src: `test ---- -`, - parseErr: true, - }, - { - src: `test ---- -foo -`, - parseErr: true, - }, - { - src: `test ---- -foo ---- -`, - parseErr: true, - }, - { - src: `test --- -foo --- -(foo) -`, - parseErr: true, - }, - // A node may have just one string node. - { - src: `test ----- -foo bar ----- -(foo (bar 'bar')) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo bar"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("bar", "bar"), - ).Fill(), - }, - }, - // A node may have just one pattern node. - { - src: `test ----- -foo bar ----- -(foo (bar "bar")) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo bar"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("bar", "bar"), - ).Fill(), - }, - }, - // A node may be the error node. - { - src: `test ----- -foo x ----- -(foo (error)) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo x"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("error", ""), - ).Fill(), - }, - }, - // The error node cannot have a string node. - { - src: `test ----- -foo x ----- -(foo (error 'x')) -`, - parseErr: true, - }, - // The error node cannot have a pattern node. - { - src: `test ----- -foo x ----- -(foo (error "x")) -`, - parseErr: true, - }, - // The error node cannot have another node. - { - src: `test ----- -foo x ----- -(foo (error (a))) -`, - parseErr: true, - }, - { - src: `test ---- -foo ---- -? -`, - parseErr: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - tc, err := ParseTestCase(strings.NewReader(tt.src)) - if tt.parseErr { - if err == nil { - t.Fatalf("an expected error didn't occur") - } - } else { - if err != nil { - t.Fatal(err) - } - testTestCase(t, tt.tc, tc) - } - }) - } -} - -func testTestCase(t *testing.T, expected, actual *TestCase) { - t.Helper() - - if expected.Description != actual.Description || - !reflect.DeepEqual(expected.Source, actual.Source) || - len(DiffTree(expected.Output, actual.Output)) > 0 { - t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual) - } -} diff --git a/tests/unit/urubu/tester.go b/tests/unit/urubu/tester.go deleted file mode 100644 index 3c6b1db..0000000 --- a/tests/unit/urubu/tester.go +++ /dev/null @@ -1,169 +0,0 @@ -package tester - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar" - "urubu/spec/grammar/parser" - tspec "urubu/spec/test" -) - -func TestTester_Run(t *testing.T) { - grammarSrc1 := ` -#name test; - -s - : foo bar baz - | foo error baz #recover - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -` - - grammarSrc2 := ` -#name test; - -s - : foos - ; -foos - : foos foo #ast foos... foo - | foo - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -foo - : 'foo'; -` - - tests := []struct { - grammarSrc string - testSrc string - error bool - }{ - { - grammarSrc: grammarSrc1, - testSrc: ` -Test ---- -foo bar baz ---- -(s - (foo 'foo') (bar 'bar') (baz 'baz')) -`, - }, - { - grammarSrc: grammarSrc1, - testSrc: ` -Test ---- -foo ? baz ---- -(s - (foo 'foo') (error) (baz 'baz')) -`, - }, - { - grammarSrc: grammarSrc1, - testSrc: ` -Test ---- -foo bar baz ---- -(s) -`, - error: true, - }, - { - grammarSrc: grammarSrc1, - testSrc: ` -Test ---- -foo bar baz ---- -(s - (foo) (bar)) -`, - error: true, - }, - { - grammarSrc: grammarSrc1, - testSrc: ` -Test ---- -foo bar baz ---- -(s - (foo) (bar) (xxx)) -`, - error: true, - }, - { - grammarSrc: grammarSrc2, - testSrc: ` -Test ---- -foo foo foo ---- -(s - (foos - (foo 'foo') (foo 'foo') (foo 'foo'))) -`, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.grammarSrc)) - if err != nil { - t.Fatal(err) - } - b := grammar.GrammarBuilder{ - AST: ast, - } - cg, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - c, err := tspec.ParseTestCase(strings.NewReader(tt.testSrc)) - if err != nil { - t.Fatal(err) - } - tester := &Tester{ - Grammar: cg, - Cases: []*TestCaseWithMetadata{ - { - TestCase: c, - }, - }, - } - rs := tester.Run() - if tt.error { - errOccurred := false - for _, r := range rs { - if r.Error != nil { - errOccurred = true - } - } - if !errOccurred { - t.Fatal("this test must fail, but it passed") - } - } else { - for _, r := range rs { - if r.Error != nil { - t.Fatalf("unexpected error occurred: %v", r.Error) - } - } - } - }) - } -} diff --git a/tests/unit/urubu/utf8.go b/tests/unit/urubu/utf8.go deleted file mode 100644 index 2dc8093..0000000 --- a/tests/unit/urubu/utf8.go +++ /dev/null @@ -1,181 +0,0 @@ -package utf8 - -import ( - "fmt" - "testing" -) - -func TestGenCharBlocks_WellFormed(t *testing.T) { - cBlk := func(from []byte, to []byte) *CharBlock { - return &CharBlock{ - From: from, - To: to, - } - } - - seq := func(b ...byte) []byte { - return b - } - - tests := []struct { - from rune - to rune - blocks []*CharBlock - }{ - { - from: '\u0000', - to: '\u007f', - blocks: []*CharBlock{ - cBlk(seq(0x00), seq(0x7f)), - }, - }, - { - from: '\u0080', - to: '\u07ff', - blocks: []*CharBlock{ - cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), - }, - }, - { - from: '\u0800', - to: '\u0fff', - blocks: []*CharBlock{ - cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)), - }, - }, - { - from: '\u1000', - to: '\ucfff', - blocks: []*CharBlock{ - cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)), - }, - }, - { - from: '\ud000', - to: '\ud7ff', - blocks: []*CharBlock{ - cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)), - }, - }, - { - from: '\ue000', - to: '\uffff', - blocks: []*CharBlock{ - cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)), - }, - }, - { - from: '\U00010000', - to: '\U0003ffff', - blocks: []*CharBlock{ - cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)), - }, - }, - { - from: '\U00040000', - to: '\U000fffff', - blocks: []*CharBlock{ - cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)), - }, - }, - { - from: '\U00100000', - to: '\U0010ffff', - blocks: []*CharBlock{ - cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)), - }, - }, - { - from: '\u0000', - to: '\U0010ffff', - blocks: []*CharBlock{ - cBlk(seq(0x00), seq(0x7f)), - cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), - cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)), - cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)), - cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)), - cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)), - cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)), - cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)), - cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)), - }, - }, - } - for _, tt := range tests { - t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) { - blks, err := GenCharBlocks(tt.from, tt.to) - if err != nil { - t.Fatal(err) - } - if len(blks) != len(tt.blocks) { - t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) - } - for i, blk := range blks { - if len(blk.From) != len(tt.blocks[i].From) || len(blk.To) != len(tt.blocks[i].To) { - t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) - } - for j := 0; j < len(blk.From); j++ { - if blk.From[j] != tt.blocks[i].From[j] || blk.To[j] != tt.blocks[i].To[j] { - t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) - } - } - } - }) - } -} - -func TestGenCharBlocks_IllFormed(t *testing.T) { - tests := []struct { - from rune - to rune - }{ - { - // from > to - from: '\u0001', - to: '\u0000', - }, - { - from: -1, // U+10FFFF - to: '\u0000', - }, - { - from: '\u0000', - to: 0x110000, // >U+10FFFF - }, - { - from: 0xd800, // U+D800 (surrogate code point) - to: '\ue000', - }, - { - from: 0xdfff, // U+DFFF (surrogate code point) - to: '\ue000', - }, - { - from: '\ucfff', - to: 0xd800, // U+D800 (surrogate code point) - }, - { - from: '\ucfff', - to: 0xdfff, // U+DFFF (surrogate code point) - }, - } - for _, tt := range tests { - t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) { - blks, err := GenCharBlocks(tt.from, tt.to) - if err == nil { - t.Fatal("expected error didn't occur") - } - if blks != nil { - t.Fatal("character blocks must be nil") - } - }) - } -} diff --git a/tests/unit/utf8/main.go b/tests/unit/utf8/main.go new file mode 100644 index 0000000..e8646d6 --- /dev/null +++ b/tests/unit/utf8/main.go @@ -0,0 +1,7 @@ +package main + +import "utf8" + +func main() { + utf8.MainTest() +} diff --git a/tests/unit/utf8/utf8.go b/tests/unit/utf8/utf8.go new file mode 100644 index 0000000..278ed3e --- /dev/null +++ b/tests/unit/utf8/utf8.go @@ -0,0 +1,184 @@ +package utf8 + +import ( + "fmt" + "testing" +) + +func TestGenCharBlocks_WellFormed(t *testing.T) { + cBlk := func(from []byte, to []byte) *CharBlock { + return &CharBlock{ + From: from, + To: to, + } + } + + seq := func(b ...byte) []byte { + return b + } + + tests := []struct { + from rune + to rune + blocks []*CharBlock + }{ + { + from: '\u0000', + to: '\u007f', + blocks: []*CharBlock{ + cBlk(seq(0x00), seq(0x7f)), + }, + }, + { + from: '\u0080', + to: '\u07ff', + blocks: []*CharBlock{ + cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), + }, + }, + { + from: '\u0800', + to: '\u0fff', + blocks: []*CharBlock{ + cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)), + }, + }, + { + from: '\u1000', + to: '\ucfff', + blocks: []*CharBlock{ + cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)), + }, + }, + { + from: '\ud000', + to: '\ud7ff', + blocks: []*CharBlock{ + cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)), + }, + }, + { + from: '\ue000', + to: '\uffff', + blocks: []*CharBlock{ + cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)), + }, + }, + { + from: '\U00010000', + to: '\U0003ffff', + blocks: []*CharBlock{ + cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)), + }, + }, + { + from: '\U00040000', + to: '\U000fffff', + blocks: []*CharBlock{ + cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)), + }, + }, + { + from: '\U00100000', + to: '\U0010ffff', + blocks: []*CharBlock{ + cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)), + }, + }, + { + from: '\u0000', + to: '\U0010ffff', + blocks: []*CharBlock{ + cBlk(seq(0x00), seq(0x7f)), + cBlk(seq(0xc2, 0x80), seq(0xdf, 0xbf)), + cBlk(seq(0xe0, 0xa0, 0x80), seq(0xe0, 0xbf, 0xbf)), + cBlk(seq(0xe1, 0x80, 0x80), seq(0xec, 0xbf, 0xbf)), + cBlk(seq(0xed, 0x80, 0x80), seq(0xed, 0x9f, 0xbf)), + cBlk(seq(0xee, 0x80, 0x80), seq(0xef, 0xbf, 0xbf)), + cBlk(seq(0xf0, 0x90, 0x80, 0x80), seq(0xf0, 0xbf, 0xbf, 0xbf)), + cBlk(seq(0xf1, 0x80, 0x80, 0x80), seq(0xf3, 0xbf, 0xbf, 0xbf)), + cBlk(seq(0xf4, 0x80, 0x80, 0x80), seq(0xf4, 0x8f, 0xbf, 0xbf)), + }, + }, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) { + blks, err := GenCharBlocks(tt.from, tt.to) + if err != nil { + t.Fatal(err) + } + if len(blks) != len(tt.blocks) { + t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) + } + for i, blk := range blks { + if len(blk.From) != len(tt.blocks[i].From) || len(blk.To) != len(tt.blocks[i].To) { + t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) + } + for j := 0; j < len(blk.From); j++ { + if blk.From[j] != tt.blocks[i].From[j] || blk.To[j] != tt.blocks[i].To[j] { + t.Fatalf("unexpected character block: want: %+v, got: %+v", tt.blocks, blks) + } + } + } + }) + } +} + +func TestGenCharBlocks_IllFormed(t *testing.T) { + tests := []struct { + from rune + to rune + }{ + { + // from > to + from: '\u0001', + to: '\u0000', + }, + { + from: -1, // U+10FFFF + to: '\u0000', + }, + { + from: '\u0000', + to: 0x110000, // >U+10FFFF + }, + { + from: 0xd800, // U+D800 (surrogate code point) + to: '\ue000', + }, + { + from: 0xdfff, // U+DFFF (surrogate code point) + to: '\ue000', + }, + { + from: '\ucfff', + to: 0xd800, // U+D800 (surrogate code point) + }, + { + from: '\ucfff', + to: 0xdfff, // U+DFFF (surrogate code point) + }, + } + for _, tt := range tests { + t.Run(fmt.Sprintf("%v..%v", tt.from, tt.to), func(t *testing.T) { + blks, err := GenCharBlocks(tt.from, tt.to) + if err == nil { + t.Fatal("expected error didn't occur") + } + if blks != nil { + t.Fatal("character blocks must be nil") + } + }) + } +} + + +func MainTest() {} -- cgit v1.2.3