diff options
Diffstat (limited to 'tests/unit/grammar')
-rw-r--r-- | tests/unit/grammar/first_test.go | 219 | ||||
-rw-r--r-- | tests/unit/grammar/grammar_test.go | 3381 | ||||
-rw-r--r-- | tests/unit/grammar/lalr1_test.go | 187 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/compiler_test.go | 338 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/dfa_test.go | 121 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/symbol_position_test.go | 79 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/tree_test.go | 257 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/lexer_test.go | 524 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/parser_test.go | 1389 | ||||
-rw-r--r-- | tests/unit/grammar/lr0_test.go | 448 | ||||
-rw-r--r-- | tests/unit/grammar/parsing_table_test.go | 387 | ||||
-rw-r--r-- | tests/unit/grammar/symbol/symbol_test.go | 159 | ||||
-rw-r--r-- | tests/unit/grammar/test_helper_test.go | 68 |
13 files changed, 7557 insertions, 0 deletions
diff --git a/tests/unit/grammar/first_test.go b/tests/unit/grammar/first_test.go new file mode 100644 index 0000000..9625ef6 --- /dev/null +++ b/tests/unit/grammar/first_test.go @@ -0,0 +1,219 @@ +package grammar + +import ( + "strings" + "testing" + + "urubu/grammar/symbol" + "urubu/spec/grammar/parser" +) + +type first struct { + lhs string + num int + dot int + symbols []string + empty bool +} + +func TestGenFirst(t *testing.T) { + tests := []struct { + caption string + src string + first []first + }{ + { + caption: "productions contain only non-empty productions", + src: ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +`, + first: []first{ + {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, + {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, + {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, + {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, + {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, + }, + }, + { + caption: "productions contain the empty start production", + src: ` +#name test; + +s + : + ; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "productions contain an empty production", + src: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a start production contains a non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + | + ; +foo: "foo"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, + {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a production contains non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + ; +foo + : bar + | + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, + {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + fst, gram := genActualFirst(t, tt.src) + + for _, ttFirst := range tt.first { + lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) + } + + prod, ok := gram.productionSet.findByLHS(lhsSym) + if !ok { + t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) + } + + actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) + if err != nil { + t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) + } + + expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) + + testFirst(t, actualFirst, expectedFirst) + } + }) + } +} + +func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.build() + if err != nil { + t.Fatal(err) + } + fst, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + if fst == nil { + t.Fatal("genFiest returned nil without any error") + } + + return fst, gram +} + +func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { + t.Helper() + + entry := newFirstEntry() + if empty { + entry.addEmpty() + } + for _, sym := range symbols { + symSym, ok := symTab.ToSymbol(sym) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", sym) + } + entry.add(symSym) + } + + return entry +} + +func testFirst(t *testing.T, actual, expected *firstEntry) { + if actual.empty != expected.empty { + t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) + } + + if len(actual.symbols) != len(expected.symbols) { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + + for eSym := range expected.symbols { + if _, ok := actual.symbols[eSym]; !ok { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + } +} diff --git a/tests/unit/grammar/grammar_test.go b/tests/unit/grammar/grammar_test.go new file mode 100644 index 0000000..ddedb27 --- /dev/null +++ b/tests/unit/grammar/grammar_test.go @@ -0,0 +1,3381 @@ +package grammar + +import ( + "strings" + "testing" + + verr "urubu/error" + "urubu/spec/grammar/parser" +) + +func TestGrammarBuilderOK(t *testing.T) { + type okTest struct { + caption string + specSrc string + validate func(t *testing.T, g *Grammar) + } + + nameTests := []*okTest{ + { + caption: "the `#name` can be the same identifier as a non-terminal symbol", + specSrc: ` +#name s; + +s + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "s" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as a terminal symbol", + specSrc: ` +#name foo; + +s + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "foo" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as the error symbol", + specSrc: ` +#name error; + +s + : foo + | error + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "error" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + { + caption: "the `#name` can be the same identifier as a fragment", + specSrc: ` +#name f; + +s + : foo + ; + +foo + : "\f{f}"; +fragment f + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + expected := "f" + if g.name != expected { + t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) + } + }, + }, + } + + modeTests := []*okTest{ + { + caption: "a `#mode` can be the same identifier as a non-terminal symbol", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push s + : 'foo'; +bar #mode s + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "s" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as a terminal symbol", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push bar + : 'foo'; +bar #mode bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "bar" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as the error symbol", + specSrc: ` +#name test; + +s + : foo bar + | error + ; + +foo #push error + : 'foo'; +bar #mode error + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "error" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + { + caption: "a `#mode` can be the same identifier as a fragment", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push f + : "\f{f}"; +bar #mode f + : 'bar'; +fragment f + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + kind := "bar" + expectedMode := "f" + for _, e := range g.lexSpec.Entries { + if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { + return + } + } + t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) + }, + }, + } + + precTests := []*okTest{ + { + caption: "a `#prec` allows the empty directive group", + specSrc: ` +#name test; + +#prec (); + +s + : foo + ; + +foo + : 'foo'; +`, + }, + { + caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #left foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #right foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "an `#assign` directive gives only a precedence to specified terminal symbols", + specSrc: ` +#name test; + +#prec ( + #assign foo bar +); + +s + : foo bar baz + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc) + } + var bazPrec int + var bazAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("baz") + bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if bazPrec != precNil || bazAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) + } + }, + }, + { + caption: "a production has the same precedence and associativity as the right-most terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if barPrec != precNil || barAssoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc) + } + if sPrec != barPrec || sAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "a production has the same precedence and associativity as the right-most terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if sPrec != barPrec || sAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'. + ; +a + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var aPrec int + var aAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("a") + ps, _ := g.productionSet.findByLHS(s) + aPrec = g.precAndAssoc.productionPredence(ps[0].num) + aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if aPrec != barPrec || aAssoc != barAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc) + } + if sPrec != fooPrec || sAssoc != fooAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc) + } + }, + }, + { + caption: "each alternative in the same production can have its own precedence and associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar + #assign baz +); + +s + : foo + | bar + | baz + | bra + ; + +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +bra + : 'bra'; +`, + validate: func(t *testing.T, g *Grammar) { + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + var alt3Prec int + var alt3Assoc assocType + var alt4Prec int + var alt4Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + alt3Prec = g.precAndAssoc.productionPredence(ps[2].num) + alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num) + alt4Prec = g.precAndAssoc.productionPredence(ps[3].num) + alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeLeft { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeRight { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc) + } + if alt3Prec != 3 || alt3Assoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc) + } + if alt4Prec != precNil || alt4Assoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc) + } + }, + }, + { + caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : a + ; +a + : foo + ; + +foo + : 'foo'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var aPrec int + var aAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("a") + ps, _ := g.productionSet.findByLHS(s) + aPrec = g.precAndAssoc.productionPredence(ps[0].num) + aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if aPrec != fooPrec || aAssoc != fooAssoc { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc) + } + if sPrec != precNil || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", + specSrc: ` +#name test; + +#prec ( + #left foo +); + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if sPrec != fooPrec || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right bar +); + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var sPrec int + var sAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + sPrec = g.precAndAssoc.productionPredence(ps[0].num) + sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + if sPrec != fooPrec || sAssoc != assocTypeNil { + t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#left` directive", + specSrc: ` +#name test; + +#prec ( + #left $high + #right foo bar + #left $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 2 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 3 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#right` directive", + specSrc: ` +#name test; + +#prec ( + #right $high + #left foo bar + #right $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 2 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 3 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "an ordered symbol can appear in a `#assign` directive", + specSrc: ` +#name test; + +#prec ( + #assign $high + #left foo + #right bar + #assign $low +); + +s + : foo #prec $high + | bar #prec $low + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 2 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 3 || barAssoc != assocTypeRight { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != 1 || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 4 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "names of an ordered symbol and a terminal symbol can duplicate", + specSrc: ` +#name test; + +#prec ( + #left foo bar + #right $foo +); + +s + : foo + | bar #prec $foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var fooPrec int + var fooAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("foo") + fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if fooPrec != 1 || fooAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != fooPrec || alt1Assoc != fooAssoc { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + { + caption: "names of an ordered symbol and a non-terminal symbol can duplicate", + specSrc: ` +#name test; + +#prec ( + #left foo bar + #right $a +); + +s + : a + | bar #prec $a + ; +a + : foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + validate: func(t *testing.T, g *Grammar) { + var barPrec int + var barAssoc assocType + { + s, _ := g.symbolTable.ToSymbol("bar") + barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) + barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) + } + if barPrec != 1 || barAssoc != assocTypeLeft { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) + } + var alt1Prec int + var alt1Assoc assocType + var alt2Prec int + var alt2Assoc assocType + { + s, _ := g.symbolTable.ToSymbol("s") + ps, _ := g.productionSet.findByLHS(s) + alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) + alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) + alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) + alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) + } + if alt1Prec != precNil || alt1Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc) + } + if alt2Prec != 2 || alt2Assoc != assocTypeNil { + t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) + } + }, + }, + } + + var tests []*okTest + tests = append(tests, nameTests...) + tests = append(tests, modeTests...) + tests = append(tests, precTests...) + + for _, test := range tests { + t.Run(test.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(test.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + g, err := b.build() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if test.validate != nil { + test.validate(t, g) + } + }) + } +} + +func TestGrammarBuilderSpecError(t *testing.T) { + type specErrTest struct { + caption string + specSrc string + errs []error + } + + spellingInconsistenciesTests := []*specErrTest{ + { + caption: "a spelling inconsistency appears among non-terminal symbols", + specSrc: ` +#name test; + +a1 + : a_1 + ; +a_1 + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among terminal symbols", + specSrc: ` +#name test; + +s + : foo1 foo_1 + ; + +foo1 + : 'foo1'; +foo_1 + : 'foo_1'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among non-terminal and terminal symbols", + specSrc: ` +#name test; + +a1 + : a_1 + ; + +a_1 + : 'a_1'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same", + specSrc: ` +#name test; + +#prec ( + #assign $p1 $p_1 +); + +s + : foo #prec $p1 + | bar #prec $p_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same", + specSrc: ` +#name test; + +#prec ( + #assign $p1 + #assign $p_1 +); + +s + : foo #prec $p1 + | bar #prec $p_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels the same alternative contains", + specSrc: ` +#name test; + +s + : foo@l1 foo@l_1 + ; + +foo + : 'foo'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels the same production contains", + specSrc: ` +#name test; + +s + : foo@l1 + | bar@l_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + { + caption: "a spelling inconsistency appears among labels different productions contain", + specSrc: ` +#name test; + +s + : foo@l1 + ; +a + : bar@l_1 + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrSpellingInconsistency}, + }, + } + + prodTests := []*specErrTest{ + { + caption: "a production `b` is unused", + specSrc: ` +#name test; + +a + : foo + ; +b + : foo + ; + +foo + : "foo"; +`, + errs: []error{semErrUnusedProduction}, + }, + { + caption: "a terminal symbol `bar` is unused", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrUnusedTerminal}, + }, + { + caption: "a production `b` and terminal symbol `bar` is unused", + specSrc: ` +#name test; + +a + : foo + ; +b + : bar + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{ + semErrUnusedProduction, + semErrUnusedTerminal, + }, + }, + { + caption: "a production cannot have production directives", + specSrc: ` +#name test; + +s #prec foo + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrInvalidProdDir}, + }, + { + caption: "a lexical production cannot have alternative directives", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo' #skip; +`, + errs: []error{semErrInvalidAltDir}, + }, + { + caption: "a production directive must not be duplicated", + specSrc: ` +#name test; + +s + : foo + ; + +foo #skip #skip + : 'foo'; +`, + errs: []error{semErrDuplicateDir}, + }, + { + caption: "an alternative directive must not be duplicated", + specSrc: ` +#name test; + +s + : foo bar #ast foo bar #ast foo bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateDir}, + }, + { + caption: "a production must not have a duplicate alternative (non-empty alternatives)", + specSrc: ` +#name test; + +s + : foo + | foo + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : bar + ; +s + : foo + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (empty alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : + | + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a production must not have a duplicate alternative (empty and split alternatives)", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : + | foo + ; +a + : + ; + +foo + : "foo"; +`, + errs: []error{semErrDuplicateProduction}, + }, + { + caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "foo"; +s + : "a"; +`, + errs: []error{semErrDuplicateName}, + }, + { + caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", + specSrc: ` +#name test; + +s + : foo + | a + ; +a + : bar + ; + +foo + : "foo"; +bar + : "bar"; +a + : "a"; +`, + errs: []error{semErrDuplicateName}, + }, + { + caption: "an invalid top-level directive", + specSrc: ` +#name test; + +#foo; + +s + : a + ; + +a + : 'a'; +`, + errs: []error{semErrDirInvalidName}, + }, + { + caption: "a label must be unique in an alternative", + specSrc: ` +#name test; + +s + : foo@x bar@x + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateLabel}, + }, + { + caption: "a label cannot be the same name as terminal symbols", + specSrc: ` +#name test; + +s + : foo bar@foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDuplicateLabel}, + }, + { + caption: "a label cannot be the same name as non-terminal symbols", + specSrc: ` +#name test; + +s + : foo@a + | a + ; +a + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{ + semErrInvalidLabel, + }, + }, + } + + nameDirTests := []*specErrTest{ + { + caption: "the `#name` directive is required", + specSrc: ` +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrNoGrammarName}, + }, + { + caption: "the `#name` directive needs an ID parameter", + specSrc: ` +#name; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a pattern parameter", + specSrc: ` +#name "test"; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a string parameter", + specSrc: ` +#name 'test'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive takes just one parameter", + specSrc: ` +#name test1 test2; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + precDirTests := []*specErrTest{ + { + caption: "the `#prec` directive needs a directive group parameter", + specSrc: ` +#name test; + +#prec; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an ID parameter", + specSrc: ` +#name test; + +#prec foo; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec $x; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec "foo"; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec 'foo'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive takes just one directive group parameter", + specSrc: ` +#name test; + +#prec () (); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + leftDirTests := []*specErrTest{ + { + caption: "the `#left` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #left +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #left error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #left x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #left "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #left 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` directive cannot take a directive parameter", + specSrc: ` +#name test; + +#prec ( + #left () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #left foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #left $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #left foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #left $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #right foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #right $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + rightDirTests := []*specErrTest{ + { + caption: "the `#right` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #right +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #right error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #right x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #right s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #right "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #right 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +#prec ( + #right () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #right foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#right` directive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #right $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #right foo + #right foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #right $x + #right $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #left foo + #right foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #left $x + #right $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + assignDirTests := []*specErrTest{ + { + caption: "the `#assign` directive needs ID parameters", + specSrc: ` +#name test; + +#prec ( + #assign +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +#prec ( + #assign error +); + +s + : foo semi_colon + | error semi_colon + ; + +foo + : 'foo'; +semi_colon + : ';'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +#prec ( + #assign x +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +#prec ( + #assign s +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +#prec ( + #assign "foo" +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec ( + #assign 'foo' +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` directive cannot take a directive parameter", + specSrc: ` +#name test; + +#prec ( + #assign () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol", + specSrc: ` +#name test; + +#prec ( + #assign foo foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol", + specSrc: ` +#name test; + +#prec ( + #assign $x $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #assign foo + #assign foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different precedence", + specSrc: ` +#name test; + +#prec ( + #assign $x + #assign $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "a terminal symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #assign foo + #left foo +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + { + caption: "an ordered symbol cannot have different associativity", + specSrc: ` +#name test; + +#prec ( + #assign $x + #left $x +); + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateAssoc}, + }, + } + + errorSymTests := []*specErrTest{ + { + caption: "cannot use the error symbol as a non-terminal symbol", + specSrc: ` +#name test; + +s + : error + ; +error + : foo + ; + +foo: 'foo'; +`, + errs: []error{ + semErrErrSymIsReserved, + semErrDuplicateName, + }, + }, + { + caption: "cannot use the error symbol as a terminal symbol", + specSrc: ` +#name test; + +s + : error + ; + +error: 'error'; +`, + errs: []error{semErrErrSymIsReserved}, + }, + { + caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +error #skip + : 'error'; +`, + errs: []error{semErrErrSymIsReserved}, + }, + } + + astDirTests := []*specErrTest{ + { + caption: "the `#ast` directive needs ID or label prameters", + specSrc: ` +#name test; + +s + : foo #ast + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo #ast $x + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #ast "foo" + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #ast 'foo' + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #ast () + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", + specSrc: ` +#name test; + +s + : foo bar #ast foo x + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", + specSrc: ` +#name test; + +s + : foo #ast bar + | bar + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", + specSrc: ` +#name test; + +s + : foo #ast b + | bar@b + ; + +foo + : "foo"; +bar + : "bar"; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol can appear in the `#ast` directive only once", + specSrc: ` +#name test; + +s + : foo #ast foo foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "a label can appear in the `#ast` directive only once", + specSrc: ` +#name test; + +s + : foo@x #ast x x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", + specSrc: ` +#name test; + +s + : foo@x #ast foo x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDuplicateElem}, + }, + { + caption: "symbol `foo` is ambiguous because it appears in an alternative twice", + specSrc: ` +#name test; + +s + : foo foo #ast foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrAmbiguousElem}, + }, + { + caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", + specSrc: ` +#name test; + +s + : foo@x foo #ast foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrAmbiguousElem}, + }, + { + caption: "the expansion operator cannot be applied to a terminal symbol", + specSrc: ` +#name test; + +s + : foo #ast foo... + ; + +foo + : "foo"; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + altPrecDirTests := []*specErrTest{ + { + caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter", + specSrc: ` +#name test; + +s + : foo #prec + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot be applied to an error symbol", + specSrc: ` +#name test; + +s + : foo #prec error + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an undefined symbol", + specSrc: ` +#name test; + +s + : foo #prec x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a non-terminal symbol", + specSrc: ` +#name test; + +s + : a #prec b + | b + ; +a + : foo + ; +b + : bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take an undefined ordered symbol parameter", + specSrc: ` +#name test; + +s + : foo #prec $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrUndefinedOrdSym}, + }, + { + caption: "the `#prec` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #prec "foo" + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #prec 'foo' + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo #prec () + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a symbol the `#prec` directive takes must be given precedence explicitly", + specSrc: ` +#name test; + +s + : foo bar #prec foo + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrUndefinedPrec}, + }, + } + + recoverDirTests := []*specErrTest{ + { + caption: "the `#recover` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo #recover foo + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo #recover $x + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo #recover "foo" + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo #recover 'foo' + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #recover () + ; + +foo + : 'foo'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + fragmentTests := []*specErrTest{ + { + caption: "a production cannot contain a fragment", + specSrc: ` +#name test; + +s + : f + ; + +fragment f + : 'fragment'; +`, + errs: []error{semErrUndefinedSym}, + }, + { + caption: "fragments cannot be duplicated", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : "\f{f}"; +fragment f + : 'fragment 1'; +fragment f + : 'fragment 2'; +`, + errs: []error{semErrDuplicateFragment}, + }, + } + + modeDirTests := []*specErrTest{ + { + caption: "the `#mode` directive needs an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo + : 'foo'; +bar #mode $x + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode "mode_1" + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode 'mode_1' + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode () + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + pushDirTests := []*specErrTest{ + { + caption: "the `#push` directive needs an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive takes just one ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 mode_2 + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo #push $x + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push "mode_1" + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push 'mode_1' + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push () + : 'foo'; +bar #mode mode_1 + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + popDirTests := []*specErrTest{ + { + caption: "the `#pop` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop mode_1 + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop $x + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop "mode_1" + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop 'mode_1' + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop () + : 'baz'; +`, + errs: []error{semErrDirInvalidParam}, + }, + } + + skipDirTests := []*specErrTest{ + { + caption: "the `#skip` directive cannot take an ID parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip bar + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take an ordered symbol parameter", + specSrc: ` +#name test; + +#prec ( + #assign $x +); + +s + : foo bar + ; + +foo #skip $x + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a pattern parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip "bar" + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a string parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip 'bar' + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip () + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrDirInvalidParam}, + }, + { + caption: "a terminal symbol used in productions cannot have the skip directive", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip + : 'foo'; +bar + : 'bar'; +`, + errs: []error{semErrTermCannotBeSkipped}, + }, + } + + var tests []*specErrTest + tests = append(tests, spellingInconsistenciesTests...) + tests = append(tests, prodTests...) + tests = append(tests, nameDirTests...) + tests = append(tests, precDirTests...) + tests = append(tests, leftDirTests...) + tests = append(tests, rightDirTests...) + tests = append(tests, assignDirTests...) + tests = append(tests, errorSymTests...) + tests = append(tests, astDirTests...) + tests = append(tests, altPrecDirTests...) + tests = append(tests, recoverDirTests...) + tests = append(tests, fragmentTests...) + tests = append(tests, modeDirTests...) + tests = append(tests, pushDirTests...) + tests = append(tests, popDirTests...) + tests = append(tests, skipDirTests...) + for _, test := range tests { + t.Run(test.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(test.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + _, err = b.build() + if err == nil { + t.Fatal("an expected error didn't occur") + } + specErrs, ok := err.(verr.SpecErrors) + if !ok { + t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err) + } + if len(specErrs) != len(test.errs) { + t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs) + } + for _, expected := range test.errs { + for _, actual := range specErrs { + if actual.Cause == expected { + return + } + } + } + t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs) + }) + } +} diff --git a/tests/unit/grammar/lalr1_test.go b/tests/unit/grammar/lalr1_test.go new file mode 100644 index 0000000..fd09333 --- /dev/null +++ b/tests/unit/grammar/lalr1_test.go @@ -0,0 +1,187 @@ +package grammar + +import ( + "strings" + "testing" + + "urubu/grammar/symbol" + "urubu/spec/grammar/parser" +) + +func TestGenLALR1Automaton(t *testing.T) { + // This grammar belongs to LALR(1) class, not SLR(1). + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var gram *Grammar + var automaton *lalr1Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatalf("failed to create a FIRST set: %v", err) + } + + automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + t.Fatalf("failed to create a LALR1 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLALR1Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("eq"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "r"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[9], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "ref", "r"), + }, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "l", "eq", "r"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton.lr0Automaton) +} diff --git a/tests/unit/grammar/lexical/compiler_test.go b/tests/unit/grammar/lexical/compiler_test.go new file mode 100644 index 0000000..b621cd2 --- /dev/null +++ b/tests/unit/grammar/lexical/compiler_test.go @@ -0,0 +1,338 @@ +package lexical + +import ( + "encoding/json" + "fmt" + "testing" + + spec "urubu/spec/grammar" +) + +func TestLexSpec_Validate(t *testing.T) { + // We expect that the spelling inconsistency error will occur. + spec := &LexSpec{ + Entries: []*LexEntry{ + { + Modes: []spec.LexModeName{ + // 'Default' is the spelling inconsistency because 'default' is predefined. + "Default", + }, + Kind: "foo", + Pattern: "foo", + }, + }, + } + err := spec.Validate() + if err == nil { + t.Fatalf("expected error didn't occur") + } +} + +func TestSnakeCaseToUpperCamelCase(t *testing.T) { + tests := []struct { + snake string + camel string + }{ + { + snake: "foo", + camel: "Foo", + }, + { + snake: "foo_bar", + camel: "FooBar", + }, + { + snake: "foo_bar_baz", + camel: "FooBarBaz", + }, + { + snake: "Foo", + camel: "Foo", + }, + { + snake: "fooBar", + camel: "FooBar", + }, + { + snake: "FOO", + camel: "FOO", + }, + { + snake: "FOO_BAR", + camel: "FOOBAR", + }, + { + snake: "_foo_bar_", + camel: "FooBar", + }, + { + snake: "___foo___bar___", + camel: "FooBar", + }, + } + for _, tt := range tests { + c := SnakeCaseToUpperCamelCase(tt.snake) + if c != tt.camel { + t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) + } + } +} + +func TestFindSpellingInconsistencies(t *testing.T) { + tests := []struct { + ids []string + duplicated [][]string + }{ + { + ids: []string{"foo", "foo"}, + duplicated: nil, + }, + { + ids: []string{"foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo", "foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo_bar_baz", "FooBarBaz"}, + duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + duplicated := FindSpellingInconsistencies(tt.ids) + if len(duplicated) != len(tt.duplicated) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) + } + for i, dupIDs := range duplicated { + if len(dupIDs) != len(tt.duplicated[i]) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + for j, id := range dupIDs { + if id != tt.duplicated[i][j] { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + } + } + }) + } +} + +func TestCompile(t *testing.T) { + tests := []struct { + Caption string + Spec string + Err bool + }{ + { + Caption: "allow duplicates names between fragments and non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragment": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + }, + { + Caption: "don't allow duplicates names in non-fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow duplicates names in fragments", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a2z", + "pattern": "\\f{a2z}" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" + }, + { + "fragments": true, + "kind": "a2z", + "pattern": "[a-z]" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow kind names across modes to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + Err: true, + }, + { + Caption: "don't allow mode names to contain spelling inconsistencies", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["foo_1"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["foo1"], + "kind": "b", + "pattern": "b" + } + ] +} +`, + Err: true, + }, + { + Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "kind": "a", + "pattern": "a" + }, + { + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + { + Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", + Spec: ` +{ + "name": "test", + "entries": [ + { + "modes": ["default"], + "kind": "a", + "pattern": "a" + }, + { + "modes": ["default"], + "fragment": true, + "kind": "foo_1", + "pattern": "foo_1" + }, + { + "modes": ["other_mode"], + "fragment": true, + "kind": "foo1", + "pattern": "foo1" + } + ] +} +`, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { + lspec := &LexSpec{} + err := json.Unmarshal([]byte(tt.Spec), lspec) + if err != nil { + t.Fatalf("%v", err) + } + clspec, err, _ := Compile(lspec, CompressionLevelMin) + if tt.Err { + if err == nil { + t.Fatalf("expected an error") + } + if clspec != nil { + t.Fatalf("Compile function mustn't return a compiled specification") + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if clspec == nil { + t.Fatalf("Compile function must return a compiled specification") + } + } + }) + } +} diff --git a/tests/unit/grammar/lexical/dfa/dfa_test.go b/tests/unit/grammar/lexical/dfa/dfa_test.go new file mode 100644 index 0000000..38577cf --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/dfa_test.go @@ -0,0 +1,121 @@ +package dfa + +import ( + "strings" + "testing" + + "urubu/grammar/lexical/parser" + spec "urubu/spec/grammar" +) + +func TestGenDFA(t *testing.T) { + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + dfa := GenDFA(bt, symTab) + if dfa == nil { + t.Fatalf("DFA is nil") + } + + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) + s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) + s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) + s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) + + rune2Int := func(char rune, index int) uint8 { + return uint8([]byte(string(char))[index]) + } + + tranS0 := [256]string{} + tranS0[rune2Int('a', 0)] = s1.hash() + tranS0[rune2Int('b', 0)] = s0.hash() + + tranS1 := [256]string{} + tranS1[rune2Int('a', 0)] = s1.hash() + tranS1[rune2Int('b', 0)] = s2.hash() + + tranS2 := [256]string{} + tranS2[rune2Int('a', 0)] = s1.hash() + tranS2[rune2Int('b', 0)] = s3.hash() + + tranS3 := [256]string{} + tranS3[rune2Int('a', 0)] = s1.hash() + tranS3[rune2Int('b', 0)] = s0.hash() + + expectedTranTab := map[string][256]string{ + s0.hash(): tranS0, + s1.hash(): tranS1, + s2.hash(): tranS2, + s3.hash(): tranS3, + } + if len(dfa.TransitionTable) != len(expectedTranTab) { + t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) + } + for h, eTranTab := range expectedTranTab { + tranTab, ok := dfa.TransitionTable[h] + if !ok { + t.Errorf("no entry; hash: %v", h) + continue + } + if len(tranTab) != len(eTranTab) { + t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) + } + for c, eNext := range eTranTab { + if eNext == "" { + continue + } + + next := tranTab[c] + if next == "" { + t.Errorf("no enatry: hash: %v, char: %v", h, c) + } + if next != eNext { + t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) + } + } + } + + if dfa.InitialState != s0.hash() { + t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) + } + + accTab := map[string]spec.LexModeKindID{ + s3.hash(): 1, + } + if len(dfa.AcceptingStatesTable) != len(accTab) { + t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) + } + for eState, eID := range accTab { + id, ok := dfa.AcceptingStatesTable[eState] + if !ok { + t.Errorf("accepting state is not found: state: %v", eState) + } + if id != eID { + t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) + } + } +} diff --git a/tests/unit/grammar/lexical/dfa/symbol_position_test.go b/tests/unit/grammar/lexical/dfa/symbol_position_test.go new file mode 100644 index 0000000..c867f64 --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/symbol_position_test.go @@ -0,0 +1,79 @@ +package dfa + +import ( + "fmt" + "testing" +) + +func TestNewSymbolPosition(t *testing.T) { + tests := []struct { + n uint16 + endMark bool + err bool + }{ + { + n: 0, + endMark: false, + err: true, + }, + { + n: 0, + endMark: true, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: true, + err: true, + }, + { + n: symbolPositionMin, + endMark: false, + }, + { + n: symbolPositionMin, + endMark: true, + }, + { + n: symbolPositionMax, + endMark: false, + }, + { + n: symbolPositionMax, + endMark: true, + }, + { + n: symbolPositionMax + 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMax + 1, + endMark: true, + err: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { + pos, err := newSymbolPosition(tt.n, tt.endMark) + if tt.err { + if err == nil { + t.Fatal("err is nil") + } + return + } + if err != nil { + t.Fatal(err) + } + n, endMark := pos.describe() + if n != tt.n || endMark != tt.endMark { + t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) + } + }) + } +} diff --git a/tests/unit/grammar/lexical/dfa/tree_test.go b/tests/unit/grammar/lexical/dfa/tree_test.go new file mode 100644 index 0000000..de3ebbb --- /dev/null +++ b/tests/unit/grammar/lexical/dfa/tree_test.go @@ -0,0 +1,257 @@ +package dfa + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar/lexical/parser" + spec "urubu/spec/grammar" +) + +func TestByteTree(t *testing.T) { + tests := []struct { + root byteTree + nullable bool + first *symbolPositionSet + last *symbolPositionSet + }{ + { + root: newSymbolNodeWithPos(0, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newEndMarkerNodeWithPos(1, 1), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(2), + }, + { + root: newConcatNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: false, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newConcatNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newSymbolNodeWithPos(0, 2), + ), + nullable: false, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newSymbolNodeWithPos(0, 2), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newSymbolNodeWithPos(0, 1), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newAltNode( + newRepeatNode(newSymbolNodeWithPos(0, 1)), + newRepeatNode(newSymbolNodeWithPos(0, 2)), + ), + nullable: true, + first: newSymbolPositionSet().add(1).add(2), + last: newSymbolPositionSet().add(1).add(2), + }, + { + root: newRepeatNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + { + root: newOptionNode(newSymbolNodeWithPos(0, 1)), + nullable: true, + first: newSymbolPositionSet().add(1), + last: newSymbolPositionSet().add(1), + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + if tt.root.nullable() != tt.nullable { + t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) + } + if tt.first.hash() != tt.root.first().hash() { + t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) + } + if tt.last.hash() != tt.root.last().hash() { + t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) + } + }) + } +} + +func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { + n := newSymbolNode(v) + n.pos = pos + return n +} + +func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { + n := newEndMarkerNode(spec.LexModeKindID(id)) + n.pos = pos + return n +} + +func TestFollowAndSymbolTable(t *testing.T) { + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + + { + followTab := genFollowTable(bt) + if followTab == nil { + t.Fatal("follow table is nil") + } + expectedFollowTab := followTable{ + 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), + 3: newSymbolPositionSet().add(symPos(4)), + 4: newSymbolPositionSet().add(symPos(5)), + 5: newSymbolPositionSet().add(endPos(6)), + } + testFollowTable(t, expectedFollowTab, followTab) + } + + { + entry := func(v byte) byteRange { + return byteRange{ + from: v, + to: v, + } + } + + expectedSymTab := &symbolTable{ + symPos2Byte: map[symbolPosition]byteRange{ + symPos(1): entry(byte('a')), + symPos(2): entry(byte('b')), + symPos(3): entry(byte('a')), + symPos(4): entry(byte('b')), + symPos(5): entry(byte('b')), + }, + endPos2ID: map[symbolPosition]spec.LexModeKindID{ + endPos(6): 1, + }, + } + testSymbolTable(t, expectedSymTab, symTab) + } +} + +func testFollowTable(t *testing.T, expected, actual followTable) { + if len(actual) != len(expected) { + t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) + } + for ePos, eSet := range expected { + aSet, ok := actual[ePos] + if !ok { + t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) + } + if aSet.hash() != eSet.hash() { + t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) + } + } +} + +func testSymbolTable(t *testing.T, expected, actual *symbolTable) { + t.Helper() + + if len(actual.symPos2Byte) != len(expected.symPos2Byte) { + t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) + } + for ePos, eByte := range expected.symPos2Byte { + byte, ok := actual.symPos2Byte[ePos] + if !ok { + t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) + continue + } + if byte.from != eByte.from || byte.to != eByte.to { + t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) + } + } + + if len(actual.endPos2ID) != len(expected.endPos2ID) { + t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) + } + for ePos, eID := range expected.endPos2ID { + id, ok := actual.endPos2ID[ePos] + if !ok { + t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) + continue + } + if id != eID { + t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) + } + } +} diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go new file mode 100644 index 0000000..055466e --- /dev/null +++ b/tests/unit/grammar/lexical/parser/lexer_test.go @@ -0,0 +1,524 @@ +package parser + +import ( + "strings" + "testing" +) + +func TestLexer(t *testing.T) { + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "lexer can recognize ordinaly characters", + src: "123abcいろは", + tokens: []*token{ + newToken(tokenKindChar, '1'), + newToken(tokenKindChar, '2'), + newToken(tokenKindChar, '3'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, 'b'), + newToken(tokenKindChar, 'c'), + newToken(tokenKindChar, 'い'), + newToken(tokenKindChar, 'ろ'), + newToken(tokenKindChar, 'は'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[\\u", + tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), + newToken(tokenKindRepeat, nullChar), + newToken(tokenKindRepeatOneOrMore, nullChar), + newToken(tokenKindOption, nullChar), + newToken(tokenKindAlt, nullChar), + newToken(tokenKindGroupOpen, nullChar), + newToken(tokenKindGroupClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", + tokens: []*token{ + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "], {, and } are treated as an ordinary character in default mode", + src: "]{}", + tokens: []*token{ + newToken(tokenKindChar, ']'), + newToken(tokenKindChar, '{'), + newToken(tokenKindChar, '}'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09AF"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09abcf"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "in a bracket expression, the special characters are also handled as normal characters", + src: "[\\\\.*+?|()[", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character-range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", + // [^...^...][^] + // ~~ ~ ~~ + // ^ ^ ^^ + // | | |`-- Ordinary Character (c) + // | | `-- Bracket Expression + // | `-- Ordinary Character (b) + // `-- Inverse Bracket Expression (a) + // + // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. + // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. + // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. + src: "[^^][^]", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "\\@", + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "\\", + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer can recognize the special characters and code points in code point expression mode", + src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a one digit hex string isn't a valid code point", + src: "\\u{0", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a two digits hex string isn't a valid code point", + src: "\\u{01", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a three digits hex string isn't a valid code point", + src: "\\u{012", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a four digits hex string is a valid code point", + src: "\\u{0123}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a five digits hex string isn't a valid code point", + src: "\\u{01234", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a six digits hex string is a valid code point", + src: "\\u{012345}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("012345"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a seven digits hex string isn't a valid code point", + src: "\\u{0123456", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{g", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{G", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters and symbols in fragment expression mode", + src: "\\f{integer}", + tokens: []*token{ + newToken(tokenKindFragmentLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newFragmentSymbolToken("integer"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a fragment expression is not supported in a bracket expression", + src: "[\\f", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "a fragment expression is not supported in an inverse bracket expression", + src: "[^\\f", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + lex := newLexer(strings.NewReader(tt.src)) + var err error + var tok *token + i := 0 + for { + tok, err = lex.next() + if err != nil { + break + } + if i >= len(tt.tokens) { + break + } + eTok := tt.tokens[i] + i++ + testToken(t, tok, eTok) + + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + detail, cause := lex.error() + if cause != tt.err { + t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if i < len(tt.tokens) { + t.Fatalf("expecte more tokens") + } + }) + } +} + +func testToken(t *testing.T, a, e *token) { + t.Helper() + if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { + t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) + } +} diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser/parser_test.go new file mode 100644 index 0000000..4c9557d --- /dev/null +++ b/tests/unit/grammar/lexical/parser/parser_test.go @@ -0,0 +1,1389 @@ +package parser + +import ( + "fmt" + "reflect" + "strings" + "testing" + + spec "urubu/spec/grammar" + "urubu/ucd" +) + +func TestParse(t *testing.T) { + tests := []struct { + pattern string + fragments map[spec.LexKindName]string + ast CPTree + syntaxError error + + // When an AST is large, as patterns containing a character property expression, this test only checks + // that the pattern is parsable. The check of the validity of such AST is performed by checking that it + // can be matched correctly using the driver. + skipTestAST bool + }{ + { + pattern: "a", + ast: newSymbolNode('a'), + }, + { + pattern: "abc", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "a?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}?", + ast: newOptionNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}?", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}?", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newOptionNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a)?", + ast: newOptionNode( + newSymbolNode('a'), + ), + }, + { + pattern: "((a?)?)?", + ast: newOptionNode( + newOptionNode( + newOptionNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)?", + ast: newOptionNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)?", + ast: newOptionNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(?)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|?", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "?|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a??", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a*", + ast: newRepeatNode( + newSymbolNode('a'), + ), + }, + { + pattern: "[abc]*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\u{3042}*", + ast: newRepeatNode( + newSymbolNode('\u3042'), + ), + }, + { + pattern: "\\p{Letter}*", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}*", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "((a*)*)*", + ast: newRepeatNode( + newRepeatNode( + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + }, + { + pattern: "(abc)*", + ast: newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "(a|b)*", + ast: newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + }, + { + pattern: "*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(*)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|*", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "*|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a**", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a+", + ast: genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + }, + { + pattern: "[abc]+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "\\u{3042}+", + ast: genConcatNode( + newSymbolNode('\u3042'), + newRepeatNode( + newSymbolNode('\u3042'), + ), + ), + }, + { + pattern: "\\p{Letter}+", + skipTestAST: true, + }, + { + pattern: "\\f{a2c}+", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: genConcatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + newRepeatNode( + newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + ), + }, + { + pattern: "((a+)+)+", + ast: genConcatNode( + genConcatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + newRepeatNode( + genConcatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newRepeatNode( + newSymbolNode('a'), + ), + ), + ), + ), + ), + ), + ), + }, + { + pattern: "(abc)+", + ast: genConcatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + newRepeatNode( + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + ), + }, + { + pattern: "(a|b)+", + ast: genConcatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + newRepeatNode( + genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + ), + ), + ), + }, + { + pattern: "+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "(+)", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a|+", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "+|b", + syntaxError: synErrRepNoTarget, + }, + { + pattern: "a++", + syntaxError: synErrRepNoTarget, + }, + { + pattern: ".", + ast: newRangeSymbolNode(0x00, 0x10FFFF), + }, + { + pattern: "[a]", + ast: newSymbolNode('a'), + }, + { + pattern: "[abc]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + }, + { + pattern: "[a-z]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[A-Za-z]", + ast: genAltNode( + newRangeSymbolNode('A', 'Z'), + newRangeSymbolNode('a', 'z'), + ), + }, + { + pattern: "[\\u{004E}]", + ast: newSymbolNode('N'), + }, + { + pattern: "[\\u{0061}-\\u{007A}]", + ast: newRangeSymbolNode('a', 'z'), + }, + { + pattern: "[\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[z-a]", + syntaxError: synErrRangeInvalidOrder, + }, + { + pattern: "a[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]a", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[]", + syntaxError: synErrBExpNoElem, + }, + { + pattern: "[^\\u{004E}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u004E'-1), + newRangeSymbolNode('\u004E'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\u{0061}-\\u{007A}]", + ast: genAltNode( + newRangeSymbolNode(0x00, '\u0061'-1), + newRangeSymbolNode('\u007A'+1, 0x10FFFF), + ), + }, + { + pattern: "[^\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[^a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\u{0000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", + syntaxError: synErrUnmatchablePattern, + }, + { + pattern: "[^]", + ast: newSymbolNode('^'), + }, + { + pattern: "[", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([^a-", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "]", + ast: newSymbolNode(']'), + }, + { + pattern: "(]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a]", + ast: genConcatNode( + newSymbolNode('a'), + newSymbolNode(']'), + ), + }, + { + pattern: "(a]", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "([)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "([a)", + syntaxError: synErrBExpUnclosed, + }, + { + pattern: "[a-]", + ast: genAltNode( + newSymbolNode('a'), + newSymbolNode('-'), + ), + }, + { + pattern: "[^a-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x60), + newRangeSymbolNode(0x62, 0x10FFFF), + ), + }, + { + pattern: "[-z]", + ast: genAltNode( + newSymbolNode('-'), + newSymbolNode('z'), + ), + }, + { + pattern: "[^-z]", + ast: newAltNode( + newRangeSymbolNode(0x00, 0x2C), + newAltNode( + newRangeSymbolNode(0x2E, 0x79), + newRangeSymbolNode(0x7B, 0x10FFFF), + ), + ), + }, + { + pattern: "[-]", + ast: newSymbolNode('-'), + }, + { + pattern: "[^-]", + ast: genAltNode( + newRangeSymbolNode(0x00, 0x2C), + newRangeSymbolNode(0x2E, 0x10FFFF), + ), + }, + { + pattern: "[^01]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^10]", + ast: genAltNode( + newRangeSymbolNode(0x00, '0'-1), + newRangeSymbolNode('1'+1, 0x10FFFF), + ), + }, + { + pattern: "[^a-z]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + }, + { + pattern: "[^az]", + ast: genAltNode( + newRangeSymbolNode(0x00, 'a'-1), + genAltNode( + newRangeSymbolNode('a'+1, 'z'-1), + newRangeSymbolNode('z'+1, 0x10FFFF), + ), + ), + }, + { + pattern: "\\u{006E}", + ast: newSymbolNode('\u006E'), + }, + { + pattern: "\\u{03BD}", + ast: newSymbolNode('\u03BD'), + }, + { + pattern: "\\u{306B}", + ast: newSymbolNode('\u306B'), + }, + { + pattern: "\\u{01F638}", + ast: newSymbolNode('\U0001F638'), + }, + { + pattern: "\\u{0000}", + ast: newSymbolNode('\u0000'), + }, + { + pattern: "\\u{10FFFF}", + ast: newSymbolNode('\U0010FFFF'), + }, + { + pattern: "\\u{110000}", + syntaxError: synErrCPExpOutOfRange, + }, + { + pattern: "\\u", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{03BD", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\u{}", + syntaxError: synErrCPExpInvalidForm, + }, + { + pattern: "\\p{Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{General_Category=Letter}", + skipTestAST: true, + }, + { + pattern: "\\p{ Letter }", + skipTestAST: true, + }, + { + pattern: "\\p{ General_Category = Letter }", + skipTestAST: true, + }, + { + pattern: "\\p", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{Letter", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{General_Category= }", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=Letter}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{ =Letter}", + syntaxError: synErrCharPropInvalidSymbol, + }, + { + pattern: "\\p{=}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\p{}", + syntaxError: synErrCharPropExpInvalidForm, + }, + { + pattern: "\\f{a2c}", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f{ a2c }", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + ast: newFragmentNode("a2c", + genConcatNode( + newSymbolNode('a'), + newSymbolNode('b'), + newSymbolNode('c'), + ), + ), + }, + { + pattern: "\\f", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{", + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "\\f{a2c", + fragments: map[spec.LexKindName]string{ + "a2c": "abc", + }, + syntaxError: synErrFragmentExpInvalidForm, + }, + { + pattern: "(a)", + ast: newSymbolNode('a'), + }, + { + pattern: "(((a)))", + ast: newSymbolNode('a'), + }, + { + pattern: "a()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()a", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "()", + syntaxError: synErrGroupNoElem, + }, + { + pattern: "(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "a(", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "(a", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: "((a)", + syntaxError: synErrGroupUnclosed, + }, + { + pattern: ")", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "a)", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: ")a", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "(a))", + syntaxError: synErrGroupNoInitiator, + }, + { + pattern: "Mulder|Scully", + ast: genAltNode( + genConcatNode( + newSymbolNode('M'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('d'), + newSymbolNode('e'), + newSymbolNode('r'), + ), + genConcatNode( + newSymbolNode('S'), + newSymbolNode('c'), + newSymbolNode('u'), + newSymbolNode('l'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + ), + }, + { + pattern: "Langly|Frohike|Byers", + ast: genAltNode( + genConcatNode( + newSymbolNode('L'), + newSymbolNode('a'), + newSymbolNode('n'), + newSymbolNode('g'), + newSymbolNode('l'), + newSymbolNode('y'), + ), + genConcatNode( + newSymbolNode('F'), + newSymbolNode('r'), + newSymbolNode('o'), + newSymbolNode('h'), + newSymbolNode('i'), + newSymbolNode('k'), + newSymbolNode('e'), + ), + genConcatNode( + newSymbolNode('B'), + newSymbolNode('y'), + newSymbolNode('e'), + newSymbolNode('r'), + newSymbolNode('s'), + ), + ), + }, + { + pattern: "|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "||", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Mulder|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Scully", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Langly||Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|Byers", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "|Frohike|", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "(Fox|)Mulder", + syntaxError: synErrAltLackOfOperand, + }, + { + pattern: "Fox(|Mulder)", + syntaxError: synErrAltLackOfOperand, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { + fragmentTrees := map[spec.LexKindName]CPTree{} + for kind, pattern := range tt.fragments { + p := NewParser(kind, strings.NewReader(pattern)) + root, err := p.Parse() + if err != nil { + t.Fatal(err) + } + + fragmentTrees[kind] = root + } + err := CompleteFragments(fragmentTrees) + if err != nil { + t.Fatal(err) + } + + p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) + root, err := p.Parse() + if tt.syntaxError != nil { + // printCPTree(os.Stdout, root, "", "") + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + _, synErr := p.Error() + if synErr != tt.syntaxError { + t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) + } + if root != nil { + t.Fatalf("tree must be nil") + } + } else { + if err != nil { + detail, cause := p.Error() + t.Fatalf("%v: %v: %v", err, cause, detail) + } + if root == nil { + t.Fatal("tree must be non-nil") + } + + complete, err := ApplyFragments(root, fragmentTrees) + if err != nil { + t.Fatal(err) + } + if !complete { + t.Fatalf("incomplete fragments") + } + + // printCPTree(os.Stdout, root, "", "") + if !tt.skipTestAST { + r := root.(*rootNode) + testAST(t, tt.ast, r.tree) + } + } + }) + } +} + +func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { + for _, cProp := range ucd.ContributoryProperties() { + t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { + p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) + root, err := p.Parse() + if err == nil { + t.Fatalf("expected syntax error: got: nil") + } + _, synErr := p.Error() + if synErr != synErrCharPropUnsupported { + t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) + } + if root != nil { + t.Fatalf("tree is not nil") + } + }) + } +} + +func TestExclude(t *testing.T) { + for _, test := range []struct { + caption string + target CPTree + base CPTree + result CPTree + }{ + // t.From > b.From && t.To < b.To + + // |t.From - b.From| = 1 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '2'), + result: newAltNode( + newSymbolNode('0'), + newSymbolNode('2'), + ), + }, + // |t.From - b.From| > 1 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+--+--+ + // Result (b - t): +--+--+ +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '4'), + result: newAltNode( + newRangeSymbolNode('0', '1'), + newRangeSymbolNode('3', '4'), + ), + }, + + // t.From <= b.From && t.To >= b.From && t.To < b.To + + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newSymbolNode('0'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('1', '2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| = 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '2'), + result: newSymbolNode('2'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| = 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '1'), + base: newRangeSymbolNode('1', '3'), + result: newRangeSymbolNode('2', '3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| = 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '3'), + result: newSymbolNode('3'), + }, + // |b.From - t.From| > 0 + // |t.To - b.From| > 0 + // |b.To - t.To| > 1 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", + target: newRangeSymbolNode('0', '2'), + base: newRangeSymbolNode('1', '4'), + result: newRangeSymbolNode('3', '4'), + }, + + // t.From > b.From && t.From <= b.To && t.To >= b.To + + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('1'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '1'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('1', '2'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| = 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+ + { + caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('1', '3'), + base: newRangeSymbolNode('0', '2'), + result: newSymbolNode('0'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('2'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '2'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('2', '3'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + // |t.From - b.From| > 1 + // |b.To - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+--+--+--+ + // Result (b - t): +--+--+ + { + caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('2', '4'), + base: newRangeSymbolNode('0', '3'), + result: newRangeSymbolNode('0', '1'), + }, + + // t.From <= b.From && t.To >= b.To + + // |b.From - t.From| = 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", + target: newSymbolNode('0'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| = 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('0'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| = 0 + // + // Target (t): +--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", + target: newRangeSymbolNode('0', '1'), + base: newSymbolNode('1'), + result: nil, + }, + // |b.From - t.From| > 0 + // |t.To - b.To| > 0 + // + // Target (t): +--+--+--+ + // Base (b): +--+ + // Result (b - t): N/A + { + caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", + target: newRangeSymbolNode('0', '2'), + base: newSymbolNode('1'), + result: nil, + }, + + // Others + + // |b.From - t.From| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| = 1", + target: newSymbolNode('0'), + base: newSymbolNode('1'), + result: newSymbolNode('1'), + }, + // |b.From - t.From| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|b.From - t.From| > 1", + target: newSymbolNode('0'), + base: newSymbolNode('2'), + result: newSymbolNode('2'), + }, + // |t.To - b.To| = 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| = 1", + target: newSymbolNode('1'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + // |t.To - b.To| > 1 + // + // Target (t): +--+ + // Base (b): +--+ + // Result (b - t): +--+ + { + caption: "|t.To - b.To| > 1", + target: newSymbolNode('2'), + base: newSymbolNode('0'), + result: newSymbolNode('0'), + }, + } { + t.Run(test.caption, func(t *testing.T) { + r := exclude(test.target, test.base) + testAST(t, test.result, r) + }) + } +} + +func testAST(t *testing.T, expected, actual CPTree) { + t.Helper() + + aTy := reflect.TypeOf(actual) + eTy := reflect.TypeOf(expected) + if eTy != aTy { + t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) + } + + if actual == nil { + return + } + + switch e := expected.(type) { + case *symbolNode: + a := actual.(*symbolNode) + if a.From != e.From || a.To != e.To { + t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) + } + } + eLeft, eRight := expected.children() + aLeft, aRight := actual.children() + testAST(t, eLeft, aLeft) + testAST(t, eRight, aRight) +} diff --git a/tests/unit/grammar/lr0_test.go b/tests/unit/grammar/lr0_test.go new file mode 100644 index 0000000..0a9ec24 --- /dev/null +++ b/tests/unit/grammar/lr0_test.go @@ -0,0 +1,448 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar/symbol" + "urubu/spec/grammar/parser" +) + +type expectedLRState struct { + kernelItems []*lrItem + nextStates map[symbol.Symbol][]*lrItem + reducibleProds []*production + emptyProdItems []*lrItem +} + +func TestGenLR0Automaton(t *testing.T) { + src := ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("expr'", "expr"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "term"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "factor"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("factor"): expectedKernels[10], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + genSym("r_paren"): expectedKernels[11], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "expr", "add", "term"), + }, + }, + { + kernelItems: expectedKernels[10], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "term", "mul", "factor"), + }, + }, + { + kernelItems: expectedKernels[11], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { + src := ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : b + | + ; + +b: "bar"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("s'", 0, "s"), + }, + 1: { + genLR0Item("s'", 1, "s"), + }, + 2: { + genLR0Item("s", 1, "foo", "bar"), + }, + 3: { + genLR0Item("s", 2, "foo", "bar"), + }, + 4: { + genLR0Item("bar", 1, "b"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("foo"): expectedKernels[2], + }, + reducibleProds: []*production{ + genProd("foo"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("foo", 0), + }, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("bar"): expectedKernels[3], + genSym("b"): expectedKernels[4], + }, + reducibleProds: []*production{ + genProd("bar"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("bar", 0), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "foo", "bar"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("bar", "b"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { + if len(automaton.states) != len(expected) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) + } + + for i, eState := range expected { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test look-ahead symbols + { + if len(state.kernel.items) != len(eState.kernelItems) { + t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) + } + for _, eKItem := range eState.kernelItems { + var kItem *lrItem + for _, it := range state.kernel.items { + if it.id != eKItem.id { + continue + } + kItem = it + break + } + if kItem == nil { + t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) + } + + if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { + t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) + } + + for eSym := range eKItem.lookAhead.symbols { + if _, ok := kItem.lookAhead.symbols[eSym]; !ok { + t.Errorf("look-ahead symbol not found: %v", eSym) + } + } + } + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } + } + }) + } +} diff --git a/tests/unit/grammar/parsing_table_test.go b/tests/unit/grammar/parsing_table_test.go new file mode 100644 index 0000000..342e187 --- /dev/null +++ b/tests/unit/grammar/parsing_table_test.go @@ -0,0 +1,387 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "urubu/grammar/symbol" + "urubu/spec/grammar/parser" +) + +type expectedState struct { + kernelItems []*lrItem + acts map[symbol.Symbol]testActionEntry + goTos map[symbol.Symbol][]*lrItem +} + +func TestGenLALRParsingTable(t *testing.T) { + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var ptab *ParsingTable + var automaton *lalr1Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatal(err) + } + automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.NonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.TerminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + lalr := &lrTableBuilder{ + automaton: automaton.lr0Automaton, + prods: gram.productionSet, + termCount: termCount, + nonTermCount: nonTermCount, + symTab: gram.symbolTable, + } + ptab, err = lalr.build() + if err != nil { + t.Fatalf("failed to create a LALR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genLALRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []expectedState{ + { + kernelItems: expectedKernels[0], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s'", "s"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("l"): expectedKernels[8], + genSym("r"): expectedKernels[9], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "l", "eq", "r"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) + }) + } +} + +func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eAct := range expectedState.acts { + nonEmptyEntries[eSym.Num()] = struct{}{} + + ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) + if ty != eAct.ty { + t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + } + switch eAct.ty { + case ActionTypeShift: + eNextState, err := newKernel(eAct.nextState) + if err != nil { + t.Fatal(err) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found; state: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + case ActionTypeReduce: + prod := findProductionByNum(gram.productionSet, prodNum) + if prod == nil { + t.Fatalf("production was not found: #%v", prodNum) + } + if prod.id != eAct.production.id { + t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + } + } + } + for symNum := 0; symNum < termCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) + if ty != ActionTypeError { + t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + } + } +} + +func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eGoTo := range expectedState.goTos { + nonEmptyEntries[eSym.Num()] = struct{}{} + + eNextState, err := newKernel(eGoTo) + if err != nil { + t.Fatal(err) + } + ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) + if ty != GoToTypeRegistered { + t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + } + for symNum := 0; symNum < nonTermCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) + if ty != GoToTypeError { + t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + } + } +} + +type testActionEntry struct { + ty ActionType + nextState []*lrItem + production *production +} + +func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { + for _, state := range states { + if state.num == num { + return state + } + } + return nil +} + +func findProductionByNum(prods *productionSet, num productionNum) *production { + for _, prod := range prods.getAllProductions() { + if prod.num == num { + return prod + } + } + return nil +} diff --git a/tests/unit/grammar/symbol/symbol_test.go b/tests/unit/grammar/symbol/symbol_test.go new file mode 100644 index 0000000..31c3edd --- /dev/null +++ b/tests/unit/grammar/symbol/symbol_test.go @@ -0,0 +1,159 @@ +package symbol + +import "testing" + +func TestSymbol(t *testing.T) { + tab := NewSymbolTable() + w := tab.Writer() + _, _ = w.RegisterStartSymbol("expr'") + _, _ = w.RegisterNonTerminalSymbol("expr") + _, _ = w.RegisterNonTerminalSymbol("term") + _, _ = w.RegisterNonTerminalSymbol("factor") + _, _ = w.RegisterTerminalSymbol("id") + _, _ = w.RegisterTerminalSymbol("add") + _, _ = w.RegisterTerminalSymbol("mul") + _, _ = w.RegisterTerminalSymbol("l_paren") + _, _ = w.RegisterTerminalSymbol("r_paren") + + nonTermTexts := []string{ + "", // Nil + "expr'", + "expr", + "term", + "factor", + } + + termTexts := []string{ + "", // Nil + symbolNameEOF, // EOF + "id", + "add", + "mul", + "l_paren", + "r_paren", + } + + tests := []struct { + text string + isNil bool + isStart bool + isEOF bool + isNonTerminal bool + isTerminal bool + }{ + { + text: "expr'", + isStart: true, + isNonTerminal: true, + }, + { + text: "expr", + isNonTerminal: true, + }, + { + text: "term", + isNonTerminal: true, + }, + { + text: "factor", + isNonTerminal: true, + }, + { + text: "id", + isTerminal: true, + }, + { + text: "add", + isTerminal: true, + }, + { + text: "mul", + isTerminal: true, + }, + { + text: "l_paren", + isTerminal: true, + }, + { + text: "r_paren", + isTerminal: true, + }, + } + for _, tt := range tests { + t.Run(tt.text, func(t *testing.T) { + r := tab.Reader() + sym, ok := r.ToSymbol(tt.text) + if !ok { + t.Fatalf("symbol was not found") + } + testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal) + text, ok := r.ToText(sym) + if !ok { + t.Fatalf("text was not found") + } + if text != tt.text { + t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text) + } + }) + } + + t.Run("EOF", func(t *testing.T) { + testSymbolProperty(t, SymbolEOF, false, false, true, false, true) + }) + + t.Run("Nil", func(t *testing.T) { + testSymbolProperty(t, SymbolNil, true, false, false, false, false) + }) + + t.Run("texts of non-terminals", func(t *testing.T) { + r := tab.Reader() + ts, err := r.NonTerminalTexts() + if err != nil { + t.Fatal(err) + } + if len(ts) != len(nonTermTexts) { + t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts) + } + for i, text := range ts { + if text != nonTermTexts[i] { + t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text) + } + } + }) + + t.Run("texts of terminals", func(t *testing.T) { + r := tab.Reader() + ts, err := r.TerminalTexts() + if err != nil { + t.Fatal(err) + } + if len(ts) != len(termTexts) { + t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts) + } + for i, text := range ts { + if text != termTexts[i] { + t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text) + } + } + }) +} + +func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) { + t.Helper() + + if v := sym.IsNil(); v != isNil { + t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v) + } + if v := sym.IsStart(); v != isStart { + t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v) + } + if v := sym.isEOF(); v != isEOF { + t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v) + } + if v := sym.isNonTerminal(); v != isNonTerminal { + t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v) + } + if v := sym.IsTerminal(); v != isTerminal { + t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v) + } +} diff --git a/tests/unit/grammar/test_helper_test.go b/tests/unit/grammar/test_helper_test.go new file mode 100644 index 0000000..546d2c1 --- /dev/null +++ b/tests/unit/grammar/test_helper_test.go @@ -0,0 +1,68 @@ +package grammar + +import ( + "testing" + + "urubu/grammar/symbol" +) + +type testSymbolGenerator func(text string) symbol.Symbol + +func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { + return func(text string) symbol.Symbol { + t.Helper() + + sym, ok := symTab.ToSymbol(text) + if !ok { + t.Fatalf("symbol was not found: %v", text) + } + return sym + } +} + +type testProductionGenerator func(lhs string, rhs ...string) *production + +func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { + return func(lhs string, rhs ...string) *production { + t.Helper() + + rhsSym := []symbol.Symbol{} + for _, text := range rhs { + rhsSym = append(rhsSym, genSym(text)) + } + prod, err := newProduction(genSym(lhs), rhsSym) + if err != nil { + t.Fatalf("failed to create a production: %v", err) + } + + return prod + } +} + +type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem + +func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { + return func(lhs string, dot int, rhs ...string) *lrItem { + t.Helper() + + prod := genProd(lhs, rhs...) + item, err := newLR0Item(prod, dot) + if err != nil { + t.Fatalf("failed to create a LR0 item: %v", err) + } + + return item + } +} + +func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { + if item.lookAhead.symbols == nil { + item.lookAhead.symbols = map[symbol.Symbol]struct{}{} + } + + for _, a := range lookAhead { + item.lookAhead.symbols[a] = struct{}{} + } + + return item +} |