diff options
author | EuAndreh <eu@euandre.org> | 2024-12-11 16:48:12 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-12-11 16:48:12 -0300 |
commit | 27b4729bd1a57740ea68e774d58d9cb3f45c5589 (patch) | |
tree | 152ff5686ade087e29e102cbbd39c0405cb63c02 /tests/unit/grammar | |
parent | Consolidate packages spread across multiple files into single one (diff) | |
download | cotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.gz cotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.xz |
Do the same single file consolidation on tests
Diffstat (limited to 'tests/unit/grammar')
-rw-r--r-- | tests/unit/grammar/first_test.go | 219 | ||||
-rw-r--r-- | tests/unit/grammar/grammar_test.go | 3381 | ||||
-rw-r--r-- | tests/unit/grammar/lalr1_test.go | 187 | ||||
-rw-r--r-- | tests/unit/grammar/lexical.go (renamed from tests/unit/grammar/lexical/compiler_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa.go (renamed from tests/unit/grammar/lexical/dfa/tree_test.go) | 185 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/dfa_test.go | 121 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/symbol_position_test.go | 79 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser.go (renamed from tests/unit/grammar/lexical/parser/parser_test.go) | 518 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/lexer_test.go | 524 | ||||
-rw-r--r-- | tests/unit/grammar/lr0_test.go | 448 | ||||
-rw-r--r-- | tests/unit/grammar/parsing_table_test.go | 387 | ||||
-rw-r--r-- | tests/unit/grammar/symbol.go (renamed from tests/unit/grammar/symbol/symbol_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/grammar/test_helper_test.go | 68 |
13 files changed, 703 insertions, 5414 deletions
diff --git a/tests/unit/grammar/first_test.go b/tests/unit/grammar/first_test.go deleted file mode 100644 index 9625ef6..0000000 --- a/tests/unit/grammar/first_test.go +++ /dev/null @@ -1,219 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type first struct { - lhs string - num int - dot int - symbols []string - empty bool -} - -func TestGenFirst(t *testing.T) { - tests := []struct { - caption string - src string - first []first - }{ - { - caption: "productions contain only non-empty productions", - src: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -`, - first: []first{ - {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, - {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, - {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, - {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, - {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, - }, - }, - { - caption: "productions contain the empty start production", - src: ` -#name test; - -s - : - ; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "productions contain an empty production", - src: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a start production contains a non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - | - ; -foo: "foo"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, - {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a production contains non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - ; -foo - : bar - | - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, - {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - fst, gram := genActualFirst(t, tt.src) - - for _, ttFirst := range tt.first { - lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) - } - - prod, ok := gram.productionSet.findByLHS(lhsSym) - if !ok { - t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) - } - - actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) - if err != nil { - t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) - } - - expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) - - testFirst(t, actualFirst, expectedFirst) - } - }) - } -} - -func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err := b.build() - if err != nil { - t.Fatal(err) - } - fst, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - if fst == nil { - t.Fatal("genFiest returned nil without any error") - } - - return fst, gram -} - -func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { - t.Helper() - - entry := newFirstEntry() - if empty { - entry.addEmpty() - } - for _, sym := range symbols { - symSym, ok := symTab.ToSymbol(sym) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", sym) - } - entry.add(symSym) - } - - return entry -} - -func testFirst(t *testing.T, actual, expected *firstEntry) { - if actual.empty != expected.empty { - t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) - } - - if len(actual.symbols) != len(expected.symbols) { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - - for eSym := range expected.symbols { - if _, ok := actual.symbols[eSym]; !ok { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - } -} diff --git a/tests/unit/grammar/grammar_test.go b/tests/unit/grammar/grammar_test.go deleted file mode 100644 index ddedb27..0000000 --- a/tests/unit/grammar/grammar_test.go +++ /dev/null @@ -1,3381 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - verr "urubu/error" - "urubu/spec/grammar/parser" -) - -func TestGrammarBuilderOK(t *testing.T) { - type okTest struct { - caption string - specSrc string - validate func(t *testing.T, g *Grammar) - } - - nameTests := []*okTest{ - { - caption: "the `#name` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name s; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "s" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a terminal symbol", - specSrc: ` -#name foo; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "foo" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as the error symbol", - specSrc: ` -#name error; - -s - : foo - | error - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "error" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a fragment", - specSrc: ` -#name f; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "f" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - } - - modeTests := []*okTest{ - { - caption: "a `#mode` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push s - : 'foo'; -bar #mode s - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "s" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push bar - : 'foo'; -bar #mode bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "bar" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as the error symbol", - specSrc: ` -#name test; - -s - : foo bar - | error - ; - -foo #push error - : 'foo'; -bar #mode error - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "error" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a fragment", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push f - : "\f{f}"; -bar #mode f - : 'bar'; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "f" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - } - - precTests := []*okTest{ - { - caption: "a `#prec` allows the empty directive group", - specSrc: ` -#name test; - -#prec (); - -s - : foo - ; - -foo - : 'foo'; -`, - }, - { - caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #left foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #right foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "an `#assign` directive gives only a precedence to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #assign foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != precNil || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'. - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if aPrec != barPrec || aAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc) - } - if sPrec != fooPrec || sAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "each alternative in the same production can have its own precedence and associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar - #assign baz -); - -s - : foo - | bar - | baz - | bra - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -bra - : 'bra'; -`, - validate: func(t *testing.T, g *Grammar) { - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - var alt3Prec int - var alt3Assoc assocType - var alt4Prec int - var alt4Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - alt3Prec = g.precAndAssoc.productionPredence(ps[2].num) - alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num) - alt4Prec = g.precAndAssoc.productionPredence(ps[3].num) - alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeLeft { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeRight { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc) - } - if alt3Prec != 3 || alt3Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc) - } - if alt4Prec != precNil || alt4Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc) - } - }, - }, - { - caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : a - ; -a - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if aPrec != fooPrec || aAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc) - } - if sPrec != precNil || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#left` directive", - specSrc: ` -#name test; - -#prec ( - #left $high - #right foo bar - #left $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#right` directive", - specSrc: ` -#name test; - -#prec ( - #right $high - #left foo bar - #right $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#assign` directive", - specSrc: ` -#name test; - -#prec ( - #assign $high - #left foo - #right bar - #assign $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 3 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 4 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $foo -); - -s - : foo - | bar #prec $foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != fooPrec || alt1Assoc != fooAssoc { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a non-terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $a -); - -s - : a - | bar #prec $a - ; -a - : foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != precNil || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - } - - var tests []*okTest - tests = append(tests, nameTests...) - tests = append(tests, modeTests...) - tests = append(tests, precTests...) - - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - g, err := b.build() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if test.validate != nil { - test.validate(t, g) - } - }) - } -} - -func TestGrammarBuilderSpecError(t *testing.T) { - type specErrTest struct { - caption string - specSrc string - errs []error - } - - spellingInconsistenciesTests := []*specErrTest{ - { - caption: "a spelling inconsistency appears among non-terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; -a_1 - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among terminal symbols", - specSrc: ` -#name test; - -s - : foo1 foo_1 - ; - -foo1 - : 'foo1'; -foo_1 - : 'foo_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among non-terminal and terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; - -a_1 - : 'a_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 - #assign $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same alternative contains", - specSrc: ` -#name test; - -s - : foo@l1 foo@l_1 - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same production contains", - specSrc: ` -#name test; - -s - : foo@l1 - | bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels different productions contain", - specSrc: ` -#name test; - -s - : foo@l1 - ; -a - : bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - } - - prodTests := []*specErrTest{ - { - caption: "a production `b` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : foo - ; - -foo - : "foo"; -`, - errs: []error{semErrUnusedProduction}, - }, - { - caption: "a terminal symbol `bar` is unused", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrUnusedTerminal}, - }, - { - caption: "a production `b` and terminal symbol `bar` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{ - semErrUnusedProduction, - semErrUnusedTerminal, - }, - }, - { - caption: "a production cannot have production directives", - specSrc: ` -#name test; - -s #prec foo - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrInvalidProdDir}, - }, - { - caption: "a lexical production cannot have alternative directives", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo' #skip; -`, - errs: []error{semErrInvalidAltDir}, - }, - { - caption: "a production directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo #skip #skip - : 'foo'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "an alternative directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo bar #ast foo bar #ast foo bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | foo - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | foo - ; -a - : - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -s - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; - -foo - : "foo"; -bar - : "bar"; -a - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "an invalid top-level directive", - specSrc: ` -#name test; - -#foo; - -s - : a - ; - -a - : 'a'; -`, - errs: []error{semErrDirInvalidName}, - }, - { - caption: "a label must be unique in an alternative", - specSrc: ` -#name test; - -s - : foo@x bar@x - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as terminal symbols", - specSrc: ` -#name test; - -s - : foo bar@foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as non-terminal symbols", - specSrc: ` -#name test; - -s - : foo@a - | a - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{ - semErrInvalidLabel, - }, - }, - } - - nameDirTests := []*specErrTest{ - { - caption: "the `#name` directive is required", - specSrc: ` -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrNoGrammarName}, - }, - { - caption: "the `#name` directive needs an ID parameter", - specSrc: ` -#name; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a pattern parameter", - specSrc: ` -#name "test"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a string parameter", - specSrc: ` -#name 'test'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive takes just one parameter", - specSrc: ` -#name test1 test2; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - precDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs a directive group parameter", - specSrc: ` -#name test; - -#prec; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ID parameter", - specSrc: ` -#name test; - -#prec foo; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec $x; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec "foo"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec 'foo'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive takes just one directive group parameter", - specSrc: ` -#name test; - -#prec () (); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - leftDirTests := []*specErrTest{ - { - caption: "the `#left` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #left -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #left error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #left x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #left "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #left 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #left () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #left $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - rightDirTests := []*specErrTest{ - { - caption: "the `#right` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #right -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #right error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #right x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #right "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #right 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -#prec ( - #right () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #right $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - assignDirTests := []*specErrTest{ - { - caption: "the `#assign` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #assign -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #assign error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #assign x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #assign "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #assign 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #assign () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #assign $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign foo - #assign foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign $x - #assign $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - errorSymTests := []*specErrTest{ - { - caption: "cannot use the error symbol as a non-terminal symbol", - specSrc: ` -#name test; - -s - : error - ; -error - : foo - ; - -foo: 'foo'; -`, - errs: []error{ - semErrErrSymIsReserved, - semErrDuplicateName, - }, - }, - { - caption: "cannot use the error symbol as a terminal symbol", - specSrc: ` -#name test; - -s - : error - ; - -error: 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - { - caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -error #skip - : 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - } - - astDirTests := []*specErrTest{ - { - caption: "the `#ast` directive needs ID or label prameters", - specSrc: ` -#name test; - -s - : foo #ast - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #ast $x - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #ast "foo" - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #ast 'foo' - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #ast () - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", - specSrc: ` -#name test; - -s - : foo bar #ast foo x - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast bar - | bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast b - | bar@b - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo #ast foo foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a label can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo@x #ast x x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", - specSrc: ` -#name test; - -s - : foo@x #ast foo x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice", - specSrc: ` -#name test; - -s - : foo foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", - specSrc: ` -#name test; - -s - : foo@x foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "the expansion operator cannot be applied to a terminal symbol", - specSrc: ` -#name test; - -s - : foo #ast foo... - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - altPrecDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -s - : foo #prec error - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -s - : foo #prec x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -s - : a #prec b - | b - ; -a - : foo - ; -b - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrUndefinedOrdSym}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #prec "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #prec 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo #prec () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol the `#prec` directive takes must be given precedence explicitly", - specSrc: ` -#name test; - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrUndefinedPrec}, - }, - } - - recoverDirTests := []*specErrTest{ - { - caption: "the `#recover` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo #recover foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #recover $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #recover "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #recover 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #recover () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - fragmentTests := []*specErrTest{ - { - caption: "a production cannot contain a fragment", - specSrc: ` -#name test; - -s - : f - ; - -fragment f - : 'fragment'; -`, - errs: []error{semErrUndefinedSym}, - }, - { - caption: "fragments cannot be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'fragment 1'; -fragment f - : 'fragment 2'; -`, - errs: []error{semErrDuplicateFragment}, - }, - } - - modeDirTests := []*specErrTest{ - { - caption: "the `#mode` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo - : 'foo'; -bar #mode $x - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode "mode_1" - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode 'mode_1' - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode () - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - pushDirTests := []*specErrTest{ - { - caption: "the `#push` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive takes just one ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 mode_2 - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #push $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push "mode_1" - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push 'mode_1' - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push () - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - popDirTests := []*specErrTest{ - { - caption: "the `#pop` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop mode_1 - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop $x - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop "mode_1" - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop 'mode_1' - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop () - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - skipDirTests := []*specErrTest{ - { - caption: "the `#skip` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip bar - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #skip $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip "bar" - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip 'bar' - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip () - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a terminal symbol used in productions cannot have the skip directive", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrTermCannotBeSkipped}, - }, - } - - var tests []*specErrTest - tests = append(tests, spellingInconsistenciesTests...) - tests = append(tests, prodTests...) - tests = append(tests, nameDirTests...) - tests = append(tests, precDirTests...) - tests = append(tests, leftDirTests...) - tests = append(tests, rightDirTests...) - tests = append(tests, assignDirTests...) - tests = append(tests, errorSymTests...) - tests = append(tests, astDirTests...) - tests = append(tests, altPrecDirTests...) - tests = append(tests, recoverDirTests...) - tests = append(tests, fragmentTests...) - tests = append(tests, modeDirTests...) - tests = append(tests, pushDirTests...) - tests = append(tests, popDirTests...) - tests = append(tests, skipDirTests...) - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - _, err = b.build() - if err == nil { - t.Fatal("an expected error didn't occur") - } - specErrs, ok := err.(verr.SpecErrors) - if !ok { - t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err) - } - if len(specErrs) != len(test.errs) { - t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs) - } - for _, expected := range test.errs { - for _, actual := range specErrs { - if actual.Cause == expected { - return - } - } - } - t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs) - }) - } -} diff --git a/tests/unit/grammar/lalr1_test.go b/tests/unit/grammar/lalr1_test.go deleted file mode 100644 index fd09333..0000000 --- a/tests/unit/grammar/lalr1_test.go +++ /dev/null @@ -1,187 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -func TestGenLALR1Automaton(t *testing.T) { - // This grammar belongs to LALR(1) class, not SLR(1). - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var gram *Grammar - var automaton *lalr1Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatalf("failed to create a FIRST set: %v", err) - } - - automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - t.Fatalf("failed to create a LALR1 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLALR1Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("eq"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "r"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[9], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "ref", "r"), - }, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "l", "eq", "r"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton.lr0Automaton) -} diff --git a/tests/unit/grammar/lexical/compiler_test.go b/tests/unit/grammar/lexical.go index b621cd2..b621cd2 100644 --- a/tests/unit/grammar/lexical/compiler_test.go +++ b/tests/unit/grammar/lexical.go diff --git a/tests/unit/grammar/lexical/dfa/tree_test.go b/tests/unit/grammar/lexical/dfa.go index de3ebbb..1a3e16a 100644 --- a/tests/unit/grammar/lexical/dfa/tree_test.go +++ b/tests/unit/grammar/lexical/dfa.go @@ -9,6 +9,191 @@ import ( spec "urubu/spec/grammar" ) +func TestGenDFA(t *testing.T) { + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + dfa := GenDFA(bt, symTab) + if dfa == nil { + t.Fatalf("DFA is nil") + } + + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) + s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) + s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) + s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) + + rune2Int := func(char rune, index int) uint8 { + return uint8([]byte(string(char))[index]) + } + + tranS0 := [256]string{} + tranS0[rune2Int('a', 0)] = s1.hash() + tranS0[rune2Int('b', 0)] = s0.hash() + + tranS1 := [256]string{} + tranS1[rune2Int('a', 0)] = s1.hash() + tranS1[rune2Int('b', 0)] = s2.hash() + + tranS2 := [256]string{} + tranS2[rune2Int('a', 0)] = s1.hash() + tranS2[rune2Int('b', 0)] = s3.hash() + + tranS3 := [256]string{} + tranS3[rune2Int('a', 0)] = s1.hash() + tranS3[rune2Int('b', 0)] = s0.hash() + + expectedTranTab := map[string][256]string{ + s0.hash(): tranS0, + s1.hash(): tranS1, + s2.hash(): tranS2, + s3.hash(): tranS3, + } + if len(dfa.TransitionTable) != len(expectedTranTab) { + t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) + } + for h, eTranTab := range expectedTranTab { + tranTab, ok := dfa.TransitionTable[h] + if !ok { + t.Errorf("no entry; hash: %v", h) + continue + } + if len(tranTab) != len(eTranTab) { + t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) + } + for c, eNext := range eTranTab { + if eNext == "" { + continue + } + + next := tranTab[c] + if next == "" { + t.Errorf("no enatry: hash: %v, char: %v", h, c) + } + if next != eNext { + t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) + } + } + } + + if dfa.InitialState != s0.hash() { + t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) + } + + accTab := map[string]spec.LexModeKindID{ + s3.hash(): 1, + } + if len(dfa.AcceptingStatesTable) != len(accTab) { + t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) + } + for eState, eID := range accTab { + id, ok := dfa.AcceptingStatesTable[eState] + if !ok { + t.Errorf("accepting state is not found: state: %v", eState) + } + if id != eID { + t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) + } + } +} + +func TestNewSymbolPosition(t *testing.T) { + tests := []struct { + n uint16 + endMark bool + err bool + }{ + { + n: 0, + endMark: false, + err: true, + }, + { + n: 0, + endMark: true, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: true, + err: true, + }, + { + n: symbolPositionMin, + endMark: false, + }, + { + n: symbolPositionMin, + endMark: true, + }, + { + n: symbolPositionMax, + endMark: false, + }, + { + n: symbolPositionMax, + endMark: true, + }, + { + n: symbolPositionMax + 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMax + 1, + endMark: true, + err: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { + pos, err := newSymbolPosition(tt.n, tt.endMark) + if tt.err { + if err == nil { + t.Fatal("err is nil") + } + return + } + if err != nil { + t.Fatal(err) + } + n, endMark := pos.describe() + if n != tt.n || endMark != tt.endMark { + t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) + } + }) + } +} + func TestByteTree(t *testing.T) { tests := []struct { root byteTree diff --git a/tests/unit/grammar/lexical/dfa/dfa_test.go b/tests/unit/grammar/lexical/dfa/dfa_test.go deleted file mode 100644 index 38577cf..0000000 --- a/tests/unit/grammar/lexical/dfa/dfa_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package dfa - -import ( - "strings" - "testing" - - "urubu/grammar/lexical/parser" - spec "urubu/spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} diff --git a/tests/unit/grammar/lexical/dfa/symbol_position_test.go b/tests/unit/grammar/lexical/dfa/symbol_position_test.go deleted file mode 100644 index c867f64..0000000 --- a/tests/unit/grammar/lexical/dfa/symbol_position_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package dfa - -import ( - "fmt" - "testing" -) - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser.go index 4c9557d..d5d7039 100644 --- a/tests/unit/grammar/lexical/parser/parser_test.go +++ b/tests/unit/grammar/lexical/parser.go @@ -10,6 +10,524 @@ import ( "urubu/ucd" ) +func TestLexer(t *testing.T) { + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "lexer can recognize ordinaly characters", + src: "123abcいろは", + tokens: []*token{ + newToken(tokenKindChar, '1'), + newToken(tokenKindChar, '2'), + newToken(tokenKindChar, '3'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, 'b'), + newToken(tokenKindChar, 'c'), + newToken(tokenKindChar, 'い'), + newToken(tokenKindChar, 'ろ'), + newToken(tokenKindChar, 'は'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[\\u", + tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), + newToken(tokenKindRepeat, nullChar), + newToken(tokenKindRepeatOneOrMore, nullChar), + newToken(tokenKindOption, nullChar), + newToken(tokenKindAlt, nullChar), + newToken(tokenKindGroupOpen, nullChar), + newToken(tokenKindGroupClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", + tokens: []*token{ + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "], {, and } are treated as an ordinary character in default mode", + src: "]{}", + tokens: []*token{ + newToken(tokenKindChar, ']'), + newToken(tokenKindChar, '{'), + newToken(tokenKindChar, '}'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09AF"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09abcf"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "in a bracket expression, the special characters are also handled as normal characters", + src: "[\\\\.*+?|()[", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character-range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", + // [^...^...][^] + // ~~ ~ ~~ + // ^ ^ ^^ + // | | |`-- Ordinary Character (c) + // | | `-- Bracket Expression + // | `-- Ordinary Character (b) + // `-- Inverse Bracket Expression (a) + // + // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. + // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. + // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. + src: "[^^][^]", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "\\@", + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "\\", + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer can recognize the special characters and code points in code point expression mode", + src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a one digit hex string isn't a valid code point", + src: "\\u{0", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a two digits hex string isn't a valid code point", + src: "\\u{01", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a three digits hex string isn't a valid code point", + src: "\\u{012", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a four digits hex string is a valid code point", + src: "\\u{0123}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a five digits hex string isn't a valid code point", + src: "\\u{01234", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a six digits hex string is a valid code point", + src: "\\u{012345}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("012345"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a seven digits hex string isn't a valid code point", + src: "\\u{0123456", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{g", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{G", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters and symbols in fragment expression mode", + src: "\\f{integer}", + tokens: []*token{ + newToken(tokenKindFragmentLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newFragmentSymbolToken("integer"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a fragment expression is not supported in a bracket expression", + src: "[\\f", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "a fragment expression is not supported in an inverse bracket expression", + src: "[^\\f", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + lex := newLexer(strings.NewReader(tt.src)) + var err error + var tok *token + i := 0 + for { + tok, err = lex.next() + if err != nil { + break + } + if i >= len(tt.tokens) { + break + } + eTok := tt.tokens[i] + i++ + testToken(t, tok, eTok) + + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + detail, cause := lex.error() + if cause != tt.err { + t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if i < len(tt.tokens) { + t.Fatalf("expecte more tokens") + } + }) + } +} + +func testToken(t *testing.T, a, e *token) { + t.Helper() + if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { + t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) + } +} + func TestParse(t *testing.T) { tests := []struct { pattern string diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go deleted file mode 100644 index 055466e..0000000 --- a/tests/unit/grammar/lexical/parser/lexer_test.go +++ /dev/null @@ -1,524 +0,0 @@ -package parser - -import ( - "strings" - "testing" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} diff --git a/tests/unit/grammar/lr0_test.go b/tests/unit/grammar/lr0_test.go deleted file mode 100644 index 0a9ec24..0000000 --- a/tests/unit/grammar/lr0_test.go +++ /dev/null @@ -1,448 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type expectedLRState struct { - kernelItems []*lrItem - nextStates map[symbol.Symbol][]*lrItem - reducibleProds []*production - emptyProdItems []*lrItem -} - -func TestGenLR0Automaton(t *testing.T) { - src := ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("expr'", 0, "expr"), - }, - 1: { - genLR0Item("expr'", 1, "expr"), - genLR0Item("expr", 1, "expr", "add", "term"), - }, - 2: { - genLR0Item("expr", 1, "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 3: { - genLR0Item("term", 1, "factor"), - }, - 4: { - genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), - }, - 5: { - genLR0Item("factor", 1, "id"), - }, - 6: { - genLR0Item("expr", 2, "expr", "add", "term"), - }, - 7: { - genLR0Item("term", 2, "term", "mul", "factor"), - }, - 8: { - genLR0Item("expr", 1, "expr", "add", "term"), - genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), - }, - 9: { - genLR0Item("expr", 3, "expr", "add", "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 10: { - genLR0Item("term", 3, "term", "mul", "factor"), - }, - 11: { - genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[1], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("expr'", "expr"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "term"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "factor"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[8], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("term"): expectedKernels[9], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("factor"): expectedKernels[10], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - genSym("r_paren"): expectedKernels[11], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "expr", "add", "term"), - }, - }, - { - kernelItems: expectedKernels[10], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "term", "mul", "factor"), - }, - }, - { - kernelItems: expectedKernels[11], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "l_paren", "expr", "r_paren"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { - src := ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : b - | - ; - -b: "bar"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("s'", 0, "s"), - }, - 1: { - genLR0Item("s'", 1, "s"), - }, - 2: { - genLR0Item("s", 1, "foo", "bar"), - }, - 3: { - genLR0Item("s", 2, "foo", "bar"), - }, - 4: { - genLR0Item("bar", 1, "b"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("foo"): expectedKernels[2], - }, - reducibleProds: []*production{ - genProd("foo"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("foo", 0), - }, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("bar"): expectedKernels[3], - genSym("b"): expectedKernels[4], - }, - reducibleProds: []*production{ - genProd("bar"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("bar", 0), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "foo", "bar"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("bar", "b"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { - if len(automaton.states) != len(expected) { - t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) - } - - for i, eState := range expected { - t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("a kernel was not found: %v", k.id) - } - - // test look-ahead symbols - { - if len(state.kernel.items) != len(eState.kernelItems) { - t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) - } - for _, eKItem := range eState.kernelItems { - var kItem *lrItem - for _, it := range state.kernel.items { - if it.id != eKItem.id { - continue - } - kItem = it - break - } - if kItem == nil { - t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) - } - - if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { - t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) - } - - for eSym := range eKItem.lookAhead.symbols { - if _, ok := kItem.lookAhead.symbols[eSym]; !ok { - t.Errorf("look-ahead symbol not found: %v", eSym) - } - } - } - } - - // test next states - { - if len(state.next) != len(eState.nextStates) { - t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) - } - for eSym, eKItems := range eState.nextStates { - nextStateKernel, err := newKernel(eKItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - nextState, ok := state.next[eSym] - if !ok { - t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) - } - if nextState != nextStateKernel.id { - t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) - } - } - } - - // test reducible productions - { - if len(state.reducible) != len(eState.reducibleProds) { - t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) - } - for _, eProd := range eState.reducibleProds { - if _, ok := state.reducible[eProd.id]; !ok { - t.Errorf("reducible production was not found: %v", eProd.id) - } - } - - if len(state.emptyProdItems) != len(eState.emptyProdItems) { - t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) - } - for _, eItem := range eState.emptyProdItems { - found := false - for _, item := range state.emptyProdItems { - if item.id != eItem.id { - continue - } - found = true - break - } - if !found { - t.Errorf("empty production item not found: %v", eItem.id) - } - } - } - }) - } -} diff --git a/tests/unit/grammar/parsing_table_test.go b/tests/unit/grammar/parsing_table_test.go deleted file mode 100644 index 342e187..0000000 --- a/tests/unit/grammar/parsing_table_test.go +++ /dev/null @@ -1,387 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type expectedState struct { - kernelItems []*lrItem - acts map[symbol.Symbol]testActionEntry - goTos map[symbol.Symbol][]*lrItem -} - -func TestGenLALRParsingTable(t *testing.T) { - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var ptab *ParsingTable - var automaton *lalr1Automaton - var gram *Grammar - var nonTermCount int - var termCount int - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - first, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatal(err) - } - automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) - if err != nil { - t.Fatal(err) - } - - nonTermTexts, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - t.Fatal(err) - } - nonTermCount = len(nonTermTexts) - termCount = len(termTexts) - - lalr := &lrTableBuilder{ - automaton: automaton.lr0Automaton, - prods: gram.productionSet, - termCount: termCount, - nonTermCount: nonTermCount, - symTab: gram.symbolTable, - } - ptab, err = lalr.build() - if err != nil { - t.Fatalf("failed to create a LALR parsing table: %v", err) - } - if ptab == nil { - t.Fatal("genLALRParsingTable returns nil without any error") - } - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []expectedState{ - { - kernelItems: expectedKernels[0], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[1], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s'", "s"), - }, - }, - }, - { - kernelItems: expectedKernels[2], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[3], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[4], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - }, - }, - { - kernelItems: expectedKernels[5], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - }, - }, - { - kernelItems: expectedKernels[6], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("l"): expectedKernels[8], - genSym("r"): expectedKernels[9], - }, - }, - { - kernelItems: expectedKernels[7], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[8], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[9], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "l", "eq", "r"), - }, - }, - }, - } - - t.Run("initial state", func(t *testing.T) { - iniState := findStateByNum(automaton.states, ptab.InitialState) - if iniState == nil { - t.Fatalf("the initial state was not found: #%v", ptab.InitialState) - } - eIniState, err := newKernel(expectedKernels[0]) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - if iniState.id != eIniState.id { - t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) - } - }) - - for i, eState := range expectedStates { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("state was not found: #%v", 0) - } - - testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) - testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) - }) - } -} - -func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eAct := range expectedState.acts { - nonEmptyEntries[eSym.Num()] = struct{}{} - - ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) - if ty != eAct.ty { - t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - } - switch eAct.ty { - case ActionTypeShift: - eNextState, err := newKernel(eAct.nextState) - if err != nil { - t.Fatal(err) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found; state: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - case ActionTypeReduce: - prod := findProductionByNum(gram.productionSet, prodNum) - if prod == nil { - t.Fatalf("production was not found: #%v", prodNum) - } - if prod.id != eAct.production.id { - t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - } - } - } - for symNum := 0; symNum < termCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) - if ty != ActionTypeError { - t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - } - } -} - -func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eGoTo := range expectedState.goTos { - nonEmptyEntries[eSym.Num()] = struct{}{} - - eNextState, err := newKernel(eGoTo) - if err != nil { - t.Fatal(err) - } - ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) - if ty != GoToTypeRegistered { - t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - } - for symNum := 0; symNum < nonTermCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) - if ty != GoToTypeError { - t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - } - } -} - -type testActionEntry struct { - ty ActionType - nextState []*lrItem - production *production -} - -func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { - for _, state := range states { - if state.num == num { - return state - } - } - return nil -} - -func findProductionByNum(prods *productionSet, num productionNum) *production { - for _, prod := range prods.getAllProductions() { - if prod.num == num { - return prod - } - } - return nil -} diff --git a/tests/unit/grammar/symbol/symbol_test.go b/tests/unit/grammar/symbol.go index 31c3edd..31c3edd 100644 --- a/tests/unit/grammar/symbol/symbol_test.go +++ b/tests/unit/grammar/symbol.go diff --git a/tests/unit/grammar/test_helper_test.go b/tests/unit/grammar/test_helper_test.go deleted file mode 100644 index 546d2c1..0000000 --- a/tests/unit/grammar/test_helper_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package grammar - -import ( - "testing" - - "urubu/grammar/symbol" -) - -type testSymbolGenerator func(text string) symbol.Symbol - -func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { - return func(text string) symbol.Symbol { - t.Helper() - - sym, ok := symTab.ToSymbol(text) - if !ok { - t.Fatalf("symbol was not found: %v", text) - } - return sym - } -} - -type testProductionGenerator func(lhs string, rhs ...string) *production - -func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { - return func(lhs string, rhs ...string) *production { - t.Helper() - - rhsSym := []symbol.Symbol{} - for _, text := range rhs { - rhsSym = append(rhsSym, genSym(text)) - } - prod, err := newProduction(genSym(lhs), rhsSym) - if err != nil { - t.Fatalf("failed to create a production: %v", err) - } - - return prod - } -} - -type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem - -func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { - return func(lhs string, dot int, rhs ...string) *lrItem { - t.Helper() - - prod := genProd(lhs, rhs...) - item, err := newLR0Item(prod, dot) - if err != nil { - t.Fatalf("failed to create a LR0 item: %v", err) - } - - return item - } -} - -func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { - if item.lookAhead.symbols == nil { - item.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - for _, a := range lookAhead { - item.lookAhead.symbols[a] = struct{}{} - } - - return item -} |