diff options
-rw-r--r-- | grammar/first.go | 144 | ||||
-rw-r--r-- | grammar/first_test.go | 205 | ||||
-rw-r--r-- | grammar/follow.go | 197 | ||||
-rw-r--r-- | grammar/follow_test.go | 188 | ||||
-rw-r--r-- | grammar/grammar.go | 158 | ||||
-rw-r--r-- | grammar/lr0_item.go | 343 | ||||
-rw-r--r-- | grammar/lr0_item_test.go | 263 | ||||
-rw-r--r-- | grammar/slr.go | 170 | ||||
-rw-r--r-- | grammar/slr_test.go | 481 | ||||
-rw-r--r-- | grammar/test_helper_test.go | 52 |
10 files changed, 2201 insertions, 0 deletions
diff --git a/grammar/first.go b/grammar/first.go new file mode 100644 index 0000000..72de282 --- /dev/null +++ b/grammar/first.go @@ -0,0 +1,144 @@ +package grammar + +import "fmt" + +type firstEntry struct { + symbols map[symbol]struct{} + empty bool +} + +func newFirstEntry() *firstEntry { + return &firstEntry{ + symbols: map[symbol]struct{}{}, + empty: false, + } +} + +func (e *firstEntry) add(sym symbol) bool { + if _, ok := e.symbols[sym]; ok { + return false + } + e.symbols[sym] = struct{}{} + return true +} + +func (e *firstEntry) addEmpty() bool { + if !e.empty { + e.empty = true + return true + } + return false +} + +func (e *firstEntry) mergeExceptEmpty(target *firstEntry) bool { + if target == nil { + return false + } + changed := false + for sym := range target.symbols { + added := e.add(sym) + if added { + changed = true + } + } + return changed +} + +type firstSet struct { + set map[symbol]*firstEntry +} + +func newFirstSet(prods *productionSet) *firstSet { + fst := &firstSet{ + set: map[symbol]*firstEntry{}, + } + for _, prod := range prods.getAllProductions() { + if _, ok := fst.set[prod.lhs]; ok { + continue + } + fst.set[prod.lhs] = newFirstEntry() + } + + return fst +} + +func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) { + entry := newFirstEntry() + if prod.rhsLen <= head { + entry.addEmpty() + return entry, nil + } + for _, sym := range prod.rhs[head:] { + if sym.isTerminal() { + entry.add(sym) + return entry, nil + } + + e := fst.findBySymbol(sym) + if e == nil { + return nil, fmt.Errorf("an entry of FIRST was not found; symbol: %s", sym) + } + for s := range e.symbols { + entry.add(s) + } + if !e.empty { + return entry, nil + } + } + entry.addEmpty() + return entry, nil +} + +func (fst *firstSet) findBySymbol(sym symbol) *firstEntry { + return fst.set[sym] +} + +type firstComContext struct { + first *firstSet +} + +func newFirstComContext(prods *productionSet) *firstComContext { + return &firstComContext{ + first: newFirstSet(prods), + } +} + +func genFirstSet(prods *productionSet) (*firstSet, error) { + cc := newFirstComContext(prods) + for { + more := false + for _, prod := range prods.getAllProductions() { + e := cc.first.findBySymbol(prod.lhs) + changed, err := genProdFirstEntry(cc, e, prod) + if err != nil { + return nil, err + } + if changed { + more = true + } + } + if !more { + break + } + } + return cc.first, nil +} + +func genProdFirstEntry(cc *firstComContext, acc *firstEntry, prod *production) (bool, error) { + if prod.isEmpty() { + return acc.addEmpty(), nil + } + + for _, sym := range prod.rhs { + if sym.isTerminal() { + return acc.add(sym), nil + } + + e := cc.first.findBySymbol(sym) + changed := acc.mergeExceptEmpty(e) + if !e.empty { + return changed, nil + } + } + return acc.addEmpty(), nil +} diff --git a/grammar/first_test.go b/grammar/first_test.go new file mode 100644 index 0000000..b65b0ea --- /dev/null +++ b/grammar/first_test.go @@ -0,0 +1,205 @@ +package grammar + +import ( + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +type first struct { + lhs string + num int + dot int + symbols []string + empty bool +} + +func TestGenFirst(t *testing.T) { + tests := []struct { + caption string + src string + first []first + }{ + { + caption: "productions contain only non-empty productions", + src: ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +`, + first: []first{ + {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, + {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, + {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, + {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, + {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, + }, + }, + { + caption: "productions contain the empty start production", + src: ` +s + : + ; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "productions contain an empty production", + src: ` +s + : foo bar + ; +foo + : + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a start production contains a non-empty alternative and empty alternative", + src: ` +s + : foo + | + ; +foo: "foo"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, + {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a production contains non-empty alternative and empty alternative", + src: ` +s + : foo + ; +foo + : bar + | + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, + {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + fst, gram := genActualFirst(t, tt.src) + + for _, ttFirst := range tt.first { + lhsSym, ok := gram.symbolTable.toSymbol(ttFirst.lhs) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) + } + + prod, ok := gram.productionSet.findByLHS(lhsSym) + if !ok { + t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) + } + + actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) + if err != nil { + t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) + } + + expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) + + testFirst(t, actualFirst, expectedFirst) + } + }) + } +} + +func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + gram, err := NewGrammar(ast) + if err != nil { + t.Fatal(err) + } + fst, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + if fst == nil { + t.Fatal("genFiest returned nil without any error") + } + + return fst, gram +} + +func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbolTable) *firstEntry { + t.Helper() + + entry := newFirstEntry() + if empty { + entry.addEmpty() + } + for _, sym := range symbols { + symSym, ok := symTab.toSymbol(sym) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", sym) + } + entry.add(symSym) + } + + return entry +} + +func testFirst(t *testing.T, actual, expected *firstEntry) { + if actual.empty != expected.empty { + t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) + } + + if len(actual.symbols) != len(expected.symbols) { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + + for eSym := range expected.symbols { + if _, ok := actual.symbols[eSym]; !ok { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + } +} diff --git a/grammar/follow.go b/grammar/follow.go new file mode 100644 index 0000000..f835bba --- /dev/null +++ b/grammar/follow.go @@ -0,0 +1,197 @@ +package grammar + +import ( + "fmt" +) + +type followEntry struct { + symbols map[symbol]struct{} + eof bool +} + +func newFollowEntry() *followEntry { + return &followEntry{ + symbols: map[symbol]struct{}{}, + eof: false, + } +} + +func (e *followEntry) add(sym symbol) bool { + if _, ok := e.symbols[sym]; ok { + return false + } + e.symbols[sym] = struct{}{} + return true +} + +func (e *followEntry) addEOF() bool { + if !e.eof { + e.eof = true + return true + } + return false +} + +func (e *followEntry) merge(fst *firstEntry, flw *followEntry) bool { + changed := false + + if fst != nil { + for sym := range fst.symbols { + added := e.add(sym) + if added { + changed = true + } + } + } + + if flw != nil { + for sym := range flw.symbols { + added := e.add(sym) + if added { + changed = true + } + } + if flw.eof { + added := e.addEOF() + if added { + changed = true + } + } + } + + return changed +} + +type followSet struct { + set map[symbol]*followEntry +} + +func newFollow(prods *productionSet) *followSet { + flw := &followSet{ + set: map[symbol]*followEntry{}, + } + for _, prod := range prods.getAllProductions() { + if _, ok := flw.set[prod.lhs]; ok { + continue + } + flw.set[prod.lhs] = newFollowEntry() + } + return flw +} + +func (flw *followSet) find(sym symbol) (*followEntry, error) { + e, ok := flw.set[sym] + if !ok { + return nil, fmt.Errorf("an entry of FOLLOW was not found; symbol: %s", sym) + } + return e, nil +} + +type followComContext struct { + prods *productionSet + first *firstSet + follow *followSet +} + +func newFollowComContext(prods *productionSet, first *firstSet) *followComContext { + return &followComContext{ + prods: prods, + first: first, + follow: newFollow(prods), + } +} + +func genFollowSet(prods *productionSet, first *firstSet) (*followSet, error) { + ntsyms := map[symbol]struct{}{} + for _, prod := range prods.getAllProductions() { + if _, ok := ntsyms[prod.lhs]; ok { + continue + } + ntsyms[prod.lhs] = struct{}{} + } + + cc := newFollowComContext(prods, first) + for { + more := false + for ntsym := range ntsyms { + e, err := cc.follow.find(ntsym) + if err != nil { + return nil, err + } + if ntsym.isStart() { + changed := e.addEOF() + if changed { + more = true + } + } + for _, prod := range prods.getAllProductions() { + for i, sym := range prod.rhs { + if sym != ntsym { + continue + } + fst, err := first.find(prod, i+1) + if err != nil { + return nil, err + } + changed := e.merge(fst, nil) + if changed { + more = true + } + if fst.empty { + flw, err := cc.follow.find(prod.lhs) + if err != nil { + return nil, err + } + changed := e.merge(nil, flw) + if changed { + more = true + } + } + } + } + } + if !more { + break + } + } + + return cc.follow, nil +} + +func genFollowEntry(cc *followComContext, acc *followEntry, ntsym symbol) (bool, error) { + changed := false + + if ntsym.isStart() { + added := acc.addEOF() + if added { + changed = true + } + } + for _, prod := range cc.prods.getAllProductions() { + for i, sym := range prod.rhs { + if sym != ntsym { + continue + } + fst, err := cc.first.find(prod, i+1) + if err != nil { + return false, err + } + added := acc.merge(fst, nil) + if added { + changed = true + } + if fst.empty { + flw, err := cc.follow.find(prod.lhs) + if err != nil { + return false, err + } + added := acc.merge(nil, flw) + if added { + changed = true + } + } + } + } + + return changed, nil +} diff --git a/grammar/follow_test.go b/grammar/follow_test.go new file mode 100644 index 0000000..042bc88 --- /dev/null +++ b/grammar/follow_test.go @@ -0,0 +1,188 @@ +package grammar + +import ( + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +type follow struct { + nonTermText string + symbols []string + eof bool +} + +func TestFollowSet(t *testing.T) { + tests := []struct { + caption string + src string + follow []follow + }{ + { + caption: "productions contain only non-empty productions", + src: ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +`, + follow: []follow{ + {nonTermText: "expr'", symbols: []string{}, eof: true}, + {nonTermText: "expr", symbols: []string{"add", "r_paren"}, eof: true}, + {nonTermText: "term", symbols: []string{"add", "mul", "r_paren"}, eof: true}, + {nonTermText: "factor", symbols: []string{"add", "mul", "r_paren"}, eof: true}, + }, + }, + { + caption: "productions contain an empty start production", + src: ` +s + : + ; +`, + follow: []follow{ + {nonTermText: "s'", symbols: []string{}, eof: true}, + {nonTermText: "s", symbols: []string{}, eof: true}, + }, + }, + { + caption: "productions contain an empty production", + src: ` +s + : foo + ; +foo + : +; +`, + follow: []follow{ + {nonTermText: "s'", symbols: []string{}, eof: true}, + {nonTermText: "s", symbols: []string{}, eof: true}, + {nonTermText: "foo", symbols: []string{}, eof: true}, + }, + }, + { + caption: "a start production contains a non-empty alternative and empty alternative", + src: ` +s + : foo + | + ; +foo: "foo"; +`, + follow: []follow{ + {nonTermText: "s'", symbols: []string{}, eof: true}, + {nonTermText: "s", symbols: []string{}, eof: true}, + }, + }, + { + caption: "a production contains non-empty alternative and empty alternative", + src: ` +s + : foo + ; +foo + : bar + | + ; +bar: "bar"; +`, + follow: []follow{ + {nonTermText: "s'", symbols: []string{}, eof: true}, + {nonTermText: "s", symbols: []string{}, eof: true}, + {nonTermText: "foo", symbols: []string{}, eof: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + flw, gram := genActualFollow(t, tt.src) + + for _, ttFollow := range tt.follow { + sym, ok := gram.symbolTable.toSymbol(ttFollow.nonTermText) + if !ok { + t.Fatalf("a symbol '%v' was not found", ttFollow.nonTermText) + } + + actualFollow, err := flw.find(sym) + if err != nil { + t.Fatalf("failed to get a FOLLOW entry; non-terminal symbol: %v (%v), error: %v", ttFollow.nonTermText, sym, err) + } + + expectedFollow := genExpectedFollowEntry(t, ttFollow.symbols, ttFollow.eof, gram.symbolTable) + + testFollow(t, actualFollow, expectedFollow) + } + }) + } +} + +func genActualFollow(t *testing.T, src string) (*followSet, *Grammar) { + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + gram, err := NewGrammar(ast) + if err != nil { + t.Fatal(err) + } + fst, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + flw, err := genFollowSet(gram.productionSet, fst) + if flw == nil { + t.Fatal("genFollow returned nil without any error") + } + + return flw, gram +} + +func genExpectedFollowEntry(t *testing.T, symbols []string, eof bool, symTab *symbolTable) *followEntry { + t.Helper() + + entry := newFollowEntry() + if eof { + entry.addEOF() + } + for _, sym := range symbols { + symID, _ := symTab.toSymbol(sym) + if symID.isNil() { + t.Fatalf("a symbol '%v' was not found", sym) + } + + entry.add(symID) + } + + return entry +} + +func testFollow(t *testing.T, actual, expected *followEntry) { + if actual.eof != expected.eof { + t.Errorf("eof is mismatched; want: %v, got: %v", expected.eof, actual.eof) + } + + if len(actual.symbols) != len(expected.symbols) { + t.Fatalf("unexpected symbol count of a FOLLOW entry; want: %v, got: %v", expected.symbols, actual.symbols) + } + + for eSym := range expected.symbols { + if _, ok := actual.symbols[eSym]; !ok { + t.Fatalf("invalid FOLLOW entry; want: %v, got: %v", expected.symbols, actual.symbols) + } + } +} diff --git a/grammar/grammar.go b/grammar/grammar.go new file mode 100644 index 0000000..ce0ff19 --- /dev/null +++ b/grammar/grammar.go @@ -0,0 +1,158 @@ +package grammar + +import ( + "fmt" + + mlspec "github.com/nihei9/maleeni/spec" + "github.com/nihei9/vartan/spec" +) + +type Grammar struct { + lexSpec *mlspec.LexSpec + productionSet *productionSet + augmentedStartSymbol symbol + symbolTable *symbolTable +} + +func NewGrammar(root *spec.RootNode) (*Grammar, error) { + symTab := newSymbolTable() + anonPat2Sym := map[string]symbol{} + var lexSpec *mlspec.LexSpec + { + entries := []*mlspec.LexEntry{} + anonPats := []string{} + for _, prod := range root.Productions { + if isLexicalProduction(prod) { + _, err := symTab.registerTerminalSymbol(prod.LHS) + if err != nil { + return nil, err + } + + entries = append(entries, &mlspec.LexEntry{ + Kind: mlspec.LexKind(prod.LHS), + Pattern: mlspec.LexPattern(prod.RHS[0].Elements[0].Pattern), + }) + continue + } + + for _, alt := range prod.RHS { + for _, elem := range alt.Elements { + if elem.Pattern == "" { + continue + } + exist := false + for _, p := range anonPats { + if p == elem.Pattern { + exist = true + break + } + } + if exist { + continue + } + anonPats = append(anonPats, elem.Pattern) + } + } + } + for i, p := range anonPats { + kind := fmt.Sprintf("__%v__", i+1) + + sym, err := symTab.registerTerminalSymbol(kind) + if err != nil { + return nil, err + } + anonPat2Sym[p] = sym + + entries = append(entries, &mlspec.LexEntry{ + Kind: mlspec.LexKind(kind), + Pattern: mlspec.LexPattern(p), + }) + } + + lexSpec = &mlspec.LexSpec{ + Entries: entries, + } + } + + prods := newProductionSet() + var augStartSym symbol + { + startProd := root.Productions[0] + augStartText := fmt.Sprintf("%s'", startProd.LHS) + var err error + augStartSym, err = symTab.registerStartSymbol(augStartText) + if err != nil { + return nil, err + } + startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS) + if err != nil { + return nil, err + } + p, err := newProduction(augStartSym, []symbol{ + startSym, + }) + if err != nil { + return nil, err + } + prods.append(p) + + for _, prod := range root.Productions { + if isLexicalProduction(prod) { + continue + } + _, err := symTab.registerNonTerminalSymbol(prod.LHS) + if err != nil { + return nil, err + } + } + + for _, prod := range root.Productions { + if isLexicalProduction(prod) { + continue + } + lhsSym, ok := symTab.toSymbol(prod.LHS) + if !ok { + return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS) + } + for _, alt := range prod.RHS { + altSyms := make([]symbol, len(alt.Elements)) + for i, elem := range alt.Elements { + var sym symbol + if elem.Pattern != "" { + var ok bool + sym, ok = anonPat2Sym[elem.Pattern] + if !ok { + return nil, fmt.Errorf("pattern '%v' is undefined", elem.Pattern) + } + } else { + var ok bool + sym, ok = symTab.toSymbol(elem.ID) + if !ok { + return nil, fmt.Errorf("symbol '%v' is undefined", elem.ID) + } + } + altSyms[i] = sym + } + p, err := newProduction(lhsSym, altSyms) + if err != nil { + return nil, err + } + prods.append(p) + } + } + } + + return &Grammar{ + lexSpec: lexSpec, + productionSet: prods, + augmentedStartSymbol: augStartSym, + symbolTable: symTab, + }, nil +} + +func isLexicalProduction(prod *spec.ProductionNode) bool { + if len(prod.RHS) == 1 && len(prod.RHS[0].Elements) == 1 && prod.RHS[0].Elements[0].Pattern != "" { + return true + } + return false +} diff --git a/grammar/lr0_item.go b/grammar/lr0_item.go new file mode 100644 index 0000000..2934061 --- /dev/null +++ b/grammar/lr0_item.go @@ -0,0 +1,343 @@ +package grammar + +import ( + "crypto/sha256" + "encoding/binary" + "fmt" + "sort" + "strconv" +) + +type lr0ItemID [32]byte + +func (id lr0ItemID) String() string { + return fmt.Sprintf("%x", id.num()) +} + +func (id lr0ItemID) num() uint32 { + return binary.LittleEndian.Uint32(id[:]) +} + +type lr0Item struct { + id lr0ItemID + prod productionID + + // E → E + T + // + // Dot | Dotted Symbol | Item + // ----+---------------+------------ + // 0 | E | E →・E + T + // 1 | + | E → E・+ T + // 2 | T | E → E +・T + // 3 | Nil | E → E + T・ + dot int + dottedSymbol symbol + + // When initial is true, the LHS of the production is the augmented start symbol and dot is 0. + // It looks like S' →・S. + initial bool + + // When reducible is true, the item looks like E → E + T・. + reducible bool + + // When kernel is true, the item is kernel item. + kernel bool +} + +func newLR0Item(prod *production, dot int) (*lr0Item, error) { + if prod == nil { + return nil, fmt.Errorf("production must be non-nil") + } + + if dot < 0 || dot > prod.rhsLen { + return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen) + } + + var id lr0ItemID + { + b := []byte{} + b = append(b, prod.id[:]...) + bDot := make([]byte, 8) + binary.LittleEndian.PutUint64(bDot, uint64(dot)) + b = append(b, bDot...) + id = sha256.Sum256(b) + } + + dottedSymbol := symbolNil + if dot < prod.rhsLen { + dottedSymbol = prod.rhs[dot] + } + + initial := false + if prod.lhs.isStart() && dot == 0 { + initial = true + } + + reducible := false + if dot == prod.rhsLen { + reducible = true + } + + kernel := false + if initial || dot > 0 { + kernel = true + } + + item := &lr0Item{ + id: id, + prod: prod.id, + dot: dot, + dottedSymbol: dottedSymbol, + initial: initial, + reducible: reducible, + kernel: kernel, + } + + return item, nil +} + +type kernelID [32]byte + +func (id kernelID) String() string { + return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:])) +} + +type kernel struct { + id kernelID + items []*lr0Item +} + +func newKernel(items []*lr0Item) (*kernel, error) { + if len(items) == 0 { + return nil, fmt.Errorf("a kernel need at least one item") + } + + // Remove duplicates from items. + var sortedItems []*lr0Item + { + m := map[lr0ItemID]*lr0Item{} + for _, item := range items { + if !item.kernel { + return nil, fmt.Errorf("not a kernel item: %v", item) + } + m[item.id] = item + } + sortedItems = []*lr0Item{} + for _, item := range m { + sortedItems = append(sortedItems, item) + } + sort.Slice(sortedItems, func(i, j int) bool { + return sortedItems[i].id.num() < sortedItems[j].id.num() + }) + } + + var id kernelID + { + b := []byte{} + for _, item := range sortedItems { + b = append(b, item.id[:]...) + } + id = sha256.Sum256(b) + } + + return &kernel{ + id: id, + items: sortedItems, + }, nil +} + +type stateNum int + +const stateNumInitial = stateNum(0) + +func (n stateNum) Int() int { + return int(n) +} + +func (n stateNum) String() string { + return strconv.Itoa(int(n)) +} + +func (n stateNum) next() stateNum { + return stateNum(n + 1) +} + +type lr0State struct { + *kernel + num stateNum + next map[symbol]kernelID + reducible map[productionID]struct{} +} + +type lr0Automaton struct { + initialState kernelID + states map[kernelID]*lr0State +} + +func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) { + if !startSym.isStart() { + return nil, fmt.Errorf("passed symbold is not a start symbol") + } + + automaton := &lr0Automaton{ + states: map[kernelID]*lr0State{}, + } + + currentState := stateNumInitial + knownKernels := map[kernelID]struct{}{} + uncheckedKernels := []*kernel{} + + // Generate an initial kernel. + { + prods, _ := prods.findByLHS(startSym) + initialItem, err := newLR0Item(prods[0], 0) + if err != nil { + return nil, err + } + + k, err := newKernel([]*lr0Item{initialItem}) + if err != nil { + return nil, err + } + + automaton.initialState = k.id + knownKernels[k.id] = struct{}{} + uncheckedKernels = append(uncheckedKernels, k) + } + + for len(uncheckedKernels) > 0 { + nextUncheckedKernels := []*kernel{} + for _, k := range uncheckedKernels { + state, neighbours, err := genStateAndNeighbourKernels(k, prods) + if err != nil { + return nil, err + } + state.num = currentState + currentState = currentState.next() + + automaton.states[state.id] = state + + for _, k := range neighbours { + if _, known := knownKernels[k.id]; known { + continue + } + knownKernels[k.id] = struct{}{} + nextUncheckedKernels = append(nextUncheckedKernels, k) + } + } + uncheckedKernels = nextUncheckedKernels + } + + return automaton, nil +} + +func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lr0State, []*kernel, error) { + items, err := genClosure(k, prods) + if err != nil { + return nil, nil, err + } + neighbours, err := genNeighbourKernels(items, prods) + if err != nil { + return nil, nil, err + } + + next := map[symbol]kernelID{} + kernels := []*kernel{} + for _, n := range neighbours { + next[n.symbol] = n.kernel.id + kernels = append(kernels, n.kernel) + } + + reducible := map[productionID]struct{}{} + for _, item := range items { + if item.reducible { + reducible[item.prod] = struct{}{} + } + } + + return &lr0State{ + kernel: k, + next: next, + reducible: reducible, + }, kernels, nil +} + +func genClosure(k *kernel, prods *productionSet) ([]*lr0Item, error) { + items := []*lr0Item{} + knownItems := map[lr0ItemID]struct{}{} + uncheckedItems := []*lr0Item{} + for _, item := range k.items { + items = append(items, item) + uncheckedItems = append(uncheckedItems, item) + } + for len(uncheckedItems) > 0 { + nextUncheckedItems := []*lr0Item{} + for _, item := range uncheckedItems { + if item.dottedSymbol.isTerminal() { + continue + } + + ps, _ := prods.findByLHS(item.dottedSymbol) + for _, prod := range ps { + item, err := newLR0Item(prod, 0) + if err != nil { + return nil, err + } + if _, exist := knownItems[item.id]; exist { + continue + } + items = append(items, item) + knownItems[item.id] = struct{}{} + nextUncheckedItems = append(nextUncheckedItems, item) + } + } + uncheckedItems = nextUncheckedItems + } + + return items, nil +} + +type neighbourKernel struct { + symbol symbol + kernel *kernel +} + +func genNeighbourKernels(items []*lr0Item, prods *productionSet) ([]*neighbourKernel, error) { + kItemMap := map[symbol][]*lr0Item{} + for _, item := range items { + if item.dottedSymbol.isNil() { + continue + } + prod, ok := prods.findByID(item.prod) + if !ok { + return nil, fmt.Errorf("a production was not found: %v", item.prod) + } + kItem, err := newLR0Item(prod, item.dot+1) + if err != nil { + return nil, err + } + kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem) + } + + nextSyms := []symbol{} + for sym := range kItemMap { + nextSyms = append(nextSyms, sym) + } + sort.Slice(nextSyms, func(i, j int) bool { + return nextSyms[i] < nextSyms[j] + }) + + kernels := []*neighbourKernel{} + for _, sym := range nextSyms { + k, err := newKernel(kItemMap[sym]) + if err != nil { + return nil, err + } + kernels = append(kernels, &neighbourKernel{ + symbol: sym, + kernel: k, + }) + } + + return kernels, nil +} diff --git a/grammar/lr0_item_test.go b/grammar/lr0_item_test.go new file mode 100644 index 0000000..c04ce13 --- /dev/null +++ b/grammar/lr0_item_test.go @@ -0,0 +1,263 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +func TestGenLR0Automaton(t *testing.T) { + src := ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + gram, err := NewGrammar(ast) + if err != nil { + t.Fatal(err) + } + + automaton, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lr0Item{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + expectedStates := []expectedLR0State{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol][]*lr0Item{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol][]*lr0Item{ + genSym("add"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("expr'", "expr"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol][]*lr0Item{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "term"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol][]*lr0Item{}, + reducibleProds: []*production{ + genProd("term", "factor"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol][]*lr0Item{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol][]*lr0Item{}, + reducibleProds: []*production{ + genProd("factor", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol][]*lr0Item{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol][]*lr0Item{ + genSym("factor"): expectedKernels[10], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol][]*lr0Item{ + genSym("add"): expectedKernels[6], + genSym("r_paren"): expectedKernels[11], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol][]*lr0Item{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "expr", "add", "term"), + }, + }, + { + kernelItems: expectedKernels[10], + nextStates: map[symbol][]*lr0Item{}, + reducibleProds: []*production{ + genProd("term", "term", "mul", "factor"), + }, + }, + { + kernelItems: expectedKernels[11], + nextStates: map[symbol][]*lr0Item{}, + reducibleProds: []*production{ + genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + } + + if len(automaton.states) != len(expectedStates) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states)) + } + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + } + }) + } +} + +type expectedLR0State struct { + kernelItems []*lr0Item + nextStates map[symbol][]*lr0Item + reducibleProds []*production +} diff --git a/grammar/slr.go b/grammar/slr.go new file mode 100644 index 0000000..a3ce803 --- /dev/null +++ b/grammar/slr.go @@ -0,0 +1,170 @@ +package grammar + +import ( + "fmt" +) + +type ActionType string + +const ( + ActionTypeShift = ActionType("shift") + ActionTypeReduce = ActionType("reduce") + ActionTypeError = ActionType("error") +) + +type actionEntry int + +const actionEntryEmpty = actionEntry(0) + +func newShiftActionEntry(state stateNum) actionEntry { + return actionEntry(state * -1) +} + +func newReduceActionEntry(prod productionNum) actionEntry { + return actionEntry(prod) +} + +func (e actionEntry) isEmpty() bool { + return e == actionEntryEmpty +} + +func (e actionEntry) describe() (ActionType, stateNum, productionNum) { + if e == actionEntryEmpty { + return ActionTypeError, stateNumInitial, productionNumNil + } + if e < 0 { + return ActionTypeShift, stateNum(e * -1), productionNumNil + } + return ActionTypeReduce, stateNumInitial, productionNum(e) +} + +type GoToType string + +const ( + GoToTypeRegistered = GoToType("registered") + GoToTypeError = GoToType("error") +) + +type goToEntry uint + +const goToEntryEmpty = goToEntry(0) + +func newGoToEntry(state stateNum) goToEntry { + return goToEntry(state) +} + +func (e goToEntry) isEmpty() bool { + return e == goToEntryEmpty +} + +func (e goToEntry) describe() (GoToType, stateNum) { + if e == goToEntryEmpty { + return GoToTypeError, stateNumInitial + } + return GoToTypeRegistered, stateNum(e) +} + +type ParsingTable struct { + actionTable []actionEntry + goToTable []goToEntry + stateCount int + terminalCount int + nonTerminalCount int + + InitialState stateNum +} + +func (t *ParsingTable) getAction(state stateNum, sym symbolNum) (ActionType, stateNum, productionNum) { + pos := state.Int()*t.terminalCount + sym.Int() + return t.actionTable[pos].describe() +} + +func (t *ParsingTable) getGoTo(state stateNum, sym symbolNum) (GoToType, stateNum) { + pos := state.Int()*t.nonTerminalCount + sym.Int() + return t.goToTable[pos].describe() +} + +func (t *ParsingTable) writeShiftAction(state stateNum, sym symbol, nextState stateNum) error { + pos := state.Int()*t.terminalCount + sym.num().Int() + act := t.actionTable[pos] + if !act.isEmpty() { + ty, _, _ := act.describe() + if ty == ActionTypeReduce { + return fmt.Errorf("shift/reduce conflict") + } + } + t.actionTable[pos] = newShiftActionEntry(nextState) + + return nil +} + +func (t *ParsingTable) writeReduceAction(state stateNum, sym symbol, prod productionNum) error { + pos := state.Int()*t.terminalCount + sym.num().Int() + act := t.actionTable[pos] + if !act.isEmpty() { + ty, _, p := act.describe() + if ty == ActionTypeReduce && p != prod { + return fmt.Errorf("reduce/reduce conflict") + } + return fmt.Errorf("shift/reduce conflict") + } + t.actionTable[pos] = newReduceActionEntry(prod) + + return nil +} + +func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) { + pos := state.Int()*t.nonTerminalCount + sym.num().Int() + t.goToTable[pos] = newGoToEntry(nextState) +} + +func genSLRParsingTable(automaton *lr0Automaton, prods *productionSet, follow *followSet, termCount, nonTermCount int) (*ParsingTable, error) { + var ptab *ParsingTable + { + initialState := automaton.states[automaton.initialState] + ptab = &ParsingTable{ + actionTable: make([]actionEntry, len(automaton.states)*termCount), + goToTable: make([]goToEntry, len(automaton.states)*nonTermCount), + stateCount: len(automaton.states), + terminalCount: termCount, + nonTerminalCount: nonTermCount, + InitialState: initialState.num, + } + } + + for _, state := range automaton.states { + for sym, kID := range state.next { + nextState := automaton.states[kID] + if sym.isTerminal() { + err := ptab.writeShiftAction(state.num, sym, nextState.num) + if err != nil { + return nil, err + } + } else { + ptab.writeGoTo(state.num, sym, nextState.num) + } + } + + for prodID := range state.reducible { + prod, _ := prods.findByID(prodID) + flw, err := follow.find(prod.lhs) + if err != nil { + return nil, err + } + for sym := range flw.symbols { + err := ptab.writeReduceAction(state.num, sym, prod.num) + if err != nil { + return nil, err + } + } + if flw.eof { + err := ptab.writeReduceAction(state.num, symbolEOF, prod.num) + if err != nil { + return nil, err + } + } + } + } + + return ptab, nil +} diff --git a/grammar/slr_test.go b/grammar/slr_test.go new file mode 100644 index 0000000..089b73b --- /dev/null +++ b/grammar/slr_test.go @@ -0,0 +1,481 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +func TestGenSLRParsingTable(t *testing.T) { + src := ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + var ptab *ParsingTable + var automaton *lr0Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + gram, err = NewGrammar(ast) + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + follow, err := genFollowSet(gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.getNonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.getTerminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + ptab, err = genSLRParsingTable(automaton, gram.productionSet, follow, termCount, nonTermCount) + if err != nil { + t.Fatalf("failed to create a SLR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genSLRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lr0Item{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + expectedStates := []struct { + kernelItems []*lr0Item + acts map[symbol]testActionEntry + goTos map[symbol][]*lr0Item + }{ + { + kernelItems: expectedKernels[0], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lr0Item{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr'", "expr"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + genSym("mul"): { + ty: ActionTypeShift, + nextState: expectedKernels[7], + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lr0Item{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lr0Item{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lr0Item{ + genSym("factor"): expectedKernels[10], + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + genSym("r_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[11], + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + genSym("mul"): { + ty: ActionTypeShift, + nextState: expectedKernels[7], + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + }, + }, + { + kernelItems: expectedKernels[10], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + }, + }, + { + kernelItems: expectedKernels[11], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + // ACTION + { + nonEmptyEntries := map[symbolNum]struct{}{} + for eSym, eAct := range eState.acts { + nonEmptyEntries[eSym.num()] = struct{}{} + + ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num()) + if ty != eAct.ty { + t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + } + switch eAct.ty { + case ActionTypeShift: + eNextState, err := newKernel(eAct.nextState) + if err != nil { + t.Fatal(err) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found; state: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + case ActionTypeReduce: + prod := findProductionByNum(gram.productionSet, prodNum) + if prod == nil { + t.Fatalf("production was not found: #%v", prodNum) + } + if prod.id != eAct.production.id { + t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + } + } + } + for symNum := 0; symNum < termCount; symNum++ { + if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + continue + } + ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum)) + if ty != ActionTypeError { + t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + } + } + } + + // GOTO + { + nonEmptyEntries := map[symbolNum]struct{}{} + for eSym, eGoTo := range eState.goTos { + nonEmptyEntries[eSym.num()] = struct{}{} + + eNextState, err := newKernel(eGoTo) + if err != nil { + t.Fatal(err) + } + ty, stateNum := ptab.getGoTo(state.num, eSym.num()) + if ty != GoToTypeRegistered { + t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + } + for symNum := 0; symNum < nonTermCount; symNum++ { + if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + continue + } + ty, _ := ptab.getGoTo(state.num, symbolNum(symNum)) + if ty != GoToTypeError { + t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + } + } + } + }) + } +} + +type testActionEntry struct { + ty ActionType + nextState []*lr0Item + production *production +} + +func findStateByNum(states map[kernelID]*lr0State, num stateNum) *lr0State { + for _, state := range states { + if state.num == num { + return state + } + } + return nil +} + +func findProductionByNum(prods *productionSet, num productionNum) *production { + for _, prod := range prods.getAllProductions() { + if prod.num == num { + return prod + } + } + return nil +} diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go new file mode 100644 index 0000000..8f5c372 --- /dev/null +++ b/grammar/test_helper_test.go @@ -0,0 +1,52 @@ +package grammar + +import "testing" + +type testSymbolGenerator func(text string) symbol + +func newTestSymbolGenerator(t *testing.T, symTab *symbolTable) testSymbolGenerator { + return func(text string) symbol { + t.Helper() + + sym, ok := symTab.toSymbol(text) + if !ok { + t.Fatalf("symbol was not found: %v", text) + } + return sym + } +} + +type testProductionGenerator func(lhs string, rhs ...string) *production + +func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { + return func(lhs string, rhs ...string) *production { + t.Helper() + + rhsSym := []symbol{} + for _, text := range rhs { + rhsSym = append(rhsSym, genSym(text)) + } + prod, err := newProduction(genSym(lhs), rhsSym) + if err != nil { + t.Fatalf("failed to create a production: %v", err) + } + + return prod + } +} + +type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lr0Item + +func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { + return func(lhs string, dot int, rhs ...string) *lr0Item { + t.Helper() + + prod := genProd(lhs, rhs...) + item, err := newLR0Item(prod, dot) + if err != nil { + t.Fatalf("failed to create a LR0 item: %v", err) + } + + return item + } +} |