aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-06-18 22:56:00 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-06-18 22:56:00 +0900
commit4f681885516651e1a85ba98527f80700c605f333 (patch)
tree669a2af1fd112b6744a7f175d820a5943d84c8d5
parentAdd production set (diff)
downloadcotia-4f681885516651e1a85ba98527f80700c605f333.tar.gz
cotia-4f681885516651e1a85ba98527f80700c605f333.tar.xz
Add SLR parsing table generator
-rw-r--r--grammar/first.go144
-rw-r--r--grammar/first_test.go205
-rw-r--r--grammar/follow.go197
-rw-r--r--grammar/follow_test.go188
-rw-r--r--grammar/grammar.go158
-rw-r--r--grammar/lr0_item.go343
-rw-r--r--grammar/lr0_item_test.go263
-rw-r--r--grammar/slr.go170
-rw-r--r--grammar/slr_test.go481
-rw-r--r--grammar/test_helper_test.go52
10 files changed, 2201 insertions, 0 deletions
diff --git a/grammar/first.go b/grammar/first.go
new file mode 100644
index 0000000..72de282
--- /dev/null
+++ b/grammar/first.go
@@ -0,0 +1,144 @@
+package grammar
+
+import "fmt"
+
+type firstEntry struct {
+ symbols map[symbol]struct{}
+ empty bool
+}
+
+func newFirstEntry() *firstEntry {
+ return &firstEntry{
+ symbols: map[symbol]struct{}{},
+ empty: false,
+ }
+}
+
+func (e *firstEntry) add(sym symbol) bool {
+ if _, ok := e.symbols[sym]; ok {
+ return false
+ }
+ e.symbols[sym] = struct{}{}
+ return true
+}
+
+func (e *firstEntry) addEmpty() bool {
+ if !e.empty {
+ e.empty = true
+ return true
+ }
+ return false
+}
+
+func (e *firstEntry) mergeExceptEmpty(target *firstEntry) bool {
+ if target == nil {
+ return false
+ }
+ changed := false
+ for sym := range target.symbols {
+ added := e.add(sym)
+ if added {
+ changed = true
+ }
+ }
+ return changed
+}
+
+type firstSet struct {
+ set map[symbol]*firstEntry
+}
+
+func newFirstSet(prods *productionSet) *firstSet {
+ fst := &firstSet{
+ set: map[symbol]*firstEntry{},
+ }
+ for _, prod := range prods.getAllProductions() {
+ if _, ok := fst.set[prod.lhs]; ok {
+ continue
+ }
+ fst.set[prod.lhs] = newFirstEntry()
+ }
+
+ return fst
+}
+
+func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) {
+ entry := newFirstEntry()
+ if prod.rhsLen <= head {
+ entry.addEmpty()
+ return entry, nil
+ }
+ for _, sym := range prod.rhs[head:] {
+ if sym.isTerminal() {
+ entry.add(sym)
+ return entry, nil
+ }
+
+ e := fst.findBySymbol(sym)
+ if e == nil {
+ return nil, fmt.Errorf("an entry of FIRST was not found; symbol: %s", sym)
+ }
+ for s := range e.symbols {
+ entry.add(s)
+ }
+ if !e.empty {
+ return entry, nil
+ }
+ }
+ entry.addEmpty()
+ return entry, nil
+}
+
+func (fst *firstSet) findBySymbol(sym symbol) *firstEntry {
+ return fst.set[sym]
+}
+
+type firstComContext struct {
+ first *firstSet
+}
+
+func newFirstComContext(prods *productionSet) *firstComContext {
+ return &firstComContext{
+ first: newFirstSet(prods),
+ }
+}
+
+func genFirstSet(prods *productionSet) (*firstSet, error) {
+ cc := newFirstComContext(prods)
+ for {
+ more := false
+ for _, prod := range prods.getAllProductions() {
+ e := cc.first.findBySymbol(prod.lhs)
+ changed, err := genProdFirstEntry(cc, e, prod)
+ if err != nil {
+ return nil, err
+ }
+ if changed {
+ more = true
+ }
+ }
+ if !more {
+ break
+ }
+ }
+ return cc.first, nil
+}
+
+func genProdFirstEntry(cc *firstComContext, acc *firstEntry, prod *production) (bool, error) {
+ if prod.isEmpty() {
+ return acc.addEmpty(), nil
+ }
+
+ for _, sym := range prod.rhs {
+ if sym.isTerminal() {
+ return acc.add(sym), nil
+ }
+
+ e := cc.first.findBySymbol(sym)
+ changed := acc.mergeExceptEmpty(e)
+ if !e.empty {
+ return changed, nil
+ }
+ }
+ return acc.addEmpty(), nil
+}
diff --git a/grammar/first_test.go b/grammar/first_test.go
new file mode 100644
index 0000000..b65b0ea
--- /dev/null
+++ b/grammar/first_test.go
@@ -0,0 +1,205 @@
+package grammar
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+type first struct {
+ lhs string
+ num int
+ dot int
+ symbols []string
+ empty bool
+}
+
+func TestGenFirst(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ first []first
+ }{
+ {
+ caption: "productions contain only non-empty productions",
+ src: `
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ first: []first{
+ {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}},
+ {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}},
+ {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}},
+ {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}},
+ {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}},
+ },
+ },
+ {
+ caption: "productions contain the empty start production",
+ src: `
+s
+ :
+ ;
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "productions contain an empty production",
+ src: `
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a start production contains a non-empty alternative and empty alternative",
+ src: `
+s
+ : foo
+ |
+ ;
+foo: "foo";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}},
+ {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a production contains non-empty alternative and empty alternative",
+ src: `
+s
+ : foo
+ ;
+foo
+ : bar
+ |
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}},
+ {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ fst, gram := genActualFirst(t, tt.src)
+
+ for _, ttFirst := range tt.first {
+ lhsSym, ok := gram.symbolTable.toSymbol(ttFirst.lhs)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
+ }
+
+ prod, ok := gram.productionSet.findByLHS(lhsSym)
+ if !ok {
+ t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym)
+ }
+
+ actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot)
+ if err != nil {
+ t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err)
+ }
+
+ expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable)
+
+ testFirst(t, actualFirst, expectedFirst)
+ }
+ })
+ }
+}
+
+func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ gram, err := NewGrammar(ast)
+ if err != nil {
+ t.Fatal(err)
+ }
+ fst, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if fst == nil {
+ t.Fatal("genFiest returned nil without any error")
+ }
+
+ return fst, gram
+}
+
+func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbolTable) *firstEntry {
+ t.Helper()
+
+ entry := newFirstEntry()
+ if empty {
+ entry.addEmpty()
+ }
+ for _, sym := range symbols {
+ symSym, ok := symTab.toSymbol(sym)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", sym)
+ }
+ entry.add(symSym)
+ }
+
+ return entry
+}
+
+func testFirst(t *testing.T, actual, expected *firstEntry) {
+ if actual.empty != expected.empty {
+ t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty)
+ }
+
+ if len(actual.symbols) != len(expected.symbols) {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+
+ for eSym := range expected.symbols {
+ if _, ok := actual.symbols[eSym]; !ok {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+ }
+}
diff --git a/grammar/follow.go b/grammar/follow.go
new file mode 100644
index 0000000..f835bba
--- /dev/null
+++ b/grammar/follow.go
@@ -0,0 +1,197 @@
+package grammar
+
+import (
+ "fmt"
+)
+
+type followEntry struct {
+ symbols map[symbol]struct{}
+ eof bool
+}
+
+func newFollowEntry() *followEntry {
+ return &followEntry{
+ symbols: map[symbol]struct{}{},
+ eof: false,
+ }
+}
+
+func (e *followEntry) add(sym symbol) bool {
+ if _, ok := e.symbols[sym]; ok {
+ return false
+ }
+ e.symbols[sym] = struct{}{}
+ return true
+}
+
+func (e *followEntry) addEOF() bool {
+ if !e.eof {
+ e.eof = true
+ return true
+ }
+ return false
+}
+
+func (e *followEntry) merge(fst *firstEntry, flw *followEntry) bool {
+ changed := false
+
+ if fst != nil {
+ for sym := range fst.symbols {
+ added := e.add(sym)
+ if added {
+ changed = true
+ }
+ }
+ }
+
+ if flw != nil {
+ for sym := range flw.symbols {
+ added := e.add(sym)
+ if added {
+ changed = true
+ }
+ }
+ if flw.eof {
+ added := e.addEOF()
+ if added {
+ changed = true
+ }
+ }
+ }
+
+ return changed
+}
+
+type followSet struct {
+ set map[symbol]*followEntry
+}
+
+func newFollow(prods *productionSet) *followSet {
+ flw := &followSet{
+ set: map[symbol]*followEntry{},
+ }
+ for _, prod := range prods.getAllProductions() {
+ if _, ok := flw.set[prod.lhs]; ok {
+ continue
+ }
+ flw.set[prod.lhs] = newFollowEntry()
+ }
+ return flw
+}
+
+func (flw *followSet) find(sym symbol) (*followEntry, error) {
+ e, ok := flw.set[sym]
+ if !ok {
+ return nil, fmt.Errorf("an entry of FOLLOW was not found; symbol: %s", sym)
+ }
+ return e, nil
+}
+
+type followComContext struct {
+ prods *productionSet
+ first *firstSet
+ follow *followSet
+}
+
+func newFollowComContext(prods *productionSet, first *firstSet) *followComContext {
+ return &followComContext{
+ prods: prods,
+ first: first,
+ follow: newFollow(prods),
+ }
+}
+
+func genFollowSet(prods *productionSet, first *firstSet) (*followSet, error) {
+ ntsyms := map[symbol]struct{}{}
+ for _, prod := range prods.getAllProductions() {
+ if _, ok := ntsyms[prod.lhs]; ok {
+ continue
+ }
+ ntsyms[prod.lhs] = struct{}{}
+ }
+
+ cc := newFollowComContext(prods, first)
+ for {
+ more := false
+ for ntsym := range ntsyms {
+ e, err := cc.follow.find(ntsym)
+ if err != nil {
+ return nil, err
+ }
+ if ntsym.isStart() {
+ changed := e.addEOF()
+ if changed {
+ more = true
+ }
+ }
+ for _, prod := range prods.getAllProductions() {
+ for i, sym := range prod.rhs {
+ if sym != ntsym {
+ continue
+ }
+ fst, err := first.find(prod, i+1)
+ if err != nil {
+ return nil, err
+ }
+ changed := e.merge(fst, nil)
+ if changed {
+ more = true
+ }
+ if fst.empty {
+ flw, err := cc.follow.find(prod.lhs)
+ if err != nil {
+ return nil, err
+ }
+ changed := e.merge(nil, flw)
+ if changed {
+ more = true
+ }
+ }
+ }
+ }
+ }
+ if !more {
+ break
+ }
+ }
+
+ return cc.follow, nil
+}
+
+func genFollowEntry(cc *followComContext, acc *followEntry, ntsym symbol) (bool, error) {
+ changed := false
+
+ if ntsym.isStart() {
+ added := acc.addEOF()
+ if added {
+ changed = true
+ }
+ }
+ for _, prod := range cc.prods.getAllProductions() {
+ for i, sym := range prod.rhs {
+ if sym != ntsym {
+ continue
+ }
+ fst, err := cc.first.find(prod, i+1)
+ if err != nil {
+ return false, err
+ }
+ added := acc.merge(fst, nil)
+ if added {
+ changed = true
+ }
+ if fst.empty {
+ flw, err := cc.follow.find(prod.lhs)
+ if err != nil {
+ return false, err
+ }
+ added := acc.merge(nil, flw)
+ if added {
+ changed = true
+ }
+ }
+ }
+ }
+
+ return changed, nil
+}
diff --git a/grammar/follow_test.go b/grammar/follow_test.go
new file mode 100644
index 0000000..042bc88
--- /dev/null
+++ b/grammar/follow_test.go
@@ -0,0 +1,188 @@
+package grammar
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+type follow struct {
+ nonTermText string
+ symbols []string
+ eof bool
+}
+
+func TestFollowSet(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ follow []follow
+ }{
+ {
+ caption: "productions contain only non-empty productions",
+ src: `
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ follow: []follow{
+ {nonTermText: "expr'", symbols: []string{}, eof: true},
+ {nonTermText: "expr", symbols: []string{"add", "r_paren"}, eof: true},
+ {nonTermText: "term", symbols: []string{"add", "mul", "r_paren"}, eof: true},
+ {nonTermText: "factor", symbols: []string{"add", "mul", "r_paren"}, eof: true},
+ },
+ },
+ {
+ caption: "productions contain an empty start production",
+ src: `
+s
+ :
+ ;
+`,
+ follow: []follow{
+ {nonTermText: "s'", symbols: []string{}, eof: true},
+ {nonTermText: "s", symbols: []string{}, eof: true},
+ },
+ },
+ {
+ caption: "productions contain an empty production",
+ src: `
+s
+ : foo
+ ;
+foo
+ :
+;
+`,
+ follow: []follow{
+ {nonTermText: "s'", symbols: []string{}, eof: true},
+ {nonTermText: "s", symbols: []string{}, eof: true},
+ {nonTermText: "foo", symbols: []string{}, eof: true},
+ },
+ },
+ {
+ caption: "a start production contains a non-empty alternative and empty alternative",
+ src: `
+s
+ : foo
+ |
+ ;
+foo: "foo";
+`,
+ follow: []follow{
+ {nonTermText: "s'", symbols: []string{}, eof: true},
+ {nonTermText: "s", symbols: []string{}, eof: true},
+ },
+ },
+ {
+ caption: "a production contains non-empty alternative and empty alternative",
+ src: `
+s
+ : foo
+ ;
+foo
+ : bar
+ |
+ ;
+bar: "bar";
+`,
+ follow: []follow{
+ {nonTermText: "s'", symbols: []string{}, eof: true},
+ {nonTermText: "s", symbols: []string{}, eof: true},
+ {nonTermText: "foo", symbols: []string{}, eof: true},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ flw, gram := genActualFollow(t, tt.src)
+
+ for _, ttFollow := range tt.follow {
+ sym, ok := gram.symbolTable.toSymbol(ttFollow.nonTermText)
+ if !ok {
+ t.Fatalf("a symbol '%v' was not found", ttFollow.nonTermText)
+ }
+
+ actualFollow, err := flw.find(sym)
+ if err != nil {
+ t.Fatalf("failed to get a FOLLOW entry; non-terminal symbol: %v (%v), error: %v", ttFollow.nonTermText, sym, err)
+ }
+
+ expectedFollow := genExpectedFollowEntry(t, ttFollow.symbols, ttFollow.eof, gram.symbolTable)
+
+ testFollow(t, actualFollow, expectedFollow)
+ }
+ })
+ }
+}
+
+func genActualFollow(t *testing.T, src string) (*followSet, *Grammar) {
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ gram, err := NewGrammar(ast)
+ if err != nil {
+ t.Fatal(err)
+ }
+ fst, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ flw, err := genFollowSet(gram.productionSet, fst)
+ if flw == nil {
+ t.Fatal("genFollow returned nil without any error")
+ }
+
+ return flw, gram
+}
+
+func genExpectedFollowEntry(t *testing.T, symbols []string, eof bool, symTab *symbolTable) *followEntry {
+ t.Helper()
+
+ entry := newFollowEntry()
+ if eof {
+ entry.addEOF()
+ }
+ for _, sym := range symbols {
+ symID, _ := symTab.toSymbol(sym)
+ if symID.isNil() {
+ t.Fatalf("a symbol '%v' was not found", sym)
+ }
+
+ entry.add(symID)
+ }
+
+ return entry
+}
+
+func testFollow(t *testing.T, actual, expected *followEntry) {
+ if actual.eof != expected.eof {
+ t.Errorf("eof is mismatched; want: %v, got: %v", expected.eof, actual.eof)
+ }
+
+ if len(actual.symbols) != len(expected.symbols) {
+ t.Fatalf("unexpected symbol count of a FOLLOW entry; want: %v, got: %v", expected.symbols, actual.symbols)
+ }
+
+ for eSym := range expected.symbols {
+ if _, ok := actual.symbols[eSym]; !ok {
+ t.Fatalf("invalid FOLLOW entry; want: %v, got: %v", expected.symbols, actual.symbols)
+ }
+ }
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
new file mode 100644
index 0000000..ce0ff19
--- /dev/null
+++ b/grammar/grammar.go
@@ -0,0 +1,158 @@
+package grammar
+
+import (
+ "fmt"
+
+ mlspec "github.com/nihei9/maleeni/spec"
+ "github.com/nihei9/vartan/spec"
+)
+
+type Grammar struct {
+ lexSpec *mlspec.LexSpec
+ productionSet *productionSet
+ augmentedStartSymbol symbol
+ symbolTable *symbolTable
+}
+
+func NewGrammar(root *spec.RootNode) (*Grammar, error) {
+ symTab := newSymbolTable()
+ anonPat2Sym := map[string]symbol{}
+ var lexSpec *mlspec.LexSpec
+ {
+ entries := []*mlspec.LexEntry{}
+ anonPats := []string{}
+ for _, prod := range root.Productions {
+ if isLexicalProduction(prod) {
+ _, err := symTab.registerTerminalSymbol(prod.LHS)
+ if err != nil {
+ return nil, err
+ }
+
+ entries = append(entries, &mlspec.LexEntry{
+ Kind: mlspec.LexKind(prod.LHS),
+ Pattern: mlspec.LexPattern(prod.RHS[0].Elements[0].Pattern),
+ })
+ continue
+ }
+
+ for _, alt := range prod.RHS {
+ for _, elem := range alt.Elements {
+ if elem.Pattern == "" {
+ continue
+ }
+ exist := false
+ for _, p := range anonPats {
+ if p == elem.Pattern {
+ exist = true
+ break
+ }
+ }
+ if exist {
+ continue
+ }
+ anonPats = append(anonPats, elem.Pattern)
+ }
+ }
+ }
+ for i, p := range anonPats {
+ kind := fmt.Sprintf("__%v__", i+1)
+
+ sym, err := symTab.registerTerminalSymbol(kind)
+ if err != nil {
+ return nil, err
+ }
+ anonPat2Sym[p] = sym
+
+ entries = append(entries, &mlspec.LexEntry{
+ Kind: mlspec.LexKind(kind),
+ Pattern: mlspec.LexPattern(p),
+ })
+ }
+
+ lexSpec = &mlspec.LexSpec{
+ Entries: entries,
+ }
+ }
+
+ prods := newProductionSet()
+ var augStartSym symbol
+ {
+ startProd := root.Productions[0]
+ augStartText := fmt.Sprintf("%s'", startProd.LHS)
+ var err error
+ augStartSym, err = symTab.registerStartSymbol(augStartText)
+ if err != nil {
+ return nil, err
+ }
+ startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS)
+ if err != nil {
+ return nil, err
+ }
+ p, err := newProduction(augStartSym, []symbol{
+ startSym,
+ })
+ if err != nil {
+ return nil, err
+ }
+ prods.append(p)
+
+ for _, prod := range root.Productions {
+ if isLexicalProduction(prod) {
+ continue
+ }
+ _, err := symTab.registerNonTerminalSymbol(prod.LHS)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ for _, prod := range root.Productions {
+ if isLexicalProduction(prod) {
+ continue
+ }
+ lhsSym, ok := symTab.toSymbol(prod.LHS)
+ if !ok {
+ return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS)
+ }
+ for _, alt := range prod.RHS {
+ altSyms := make([]symbol, len(alt.Elements))
+ for i, elem := range alt.Elements {
+ var sym symbol
+ if elem.Pattern != "" {
+ var ok bool
+ sym, ok = anonPat2Sym[elem.Pattern]
+ if !ok {
+ return nil, fmt.Errorf("pattern '%v' is undefined", elem.Pattern)
+ }
+ } else {
+ var ok bool
+ sym, ok = symTab.toSymbol(elem.ID)
+ if !ok {
+ return nil, fmt.Errorf("symbol '%v' is undefined", elem.ID)
+ }
+ }
+ altSyms[i] = sym
+ }
+ p, err := newProduction(lhsSym, altSyms)
+ if err != nil {
+ return nil, err
+ }
+ prods.append(p)
+ }
+ }
+ }
+
+ return &Grammar{
+ lexSpec: lexSpec,
+ productionSet: prods,
+ augmentedStartSymbol: augStartSym,
+ symbolTable: symTab,
+ }, nil
+}
+
+func isLexicalProduction(prod *spec.ProductionNode) bool {
+ if len(prod.RHS) == 1 && len(prod.RHS[0].Elements) == 1 && prod.RHS[0].Elements[0].Pattern != "" {
+ return true
+ }
+ return false
+}
diff --git a/grammar/lr0_item.go b/grammar/lr0_item.go
new file mode 100644
index 0000000..2934061
--- /dev/null
+++ b/grammar/lr0_item.go
@@ -0,0 +1,343 @@
+package grammar
+
+import (
+ "crypto/sha256"
+ "encoding/binary"
+ "fmt"
+ "sort"
+ "strconv"
+)
+
+type lr0ItemID [32]byte
+
+func (id lr0ItemID) String() string {
+ return fmt.Sprintf("%x", id.num())
+}
+
+func (id lr0ItemID) num() uint32 {
+ return binary.LittleEndian.Uint32(id[:])
+}
+
+type lr0Item struct {
+ id lr0ItemID
+ prod productionID
+
+ // E → E + T
+ //
+ // Dot | Dotted Symbol | Item
+ // ----+---------------+------------
+ // 0 | E | E →・E + T
+ // 1 | + | E → E・+ T
+ // 2 | T | E → E +・T
+ // 3 | Nil | E → E + T・
+ dot int
+ dottedSymbol symbol
+
+ // When initial is true, the LHS of the production is the augmented start symbol and dot is 0.
+ // It looks like S' →・S.
+ initial bool
+
+ // When reducible is true, the item looks like E → E + T・.
+ reducible bool
+
+ // When kernel is true, the item is kernel item.
+ kernel bool
+}
+
+func newLR0Item(prod *production, dot int) (*lr0Item, error) {
+ if prod == nil {
+ return nil, fmt.Errorf("production must be non-nil")
+ }
+
+ if dot < 0 || dot > prod.rhsLen {
+ return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen)
+ }
+
+ var id lr0ItemID
+ {
+ b := []byte{}
+ b = append(b, prod.id[:]...)
+ bDot := make([]byte, 8)
+ binary.LittleEndian.PutUint64(bDot, uint64(dot))
+ b = append(b, bDot...)
+ id = sha256.Sum256(b)
+ }
+
+ dottedSymbol := symbolNil
+ if dot < prod.rhsLen {
+ dottedSymbol = prod.rhs[dot]
+ }
+
+ initial := false
+ if prod.lhs.isStart() && dot == 0 {
+ initial = true
+ }
+
+ reducible := false
+ if dot == prod.rhsLen {
+ reducible = true
+ }
+
+ kernel := false
+ if initial || dot > 0 {
+ kernel = true
+ }
+
+ item := &lr0Item{
+ id: id,
+ prod: prod.id,
+ dot: dot,
+ dottedSymbol: dottedSymbol,
+ initial: initial,
+ reducible: reducible,
+ kernel: kernel,
+ }
+
+ return item, nil
+}
+
+type kernelID [32]byte
+
+func (id kernelID) String() string {
+ return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:]))
+}
+
+type kernel struct {
+ id kernelID
+ items []*lr0Item
+}
+
+func newKernel(items []*lr0Item) (*kernel, error) {
+ if len(items) == 0 {
+ return nil, fmt.Errorf("a kernel need at least one item")
+ }
+
+ // Remove duplicates from items.
+ var sortedItems []*lr0Item
+ {
+ m := map[lr0ItemID]*lr0Item{}
+ for _, item := range items {
+ if !item.kernel {
+ return nil, fmt.Errorf("not a kernel item: %v", item)
+ }
+ m[item.id] = item
+ }
+ sortedItems = []*lr0Item{}
+ for _, item := range m {
+ sortedItems = append(sortedItems, item)
+ }
+ sort.Slice(sortedItems, func(i, j int) bool {
+ return sortedItems[i].id.num() < sortedItems[j].id.num()
+ })
+ }
+
+ var id kernelID
+ {
+ b := []byte{}
+ for _, item := range sortedItems {
+ b = append(b, item.id[:]...)
+ }
+ id = sha256.Sum256(b)
+ }
+
+ return &kernel{
+ id: id,
+ items: sortedItems,
+ }, nil
+}
+
+type stateNum int
+
+const stateNumInitial = stateNum(0)
+
+func (n stateNum) Int() int {
+ return int(n)
+}
+
+func (n stateNum) String() string {
+ return strconv.Itoa(int(n))
+}
+
+func (n stateNum) next() stateNum {
+ return stateNum(n + 1)
+}
+
+type lr0State struct {
+ *kernel
+ num stateNum
+ next map[symbol]kernelID
+ reducible map[productionID]struct{}
+}
+
+type lr0Automaton struct {
+ initialState kernelID
+ states map[kernelID]*lr0State
+}
+
+func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) {
+ if !startSym.isStart() {
+ return nil, fmt.Errorf("passed symbold is not a start symbol")
+ }
+
+ automaton := &lr0Automaton{
+ states: map[kernelID]*lr0State{},
+ }
+
+ currentState := stateNumInitial
+ knownKernels := map[kernelID]struct{}{}
+ uncheckedKernels := []*kernel{}
+
+ // Generate an initial kernel.
+ {
+ prods, _ := prods.findByLHS(startSym)
+ initialItem, err := newLR0Item(prods[0], 0)
+ if err != nil {
+ return nil, err
+ }
+
+ k, err := newKernel([]*lr0Item{initialItem})
+ if err != nil {
+ return nil, err
+ }
+
+ automaton.initialState = k.id
+ knownKernels[k.id] = struct{}{}
+ uncheckedKernels = append(uncheckedKernels, k)
+ }
+
+ for len(uncheckedKernels) > 0 {
+ nextUncheckedKernels := []*kernel{}
+ for _, k := range uncheckedKernels {
+ state, neighbours, err := genStateAndNeighbourKernels(k, prods)
+ if err != nil {
+ return nil, err
+ }
+ state.num = currentState
+ currentState = currentState.next()
+
+ automaton.states[state.id] = state
+
+ for _, k := range neighbours {
+ if _, known := knownKernels[k.id]; known {
+ continue
+ }
+ knownKernels[k.id] = struct{}{}
+ nextUncheckedKernels = append(nextUncheckedKernels, k)
+ }
+ }
+ uncheckedKernels = nextUncheckedKernels
+ }
+
+ return automaton, nil
+}
+
+func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lr0State, []*kernel, error) {
+ items, err := genClosure(k, prods)
+ if err != nil {
+ return nil, nil, err
+ }
+ neighbours, err := genNeighbourKernels(items, prods)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ next := map[symbol]kernelID{}
+ kernels := []*kernel{}
+ for _, n := range neighbours {
+ next[n.symbol] = n.kernel.id
+ kernels = append(kernels, n.kernel)
+ }
+
+ reducible := map[productionID]struct{}{}
+ for _, item := range items {
+ if item.reducible {
+ reducible[item.prod] = struct{}{}
+ }
+ }
+
+ return &lr0State{
+ kernel: k,
+ next: next,
+ reducible: reducible,
+ }, kernels, nil
+}
+
+func genClosure(k *kernel, prods *productionSet) ([]*lr0Item, error) {
+ items := []*lr0Item{}
+ knownItems := map[lr0ItemID]struct{}{}
+ uncheckedItems := []*lr0Item{}
+ for _, item := range k.items {
+ items = append(items, item)
+ uncheckedItems = append(uncheckedItems, item)
+ }
+ for len(uncheckedItems) > 0 {
+ nextUncheckedItems := []*lr0Item{}
+ for _, item := range uncheckedItems {
+ if item.dottedSymbol.isTerminal() {
+ continue
+ }
+
+ ps, _ := prods.findByLHS(item.dottedSymbol)
+ for _, prod := range ps {
+ item, err := newLR0Item(prod, 0)
+ if err != nil {
+ return nil, err
+ }
+ if _, exist := knownItems[item.id]; exist {
+ continue
+ }
+ items = append(items, item)
+ knownItems[item.id] = struct{}{}
+ nextUncheckedItems = append(nextUncheckedItems, item)
+ }
+ }
+ uncheckedItems = nextUncheckedItems
+ }
+
+ return items, nil
+}
+
+type neighbourKernel struct {
+ symbol symbol
+ kernel *kernel
+}
+
+func genNeighbourKernels(items []*lr0Item, prods *productionSet) ([]*neighbourKernel, error) {
+ kItemMap := map[symbol][]*lr0Item{}
+ for _, item := range items {
+ if item.dottedSymbol.isNil() {
+ continue
+ }
+ prod, ok := prods.findByID(item.prod)
+ if !ok {
+ return nil, fmt.Errorf("a production was not found: %v", item.prod)
+ }
+ kItem, err := newLR0Item(prod, item.dot+1)
+ if err != nil {
+ return nil, err
+ }
+ kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem)
+ }
+
+ nextSyms := []symbol{}
+ for sym := range kItemMap {
+ nextSyms = append(nextSyms, sym)
+ }
+ sort.Slice(nextSyms, func(i, j int) bool {
+ return nextSyms[i] < nextSyms[j]
+ })
+
+ kernels := []*neighbourKernel{}
+ for _, sym := range nextSyms {
+ k, err := newKernel(kItemMap[sym])
+ if err != nil {
+ return nil, err
+ }
+ kernels = append(kernels, &neighbourKernel{
+ symbol: sym,
+ kernel: k,
+ })
+ }
+
+ return kernels, nil
+}
diff --git a/grammar/lr0_item_test.go b/grammar/lr0_item_test.go
new file mode 100644
index 0000000..c04ce13
--- /dev/null
+++ b/grammar/lr0_item_test.go
@@ -0,0 +1,263 @@
+package grammar
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+func TestGenLR0Automaton(t *testing.T) {
+ src := `
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ gram, err := NewGrammar(ast)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lr0Item{
+ 0: {
+ genLR0Item("expr'", 0, "expr"),
+ },
+ 1: {
+ genLR0Item("expr'", 1, "expr"),
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ },
+ 2: {
+ genLR0Item("expr", 1, "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 3: {
+ genLR0Item("term", 1, "factor"),
+ },
+ 4: {
+ genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
+ },
+ 5: {
+ genLR0Item("factor", 1, "id"),
+ },
+ 6: {
+ genLR0Item("expr", 2, "expr", "add", "term"),
+ },
+ 7: {
+ genLR0Item("term", 2, "term", "mul", "factor"),
+ },
+ 8: {
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
+ },
+ 9: {
+ genLR0Item("expr", 3, "expr", "add", "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 10: {
+ genLR0Item("term", 3, "term", "mul", "factor"),
+ },
+ 11: {
+ genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
+ },
+ }
+
+ expectedStates := []expectedLR0State{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("expr"): expectedKernels[1],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("add"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("expr'", "expr"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol][]*lr0Item{},
+ reducibleProds: []*production{
+ genProd("term", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("expr"): expectedKernels[8],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol][]*lr0Item{},
+ reducibleProds: []*production{
+ genProd("factor", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("term"): expectedKernels[9],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("factor"): expectedKernels[10],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("add"): expectedKernels[6],
+ genSym("r_paren"): expectedKernels[11],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol][]*lr0Item{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "expr", "add", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[10],
+ nextStates: map[symbol][]*lr0Item{},
+ reducibleProds: []*production{
+ genProd("term", "term", "mul", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[11],
+ nextStates: map[symbol][]*lr0Item{},
+ reducibleProds: []*production{
+ genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ },
+ }
+
+ if len(automaton.states) != len(expectedStates) {
+ t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states))
+ }
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("a kernel was not found: %v", k.id)
+ }
+
+ // test next states
+ {
+ if len(state.next) != len(eState.nextStates) {
+ t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
+ }
+ for eSym, eKItems := range eState.nextStates {
+ nextStateKernel, err := newKernel(eKItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ nextState, ok := state.next[eSym]
+ if !ok {
+ t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
+ }
+ if nextState != nextStateKernel.id {
+ t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
+ }
+ }
+ }
+
+ // test reducible productions
+ {
+ if len(state.reducible) != len(eState.reducibleProds) {
+ t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
+ }
+ for _, eProd := range eState.reducibleProds {
+ if _, ok := state.reducible[eProd.id]; !ok {
+ t.Errorf("reducible production was not found: %v", eProd.id)
+ }
+ }
+ }
+ })
+ }
+}
+
+type expectedLR0State struct {
+ kernelItems []*lr0Item
+ nextStates map[symbol][]*lr0Item
+ reducibleProds []*production
+}
diff --git a/grammar/slr.go b/grammar/slr.go
new file mode 100644
index 0000000..a3ce803
--- /dev/null
+++ b/grammar/slr.go
@@ -0,0 +1,170 @@
+package grammar
+
+import (
+ "fmt"
+)
+
+type ActionType string
+
+const (
+ ActionTypeShift = ActionType("shift")
+ ActionTypeReduce = ActionType("reduce")
+ ActionTypeError = ActionType("error")
+)
+
+type actionEntry int
+
+const actionEntryEmpty = actionEntry(0)
+
+func newShiftActionEntry(state stateNum) actionEntry {
+ return actionEntry(state * -1)
+}
+
+func newReduceActionEntry(prod productionNum) actionEntry {
+ return actionEntry(prod)
+}
+
+func (e actionEntry) isEmpty() bool {
+ return e == actionEntryEmpty
+}
+
+func (e actionEntry) describe() (ActionType, stateNum, productionNum) {
+ if e == actionEntryEmpty {
+ return ActionTypeError, stateNumInitial, productionNumNil
+ }
+ if e < 0 {
+ return ActionTypeShift, stateNum(e * -1), productionNumNil
+ }
+ return ActionTypeReduce, stateNumInitial, productionNum(e)
+}
+
+type GoToType string
+
+const (
+ GoToTypeRegistered = GoToType("registered")
+ GoToTypeError = GoToType("error")
+)
+
+type goToEntry uint
+
+const goToEntryEmpty = goToEntry(0)
+
+func newGoToEntry(state stateNum) goToEntry {
+ return goToEntry(state)
+}
+
+func (e goToEntry) isEmpty() bool {
+ return e == goToEntryEmpty
+}
+
+func (e goToEntry) describe() (GoToType, stateNum) {
+ if e == goToEntryEmpty {
+ return GoToTypeError, stateNumInitial
+ }
+ return GoToTypeRegistered, stateNum(e)
+}
+
+type ParsingTable struct {
+ actionTable []actionEntry
+ goToTable []goToEntry
+ stateCount int
+ terminalCount int
+ nonTerminalCount int
+
+ InitialState stateNum
+}
+
+func (t *ParsingTable) getAction(state stateNum, sym symbolNum) (ActionType, stateNum, productionNum) {
+ pos := state.Int()*t.terminalCount + sym.Int()
+ return t.actionTable[pos].describe()
+}
+
+func (t *ParsingTable) getGoTo(state stateNum, sym symbolNum) (GoToType, stateNum) {
+ pos := state.Int()*t.nonTerminalCount + sym.Int()
+ return t.goToTable[pos].describe()
+}
+
+func (t *ParsingTable) writeShiftAction(state stateNum, sym symbol, nextState stateNum) error {
+ pos := state.Int()*t.terminalCount + sym.num().Int()
+ act := t.actionTable[pos]
+ if !act.isEmpty() {
+ ty, _, _ := act.describe()
+ if ty == ActionTypeReduce {
+ return fmt.Errorf("shift/reduce conflict")
+ }
+ }
+ t.actionTable[pos] = newShiftActionEntry(nextState)
+
+ return nil
+}
+
+func (t *ParsingTable) writeReduceAction(state stateNum, sym symbol, prod productionNum) error {
+ pos := state.Int()*t.terminalCount + sym.num().Int()
+ act := t.actionTable[pos]
+ if !act.isEmpty() {
+ ty, _, p := act.describe()
+ if ty == ActionTypeReduce && p != prod {
+ return fmt.Errorf("reduce/reduce conflict")
+ }
+ return fmt.Errorf("shift/reduce conflict")
+ }
+ t.actionTable[pos] = newReduceActionEntry(prod)
+
+ return nil
+}
+
+func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) {
+ pos := state.Int()*t.nonTerminalCount + sym.num().Int()
+ t.goToTable[pos] = newGoToEntry(nextState)
+}
+
+func genSLRParsingTable(automaton *lr0Automaton, prods *productionSet, follow *followSet, termCount, nonTermCount int) (*ParsingTable, error) {
+ var ptab *ParsingTable
+ {
+ initialState := automaton.states[automaton.initialState]
+ ptab = &ParsingTable{
+ actionTable: make([]actionEntry, len(automaton.states)*termCount),
+ goToTable: make([]goToEntry, len(automaton.states)*nonTermCount),
+ stateCount: len(automaton.states),
+ terminalCount: termCount,
+ nonTerminalCount: nonTermCount,
+ InitialState: initialState.num,
+ }
+ }
+
+ for _, state := range automaton.states {
+ for sym, kID := range state.next {
+ nextState := automaton.states[kID]
+ if sym.isTerminal() {
+ err := ptab.writeShiftAction(state.num, sym, nextState.num)
+ if err != nil {
+ return nil, err
+ }
+ } else {
+ ptab.writeGoTo(state.num, sym, nextState.num)
+ }
+ }
+
+ for prodID := range state.reducible {
+ prod, _ := prods.findByID(prodID)
+ flw, err := follow.find(prod.lhs)
+ if err != nil {
+ return nil, err
+ }
+ for sym := range flw.symbols {
+ err := ptab.writeReduceAction(state.num, sym, prod.num)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if flw.eof {
+ err := ptab.writeReduceAction(state.num, symbolEOF, prod.num)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+ }
+
+ return ptab, nil
+}
diff --git a/grammar/slr_test.go b/grammar/slr_test.go
new file mode 100644
index 0000000..089b73b
--- /dev/null
+++ b/grammar/slr_test.go
@@ -0,0 +1,481 @@
+package grammar
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+func TestGenSLRParsingTable(t *testing.T) {
+ src := `
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+
+ var ptab *ParsingTable
+ var automaton *lr0Automaton
+ var gram *Grammar
+ var nonTermCount int
+ var termCount int
+ {
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ gram, err = NewGrammar(ast)
+ if err != nil {
+ t.Fatal(err)
+ }
+ first, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ follow, err := genFollowSet(gram.productionSet, first)
+ if err != nil {
+ t.Fatal(err)
+ }
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ nonTermTexts, err := gram.symbolTable.getNonTerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ termTexts, err := gram.symbolTable.getTerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ nonTermCount = len(nonTermTexts)
+ termCount = len(termTexts)
+
+ ptab, err = genSLRParsingTable(automaton, gram.productionSet, follow, termCount, nonTermCount)
+ if err != nil {
+ t.Fatalf("failed to create a SLR parsing table: %v", err)
+ }
+ if ptab == nil {
+ t.Fatal("genSLRParsingTable returns nil without any error")
+ }
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lr0Item{
+ 0: {
+ genLR0Item("expr'", 0, "expr"),
+ },
+ 1: {
+ genLR0Item("expr'", 1, "expr"),
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ },
+ 2: {
+ genLR0Item("expr", 1, "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 3: {
+ genLR0Item("term", 1, "factor"),
+ },
+ 4: {
+ genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
+ },
+ 5: {
+ genLR0Item("factor", 1, "id"),
+ },
+ 6: {
+ genLR0Item("expr", 2, "expr", "add", "term"),
+ },
+ 7: {
+ genLR0Item("term", 2, "term", "mul", "factor"),
+ },
+ 8: {
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
+ },
+ 9: {
+ genLR0Item("expr", 3, "expr", "add", "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 10: {
+ genLR0Item("term", 3, "term", "mul", "factor"),
+ },
+ 11: {
+ genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
+ },
+ }
+
+ expectedStates := []struct {
+ kernelItems []*lr0Item
+ acts map[symbol]testActionEntry
+ goTos map[symbol][]*lr0Item
+ }{
+ {
+ kernelItems: expectedKernels[0],
+ acts: map[symbol]testActionEntry{
+ genSym("l_paren"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol][]*lr0Item{
+ genSym("expr"): expectedKernels[1],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[6],
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("expr'", "expr"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "term"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[7],
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "term"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "term"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "factor"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "factor"),
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "factor"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("term", "factor"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ acts: map[symbol]testActionEntry{
+ genSym("l_paren"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol][]*lr0Item{
+ genSym("expr"): expectedKernels[8],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ },
+ },
+ {
+ kernelItems: expectedKernels[5],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "id"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "id"),
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "id"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "id"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ acts: map[symbol]testActionEntry{
+ genSym("l_paren"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol][]*lr0Item{
+ genSym("term"): expectedKernels[9],
+ genSym("factor"): expectedKernels[3],
+ },
+ },
+ {
+ kernelItems: expectedKernels[7],
+ acts: map[symbol]testActionEntry{
+ genSym("l_paren"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol][]*lr0Item{
+ genSym("factor"): expectedKernels[10],
+ },
+ },
+ {
+ kernelItems: expectedKernels[8],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[6],
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[11],
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[9],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "expr", "add", "term"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[7],
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "expr", "add", "term"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("expr", "expr", "add", "term"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[10],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "term", "mul", "factor"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "term", "mul", "factor"),
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("term", "term", "mul", "factor"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("term", "term", "mul", "factor"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[11],
+ acts: map[symbol]testActionEntry{
+ genSym("add"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ genSym("mul"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ genSym("r_paren"): {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ symbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ },
+ },
+ }
+
+ t.Run("initial state", func(t *testing.T) {
+ iniState := findStateByNum(automaton.states, ptab.InitialState)
+ if iniState == nil {
+ t.Fatalf("the initial state was not found: #%v", ptab.InitialState)
+ }
+ eIniState, err := newKernel(expectedKernels[0])
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ if iniState.id != eIniState.id {
+ t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id)
+ }
+ })
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("state was not found: #%v", 0)
+ }
+
+ // ACTION
+ {
+ nonEmptyEntries := map[symbolNum]struct{}{}
+ for eSym, eAct := range eState.acts {
+ nonEmptyEntries[eSym.num()] = struct{}{}
+
+ ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num())
+ if ty != eAct.ty {
+ t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
+ }
+ switch eAct.ty {
+ case ActionTypeShift:
+ eNextState, err := newKernel(eAct.nextState)
+ if err != nil {
+ t.Fatal(err)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found; state: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ case ActionTypeReduce:
+ prod := findProductionByNum(gram.productionSet, prodNum)
+ if prod == nil {
+ t.Fatalf("production was not found: #%v", prodNum)
+ }
+ if prod.id != eAct.production.id {
+ t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
+ }
+ }
+ }
+ for symNum := 0; symNum < termCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
+ continue
+ }
+ ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum))
+ if ty != ActionTypeError {
+ t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
+ }
+ }
+ }
+
+ // GOTO
+ {
+ nonEmptyEntries := map[symbolNum]struct{}{}
+ for eSym, eGoTo := range eState.goTos {
+ nonEmptyEntries[eSym.num()] = struct{}{}
+
+ eNextState, err := newKernel(eGoTo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ ty, stateNum := ptab.getGoTo(state.num, eSym.num())
+ if ty != GoToTypeRegistered {
+ t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ }
+ for symNum := 0; symNum < nonTermCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
+ continue
+ }
+ ty, _ := ptab.getGoTo(state.num, symbolNum(symNum))
+ if ty != GoToTypeError {
+ t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
+ }
+ }
+ }
+ })
+ }
+}
+
+type testActionEntry struct {
+ ty ActionType
+ nextState []*lr0Item
+ production *production
+}
+
+func findStateByNum(states map[kernelID]*lr0State, num stateNum) *lr0State {
+ for _, state := range states {
+ if state.num == num {
+ return state
+ }
+ }
+ return nil
+}
+
+func findProductionByNum(prods *productionSet, num productionNum) *production {
+ for _, prod := range prods.getAllProductions() {
+ if prod.num == num {
+ return prod
+ }
+ }
+ return nil
+}
diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go
new file mode 100644
index 0000000..8f5c372
--- /dev/null
+++ b/grammar/test_helper_test.go
@@ -0,0 +1,52 @@
+package grammar
+
+import "testing"
+
+type testSymbolGenerator func(text string) symbol
+
+func newTestSymbolGenerator(t *testing.T, symTab *symbolTable) testSymbolGenerator {
+ return func(text string) symbol {
+ t.Helper()
+
+ sym, ok := symTab.toSymbol(text)
+ if !ok {
+ t.Fatalf("symbol was not found: %v", text)
+ }
+ return sym
+ }
+}
+
+type testProductionGenerator func(lhs string, rhs ...string) *production
+
+func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator {
+ return func(lhs string, rhs ...string) *production {
+ t.Helper()
+
+ rhsSym := []symbol{}
+ for _, text := range rhs {
+ rhsSym = append(rhsSym, genSym(text))
+ }
+ prod, err := newProduction(genSym(lhs), rhsSym)
+ if err != nil {
+ t.Fatalf("failed to create a production: %v", err)
+ }
+
+ return prod
+ }
+}
+
+type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lr0Item
+
+func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator {
+ return func(lhs string, dot int, rhs ...string) *lr0Item {
+ t.Helper()
+
+ prod := genProd(lhs, rhs...)
+ item, err := newLR0Item(prod, dot)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 item: %v", err)
+ }
+
+ return item
+ }
+}