aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-08-18 00:03:59 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-08-18 00:03:59 +0900
commite110a6ff4073799d00c7d3400ac022f758a03e40 (patch)
treee194e63debc48a53e5f9ec77075bd8ad0dcfa25f
parentFix panic on a syntax error (diff)
downloadurubu-e110a6ff4073799d00c7d3400ac022f758a03e40.tar.gz
urubu-e110a6ff4073799d00c7d3400ac022f758a03e40.tar.xz
Set look-ahead symbols to items before generating a SLR(1) parsing table
-rw-r--r--grammar/grammar.go37
-rw-r--r--grammar/lalr1.go19
-rw-r--r--grammar/lalr1_test.go31
-rw-r--r--grammar/lr0.go22
-rw-r--r--grammar/lr0_test.go202
-rw-r--r--grammar/parsing_table_builder.go201
-rw-r--r--grammar/parsing_table_builder_test.go103
-rw-r--r--grammar/slr1.go61
-rw-r--r--grammar/slr1_test.go325
9 files changed, 713 insertions, 288 deletions
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 2b0f648..bca82e9 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -696,10 +696,14 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
return nil, err
}
- slr := &slrTableBuilder{
- automaton: lr0,
+ slr1, err := genSLR1Automaton(lr0, gram.productionSet, followSet)
+ if err != nil {
+ return nil, err
+ }
+
+ slr := &lrTableBuilder{
+ automaton: slr1.lr0Automaton,
prods: gram.productionSet,
- follow: followSet,
termCount: len(terms),
nonTermCount: len(nonTerms),
symTab: gram.symbolTable,
@@ -714,17 +718,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
}
defer f.Close()
- dw := &descriptionWriter{
- automaton: slr.automaton,
- prods: slr.prods,
- follow: slr.follow,
- termCount: slr.termCount,
- nonTermCount: slr.nonTermCount,
- symTab: slr.symTab,
- sym2AnonPat: slr.sym2AnonPat,
- conflicts: slr.conflicts,
- }
- dw.write(f)
+ slr.write(f)
}
if err != nil {
@@ -736,8 +730,8 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
return nil, err
}
- lalr := &lalrTableBuilder{
- automaton: lalr1,
+ lalr := &lrTableBuilder{
+ automaton: lalr1.lr0Automaton,
prods: gram.productionSet,
termCount: len(terms),
nonTermCount: len(nonTerms),
@@ -753,16 +747,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
}
defer f.Close()
- dw := &descriptionWriter{
- automaton: lalr.automaton.lr0Automaton,
- prods: lalr.prods,
- termCount: lalr.termCount,
- nonTermCount: lalr.nonTermCount,
- symTab: lalr.symTab,
- sym2AnonPat: lalr.sym2AnonPat,
- conflicts: lalr.conflicts,
- }
- dw.write(f)
+ lalr.write(f)
}
if err != nil {
diff --git a/grammar/lalr1.go b/grammar/lalr1.go
index fc15942..f1b8149 100644
--- a/grammar/lalr1.go
+++ b/grammar/lalr1.go
@@ -42,7 +42,24 @@ func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet)
}
if p.isEmpty() {
- state.emptyProdItems = append(state.emptyProdItems, item)
+ var reducibleItem *lrItem
+ for _, it := range state.emptyProdItems {
+ if it.id != item.id {
+ continue
+ }
+
+ reducibleItem = it
+ break
+ }
+ if reducibleItem == nil {
+ return nil, fmt.Errorf("reducible item not found: %v", item.id)
+ }
+ if reducibleItem.lookAhead.symbols == nil {
+ reducibleItem.lookAhead.symbols = map[symbol]struct{}{}
+ }
+ for a := range item.lookAhead.symbols {
+ reducibleItem.lookAhead.symbols[a] = struct{}{}
+ }
propDests = append(propDests, &stateAndLRItem{
kernelID: state.id,
diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go
index 5ac84cd..67ab7d0 100644
--- a/grammar/lalr1_test.go
+++ b/grammar/lalr1_test.go
@@ -8,12 +8,6 @@ import (
"github.com/nihei9/vartan/spec"
)
-type expectedLALR1State struct {
- kernelItems []*lrItem
- nextStates map[symbol][]*lrItem
- reducibleProds []*production
-}
-
func TestGenLALR1Automaton(t *testing.T) {
// This grammar belongs to LALR(1) class, not SLR(1).
src := `
@@ -54,7 +48,7 @@ id: "[A-Za-z0-9_]+";
if err != nil {
t.Fatalf("failed to create a LALR1 automaton: %v", err)
}
- if lr0 == nil {
+ if automaton == nil {
t.Fatalf("genLALR1Automaton returns nil without any error")
}
@@ -101,7 +95,7 @@ id: "[A-Za-z0-9_]+";
},
}
- expectedStates := []expectedLALR1State{
+ expectedStates := []expectedLRState{
{
kernelItems: expectedKernels[0],
nextStates: map[symbol][]*lrItem{
@@ -220,6 +214,10 @@ id: "[A-Za-z0-9_]+";
t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
}
+ if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
+ t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
+ }
+
for eSym := range eKItem.lookAhead.symbols {
if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
t.Errorf("look-ahead symbol not found: %v", eSym)
@@ -258,6 +256,23 @@ id: "[A-Za-z0-9_]+";
t.Errorf("reducible production was not found: %v", eProd.id)
}
}
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
}
})
}
diff --git a/grammar/lr0.go b/grammar/lr0.go
index 9e3bb2e..20952c8 100644
--- a/grammar/lr0.go
+++ b/grammar/lr0.go
@@ -85,16 +85,28 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*
}
reducible := map[productionID]struct{}{}
+ var emptyProdItems []*lrItem
for _, item := range items {
- if item.reducible {
- reducible[item.prod] = struct{}{}
+ if !item.reducible {
+ continue
+ }
+
+ reducible[item.prod] = struct{}{}
+
+ prod, ok := prods.findByID(item.prod)
+ if !ok {
+ return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
+ }
+ if prod.isEmpty() {
+ emptyProdItems = append(emptyProdItems, item)
}
}
return &lrState{
- kernel: k,
- next: next,
- reducible: reducible,
+ kernel: k,
+ next: next,
+ reducible: reducible,
+ emptyProdItems: emptyProdItems,
}, kernels, nil
}
diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go
index 9000483..aea028c 100644
--- a/grammar/lr0_test.go
+++ b/grammar/lr0_test.go
@@ -8,10 +8,11 @@ import (
"github.com/nihei9/vartan/spec"
)
-type expectedLR0State struct {
+type expectedLRState struct {
kernelItems []*lrItem
nextStates map[symbol][]*lrItem
reducibleProds []*production
+ emptyProdItems []*lrItem
}
func TestGenLR0Automaton(t *testing.T) {
@@ -107,7 +108,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
}
- expectedStates := []expectedLR0State{
+ expectedStates := []expectedLRState{
{
kernelItems: expectedKernels[0],
nextStates: map[symbol][]*lrItem{
@@ -260,6 +261,203 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
t.Errorf("reducible production was not found: %v", eProd.id)
}
}
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestLR0AutomatonContainingEmptyProduction(t *testing.T) {
+ src := `
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : BAR
+ |
+ ;
+BAR: "bar";
+`
+
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("s'", 0, "s"),
+ },
+ 1: {
+ genLR0Item("s'", 1, "s"),
+ },
+ 2: {
+ genLR0Item("s", 1, "foo", "bar"),
+ },
+ 3: {
+ genLR0Item("s", 2, "foo", "bar"),
+ },
+ 4: {
+ genLR0Item("bar", 1, "BAR"),
+ },
+ }
+
+ expectedStates := []expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("foo"): expectedKernels[2],
+ },
+ reducibleProds: []*production{
+ genProd("foo"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("foo", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s'", "s"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol][]*lrItem{
+ genSym("bar"): expectedKernels[3],
+ genSym("BAR"): expectedKernels[4],
+ },
+ reducibleProds: []*production{
+ genProd("bar"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("bar", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "foo", "bar"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("bar", "BAR"),
+ },
+ },
+ }
+
+ if len(automaton.states) != len(expectedStates) {
+ t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states))
+ }
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("a kernel was not found: %v", k.id)
+ }
+
+ // test next states
+ {
+ if len(state.next) != len(eState.nextStates) {
+ t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
+ }
+ for eSym, eKItems := range eState.nextStates {
+ nextStateKernel, err := newKernel(eKItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ nextState, ok := state.next[eSym]
+ if !ok {
+ t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
+ }
+ if nextState != nextStateKernel.id {
+ t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
+ }
+ }
+ }
+
+ // test reducible productions
+ {
+ if len(state.reducible) != len(eState.reducibleProds) {
+ t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
+ }
+ for _, eProd := range eState.reducibleProds {
+ if _, ok := state.reducible[eProd.id]; !ok {
+ t.Errorf("reducible production was not found: %v", eProd.id)
+ }
+ }
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
}
})
}
diff --git a/grammar/parsing_table_builder.go b/grammar/parsing_table_builder.go
index e209c94..82b0d34 100644
--- a/grammar/parsing_table_builder.go
+++ b/grammar/parsing_table_builder.go
@@ -165,8 +165,8 @@ func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum)
t.goToTable[pos] = newGoToEntry(nextState)
}
-type lalrTableBuilder struct {
- automaton *lalr1Automaton
+type lrTableBuilder struct {
+ automaton *lr0Automaton
prods *productionSet
termCount int
nonTermCount int
@@ -176,7 +176,7 @@ type lalrTableBuilder struct {
conflicts []conflict
}
-func (b *lalrTableBuilder) build() (*ParsingTable, error) {
+func (b *lrTableBuilder) build() (*ParsingTable, error) {
var ptab *ParsingTable
{
initialState := b.automaton.states[b.automaton.initialState]
@@ -262,104 +262,9 @@ func (b *lalrTableBuilder) build() (*ParsingTable, error) {
return ptab, nil
}
-type slrTableBuilder struct {
- automaton *lr0Automaton
- prods *productionSet
- follow *followSet
- termCount int
- nonTermCount int
- symTab *symbolTable
- sym2AnonPat map[symbol]string
-
- conflicts []conflict
-}
-
-func (b *slrTableBuilder) build() (*ParsingTable, error) {
- var ptab *ParsingTable
- {
- initialState := b.automaton.states[b.automaton.initialState]
- ptab = &ParsingTable{
- actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
- goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
- stateCount: len(b.automaton.states),
- terminalCount: b.termCount,
- nonTerminalCount: b.nonTermCount,
- expectedTerminals: make([][]int, len(b.automaton.states)),
- InitialState: initialState.num,
- }
- }
-
- var conflicts []conflict
- for _, state := range b.automaton.states {
- var eTerms []int
-
- for sym, kID := range state.next {
- nextState := b.automaton.states[kID]
- if sym.isTerminal() {
- eTerms = append(eTerms, sym.num().Int())
-
- c := ptab.writeShiftAction(state.num, sym, nextState.num)
- if c != nil {
- conflicts = append(conflicts, c)
- continue
- }
- } else {
- ptab.writeGoTo(state.num, sym, nextState.num)
- }
- }
-
- for prodID := range state.reducible {
- prod, _ := b.prods.findByID(prodID)
- flw, err := b.follow.find(prod.lhs)
- if err != nil {
- return nil, err
- }
- for sym := range flw.symbols {
- eTerms = append(eTerms, sym.num().Int())
-
- c := ptab.writeReduceAction(state.num, sym, prod.num)
- if c != nil {
- conflicts = append(conflicts, c)
- continue
- }
- }
- if flw.eof {
- eTerms = append(eTerms, symbolEOF.num().Int())
-
- c := ptab.writeReduceAction(state.num, symbolEOF, prod.num)
- if c != nil {
- conflicts = append(conflicts, c)
- continue
- }
- }
- }
-
- ptab.expectedTerminals[state.num] = eTerms
- }
-
- b.conflicts = conflicts
-
- if len(conflicts) > 0 {
- return nil, fmt.Errorf("%v conflicts", len(conflicts))
- }
-
- return ptab, nil
-}
-
-type descriptionWriter struct {
- automaton *lr0Automaton
- prods *productionSet
- follow *followSet
- termCount int
- nonTermCount int
- symTab *symbolTable
- sym2AnonPat map[symbol]string
- conflicts []conflict
-}
-
-func (dw *descriptionWriter) write(w io.Writer) {
+func (b *lrTableBuilder) write(w io.Writer) {
conflicts := map[stateNum][]conflict{}
- for _, con := range dw.conflicts {
+ for _, con := range b.conflicts {
switch c := con.(type) {
case *shiftReduceConflict:
conflicts[c.state] = append(conflicts[c.state], c)
@@ -370,15 +275,15 @@ func (dw *descriptionWriter) write(w io.Writer) {
fmt.Fprintf(w, "# Conflicts\n\n")
- if len(dw.conflicts) > 0 {
- fmt.Fprintf(w, "%v conflics:\n\n", len(dw.conflicts))
+ if len(b.conflicts) > 0 {
+ fmt.Fprintf(w, "%v conflics:\n\n", len(b.conflicts))
- for _, conflict := range dw.conflicts {
+ for _, conflict := range b.conflicts {
switch c := conflict.(type) {
case *shiftReduceConflict:
- fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, dw.symbolToText(c.sym))
+ fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, b.symbolToText(c.sym))
case *reduceReduceConflict:
- fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, dw.symbolToText(c.sym))
+ fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, b.symbolToText(c.sym))
}
}
fmt.Fprintf(w, "\n")
@@ -388,17 +293,17 @@ func (dw *descriptionWriter) write(w io.Writer) {
fmt.Fprintf(w, "# Terminals\n\n")
- termSyms := dw.symTab.terminalSymbols()
+ termSyms := b.symTab.terminalSymbols()
fmt.Fprintf(w, "%v symbols:\n\n", len(termSyms))
for _, sym := range termSyms {
- text, ok := dw.symTab.toText(sym)
+ text, ok := b.symTab.toText(sym)
if !ok {
text = fmt.Sprintf("<symbol not found: %v>", sym)
}
if strings.HasPrefix(text, "_") {
- fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, dw.sym2AnonPat[sym])
+ fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, b.sym2AnonPat[sym])
} else {
fmt.Fprintf(w, "%4v %v\n", sym.num(), text)
}
@@ -408,25 +313,25 @@ func (dw *descriptionWriter) write(w io.Writer) {
fmt.Fprintf(w, "# Productions\n\n")
- fmt.Fprintf(w, "%v productions:\n\n", len(dw.prods.getAllProductions()))
+ fmt.Fprintf(w, "%v productions:\n\n", len(b.prods.getAllProductions()))
- for _, prod := range dw.prods.getAllProductions() {
- fmt.Fprintf(w, "%4v %v\n", prod.num, dw.productionToString(prod, -1))
+ for _, prod := range b.prods.getAllProductions() {
+ fmt.Fprintf(w, "%4v %v\n", prod.num, b.productionToString(prod, -1))
}
fmt.Fprintf(w, "\n# States\n\n")
- fmt.Fprintf(w, "%v states:\n\n", len(dw.automaton.states))
+ fmt.Fprintf(w, "%v states:\n\n", len(b.automaton.states))
- for _, state := range dw.automaton.states {
+ for _, state := range b.automaton.states {
fmt.Fprintf(w, "state %v\n", state.num)
for _, item := range state.items {
- prod, ok := dw.prods.findByID(item.prod)
+ prod, ok := b.prods.findByID(item.prod)
if !ok {
fmt.Fprintf(w, "<production not found>\n")
continue
}
- fmt.Fprintf(w, " %v\n", dw.productionToString(prod, item.dot))
+ fmt.Fprintf(w, " %v\n", b.productionToString(prod, item.dot))
}
fmt.Fprintf(w, "\n")
@@ -437,16 +342,16 @@ func (dw *descriptionWriter) write(w io.Writer) {
var accRec string
{
for sym, kID := range state.next {
- nextState := dw.automaton.states[kID]
+ nextState := b.automaton.states[kID]
if sym.isTerminal() {
- shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, dw.symbolToText(sym)))
+ shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, b.symbolToText(sym)))
} else {
- gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, dw.symbolToText(sym)))
+ gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, b.symbolToText(sym)))
}
}
for prodID := range state.reducible {
- prod, ok := dw.prods.findByID(prodID)
+ prod, ok := b.prods.findByID(prodID)
if !ok {
reduceRecs = append(reduceRecs, "<production not found>")
continue
@@ -456,21 +361,17 @@ func (dw *descriptionWriter) write(w io.Writer) {
continue
}
- if dw.follow != nil {
- flw, err := dw.follow.find(prod.lhs)
- if err != nil {
- reduceRecs = append(reduceRecs, fmt.Sprintf("%v", err))
+ var reducibleItem *lrItem
+ for _, item := range state.items {
+ if item.prod != prodID {
continue
}
- for sym := range flw.symbols {
- reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(sym)))
- }
- if flw.eof {
- reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on <EOF>", prod.num))
- }
- } else {
- var reducibleItem *lrItem
- for _, item := range state.items {
+
+ reducibleItem = item
+ break
+ }
+ if reducibleItem == nil {
+ for _, item := range state.emptyProdItems {
if item.prod != prodID {
continue
}
@@ -479,23 +380,13 @@ func (dw *descriptionWriter) write(w io.Writer) {
break
}
if reducibleItem == nil {
- for _, item := range state.emptyProdItems {
- if item.prod != prodID {
- continue
- }
-
- reducibleItem = item
- break
- }
- if reducibleItem == nil {
- reduceRecs = append(reduceRecs, "<item not found>")
- continue
- }
- }
- for a := range reducibleItem.lookAhead.symbols {
- reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(a)))
+ reduceRecs = append(reduceRecs, "<item not found>")
+ continue
}
}
+ for a := range reducibleItem.lookAhead.symbols {
+ reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, b.symbolToText(a)))
+ }
}
}
@@ -523,9 +414,9 @@ func (dw *descriptionWriter) write(w io.Writer) {
for _, con := range cons {
switch c := con.(type) {
case *shiftReduceConflict:
- fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, dw.symbolToText(c.sym))
+ fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, b.symbolToText(c.sym))
case *reduceReduceConflict:
- fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, dw.symbolToText(c.sym))
+ fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, b.symbolToText(c.sym))
}
}
fmt.Fprintf(w, "\n")
@@ -533,14 +424,14 @@ func (dw *descriptionWriter) write(w io.Writer) {
}
}
-func (dw *descriptionWriter) productionToString(prod *production, dot int) string {
+func (b *lrTableBuilder) productionToString(prod *production, dot int) string {
var w strings.Builder
- fmt.Fprintf(&w, "%v →", dw.symbolToText(prod.lhs))
+ fmt.Fprintf(&w, "%v →", b.symbolToText(prod.lhs))
for n, rhs := range prod.rhs {
if n == dot {
fmt.Fprintf(&w, " ・")
}
- fmt.Fprintf(&w, " %v", dw.symbolToText(rhs))
+ fmt.Fprintf(&w, " %v", b.symbolToText(rhs))
}
if dot == len(prod.rhs) {
fmt.Fprintf(&w, " ・")
@@ -549,7 +440,7 @@ func (dw *descriptionWriter) productionToString(prod *production, dot int) strin
return w.String()
}
-func (dw *descriptionWriter) symbolToText(sym symbol) string {
+func (b *lrTableBuilder) symbolToText(sym symbol) string {
if sym.isNil() {
return "<NULL>"
}
@@ -557,13 +448,13 @@ func (dw *descriptionWriter) symbolToText(sym symbol) string {
return "<EOF>"
}
- text, ok := dw.symTab.toText(sym)
+ text, ok := b.symTab.toText(sym)
if !ok {
return fmt.Sprintf("<symbol not found: %v>", sym)
}
if strings.HasPrefix(text, "_") {
- pat, ok := dw.sym2AnonPat[sym]
+ pat, ok := b.sym2AnonPat[sym]
if !ok {
return fmt.Sprintf("<pattern not found: %v>", text)
}
diff --git a/grammar/parsing_table_builder_test.go b/grammar/parsing_table_builder_test.go
index 18add81..95bc543 100644
--- a/grammar/parsing_table_builder_test.go
+++ b/grammar/parsing_table_builder_test.go
@@ -65,8 +65,8 @@ id: "[A-Za-z0-9_]+";
nonTermCount = len(nonTermTexts)
termCount = len(termTexts)
- lalr := &lalrTableBuilder{
- automaton: automaton,
+ lalr := &lrTableBuilder{
+ automaton: automaton.lr0Automaton,
prods: gram.productionSet,
termCount: termCount,
nonTermCount: nonTermCount,
@@ -306,7 +306,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
`
var ptab *ParsingTable
- var automaton *lr0Automaton
+ var automaton *slr1Automaton
var gram *Grammar
var nonTermCount int
var termCount int
@@ -330,7 +330,11 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
if err != nil {
t.Fatal(err)
}
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ t.Fatal(err)
+ }
+ automaton, err = genSLR1Automaton(lr0, gram.productionSet, follow)
if err != nil {
t.Fatal(err)
}
@@ -346,10 +350,9 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
nonTermCount = len(nonTermTexts)
termCount = len(termTexts)
- slr := &slrTableBuilder{
- automaton: automaton,
+ slr := &lrTableBuilder{
+ automaton: automaton.lr0Automaton,
prods: gram.productionSet,
- follow: follow,
termCount: termCount,
nonTermCount: nonTermCount,
symTab: gram.symbolTable,
@@ -410,11 +413,6 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
}
- // expectedStates := []struct {
- // kernelItems []*lrItem
- // acts map[symbol]testActionEntry
- // goTos map[symbol][]*lrItem
- // }{
expectedStates := []expectedState{
{
kernelItems: expectedKernels[0],
@@ -664,85 +662,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
t.Fatalf("state was not found: #%v", 0)
}
- testAction(t, &eState, state, ptab, automaton, gram, termCount)
- testGoTo(t, &eState, state, ptab, automaton, nonTermCount)
-
- // ACTION
- {
- // nonEmptyEntries := map[symbolNum]struct{}{}
- // for eSym, eAct := range eState.acts {
- // nonEmptyEntries[eSym.num()] = struct{}{}
-
- // ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num())
- // if ty != eAct.ty {
- // t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
- // }
- // switch eAct.ty {
- // case ActionTypeShift:
- // eNextState, err := newKernel(eAct.nextState)
- // if err != nil {
- // t.Fatal(err)
- // }
- // nextState := findStateByNum(automaton.states, stateNum)
- // if nextState == nil {
- // t.Fatalf("state was not found; state: #%v", stateNum)
- // }
- // if nextState.id != eNextState.id {
- // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- // }
- // case ActionTypeReduce:
- // prod := findProductionByNum(gram.productionSet, prodNum)
- // if prod == nil {
- // t.Fatalf("production was not found: #%v", prodNum)
- // }
- // if prod.id != eAct.production.id {
- // t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
- // }
- // }
- // }
- // for symNum := 0; symNum < termCount; symNum++ {
- // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
- // continue
- // }
- // ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum))
- // if ty != ActionTypeError {
- // t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
- // }
- // }
- }
-
- // GOTO
- {
- // nonEmptyEntries := map[symbolNum]struct{}{}
- // for eSym, eGoTo := range eState.goTos {
- // nonEmptyEntries[eSym.num()] = struct{}{}
-
- // eNextState, err := newKernel(eGoTo)
- // if err != nil {
- // t.Fatal(err)
- // }
- // ty, stateNum := ptab.getGoTo(state.num, eSym.num())
- // if ty != GoToTypeRegistered {
- // t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
- // }
- // nextState := findStateByNum(automaton.states, stateNum)
- // if nextState == nil {
- // t.Fatalf("state was not found: #%v", stateNum)
- // }
- // if nextState.id != eNextState.id {
- // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- // }
- // }
- // for symNum := 0; symNum < nonTermCount; symNum++ {
- // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
- // continue
- // }
- // ty, _ := ptab.getGoTo(state.num, symbolNum(symNum))
- // if ty != GoToTypeError {
- // t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
- // }
- // }
- }
+ testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount)
+ testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount)
})
}
}
diff --git a/grammar/slr1.go b/grammar/slr1.go
new file mode 100644
index 0000000..b11a66f
--- /dev/null
+++ b/grammar/slr1.go
@@ -0,0 +1,61 @@
+package grammar
+
+import "fmt"
+
+type slr1Automaton struct {
+ *lr0Automaton
+}
+
+func genSLR1Automaton(lr0 *lr0Automaton, prods *productionSet, follow *followSet) (*slr1Automaton, error) {
+ for _, state := range lr0.states {
+ for prodID := range state.reducible {
+ prod, ok := prods.findByID(prodID)
+ if !ok {
+ return nil, fmt.Errorf("reducible production not found: %v", prodID)
+ }
+
+ flw, err := follow.find(prod.lhs)
+ if err != nil {
+ return nil, err
+ }
+
+ var reducibleItem *lrItem
+ for _, item := range state.items {
+ if item.prod != prodID {
+ continue
+ }
+
+ reducibleItem = item
+ break
+ }
+ if reducibleItem == nil {
+ for _, item := range state.emptyProdItems {
+ if item.prod != prodID {
+ continue
+ }
+
+ reducibleItem = item
+ break
+ }
+ if reducibleItem == nil {
+ return nil, fmt.Errorf("reducible item not found; state: %v, production: %v", state.num, prodID)
+ }
+ }
+
+ if reducibleItem.lookAhead.symbols == nil {
+ reducibleItem.lookAhead.symbols = map[symbol]struct{}{}
+ }
+
+ for sym := range flw.symbols {
+ reducibleItem.lookAhead.symbols[sym] = struct{}{}
+ }
+ if flw.eof {
+ reducibleItem.lookAhead.symbols[symbolEOF] = struct{}{}
+ }
+ }
+ }
+
+ return &slr1Automaton{
+ lr0Automaton: lr0,
+ }, nil
+}
diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go
new file mode 100644
index 0000000..1296bd8
--- /dev/null
+++ b/grammar/slr1_test.go
@@ -0,0 +1,325 @@
+package grammar
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+func TestGenSLR1Automaton(t *testing.T) {
+ src := `
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+
+ ast, err := spec.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if lr0 == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+
+ firstSet, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatalf("failed to create a FIRST set: %v", err)
+ }
+
+ followSet, err := genFollowSet(gram.productionSet, firstSet)
+ if err != nil {
+ t.Fatalf("failed to create a FOLLOW set: %v", err)
+ }
+
+ automaton, err := genSLR1Automaton(lr0, gram.productionSet, followSet)
+ if err != nil {
+ t.Fatalf("failed to create a SLR1 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genSLR1Automaton returns nil without any error")
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("expr'", 0, "expr"),
+ },
+ 1: {
+ withLookAhead(genLR0Item("expr'", 1, "expr"), symbolEOF),
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ },
+ 2: {
+ withLookAhead(genLR0Item("expr", 1, "term"), genSym("add"), genSym("r_paren"), symbolEOF),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 3: {
+ withLookAhead(genLR0Item("term", 1, "factor"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF),
+ },
+ 4: {
+ genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
+ },
+ 5: {
+ withLookAhead(genLR0Item("factor", 1, "id"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF),
+ },
+ 6: {
+ genLR0Item("expr", 2, "expr", "add", "term"),
+ },
+ 7: {
+ genLR0Item("term", 2, "term", "mul", "factor"),
+ },
+ 8: {
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
+ },
+ 9: {
+ withLookAhead(genLR0Item("expr", 3, "expr", "add", "term"), genSym("add"), genSym("r_paren"), symbolEOF),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 10: {
+ withLookAhead(genLR0Item("term", 3, "term", "mul", "factor"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF),
+ },
+ 11: {
+ withLookAhead(genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF),
+ },
+ }
+
+ expectedStates := []expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol][]*lrItem{
+ genSym("expr"): expectedKernels[1],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("expr'", "expr"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol][]*lrItem{
+ genSym("expr"): expectedKernels[8],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol][]*lrItem{
+ genSym("term"): expectedKernels[9],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol][]*lrItem{
+ genSym("factor"): expectedKernels[10],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ genSym("r_paren"): expectedKernels[11],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "expr", "add", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[10],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "term", "mul", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[11],
+ nextStates: map[symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ },
+ }
+
+ if len(automaton.states) != len(expectedStates) {
+ t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states))
+ }
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("a kernel was not found: %v", k.id)
+ }
+
+ // test look-ahead symbols
+ {
+ if len(state.kernel.items) != len(eState.kernelItems) {
+ t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items))
+ }
+ for _, eKItem := range eState.kernelItems {
+ var kItem *lrItem
+ for _, it := range state.kernel.items {
+ if it.id != eKItem.id {
+ continue
+ }
+ kItem = it
+ break
+ }
+ if kItem == nil {
+ t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
+ }
+
+ if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
+ t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
+ }
+
+ for eSym := range eKItem.lookAhead.symbols {
+ if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
+ t.Errorf("look-ahead symbol not found: %v", eSym)
+ }
+ }
+ }
+ }
+
+ // test next states
+ {
+ if len(state.next) != len(eState.nextStates) {
+ t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
+ }
+ for eSym, eKItems := range eState.nextStates {
+ nextStateKernel, err := newKernel(eKItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ nextState, ok := state.next[eSym]
+ if !ok {
+ t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
+ }
+ if nextState != nextStateKernel.id {
+ t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
+ }
+ }
+ }
+
+ // test reducible productions
+ {
+ if len(state.reducible) != len(eState.reducibleProds) {
+ t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
+ }
+ for _, eProd := range eState.reducibleProds {
+ if _, ok := state.reducible[eProd.id]; !ok {
+ t.Errorf("reducible production was not found: %v", eProd.id)
+ }
+ }
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
+ }
+ })
+ }
+}