diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-18 00:03:59 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-18 00:03:59 +0900 |
commit | e110a6ff4073799d00c7d3400ac022f758a03e40 (patch) | |
tree | e194e63debc48a53e5f9ec77075bd8ad0dcfa25f | |
parent | Fix panic on a syntax error (diff) | |
download | urubu-e110a6ff4073799d00c7d3400ac022f758a03e40.tar.gz urubu-e110a6ff4073799d00c7d3400ac022f758a03e40.tar.xz |
Set look-ahead symbols to items before generating a SLR(1) parsing table
-rw-r--r-- | grammar/grammar.go | 37 | ||||
-rw-r--r-- | grammar/lalr1.go | 19 | ||||
-rw-r--r-- | grammar/lalr1_test.go | 31 | ||||
-rw-r--r-- | grammar/lr0.go | 22 | ||||
-rw-r--r-- | grammar/lr0_test.go | 202 | ||||
-rw-r--r-- | grammar/parsing_table_builder.go | 201 | ||||
-rw-r--r-- | grammar/parsing_table_builder_test.go | 103 | ||||
-rw-r--r-- | grammar/slr1.go | 61 | ||||
-rw-r--r-- | grammar/slr1_test.go | 325 |
9 files changed, 713 insertions, 288 deletions
diff --git a/grammar/grammar.go b/grammar/grammar.go index 2b0f648..bca82e9 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -696,10 +696,14 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } - slr := &slrTableBuilder{ - automaton: lr0, + slr1, err := genSLR1Automaton(lr0, gram.productionSet, followSet) + if err != nil { + return nil, err + } + + slr := &lrTableBuilder{ + automaton: slr1.lr0Automaton, prods: gram.productionSet, - follow: followSet, termCount: len(terms), nonTermCount: len(nonTerms), symTab: gram.symbolTable, @@ -714,17 +718,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error } defer f.Close() - dw := &descriptionWriter{ - automaton: slr.automaton, - prods: slr.prods, - follow: slr.follow, - termCount: slr.termCount, - nonTermCount: slr.nonTermCount, - symTab: slr.symTab, - sym2AnonPat: slr.sym2AnonPat, - conflicts: slr.conflicts, - } - dw.write(f) + slr.write(f) } if err != nil { @@ -736,8 +730,8 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } - lalr := &lalrTableBuilder{ - automaton: lalr1, + lalr := &lrTableBuilder{ + automaton: lalr1.lr0Automaton, prods: gram.productionSet, termCount: len(terms), nonTermCount: len(nonTerms), @@ -753,16 +747,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error } defer f.Close() - dw := &descriptionWriter{ - automaton: lalr.automaton.lr0Automaton, - prods: lalr.prods, - termCount: lalr.termCount, - nonTermCount: lalr.nonTermCount, - symTab: lalr.symTab, - sym2AnonPat: lalr.sym2AnonPat, - conflicts: lalr.conflicts, - } - dw.write(f) + lalr.write(f) } if err != nil { diff --git a/grammar/lalr1.go b/grammar/lalr1.go index fc15942..f1b8149 100644 --- a/grammar/lalr1.go +++ b/grammar/lalr1.go @@ -42,7 +42,24 @@ func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet) } if p.isEmpty() { - state.emptyProdItems = append(state.emptyProdItems, item) + var reducibleItem *lrItem + for _, it := range state.emptyProdItems { + if it.id != item.id { + continue + } + + reducibleItem = it + break + } + if reducibleItem == nil { + return nil, fmt.Errorf("reducible item not found: %v", item.id) + } + if reducibleItem.lookAhead.symbols == nil { + reducibleItem.lookAhead.symbols = map[symbol]struct{}{} + } + for a := range item.lookAhead.symbols { + reducibleItem.lookAhead.symbols[a] = struct{}{} + } propDests = append(propDests, &stateAndLRItem{ kernelID: state.id, diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go index 5ac84cd..67ab7d0 100644 --- a/grammar/lalr1_test.go +++ b/grammar/lalr1_test.go @@ -8,12 +8,6 @@ import ( "github.com/nihei9/vartan/spec" ) -type expectedLALR1State struct { - kernelItems []*lrItem - nextStates map[symbol][]*lrItem - reducibleProds []*production -} - func TestGenLALR1Automaton(t *testing.T) { // This grammar belongs to LALR(1) class, not SLR(1). src := ` @@ -54,7 +48,7 @@ id: "[A-Za-z0-9_]+"; if err != nil { t.Fatalf("failed to create a LALR1 automaton: %v", err) } - if lr0 == nil { + if automaton == nil { t.Fatalf("genLALR1Automaton returns nil without any error") } @@ -101,7 +95,7 @@ id: "[A-Za-z0-9_]+"; }, } - expectedStates := []expectedLALR1State{ + expectedStates := []expectedLRState{ { kernelItems: expectedKernels[0], nextStates: map[symbol][]*lrItem{ @@ -220,6 +214,10 @@ id: "[A-Za-z0-9_]+"; t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) } + if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { + t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) + } + for eSym := range eKItem.lookAhead.symbols { if _, ok := kItem.lookAhead.symbols[eSym]; !ok { t.Errorf("look-ahead symbol not found: %v", eSym) @@ -258,6 +256,23 @@ id: "[A-Za-z0-9_]+"; t.Errorf("reducible production was not found: %v", eProd.id) } } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } } }) } diff --git a/grammar/lr0.go b/grammar/lr0.go index 9e3bb2e..20952c8 100644 --- a/grammar/lr0.go +++ b/grammar/lr0.go @@ -85,16 +85,28 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []* } reducible := map[productionID]struct{}{} + var emptyProdItems []*lrItem for _, item := range items { - if item.reducible { - reducible[item.prod] = struct{}{} + if !item.reducible { + continue + } + + reducible[item.prod] = struct{}{} + + prod, ok := prods.findByID(item.prod) + if !ok { + return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) + } + if prod.isEmpty() { + emptyProdItems = append(emptyProdItems, item) } } return &lrState{ - kernel: k, - next: next, - reducible: reducible, + kernel: k, + next: next, + reducible: reducible, + emptyProdItems: emptyProdItems, }, kernels, nil } diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go index 9000483..aea028c 100644 --- a/grammar/lr0_test.go +++ b/grammar/lr0_test.go @@ -8,10 +8,11 @@ import ( "github.com/nihei9/vartan/spec" ) -type expectedLR0State struct { +type expectedLRState struct { kernelItems []*lrItem nextStates map[symbol][]*lrItem reducibleProds []*production + emptyProdItems []*lrItem } func TestGenLR0Automaton(t *testing.T) { @@ -107,7 +108,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, } - expectedStates := []expectedLR0State{ + expectedStates := []expectedLRState{ { kernelItems: expectedKernels[0], nextStates: map[symbol][]*lrItem{ @@ -260,6 +261,203 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Errorf("reducible production was not found: %v", eProd.id) } } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } + } + }) + } +} + +func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { + src := ` +s + : foo bar + ; +foo + : + ; +bar + : BAR + | + ; +BAR: "bar"; +` + + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.Build() + if err != nil { + t.Fatal(err) + } + + automaton, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("s'", 0, "s"), + }, + 1: { + genLR0Item("s'", 1, "s"), + }, + 2: { + genLR0Item("s", 1, "foo", "bar"), + }, + 3: { + genLR0Item("s", 2, "foo", "bar"), + }, + 4: { + genLR0Item("bar", 1, "BAR"), + }, + } + + expectedStates := []expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("foo"): expectedKernels[2], + }, + reducibleProds: []*production{ + genProd("foo"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("foo", 0), + }, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol][]*lrItem{ + genSym("bar"): expectedKernels[3], + genSym("BAR"): expectedKernels[4], + }, + reducibleProds: []*production{ + genProd("bar"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("bar", 0), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "foo", "bar"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("bar", "BAR"), + }, + }, + } + + if len(automaton.states) != len(expectedStates) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states)) + } + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } } }) } diff --git a/grammar/parsing_table_builder.go b/grammar/parsing_table_builder.go index e209c94..82b0d34 100644 --- a/grammar/parsing_table_builder.go +++ b/grammar/parsing_table_builder.go @@ -165,8 +165,8 @@ func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) t.goToTable[pos] = newGoToEntry(nextState) } -type lalrTableBuilder struct { - automaton *lalr1Automaton +type lrTableBuilder struct { + automaton *lr0Automaton prods *productionSet termCount int nonTermCount int @@ -176,7 +176,7 @@ type lalrTableBuilder struct { conflicts []conflict } -func (b *lalrTableBuilder) build() (*ParsingTable, error) { +func (b *lrTableBuilder) build() (*ParsingTable, error) { var ptab *ParsingTable { initialState := b.automaton.states[b.automaton.initialState] @@ -262,104 +262,9 @@ func (b *lalrTableBuilder) build() (*ParsingTable, error) { return ptab, nil } -type slrTableBuilder struct { - automaton *lr0Automaton - prods *productionSet - follow *followSet - termCount int - nonTermCount int - symTab *symbolTable - sym2AnonPat map[symbol]string - - conflicts []conflict -} - -func (b *slrTableBuilder) build() (*ParsingTable, error) { - var ptab *ParsingTable - { - initialState := b.automaton.states[b.automaton.initialState] - ptab = &ParsingTable{ - actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), - goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), - stateCount: len(b.automaton.states), - terminalCount: b.termCount, - nonTerminalCount: b.nonTermCount, - expectedTerminals: make([][]int, len(b.automaton.states)), - InitialState: initialState.num, - } - } - - var conflicts []conflict - for _, state := range b.automaton.states { - var eTerms []int - - for sym, kID := range state.next { - nextState := b.automaton.states[kID] - if sym.isTerminal() { - eTerms = append(eTerms, sym.num().Int()) - - c := ptab.writeShiftAction(state.num, sym, nextState.num) - if c != nil { - conflicts = append(conflicts, c) - continue - } - } else { - ptab.writeGoTo(state.num, sym, nextState.num) - } - } - - for prodID := range state.reducible { - prod, _ := b.prods.findByID(prodID) - flw, err := b.follow.find(prod.lhs) - if err != nil { - return nil, err - } - for sym := range flw.symbols { - eTerms = append(eTerms, sym.num().Int()) - - c := ptab.writeReduceAction(state.num, sym, prod.num) - if c != nil { - conflicts = append(conflicts, c) - continue - } - } - if flw.eof { - eTerms = append(eTerms, symbolEOF.num().Int()) - - c := ptab.writeReduceAction(state.num, symbolEOF, prod.num) - if c != nil { - conflicts = append(conflicts, c) - continue - } - } - } - - ptab.expectedTerminals[state.num] = eTerms - } - - b.conflicts = conflicts - - if len(conflicts) > 0 { - return nil, fmt.Errorf("%v conflicts", len(conflicts)) - } - - return ptab, nil -} - -type descriptionWriter struct { - automaton *lr0Automaton - prods *productionSet - follow *followSet - termCount int - nonTermCount int - symTab *symbolTable - sym2AnonPat map[symbol]string - conflicts []conflict -} - -func (dw *descriptionWriter) write(w io.Writer) { +func (b *lrTableBuilder) write(w io.Writer) { conflicts := map[stateNum][]conflict{} - for _, con := range dw.conflicts { + for _, con := range b.conflicts { switch c := con.(type) { case *shiftReduceConflict: conflicts[c.state] = append(conflicts[c.state], c) @@ -370,15 +275,15 @@ func (dw *descriptionWriter) write(w io.Writer) { fmt.Fprintf(w, "# Conflicts\n\n") - if len(dw.conflicts) > 0 { - fmt.Fprintf(w, "%v conflics:\n\n", len(dw.conflicts)) + if len(b.conflicts) > 0 { + fmt.Fprintf(w, "%v conflics:\n\n", len(b.conflicts)) - for _, conflict := range dw.conflicts { + for _, conflict := range b.conflicts { switch c := conflict.(type) { case *shiftReduceConflict: - fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, dw.symbolToText(c.sym)) + fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, b.symbolToText(c.sym)) case *reduceReduceConflict: - fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, dw.symbolToText(c.sym)) + fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, b.symbolToText(c.sym)) } } fmt.Fprintf(w, "\n") @@ -388,17 +293,17 @@ func (dw *descriptionWriter) write(w io.Writer) { fmt.Fprintf(w, "# Terminals\n\n") - termSyms := dw.symTab.terminalSymbols() + termSyms := b.symTab.terminalSymbols() fmt.Fprintf(w, "%v symbols:\n\n", len(termSyms)) for _, sym := range termSyms { - text, ok := dw.symTab.toText(sym) + text, ok := b.symTab.toText(sym) if !ok { text = fmt.Sprintf("<symbol not found: %v>", sym) } if strings.HasPrefix(text, "_") { - fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, dw.sym2AnonPat[sym]) + fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, b.sym2AnonPat[sym]) } else { fmt.Fprintf(w, "%4v %v\n", sym.num(), text) } @@ -408,25 +313,25 @@ func (dw *descriptionWriter) write(w io.Writer) { fmt.Fprintf(w, "# Productions\n\n") - fmt.Fprintf(w, "%v productions:\n\n", len(dw.prods.getAllProductions())) + fmt.Fprintf(w, "%v productions:\n\n", len(b.prods.getAllProductions())) - for _, prod := range dw.prods.getAllProductions() { - fmt.Fprintf(w, "%4v %v\n", prod.num, dw.productionToString(prod, -1)) + for _, prod := range b.prods.getAllProductions() { + fmt.Fprintf(w, "%4v %v\n", prod.num, b.productionToString(prod, -1)) } fmt.Fprintf(w, "\n# States\n\n") - fmt.Fprintf(w, "%v states:\n\n", len(dw.automaton.states)) + fmt.Fprintf(w, "%v states:\n\n", len(b.automaton.states)) - for _, state := range dw.automaton.states { + for _, state := range b.automaton.states { fmt.Fprintf(w, "state %v\n", state.num) for _, item := range state.items { - prod, ok := dw.prods.findByID(item.prod) + prod, ok := b.prods.findByID(item.prod) if !ok { fmt.Fprintf(w, "<production not found>\n") continue } - fmt.Fprintf(w, " %v\n", dw.productionToString(prod, item.dot)) + fmt.Fprintf(w, " %v\n", b.productionToString(prod, item.dot)) } fmt.Fprintf(w, "\n") @@ -437,16 +342,16 @@ func (dw *descriptionWriter) write(w io.Writer) { var accRec string { for sym, kID := range state.next { - nextState := dw.automaton.states[kID] + nextState := b.automaton.states[kID] if sym.isTerminal() { - shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, dw.symbolToText(sym))) + shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, b.symbolToText(sym))) } else { - gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, dw.symbolToText(sym))) + gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, b.symbolToText(sym))) } } for prodID := range state.reducible { - prod, ok := dw.prods.findByID(prodID) + prod, ok := b.prods.findByID(prodID) if !ok { reduceRecs = append(reduceRecs, "<production not found>") continue @@ -456,21 +361,17 @@ func (dw *descriptionWriter) write(w io.Writer) { continue } - if dw.follow != nil { - flw, err := dw.follow.find(prod.lhs) - if err != nil { - reduceRecs = append(reduceRecs, fmt.Sprintf("%v", err)) + var reducibleItem *lrItem + for _, item := range state.items { + if item.prod != prodID { continue } - for sym := range flw.symbols { - reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(sym))) - } - if flw.eof { - reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on <EOF>", prod.num)) - } - } else { - var reducibleItem *lrItem - for _, item := range state.items { + + reducibleItem = item + break + } + if reducibleItem == nil { + for _, item := range state.emptyProdItems { if item.prod != prodID { continue } @@ -479,23 +380,13 @@ func (dw *descriptionWriter) write(w io.Writer) { break } if reducibleItem == nil { - for _, item := range state.emptyProdItems { - if item.prod != prodID { - continue - } - - reducibleItem = item - break - } - if reducibleItem == nil { - reduceRecs = append(reduceRecs, "<item not found>") - continue - } - } - for a := range reducibleItem.lookAhead.symbols { - reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(a))) + reduceRecs = append(reduceRecs, "<item not found>") + continue } } + for a := range reducibleItem.lookAhead.symbols { + reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, b.symbolToText(a))) + } } } @@ -523,9 +414,9 @@ func (dw *descriptionWriter) write(w io.Writer) { for _, con := range cons { switch c := con.(type) { case *shiftReduceConflict: - fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, dw.symbolToText(c.sym)) + fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, b.symbolToText(c.sym)) case *reduceReduceConflict: - fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, dw.symbolToText(c.sym)) + fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, b.symbolToText(c.sym)) } } fmt.Fprintf(w, "\n") @@ -533,14 +424,14 @@ func (dw *descriptionWriter) write(w io.Writer) { } } -func (dw *descriptionWriter) productionToString(prod *production, dot int) string { +func (b *lrTableBuilder) productionToString(prod *production, dot int) string { var w strings.Builder - fmt.Fprintf(&w, "%v →", dw.symbolToText(prod.lhs)) + fmt.Fprintf(&w, "%v →", b.symbolToText(prod.lhs)) for n, rhs := range prod.rhs { if n == dot { fmt.Fprintf(&w, " ・") } - fmt.Fprintf(&w, " %v", dw.symbolToText(rhs)) + fmt.Fprintf(&w, " %v", b.symbolToText(rhs)) } if dot == len(prod.rhs) { fmt.Fprintf(&w, " ・") @@ -549,7 +440,7 @@ func (dw *descriptionWriter) productionToString(prod *production, dot int) strin return w.String() } -func (dw *descriptionWriter) symbolToText(sym symbol) string { +func (b *lrTableBuilder) symbolToText(sym symbol) string { if sym.isNil() { return "<NULL>" } @@ -557,13 +448,13 @@ func (dw *descriptionWriter) symbolToText(sym symbol) string { return "<EOF>" } - text, ok := dw.symTab.toText(sym) + text, ok := b.symTab.toText(sym) if !ok { return fmt.Sprintf("<symbol not found: %v>", sym) } if strings.HasPrefix(text, "_") { - pat, ok := dw.sym2AnonPat[sym] + pat, ok := b.sym2AnonPat[sym] if !ok { return fmt.Sprintf("<pattern not found: %v>", text) } diff --git a/grammar/parsing_table_builder_test.go b/grammar/parsing_table_builder_test.go index 18add81..95bc543 100644 --- a/grammar/parsing_table_builder_test.go +++ b/grammar/parsing_table_builder_test.go @@ -65,8 +65,8 @@ id: "[A-Za-z0-9_]+"; nonTermCount = len(nonTermTexts) termCount = len(termTexts) - lalr := &lalrTableBuilder{ - automaton: automaton, + lalr := &lrTableBuilder{ + automaton: automaton.lr0Automaton, prods: gram.productionSet, termCount: termCount, nonTermCount: nonTermCount, @@ -306,7 +306,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; ` var ptab *ParsingTable - var automaton *lr0Automaton + var automaton *slr1Automaton var gram *Grammar var nonTermCount int var termCount int @@ -330,7 +330,11 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; if err != nil { t.Fatal(err) } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatal(err) + } + automaton, err = genSLR1Automaton(lr0, gram.productionSet, follow) if err != nil { t.Fatal(err) } @@ -346,10 +350,9 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; nonTermCount = len(nonTermTexts) termCount = len(termTexts) - slr := &slrTableBuilder{ - automaton: automaton, + slr := &lrTableBuilder{ + automaton: automaton.lr0Automaton, prods: gram.productionSet, - follow: follow, termCount: termCount, nonTermCount: nonTermCount, symTab: gram.symbolTable, @@ -410,11 +413,6 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, } - // expectedStates := []struct { - // kernelItems []*lrItem - // acts map[symbol]testActionEntry - // goTos map[symbol][]*lrItem - // }{ expectedStates := []expectedState{ { kernelItems: expectedKernels[0], @@ -664,85 +662,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Fatalf("state was not found: #%v", 0) } - testAction(t, &eState, state, ptab, automaton, gram, termCount) - testGoTo(t, &eState, state, ptab, automaton, nonTermCount) - - // ACTION - { - // nonEmptyEntries := map[symbolNum]struct{}{} - // for eSym, eAct := range eState.acts { - // nonEmptyEntries[eSym.num()] = struct{}{} - - // ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num()) - // if ty != eAct.ty { - // t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - // } - // switch eAct.ty { - // case ActionTypeShift: - // eNextState, err := newKernel(eAct.nextState) - // if err != nil { - // t.Fatal(err) - // } - // nextState := findStateByNum(automaton.states, stateNum) - // if nextState == nil { - // t.Fatalf("state was not found; state: #%v", stateNum) - // } - // if nextState.id != eNextState.id { - // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - // } - // case ActionTypeReduce: - // prod := findProductionByNum(gram.productionSet, prodNum) - // if prod == nil { - // t.Fatalf("production was not found: #%v", prodNum) - // } - // if prod.id != eAct.production.id { - // t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - // } - // } - // } - // for symNum := 0; symNum < termCount; symNum++ { - // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { - // continue - // } - // ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum)) - // if ty != ActionTypeError { - // t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - // } - // } - } - - // GOTO - { - // nonEmptyEntries := map[symbolNum]struct{}{} - // for eSym, eGoTo := range eState.goTos { - // nonEmptyEntries[eSym.num()] = struct{}{} - - // eNextState, err := newKernel(eGoTo) - // if err != nil { - // t.Fatal(err) - // } - // ty, stateNum := ptab.getGoTo(state.num, eSym.num()) - // if ty != GoToTypeRegistered { - // t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - // } - // nextState := findStateByNum(automaton.states, stateNum) - // if nextState == nil { - // t.Fatalf("state was not found: #%v", stateNum) - // } - // if nextState.id != eNextState.id { - // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - // } - // } - // for symNum := 0; symNum < nonTermCount; symNum++ { - // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { - // continue - // } - // ty, _ := ptab.getGoTo(state.num, symbolNum(symNum)) - // if ty != GoToTypeError { - // t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - // } - // } - } + testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) }) } } diff --git a/grammar/slr1.go b/grammar/slr1.go new file mode 100644 index 0000000..b11a66f --- /dev/null +++ b/grammar/slr1.go @@ -0,0 +1,61 @@ +package grammar + +import "fmt" + +type slr1Automaton struct { + *lr0Automaton +} + +func genSLR1Automaton(lr0 *lr0Automaton, prods *productionSet, follow *followSet) (*slr1Automaton, error) { + for _, state := range lr0.states { + for prodID := range state.reducible { + prod, ok := prods.findByID(prodID) + if !ok { + return nil, fmt.Errorf("reducible production not found: %v", prodID) + } + + flw, err := follow.find(prod.lhs) + if err != nil { + return nil, err + } + + var reducibleItem *lrItem + for _, item := range state.items { + if item.prod != prodID { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + for _, item := range state.emptyProdItems { + if item.prod != prodID { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + return nil, fmt.Errorf("reducible item not found; state: %v, production: %v", state.num, prodID) + } + } + + if reducibleItem.lookAhead.symbols == nil { + reducibleItem.lookAhead.symbols = map[symbol]struct{}{} + } + + for sym := range flw.symbols { + reducibleItem.lookAhead.symbols[sym] = struct{}{} + } + if flw.eof { + reducibleItem.lookAhead.symbols[symbolEOF] = struct{}{} + } + } + } + + return &slr1Automaton{ + lr0Automaton: lr0, + }, nil +} diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go new file mode 100644 index 0000000..1296bd8 --- /dev/null +++ b/grammar/slr1_test.go @@ -0,0 +1,325 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +func TestGenSLR1Automaton(t *testing.T) { + src := ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.Build() + if err != nil { + t.Fatal(err) + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if lr0 == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatalf("failed to create a FIRST set: %v", err) + } + + followSet, err := genFollowSet(gram.productionSet, firstSet) + if err != nil { + t.Fatalf("failed to create a FOLLOW set: %v", err) + } + + automaton, err := genSLR1Automaton(lr0, gram.productionSet, followSet) + if err != nil { + t.Fatalf("failed to create a SLR1 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genSLR1Automaton returns nil without any error") + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + withLookAhead(genLR0Item("expr'", 1, "expr"), symbolEOF), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + withLookAhead(genLR0Item("expr", 1, "term"), genSym("add"), genSym("r_paren"), symbolEOF), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + withLookAhead(genLR0Item("term", 1, "factor"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + withLookAhead(genLR0Item("factor", 1, "id"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + withLookAhead(genLR0Item("expr", 3, "expr", "add", "term"), genSym("add"), genSym("r_paren"), symbolEOF), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + withLookAhead(genLR0Item("term", 3, "term", "mul", "factor"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF), + }, + 11: { + withLookAhead(genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), genSym("add"), genSym("mul"), genSym("r_paren"), symbolEOF), + }, + } + + expectedStates := []expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol][]*lrItem{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("expr'", "expr"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "term"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "factor"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol][]*lrItem{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol][]*lrItem{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol][]*lrItem{ + genSym("factor"): expectedKernels[10], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + genSym("r_paren"): expectedKernels[11], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "expr", "add", "term"), + }, + }, + { + kernelItems: expectedKernels[10], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "term", "mul", "factor"), + }, + }, + { + kernelItems: expectedKernels[11], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + } + + if len(automaton.states) != len(expectedStates) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states)) + } + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test look-ahead symbols + { + if len(state.kernel.items) != len(eState.kernelItems) { + t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) + } + for _, eKItem := range eState.kernelItems { + var kItem *lrItem + for _, it := range state.kernel.items { + if it.id != eKItem.id { + continue + } + kItem = it + break + } + if kItem == nil { + t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) + } + + if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { + t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) + } + + for eSym := range eKItem.lookAhead.symbols { + if _, ok := kItem.lookAhead.symbols[eSym]; !ok { + t.Errorf("look-ahead symbol not found: %v", eSym) + } + } + } + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } + } + }) + } +} |