diff options
-rw-r--r-- | cmd/vartan/compile.go | 16 | ||||
-rw-r--r-- | driver/parser_test.go | 87 | ||||
-rw-r--r-- | grammar/grammar.go | 119 | ||||
-rw-r--r-- | grammar/item.go | 198 | ||||
-rw-r--r-- | grammar/lalr1.go | 297 | ||||
-rw-r--r-- | grammar/lalr1_test.go | 264 | ||||
-rw-r--r-- | grammar/lr0.go | 179 | ||||
-rw-r--r-- | grammar/lr0_item.go | 343 | ||||
-rw-r--r-- | grammar/lr0_test.go (renamed from grammar/lr0_item_test.go) | 38 | ||||
-rw-r--r-- | grammar/parsing_table_builder.go (renamed from grammar/slr.go) | 216 | ||||
-rw-r--r-- | grammar/parsing_table_builder_test.go | 847 | ||||
-rw-r--r-- | grammar/slr_test.go | 492 | ||||
-rw-r--r-- | grammar/test_helper_test.go | 16 |
13 files changed, 2151 insertions, 961 deletions
diff --git a/cmd/vartan/compile.go b/cmd/vartan/compile.go index f29e972..6fdc6a9 100644 --- a/cmd/vartan/compile.go +++ b/cmd/vartan/compile.go @@ -18,6 +18,7 @@ import ( var compileFlags = struct { grammar *string output *string + class *string }{} func init() { @@ -29,6 +30,7 @@ func init() { } compileFlags.grammar = cmd.Flags().StringP("grammar", "g", "", "grammar file path (default stdin)") compileFlags.output = cmd.Flags().StringP("output", "o", "", "output file path (default stdout)") + compileFlags.class = cmd.Flags().StringP("class", "", "lalr", "LALR or SLR") rootCmd.AddCommand(cmd) } @@ -103,7 +105,19 @@ func runCompile(cmd *cobra.Command, args []string) (retErr error) { descFileName = fmt.Sprintf("%v.desc", strings.TrimSuffix(grmFileName, ".vr")) } - cgram, err := grammar.Compile(gram, grammar.EnableDescription(descFileName)) + opts := []grammar.CompileOption{ + grammar.EnableDescription(descFileName), + } + switch strings.ToLower(*compileFlags.class) { + case "slr": + opts = append(opts, grammar.SpecifyClass(grammar.ClassSLR)) + case "lalr": + opts = append(opts, grammar.SpecifyClass(grammar.ClassLALR)) + default: + return fmt.Errorf("invalid class: %v", *compileFlags.class) + } + + cgram, err := grammar.Compile(gram, opts...) if err != nil { return err } diff --git a/driver/parser_test.go b/driver/parser_test.go index a46de9b..0776232 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -396,57 +396,64 @@ b: "a"; specErr: true, }, } + + classes := []grammar.Class{ + grammar.ClassSLR, + grammar.ClassLALR, + } + for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := spec.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } + for _, class := range classes { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := spec.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } - b := grammar.GrammarBuilder{ - AST: ast, - } - g, err := b.Build() - if tt.specErr { - if err == nil { - t.Fatal("an expected error didn't occur") + b := grammar.GrammarBuilder{ + AST: ast, + } + g, err := b.Build() + if tt.specErr { + if err == nil { + t.Fatal("an expected error didn't occur") + } + return + } else { + if err != nil { + t.Fatal(err) + } } - // fmt.Printf("error: %v\n", err) - return - } else { + + gram, err := grammar.Compile(g, grammar.SpecifyClass(class)) if err != nil { t.Fatal(err) } - } - - gram, err := grammar.Compile(g) - if err != nil { - t.Fatal(err) - } - p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST()) - if err != nil { - t.Fatal(err) - } + p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST()) + if err != nil { + t.Fatal(err) + } - err = p.Parse() - if err != nil { - t.Fatal(err) - } + err = p.Parse() + if err != nil { + t.Fatal(err) + } - if tt.cst != nil { - testTree(t, p.CST(), tt.cst) - } + if tt.cst != nil { + testTree(t, p.CST(), tt.cst) + } - if tt.ast != nil { - testTree(t, p.AST(), tt.ast) - } + if tt.ast != nil { + testTree(t, p.AST(), tt.ast) + } - fmt.Println("CST:") - PrintTree(os.Stdout, p.CST()) - fmt.Println("AST:") - PrintTree(os.Stdout, p.AST()) - }) + fmt.Println("CST:") + PrintTree(os.Stdout, p.CST()) + fmt.Println("AST:") + PrintTree(os.Stdout, p.AST()) + }) + } } } diff --git a/grammar/grammar.go b/grammar/grammar.go index a7f80ca..2b0f648 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -605,19 +605,33 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd }, nil } +type Class string + +const ( + ClassSLR Class = "SLR(1)" + ClassLALR Class = "LALR(1)" +) + type compileConfig struct { descriptionFileName string + class Class } -type compileOption func(config *compileConfig) +type CompileOption func(config *compileConfig) -func EnableDescription(fileName string) compileOption { +func EnableDescription(fileName string) CompileOption { return func(config *compileConfig) { config.descriptionFileName = fileName } } -func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error) { +func SpecifyClass(class Class) CompileOption { + return func(config *compileConfig) { + config.class = class + } +} + +func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error) { config := &compileConfig{} for _, opt := range opts { opt(config) @@ -669,36 +683,91 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error return nil, err } - followSet, err := genFollowSet(gram.productionSet, firstSet) - if err != nil { - return nil, err - } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) if err != nil { return nil, err } - slr := &slrTableBuilder{ - automaton: lr0, - prods: gram.productionSet, - follow: followSet, - termCount: len(terms), - nonTermCount: len(nonTerms), - symTab: gram.symbolTable, - sym2AnonPat: gram.sym2AnonPat, - } - tab, err := slr.build() - if config.descriptionFileName != "" { - f, err := os.OpenFile(config.descriptionFileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + var tab *ParsingTable + switch config.class { + case ClassSLR: + followSet, err := genFollowSet(gram.productionSet, firstSet) + if err != nil { + return nil, err + } + + slr := &slrTableBuilder{ + automaton: lr0, + prods: gram.productionSet, + follow: followSet, + termCount: len(terms), + nonTermCount: len(nonTerms), + symTab: gram.symbolTable, + sym2AnonPat: gram.sym2AnonPat, + } + tab, err = slr.build() + + if config.descriptionFileName != "" { + f, err := os.OpenFile(config.descriptionFileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return nil, err + } + defer f.Close() + + dw := &descriptionWriter{ + automaton: slr.automaton, + prods: slr.prods, + follow: slr.follow, + termCount: slr.termCount, + nonTermCount: slr.nonTermCount, + symTab: slr.symTab, + sym2AnonPat: slr.sym2AnonPat, + conflicts: slr.conflicts, + } + dw.write(f) + } + + if err != nil { + return nil, err + } + case ClassLALR: + lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + return nil, err + } + + lalr := &lalrTableBuilder{ + automaton: lalr1, + prods: gram.productionSet, + termCount: len(terms), + nonTermCount: len(nonTerms), + symTab: gram.symbolTable, + sym2AnonPat: gram.sym2AnonPat, + } + tab, err = lalr.build() + + if config.descriptionFileName != "" { + f, err := os.OpenFile(config.descriptionFileName, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return nil, err + } + defer f.Close() + + dw := &descriptionWriter{ + automaton: lalr.automaton.lr0Automaton, + prods: lalr.prods, + termCount: lalr.termCount, + nonTermCount: lalr.nonTermCount, + symTab: lalr.symTab, + sym2AnonPat: lalr.sym2AnonPat, + conflicts: lalr.conflicts, + } + dw.write(f) + } + if err != nil { return nil, err } - defer f.Close() - slr.writeDescription(f) - } - if err != nil { - return nil, err } action := make([]int, len(tab.actionTable)) diff --git a/grammar/item.go b/grammar/item.go new file mode 100644 index 0000000..153aea4 --- /dev/null +++ b/grammar/item.go @@ -0,0 +1,198 @@ +package grammar + +import ( + "crypto/sha256" + "encoding/binary" + "fmt" + "sort" + "strconv" +) + +type lrItemID [32]byte + +func (id lrItemID) String() string { + return fmt.Sprintf("%x", id.num()) +} + +func (id lrItemID) num() uint32 { + return binary.LittleEndian.Uint32(id[:]) +} + +type lookAhead struct { + symbols map[symbol]struct{} + + // When propagation is true, an item propagates look-ahead symbols to other items. + propagation bool +} + +type lrItem struct { + id lrItemID + prod productionID + + // E → E + T + // + // Dot | Dotted Symbol | Item + // ----+---------------+------------ + // 0 | E | E →・E + T + // 1 | + | E → E・+ T + // 2 | T | E → E +・T + // 3 | Nil | E → E + T・ + dot int + dottedSymbol symbol + + // When initial is true, the LHS of the production is the augmented start symbol and dot is 0. + // It looks like S' →・S. + initial bool + + // When reducible is true, the item looks like E → E + T・. + reducible bool + + // When kernel is true, the item is kernel item. + kernel bool + + // lookAhead stores look-ahead symbols, and they are terminal symbols. + // The item is reducible only when the look-ahead symbols appear as the next input symbol. + lookAhead lookAhead +} + +func newLR0Item(prod *production, dot int) (*lrItem, error) { + if prod == nil { + return nil, fmt.Errorf("production must be non-nil") + } + + if dot < 0 || dot > prod.rhsLen { + return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen) + } + + var id lrItemID + { + b := []byte{} + b = append(b, prod.id[:]...) + bDot := make([]byte, 8) + binary.LittleEndian.PutUint64(bDot, uint64(dot)) + b = append(b, bDot...) + id = sha256.Sum256(b) + } + + dottedSymbol := symbolNil + if dot < prod.rhsLen { + dottedSymbol = prod.rhs[dot] + } + + initial := false + if prod.lhs.isStart() && dot == 0 { + initial = true + } + + reducible := false + if dot == prod.rhsLen { + reducible = true + } + + kernel := false + if initial || dot > 0 { + kernel = true + } + + item := &lrItem{ + id: id, + prod: prod.id, + dot: dot, + dottedSymbol: dottedSymbol, + initial: initial, + reducible: reducible, + kernel: kernel, + } + + return item, nil +} + +type kernelID [32]byte + +func (id kernelID) String() string { + return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:])) +} + +type kernel struct { + id kernelID + items []*lrItem +} + +func newKernel(items []*lrItem) (*kernel, error) { + if len(items) == 0 { + return nil, fmt.Errorf("a kernel need at least one item") + } + + // Remove duplicates from items. + var sortedItems []*lrItem + { + m := map[lrItemID]*lrItem{} + for _, item := range items { + if !item.kernel { + return nil, fmt.Errorf("not a kernel item: %v", item) + } + m[item.id] = item + } + sortedItems = []*lrItem{} + for _, item := range m { + sortedItems = append(sortedItems, item) + } + sort.Slice(sortedItems, func(i, j int) bool { + return sortedItems[i].id.num() < sortedItems[j].id.num() + }) + } + + var id kernelID + { + b := []byte{} + for _, item := range sortedItems { + b = append(b, item.id[:]...) + } + id = sha256.Sum256(b) + } + + return &kernel{ + id: id, + items: sortedItems, + }, nil +} + +type stateNum int + +const stateNumInitial = stateNum(0) + +func (n stateNum) Int() int { + return int(n) +} + +func (n stateNum) String() string { + return strconv.Itoa(int(n)) +} + +func (n stateNum) next() stateNum { + return stateNum(n + 1) +} + +type lrState struct { + *kernel + num stateNum + next map[symbol]kernelID + reducible map[productionID]struct{} + + // emptyProdItems stores items that have an empty production like `p → ε` and is reducible. + // Thus the items emptyProdItems stores are like `p → ・ε`. emptyProdItems is needed to store + // look-ahead symbols because the kernel items don't include these items. + // + // For instance, we have the following productions, and A is a terminal symbol. + // + // s' → s + // s → A | ε + // + // CLOSURE({s' → ・s}) generates the following closure, but the kernel of this closure doesn't + // include `s → ・ε`. + // + // s' → ・s + // s → ・A + // s → ・ε + emptyProdItems []*lrItem +} diff --git a/grammar/lalr1.go b/grammar/lalr1.go new file mode 100644 index 0000000..fc15942 --- /dev/null +++ b/grammar/lalr1.go @@ -0,0 +1,297 @@ +package grammar + +import "fmt" + +type stateAndLRItem struct { + kernelID kernelID + itemID lrItemID +} + +type propagation struct { + src *stateAndLRItem + dest []*stateAndLRItem +} + +type lalr1Automaton struct { + *lr0Automaton +} + +func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet) (*lalr1Automaton, error) { + // Set the look-ahead symbol <EOF> to the initial item: [S' → ・S, $] + iniState := lr0.states[lr0.initialState] + iniState.items[0].lookAhead.symbols = map[symbol]struct{}{ + symbolEOF: {}, + } + + var props []*propagation + for _, state := range lr0.states { + for _, kItem := range state.items { + items, err := genLALR1Closure(kItem, prods, first) + if err != nil { + return nil, err + } + + kItem.lookAhead.propagation = true + + var propDests []*stateAndLRItem + for _, item := range items { + if item.reducible { + p, ok := prods.findByID(item.prod) + if !ok { + return nil, fmt.Errorf("production not found: %v", item.prod) + } + + if p.isEmpty() { + state.emptyProdItems = append(state.emptyProdItems, item) + + propDests = append(propDests, &stateAndLRItem{ + kernelID: state.id, + itemID: item.id, + }) + } + + continue + } + + nextKID := state.next[item.dottedSymbol] + var nextItemID lrItemID + { + p, ok := prods.findByID(item.prod) + if !ok { + return nil, fmt.Errorf("production not found: %v", item.prod) + } + it, err := newLR0Item(p, item.dot+1) + if err != nil { + return nil, fmt.Errorf("failed to generate an item ID: %v", err) + } + nextItemID = it.id + } + + if item.lookAhead.propagation { + propDests = append(propDests, &stateAndLRItem{ + kernelID: nextKID, + itemID: nextItemID, + }) + } else { + nextState := lr0.states[nextKID] + var nextItem *lrItem + for _, it := range nextState.items { + if it.id != nextItemID { + continue + } + nextItem = it + break + } + if nextItem == nil { + return nil, fmt.Errorf("item not found: %v", nextItemID) + } + + if nextItem.lookAhead.symbols == nil { + nextItem.lookAhead.symbols = map[symbol]struct{}{} + } + + for a := range item.lookAhead.symbols { + nextItem.lookAhead.symbols[a] = struct{}{} + } + } + } + if len(propDests) == 0 { + continue + } + + props = append(props, &propagation{ + src: &stateAndLRItem{ + kernelID: state.id, + itemID: kItem.id, + }, + dest: propDests, + }) + } + } + + err := propagateLookAhead(lr0, props) + if err != nil { + return nil, fmt.Errorf("failed to propagate look-ahead symbols: %v", err) + } + + return &lalr1Automaton{ + lr0Automaton: lr0, + }, nil +} + +func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]*lrItem, error) { + items := []*lrItem{} + knownItems := map[lrItemID]map[symbol]struct{}{} + knownItemsProp := map[lrItemID]struct{}{} + uncheckedItems := []*lrItem{} + items = append(items, srcItem) + uncheckedItems = append(uncheckedItems, srcItem) + for len(uncheckedItems) > 0 { + nextUncheckedItems := []*lrItem{} + for _, item := range uncheckedItems { + if item.dottedSymbol.isTerminal() { + continue + } + + p, ok := prods.findByID(item.prod) + if !ok { + return nil, fmt.Errorf("production not found: %v", item.prod) + } + + var fstSyms []symbol + var isFstNullable bool + { + fst, err := first.find(p, item.dot+1) + if err != nil { + return nil, err + } + + fstSyms = make([]symbol, len(fst.symbols)) + i := 0 + for s := range fst.symbols { + fstSyms[i] = s + i++ + } + if fst.empty { + isFstNullable = true + } + } + + ps, _ := prods.findByLHS(item.dottedSymbol) + for _, prod := range ps { + var lookAhead []symbol + { + var lookAheadCount int + if isFstNullable { + lookAheadCount = len(fstSyms) + len(item.lookAhead.symbols) + } else { + lookAheadCount = len(fstSyms) + } + + lookAhead = make([]symbol, lookAheadCount) + i := 0 + for _, s := range fstSyms { + lookAhead[i] = s + i++ + } + if isFstNullable { + for a := range item.lookAhead.symbols { + lookAhead[i] = a + i++ + } + } + } + + for _, a := range lookAhead { + newItem, err := newLR0Item(prod, 0) + if err != nil { + return nil, err + } + if items, exist := knownItems[newItem.id]; exist { + if _, exist := items[a]; exist { + continue + } + } + + newItem.lookAhead.symbols = map[symbol]struct{}{ + a: {}, + } + + items = append(items, newItem) + if knownItems[newItem.id] == nil { + knownItems[newItem.id] = map[symbol]struct{}{} + } + knownItems[newItem.id][a] = struct{}{} + nextUncheckedItems = append(nextUncheckedItems, newItem) + } + + if isFstNullable { + newItem, err := newLR0Item(prod, 0) + if err != nil { + return nil, err + } + if _, exist := knownItemsProp[newItem.id]; exist { + continue + } + + newItem.lookAhead.propagation = true + + items = append(items, newItem) + knownItemsProp[newItem.id] = struct{}{} + nextUncheckedItems = append(nextUncheckedItems, newItem) + } + } + } + uncheckedItems = nextUncheckedItems + } + + return items, nil +} + +func propagateLookAhead(lr0 *lr0Automaton, props []*propagation) error { + for { + changed := false + for _, prop := range props { + srcState, ok := lr0.states[prop.src.kernelID] + if !ok { + return fmt.Errorf("source state not found: %v", prop.src.kernelID) + } + var srcItem *lrItem + for _, item := range srcState.items { + if item.id != prop.src.itemID { + continue + } + srcItem = item + break + } + if srcItem == nil { + return fmt.Errorf("source item not found: %v", prop.src.itemID) + } + + for _, dest := range prop.dest { + destState, ok := lr0.states[dest.kernelID] + if !ok { + return fmt.Errorf("destination state not found: %v", dest.kernelID) + } + var destItem *lrItem + for _, item := range destState.items { + if item.id != dest.itemID { + continue + } + destItem = item + break + } + if destItem == nil { + for _, item := range destState.emptyProdItems { + if item.id != dest.itemID { + continue + } + destItem = item + break + } + if destItem == nil { + return fmt.Errorf("destination item not found: %v", dest.itemID) + } + } + + for a := range srcItem.lookAhead.symbols { + if _, ok := destItem.lookAhead.symbols[a]; ok { + continue + } + + if destItem.lookAhead.symbols == nil { + destItem.lookAhead.symbols = map[symbol]struct{}{} + } + + destItem.lookAhead.symbols[a] = struct{}{} + changed = true + } + } + } + if !changed { + break + } + } + + return nil +} diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go new file mode 100644 index 0000000..5ac84cd --- /dev/null +++ b/grammar/lalr1_test.go @@ -0,0 +1,264 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +type expectedLALR1State struct { + kernelItems []*lrItem + nextStates map[symbol][]*lrItem + reducibleProds []*production +} + +func TestGenLALR1Automaton(t *testing.T) { + // This grammar belongs to LALR(1) class, not SLR(1). + src := ` +S: L eq R | R; +L: ref R | id; +R: L; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.Build() + if err != nil { + t.Fatal(err) + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if lr0 == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatalf("failed to create a FIRST set: %v", err) + } + + automaton, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + t.Fatalf("failed to create a LALR1 automaton: %v", err) + } + if lr0 == nil { + t.Fatalf("genLALR1Automaton returns nil without any error") + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("S'", 0, "S"), symbolEOF), + }, + 1: { + withLookAhead(genLR0Item("S'", 1, "S"), symbolEOF), + }, + 2: { + withLookAhead(genLR0Item("S", 1, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("R", 1, "L"), symbolEOF), + }, + 3: { + withLookAhead(genLR0Item("S", 1, "R"), symbolEOF), + }, + 4: { + withLookAhead(genLR0Item("L", 1, "ref", "R"), genSym("eq"), symbolEOF), + }, + 5: { + withLookAhead(genLR0Item("L", 1, "id"), genSym("eq"), symbolEOF), + }, + 6: { + withLookAhead(genLR0Item("S", 2, "L", "eq", "R"), symbolEOF), + }, + 7: { + withLookAhead(genLR0Item("L", 2, "ref", "R"), genSym("eq"), symbolEOF), + }, + 8: { + withLookAhead(genLR0Item("R", 1, "L"), genSym("eq"), symbolEOF), + }, + 9: { + withLookAhead(genLR0Item("S", 3, "L", "eq", "R"), symbolEOF), + }, + } + + expectedStates := []expectedLALR1State{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol][]*lrItem{ + genSym("S"): expectedKernels[1], + genSym("L"): expectedKernels[2], + genSym("R"): expectedKernels[3], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("S'", "S"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol][]*lrItem{ + genSym("eq"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("R", "L"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("S", "R"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol][]*lrItem{ + genSym("R"): expectedKernels[7], + genSym("L"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("L", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol][]*lrItem{ + genSym("R"): expectedKernels[9], + genSym("L"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("L", "ref", "R"), + }, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("R", "L"), + }, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("S", "L", "eq", "R"), + }, + }, + } + + if len(automaton.states) != len(expectedStates) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expectedStates), len(automaton.states)) + } + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test look-ahead symbols + { + if len(state.kernel.items) != len(eState.kernelItems) { + t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) + } + for _, eKItem := range eState.kernelItems { + var kItem *lrItem + for _, it := range state.kernel.items { + if it.id != eKItem.id { + continue + } + kItem = it + break + } + if kItem == nil { + t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) + } + + for eSym := range eKItem.lookAhead.symbols { + if _, ok := kItem.lookAhead.symbols[eSym]; !ok { + t.Errorf("look-ahead symbol not found: %v", eSym) + } + } + } + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismatched; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + } + }) + } +} diff --git a/grammar/lr0.go b/grammar/lr0.go new file mode 100644 index 0000000..9e3bb2e --- /dev/null +++ b/grammar/lr0.go @@ -0,0 +1,179 @@ +package grammar + +import ( + "fmt" + "sort" +) + +type lr0Automaton struct { + initialState kernelID + states map[kernelID]*lrState +} + +func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) { + if !startSym.isStart() { + return nil, fmt.Errorf("passed symbold is not a start symbol") + } + + automaton := &lr0Automaton{ + states: map[kernelID]*lrState{}, + } + + currentState := stateNumInitial + knownKernels := map[kernelID]struct{}{} + uncheckedKernels := []*kernel{} + + // Generate an initial kernel. + { + prods, _ := prods.findByLHS(startSym) + initialItem, err := newLR0Item(prods[0], 0) + if err != nil { + return nil, err + } + + k, err := newKernel([]*lrItem{initialItem}) + if err != nil { + return nil, err + } + + automaton.initialState = k.id + knownKernels[k.id] = struct{}{} + uncheckedKernels = append(uncheckedKernels, k) + } + + for len(uncheckedKernels) > 0 { + nextUncheckedKernels := []*kernel{} + for _, k := range uncheckedKernels { + state, neighbours, err := genStateAndNeighbourKernels(k, prods) + if err != nil { + return nil, err + } + state.num = currentState + currentState = currentState.next() + + automaton.states[state.id] = state + + for _, k := range neighbours { + if _, known := knownKernels[k.id]; known { + continue + } + knownKernels[k.id] = struct{}{} + nextUncheckedKernels = append(nextUncheckedKernels, k) + } + } + uncheckedKernels = nextUncheckedKernels + } + + return automaton, nil +} + +func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*kernel, error) { + items, err := genLR0Closure(k, prods) + if err != nil { + return nil, nil, err + } + neighbours, err := genNeighbourKernels(items, prods) + if err != nil { + return nil, nil, err + } + + next := map[symbol]kernelID{} + kernels := []*kernel{} + for _, n := range neighbours { + next[n.symbol] = n.kernel.id + kernels = append(kernels, n.kernel) + } + + reducible := map[productionID]struct{}{} + for _, item := range items { + if item.reducible { + reducible[item.prod] = struct{}{} + } + } + + return &lrState{ + kernel: k, + next: next, + reducible: reducible, + }, kernels, nil +} + +func genLR0Closure(k *kernel, prods *productionSet) ([]*lrItem, error) { + items := []*lrItem{} + knownItems := map[lrItemID]struct{}{} + uncheckedItems := []*lrItem{} + for _, item := range k.items { + items = append(items, item) + uncheckedItems = append(uncheckedItems, item) + } + for len(uncheckedItems) > 0 { + nextUncheckedItems := []*lrItem{} + for _, item := range uncheckedItems { + if item.dottedSymbol.isTerminal() { + continue + } + + ps, _ := prods.findByLHS(item.dottedSymbol) + for _, prod := range ps { + item, err := newLR0Item(prod, 0) + if err != nil { + return nil, err + } + if _, exist := knownItems[item.id]; exist { + continue + } + items = append(items, item) + knownItems[item.id] = struct{}{} + nextUncheckedItems = append(nextUncheckedItems, item) + } + } + uncheckedItems = nextUncheckedItems + } + + return items, nil +} + +type neighbourKernel struct { + symbol symbol + kernel *kernel +} + +func genNeighbourKernels(items []*lrItem, prods *productionSet) ([]*neighbourKernel, error) { + kItemMap := map[symbol][]*lrItem{} + for _, item := range items { + if item.dottedSymbol.isNil() { + continue + } + prod, ok := prods.findByID(item.prod) + if !ok { + return nil, fmt.Errorf("a production was not found: %v", item.prod) + } + kItem, err := newLR0Item(prod, item.dot+1) + if err != nil { + return nil, err + } + kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem) + } + + nextSyms := []symbol{} + for sym := range kItemMap { + nextSyms = append(nextSyms, sym) + } + sort.Slice(nextSyms, func(i, j int) bool { + return nextSyms[i] < nextSyms[j] + }) + + kernels := []*neighbourKernel{} + for _, sym := range nextSyms { + k, err := newKernel(kItemMap[sym]) + if err != nil { + return nil, err + } + kernels = append(kernels, &neighbourKernel{ + symbol: sym, + kernel: k, + }) + } + + return kernels, nil +} diff --git a/grammar/lr0_item.go b/grammar/lr0_item.go deleted file mode 100644 index 2934061..0000000 --- a/grammar/lr0_item.go +++ /dev/null @@ -1,343 +0,0 @@ -package grammar - -import ( - "crypto/sha256" - "encoding/binary" - "fmt" - "sort" - "strconv" -) - -type lr0ItemID [32]byte - -func (id lr0ItemID) String() string { - return fmt.Sprintf("%x", id.num()) -} - -func (id lr0ItemID) num() uint32 { - return binary.LittleEndian.Uint32(id[:]) -} - -type lr0Item struct { - id lr0ItemID - prod productionID - - // E → E + T - // - // Dot | Dotted Symbol | Item - // ----+---------------+------------ - // 0 | E | E →・E + T - // 1 | + | E → E・+ T - // 2 | T | E → E +・T - // 3 | Nil | E → E + T・ - dot int - dottedSymbol symbol - - // When initial is true, the LHS of the production is the augmented start symbol and dot is 0. - // It looks like S' →・S. - initial bool - - // When reducible is true, the item looks like E → E + T・. - reducible bool - - // When kernel is true, the item is kernel item. - kernel bool -} - -func newLR0Item(prod *production, dot int) (*lr0Item, error) { - if prod == nil { - return nil, fmt.Errorf("production must be non-nil") - } - - if dot < 0 || dot > prod.rhsLen { - return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen) - } - - var id lr0ItemID - { - b := []byte{} - b = append(b, prod.id[:]...) - bDot := make([]byte, 8) - binary.LittleEndian.PutUint64(bDot, uint64(dot)) - b = append(b, bDot...) - id = sha256.Sum256(b) - } - - dottedSymbol := symbolNil - if dot < prod.rhsLen { - dottedSymbol = prod.rhs[dot] - } - - initial := false - if prod.lhs.isStart() && dot == 0 { - initial = true - } - - reducible := false - if dot == prod.rhsLen { - reducible = true - } - - kernel := false - if initial || dot > 0 { - kernel = true - } - - item := &lr0Item{ - id: id, - prod: prod.id, - dot: dot, - dottedSymbol: dottedSymbol, - initial: initial, - reducible: reducible, - kernel: kernel, - } - - return item, nil -} - -type kernelID [32]byte - -func (id kernelID) String() string { - return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:])) -} - -type kernel struct { - id kernelID - items []*lr0Item -} - -func newKernel(items []*lr0Item) (*kernel, error) { - if len(items) == 0 { - return nil, fmt.Errorf("a kernel need at least one item") - } - - // Remove duplicates from items. - var sortedItems []*lr0Item - { - m := map[lr0ItemID]*lr0Item{} - for _, item := range items { - if !item.kernel { - return nil, fmt.Errorf("not a kernel item: %v", item) - } - m[item.id] = item - } - sortedItems = []*lr0Item{} - for _, item := range m { - sortedItems = append(sortedItems, item) - } - sort.Slice(sortedItems, func(i, j int) bool { - return sortedItems[i].id.num() < sortedItems[j].id.num() - }) - } - - var id kernelID - { - b := []byte{} - for _, item := range sortedItems { - b = append(b, item.id[:]...) - } - id = sha256.Sum256(b) - } - - return &kernel{ - id: id, - items: sortedItems, - }, nil -} - -type stateNum int - -const stateNumInitial = stateNum(0) - -func (n stateNum) Int() int { - return int(n) -} - -func (n stateNum) String() string { - return strconv.Itoa(int(n)) -} - -func (n stateNum) next() stateNum { - return stateNum(n + 1) -} - -type lr0State struct { - *kernel - num stateNum - next map[symbol]kernelID - reducible map[productionID]struct{} -} - -type lr0Automaton struct { - initialState kernelID - states map[kernelID]*lr0State -} - -func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) { - if !startSym.isStart() { - return nil, fmt.Errorf("passed symbold is not a start symbol") - } - - automaton := &lr0Automaton{ - states: map[kernelID]*lr0State{}, - } - - currentState := stateNumInitial - knownKernels := map[kernelID]struct{}{} - uncheckedKernels := []*kernel{} - - // Generate an initial kernel. - { - prods, _ := prods.findByLHS(startSym) - initialItem, err := newLR0Item(prods[0], 0) - if err != nil { - return nil, err - } - - k, err := newKernel([]*lr0Item{initialItem}) - if err != nil { - return nil, err - } - - automaton.initialState = k.id - knownKernels[k.id] = struct{}{} - uncheckedKernels = append(uncheckedKernels, k) - } - - for len(uncheckedKernels) > 0 { - nextUncheckedKernels := []*kernel{} - for _, k := range uncheckedKernels { - state, neighbours, err := genStateAndNeighbourKernels(k, prods) - if err != nil { - return nil, err - } - state.num = currentState - currentState = currentState.next() - - automaton.states[state.id] = state - - for _, k := range neighbours { - if _, known := knownKernels[k.id]; known { - continue - } - knownKernels[k.id] = struct{}{} - nextUncheckedKernels = append(nextUncheckedKernels, k) - } - } - uncheckedKernels = nextUncheckedKernels - } - - return automaton, nil -} - -func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lr0State, []*kernel, error) { - items, err := genClosure(k, prods) - if err != nil { - return nil, nil, err - } - neighbours, err := genNeighbourKernels(items, prods) - if err != nil { - return nil, nil, err - } - - next := map[symbol]kernelID{} - kernels := []*kernel{} - for _, n := range neighbours { - next[n.symbol] = n.kernel.id - kernels = append(kernels, n.kernel) - } - - reducible := map[productionID]struct{}{} - for _, item := range items { - if item.reducible { - reducible[item.prod] = struct{}{} - } - } - - return &lr0State{ - kernel: k, - next: next, - reducible: reducible, - }, kernels, nil -} - -func genClosure(k *kernel, prods *productionSet) ([]*lr0Item, error) { - items := []*lr0Item{} - knownItems := map[lr0ItemID]struct{}{} - uncheckedItems := []*lr0Item{} - for _, item := range k.items { - items = append(items, item) - uncheckedItems = append(uncheckedItems, item) - } - for len(uncheckedItems) > 0 { - nextUncheckedItems := []*lr0Item{} - for _, item := range uncheckedItems { - if item.dottedSymbol.isTerminal() { - continue - } - - ps, _ := prods.findByLHS(item.dottedSymbol) - for _, prod := range ps { - item, err := newLR0Item(prod, 0) - if err != nil { - return nil, err - } - if _, exist := knownItems[item.id]; exist { - continue - } - items = append(items, item) - knownItems[item.id] = struct{}{} - nextUncheckedItems = append(nextUncheckedItems, item) - } - } - uncheckedItems = nextUncheckedItems - } - - return items, nil -} - -type neighbourKernel struct { - symbol symbol - kernel *kernel -} - -func genNeighbourKernels(items []*lr0Item, prods *productionSet) ([]*neighbourKernel, error) { - kItemMap := map[symbol][]*lr0Item{} - for _, item := range items { - if item.dottedSymbol.isNil() { - continue - } - prod, ok := prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("a production was not found: %v", item.prod) - } - kItem, err := newLR0Item(prod, item.dot+1) - if err != nil { - return nil, err - } - kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem) - } - - nextSyms := []symbol{} - for sym := range kItemMap { - nextSyms = append(nextSyms, sym) - } - sort.Slice(nextSyms, func(i, j int) bool { - return nextSyms[i] < nextSyms[j] - }) - - kernels := []*neighbourKernel{} - for _, sym := range nextSyms { - k, err := newKernel(kItemMap[sym]) - if err != nil { - return nil, err - } - kernels = append(kernels, &neighbourKernel{ - symbol: sym, - kernel: k, - }) - } - - return kernels, nil -} diff --git a/grammar/lr0_item_test.go b/grammar/lr0_test.go index 78e9f3b..9000483 100644 --- a/grammar/lr0_item_test.go +++ b/grammar/lr0_test.go @@ -8,6 +8,12 @@ import ( "github.com/nihei9/vartan/spec" ) +type expectedLR0State struct { + kernelItems []*lrItem + nextStates map[symbol][]*lrItem + reducibleProds []*production +} + func TestGenLR0Automaton(t *testing.T) { src := ` expr @@ -58,7 +64,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; genProd := newTestProductionGenerator(t, genSym) genLR0Item := newTestLR0ItemGenerator(t, genProd) - expectedKernels := map[int][]*lr0Item{ + expectedKernels := map[int][]*lrItem{ 0: { genLR0Item("expr'", 0, "expr"), }, @@ -104,7 +110,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; expectedStates := []expectedLR0State{ { kernelItems: expectedKernels[0], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("expr"): expectedKernels[1], genSym("term"): expectedKernels[2], genSym("factor"): expectedKernels[3], @@ -115,7 +121,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[1], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("add"): expectedKernels[6], }, reducibleProds: []*production{ @@ -124,7 +130,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[2], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("mul"): expectedKernels[7], }, reducibleProds: []*production{ @@ -133,14 +139,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[3], - nextStates: map[symbol][]*lr0Item{}, + nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ genProd("term", "factor"), }, }, { kernelItems: expectedKernels[4], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("expr"): expectedKernels[8], genSym("term"): expectedKernels[2], genSym("factor"): expectedKernels[3], @@ -151,14 +157,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[5], - nextStates: map[symbol][]*lr0Item{}, + nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ genProd("factor", "id"), }, }, { kernelItems: expectedKernels[6], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("term"): expectedKernels[9], genSym("factor"): expectedKernels[3], genSym("l_paren"): expectedKernels[4], @@ -168,7 +174,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[7], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("factor"): expectedKernels[10], genSym("l_paren"): expectedKernels[4], genSym("id"): expectedKernels[5], @@ -177,7 +183,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[8], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("add"): expectedKernels[6], genSym("r_paren"): expectedKernels[11], }, @@ -185,7 +191,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[9], - nextStates: map[symbol][]*lr0Item{ + nextStates: map[symbol][]*lrItem{ genSym("mul"): expectedKernels[7], }, reducibleProds: []*production{ @@ -194,14 +200,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }, { kernelItems: expectedKernels[10], - nextStates: map[symbol][]*lr0Item{}, + nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ genProd("term", "term", "mul", "factor"), }, }, { kernelItems: expectedKernels[11], - nextStates: map[symbol][]*lr0Item{}, + nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ genProd("factor", "l_paren", "expr", "r_paren"), }, @@ -258,9 +264,3 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; }) } } - -type expectedLR0State struct { - kernelItems []*lr0Item - nextStates map[symbol][]*lr0Item - reducibleProds []*production -} diff --git a/grammar/slr.go b/grammar/parsing_table_builder.go index 1bca78f..e209c94 100644 --- a/grammar/slr.go +++ b/grammar/parsing_table_builder.go @@ -165,6 +165,103 @@ func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) t.goToTable[pos] = newGoToEntry(nextState) } +type lalrTableBuilder struct { + automaton *lalr1Automaton + prods *productionSet + termCount int + nonTermCount int + symTab *symbolTable + sym2AnonPat map[symbol]string + + conflicts []conflict +} + +func (b *lalrTableBuilder) build() (*ParsingTable, error) { + var ptab *ParsingTable + { + initialState := b.automaton.states[b.automaton.initialState] + ptab = &ParsingTable{ + actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), + goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), + stateCount: len(b.automaton.states), + terminalCount: b.termCount, + nonTerminalCount: b.nonTermCount, + expectedTerminals: make([][]int, len(b.automaton.states)), + InitialState: initialState.num, + } + } + + var conflicts []conflict + for _, state := range b.automaton.states { + var eTerms []int + + for sym, kID := range state.next { + nextState := b.automaton.states[kID] + if sym.isTerminal() { + eTerms = append(eTerms, sym.num().Int()) + + c := ptab.writeShiftAction(state.num, sym, nextState.num) + if c != nil { + conflicts = append(conflicts, c) + continue + } + } else { + ptab.writeGoTo(state.num, sym, nextState.num) + } + } + + for prodID := range state.reducible { + reducibleProd, ok := b.prods.findByID(prodID) + if !ok { + return nil, fmt.Errorf("reducible production not found: %v", prodID) + } + + var reducibleItem *lrItem + for _, item := range state.items { + if item.prod != reducibleProd.id { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + for _, item := range state.emptyProdItems { + if item.prod != reducibleProd.id { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + return nil, fmt.Errorf("reducible item not found; state: %v, production: %v", state.num, reducibleProd.num) + } + } + + for a := range reducibleItem.lookAhead.symbols { + eTerms = append(eTerms, a.num().Int()) + + c := ptab.writeReduceAction(state.num, a, reducibleProd.num) + if c != nil { + conflicts = append(conflicts, c) + continue + } + } + } + + ptab.expectedTerminals[state.num] = eTerms + } + + b.conflicts = conflicts + + if len(conflicts) > 0 { + return nil, fmt.Errorf("%v conflicts", len(conflicts)) + } + + return ptab, nil +} + type slrTableBuilder struct { automaton *lr0Automaton prods *productionSet @@ -249,9 +346,20 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) { return ptab, nil } -func (b *slrTableBuilder) writeDescription(w io.Writer) { +type descriptionWriter struct { + automaton *lr0Automaton + prods *productionSet + follow *followSet + termCount int + nonTermCount int + symTab *symbolTable + sym2AnonPat map[symbol]string + conflicts []conflict +} + +func (dw *descriptionWriter) write(w io.Writer) { conflicts := map[stateNum][]conflict{} - for _, con := range b.conflicts { + for _, con := range dw.conflicts { switch c := con.(type) { case *shiftReduceConflict: conflicts[c.state] = append(conflicts[c.state], c) @@ -262,15 +370,15 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { fmt.Fprintf(w, "# Conflicts\n\n") - if len(b.conflicts) > 0 { - fmt.Fprintf(w, "%v conflics:\n\n", len(b.conflicts)) + if len(dw.conflicts) > 0 { + fmt.Fprintf(w, "%v conflics:\n\n", len(dw.conflicts)) - for _, conflict := range b.conflicts { + for _, conflict := range dw.conflicts { switch c := conflict.(type) { case *shiftReduceConflict: - fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, b.symbolToText(c.sym)) + fmt.Fprintf(w, "%v: shift/reduce conflict (shift %v, reduce %v) on %v\n", c.state, c.nextState, c.prodNum, dw.symbolToText(c.sym)) case *reduceReduceConflict: - fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, b.symbolToText(c.sym)) + fmt.Fprintf(w, "%v: reduce/reduce conflict (reduce %v and %v) on %v\n", c.state, c.prodNum1, c.prodNum2, dw.symbolToText(c.sym)) } } fmt.Fprintf(w, "\n") @@ -280,17 +388,17 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { fmt.Fprintf(w, "# Terminals\n\n") - termSyms := b.symTab.terminalSymbols() + termSyms := dw.symTab.terminalSymbols() fmt.Fprintf(w, "%v symbols:\n\n", len(termSyms)) for _, sym := range termSyms { - text, ok := b.symTab.toText(sym) + text, ok := dw.symTab.toText(sym) if !ok { text = fmt.Sprintf("<symbol not found: %v>", sym) } if strings.HasPrefix(text, "_") { - fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, b.sym2AnonPat[sym]) + fmt.Fprintf(w, "%4v %v: \"%v\"\n", sym.num(), text, dw.sym2AnonPat[sym]) } else { fmt.Fprintf(w, "%4v %v\n", sym.num(), text) } @@ -300,25 +408,25 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { fmt.Fprintf(w, "# Productions\n\n") - fmt.Fprintf(w, "%v productions:\n\n", len(b.prods.getAllProductions())) + fmt.Fprintf(w, "%v productions:\n\n", len(dw.prods.getAllProductions())) - for _, prod := range b.prods.getAllProductions() { - fmt.Fprintf(w, "%4v %v\n", prod.num, b.productionToString(prod, -1)) + for _, prod := range dw.prods.getAllProductions() { + fmt.Fprintf(w, "%4v %v\n", prod.num, dw.productionToString(prod, -1)) } fmt.Fprintf(w, "\n# States\n\n") - fmt.Fprintf(w, "%v states:\n\n", len(b.automaton.states)) + fmt.Fprintf(w, "%v states:\n\n", len(dw.automaton.states)) - for _, state := range b.automaton.states { + for _, state := range dw.automaton.states { fmt.Fprintf(w, "state %v\n", state.num) for _, item := range state.items { - prod, ok := b.prods.findByID(item.prod) + prod, ok := dw.prods.findByID(item.prod) if !ok { fmt.Fprintf(w, "<production not found>\n") continue } - fmt.Fprintf(w, " %v\n", b.productionToString(prod, item.dot)) + fmt.Fprintf(w, " %v\n", dw.productionToString(prod, item.dot)) } fmt.Fprintf(w, "\n") @@ -329,16 +437,16 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { var accRec string { for sym, kID := range state.next { - nextState := b.automaton.states[kID] + nextState := dw.automaton.states[kID] if sym.isTerminal() { - shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, b.symbolToText(sym))) + shiftRecs = append(shiftRecs, fmt.Sprintf("shift %4v on %v", nextState.num, dw.symbolToText(sym))) } else { - gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, b.symbolToText(sym))) + gotoRecs = append(gotoRecs, fmt.Sprintf("goto %4v on %v", nextState.num, dw.symbolToText(sym))) } } for prodID := range state.reducible { - prod, ok := b.prods.findByID(prodID) + prod, ok := dw.prods.findByID(prodID) if !ok { reduceRecs = append(reduceRecs, "<production not found>") continue @@ -347,16 +455,46 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { accRec = "accept on <EOF>" continue } - flw, err := b.follow.find(prod.lhs) - if err != nil { - reduceRecs = append(reduceRecs, fmt.Sprintf("%v", err)) - continue - } - for sym := range flw.symbols { - reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, b.symbolToText(sym))) - } - if flw.eof { - reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on <EOF>", prod.num)) + + if dw.follow != nil { + flw, err := dw.follow.find(prod.lhs) + if err != nil { + reduceRecs = append(reduceRecs, fmt.Sprintf("%v", err)) + continue + } + for sym := range flw.symbols { + reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(sym))) + } + if flw.eof { + reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on <EOF>", prod.num)) + } + } else { + var reducibleItem *lrItem + for _, item := range state.items { + if item.prod != prodID { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + for _, item := range state.emptyProdItems { + if item.prod != prodID { + continue + } + + reducibleItem = item + break + } + if reducibleItem == nil { + reduceRecs = append(reduceRecs, "<item not found>") + continue + } + } + for a := range reducibleItem.lookAhead.symbols { + reduceRecs = append(reduceRecs, fmt.Sprintf("reduce %4v on %v", prod.num, dw.symbolToText(a))) + } } } } @@ -385,9 +523,9 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { for _, con := range cons { switch c := con.(type) { case *shiftReduceConflict: - fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, b.symbolToText(c.sym)) + fmt.Fprintf(w, " shift/reduce conflict (shift %v, reduce %v) on %v\n", c.nextState, c.prodNum, dw.symbolToText(c.sym)) case *reduceReduceConflict: - fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, b.symbolToText(c.sym)) + fmt.Fprintf(w, " reduce/reduce conflict (reduce %v and %v) on %v\n", c.prodNum1, c.prodNum2, dw.symbolToText(c.sym)) } } fmt.Fprintf(w, "\n") @@ -395,14 +533,14 @@ func (b *slrTableBuilder) writeDescription(w io.Writer) { } } -func (b *slrTableBuilder) productionToString(prod *production, dot int) string { +func (dw *descriptionWriter) productionToString(prod *production, dot int) string { var w strings.Builder - fmt.Fprintf(&w, "%v →", b.symbolToText(prod.lhs)) + fmt.Fprintf(&w, "%v →", dw.symbolToText(prod.lhs)) for n, rhs := range prod.rhs { if n == dot { fmt.Fprintf(&w, " ・") } - fmt.Fprintf(&w, " %v", b.symbolToText(rhs)) + fmt.Fprintf(&w, " %v", dw.symbolToText(rhs)) } if dot == len(prod.rhs) { fmt.Fprintf(&w, " ・") @@ -411,7 +549,7 @@ func (b *slrTableBuilder) productionToString(prod *production, dot int) string { return w.String() } -func (b *slrTableBuilder) symbolToText(sym symbol) string { +func (dw *descriptionWriter) symbolToText(sym symbol) string { if sym.isNil() { return "<NULL>" } @@ -419,13 +557,13 @@ func (b *slrTableBuilder) symbolToText(sym symbol) string { return "<EOF>" } - text, ok := b.symTab.toText(sym) + text, ok := dw.symTab.toText(sym) if !ok { return fmt.Sprintf("<symbol not found: %v>", sym) } if strings.HasPrefix(text, "_") { - pat, ok := b.sym2AnonPat[sym] + pat, ok := dw.sym2AnonPat[sym] if !ok { return fmt.Sprintf("<pattern not found: %v>", text) } diff --git a/grammar/parsing_table_builder_test.go b/grammar/parsing_table_builder_test.go new file mode 100644 index 0000000..18add81 --- /dev/null +++ b/grammar/parsing_table_builder_test.go @@ -0,0 +1,847 @@ +package grammar + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/spec" +) + +type expectedState struct { + kernelItems []*lrItem + acts map[symbol]testActionEntry + goTos map[symbol][]*lrItem +} + +func TestGenLALRParsingTable(t *testing.T) { + src := ` +S: L eq R | R; +L: ref R | id; +R: L; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var ptab *ParsingTable + var automaton *lalr1Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.Build() + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatal(err) + } + automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.nonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.terminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + lalr := &lalrTableBuilder{ + automaton: automaton, + prods: gram.productionSet, + termCount: termCount, + nonTermCount: nonTermCount, + symTab: gram.symbolTable, + } + ptab, err = lalr.build() + if err != nil { + t.Fatalf("failed to create a LALR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genLALRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("S'", 0, "S"), symbolEOF), + }, + 1: { + withLookAhead(genLR0Item("S'", 1, "S"), symbolEOF), + }, + 2: { + withLookAhead(genLR0Item("S", 1, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("R", 1, "L"), symbolEOF), + }, + 3: { + withLookAhead(genLR0Item("S", 1, "R"), symbolEOF), + }, + 4: { + withLookAhead(genLR0Item("L", 1, "ref", "R"), genSym("eq"), symbolEOF), + }, + 5: { + withLookAhead(genLR0Item("L", 1, "id"), genSym("eq"), symbolEOF), + }, + 6: { + withLookAhead(genLR0Item("S", 2, "L", "eq", "R"), symbolEOF), + }, + 7: { + withLookAhead(genLR0Item("L", 2, "ref", "R"), genSym("eq"), symbolEOF), + }, + 8: { + withLookAhead(genLR0Item("R", 1, "L"), genSym("eq"), symbolEOF), + }, + 9: { + withLookAhead(genLR0Item("S", 3, "L", "eq", "R"), symbolEOF), + }, + } + + expectedStates := []expectedState{ + { + kernelItems: expectedKernels[0], + acts: map[symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("S"): expectedKernels[1], + genSym("L"): expectedKernels[2], + genSym("R"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol]testActionEntry{ + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("S'", "S"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("R", "L"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol]testActionEntry{ + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("S", "R"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("R"): expectedKernels[7], + genSym("L"): expectedKernels[8], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("L", "id"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("L", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("L"): expectedKernels[8], + genSym("R"): expectedKernels[9], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("L", "ref", "R"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("L", "ref", "R"), + }, + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("R", "L"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("R", "L"), + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol]testActionEntry{ + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("S", "L", "eq", "R"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) + }) + } +} + +func TestGenSLRParsingTable(t *testing.T) { + src := ` +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + var ptab *ParsingTable + var automaton *lr0Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := spec.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.Build() + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + follow, err := genFollowSet(gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.nonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.terminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + slr := &slrTableBuilder{ + automaton: automaton, + prods: gram.productionSet, + follow: follow, + termCount: termCount, + nonTermCount: nonTermCount, + symTab: gram.symbolTable, + } + ptab, err = slr.build() + if err != nil { + t.Fatalf("failed to create a SLR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genSLRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + // expectedStates := []struct { + // kernelItems []*lrItem + // acts map[symbol]testActionEntry + // goTos map[symbol][]*lrItem + // }{ + expectedStates := []expectedState{ + { + kernelItems: expectedKernels[0], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr'", "expr"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + genSym("mul"): { + ty: ActionTypeShift, + nextState: expectedKernels[7], + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr", "term"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("term", "factor"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("factor", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol]testActionEntry{ + genSym("l_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol][]*lrItem{ + genSym("factor"): expectedKernels[10], + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + genSym("r_paren"): { + ty: ActionTypeShift, + nextState: expectedKernels[11], + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + genSym("mul"): { + ty: ActionTypeShift, + nextState: expectedKernels[7], + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("expr", "expr", "add", "term"), + }, + }, + }, + { + kernelItems: expectedKernels[10], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("term", "term", "mul", "factor"), + }, + }, + }, + { + kernelItems: expectedKernels[11], + acts: map[symbol]testActionEntry{ + genSym("add"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + genSym("mul"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + genSym("r_paren"): { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + symbolEOF: { + ty: ActionTypeReduce, + production: genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + testAction(t, &eState, state, ptab, automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton, nonTermCount) + + // ACTION + { + // nonEmptyEntries := map[symbolNum]struct{}{} + // for eSym, eAct := range eState.acts { + // nonEmptyEntries[eSym.num()] = struct{}{} + + // ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num()) + // if ty != eAct.ty { + // t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + // } + // switch eAct.ty { + // case ActionTypeShift: + // eNextState, err := newKernel(eAct.nextState) + // if err != nil { + // t.Fatal(err) + // } + // nextState := findStateByNum(automaton.states, stateNum) + // if nextState == nil { + // t.Fatalf("state was not found; state: #%v", stateNum) + // } + // if nextState.id != eNextState.id { + // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + // } + // case ActionTypeReduce: + // prod := findProductionByNum(gram.productionSet, prodNum) + // if prod == nil { + // t.Fatalf("production was not found: #%v", prodNum) + // } + // if prod.id != eAct.production.id { + // t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + // } + // } + // } + // for symNum := 0; symNum < termCount; symNum++ { + // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + // continue + // } + // ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum)) + // if ty != ActionTypeError { + // t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + // } + // } + } + + // GOTO + { + // nonEmptyEntries := map[symbolNum]struct{}{} + // for eSym, eGoTo := range eState.goTos { + // nonEmptyEntries[eSym.num()] = struct{}{} + + // eNextState, err := newKernel(eGoTo) + // if err != nil { + // t.Fatal(err) + // } + // ty, stateNum := ptab.getGoTo(state.num, eSym.num()) + // if ty != GoToTypeRegistered { + // t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + // } + // nextState := findStateByNum(automaton.states, stateNum) + // if nextState == nil { + // t.Fatalf("state was not found: #%v", stateNum) + // } + // if nextState.id != eNextState.id { + // t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + // } + // } + // for symNum := 0; symNum < nonTermCount; symNum++ { + // if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + // continue + // } + // ty, _ := ptab.getGoTo(state.num, symbolNum(symNum)) + // if ty != GoToTypeError { + // t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + // } + // } + } + }) + } +} + +func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { + nonEmptyEntries := map[symbolNum]struct{}{} + for eSym, eAct := range expectedState.acts { + nonEmptyEntries[eSym.num()] = struct{}{} + + ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num()) + if ty != eAct.ty { + t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + } + switch eAct.ty { + case ActionTypeShift: + eNextState, err := newKernel(eAct.nextState) + if err != nil { + t.Fatal(err) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found; state: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + case ActionTypeReduce: + prod := findProductionByNum(gram.productionSet, prodNum) + if prod == nil { + t.Fatalf("production was not found: #%v", prodNum) + } + if prod.id != eAct.production.id { + t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + } + } + } + for symNum := 0; symNum < termCount; symNum++ { + if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + continue + } + ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum)) + if ty != ActionTypeError { + t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + } + } +} + +func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { + nonEmptyEntries := map[symbolNum]struct{}{} + for eSym, eGoTo := range expectedState.goTos { + nonEmptyEntries[eSym.num()] = struct{}{} + + eNextState, err := newKernel(eGoTo) + if err != nil { + t.Fatal(err) + } + ty, stateNum := ptab.getGoTo(state.num, eSym.num()) + if ty != GoToTypeRegistered { + t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + } + for symNum := 0; symNum < nonTermCount; symNum++ { + if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { + continue + } + ty, _ := ptab.getGoTo(state.num, symbolNum(symNum)) + if ty != GoToTypeError { + t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + } + } +} + +type testActionEntry struct { + ty ActionType + nextState []*lrItem + production *production +} + +func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { + for _, state := range states { + if state.num == num { + return state + } + } + return nil +} + +func findProductionByNum(prods *productionSet, num productionNum) *production { + for _, prod := range prods.getAllProductions() { + if prod.num == num { + return prod + } + } + return nil +} diff --git a/grammar/slr_test.go b/grammar/slr_test.go deleted file mode 100644 index 41be03f..0000000 --- a/grammar/slr_test.go +++ /dev/null @@ -1,492 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "github.com/nihei9/vartan/spec" -) - -func TestGenSLRParsingTable(t *testing.T) { - src := ` -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -` - - var ptab *ParsingTable - var automaton *lr0Automaton - var gram *Grammar - var nonTermCount int - var termCount int - { - ast, err := spec.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.Build() - if err != nil { - t.Fatal(err) - } - first, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - follow, err := genFollowSet(gram.productionSet, first) - if err != nil { - t.Fatal(err) - } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) - if err != nil { - t.Fatal(err) - } - - nonTermTexts, err := gram.symbolTable.nonTerminalTexts() - if err != nil { - t.Fatal(err) - } - termTexts, err := gram.symbolTable.terminalTexts() - if err != nil { - t.Fatal(err) - } - nonTermCount = len(nonTermTexts) - termCount = len(termTexts) - - slr := &slrTableBuilder{ - automaton: automaton, - prods: gram.productionSet, - follow: follow, - termCount: termCount, - nonTermCount: nonTermCount, - symTab: gram.symbolTable, - } - ptab, err = slr.build() - if err != nil { - t.Fatalf("failed to create a SLR parsing table: %v", err) - } - if ptab == nil { - t.Fatal("genSLRParsingTable returns nil without any error") - } - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lr0Item{ - 0: { - genLR0Item("expr'", 0, "expr"), - }, - 1: { - genLR0Item("expr'", 1, "expr"), - genLR0Item("expr", 1, "expr", "add", "term"), - }, - 2: { - genLR0Item("expr", 1, "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 3: { - genLR0Item("term", 1, "factor"), - }, - 4: { - genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), - }, - 5: { - genLR0Item("factor", 1, "id"), - }, - 6: { - genLR0Item("expr", 2, "expr", "add", "term"), - }, - 7: { - genLR0Item("term", 2, "term", "mul", "factor"), - }, - 8: { - genLR0Item("expr", 1, "expr", "add", "term"), - genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), - }, - 9: { - genLR0Item("expr", 3, "expr", "add", "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 10: { - genLR0Item("term", 3, "term", "mul", "factor"), - }, - 11: { - genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), - }, - } - - expectedStates := []struct { - kernelItems []*lr0Item - acts map[symbol]testActionEntry - goTos map[symbol][]*lr0Item - }{ - { - kernelItems: expectedKernels[0], - acts: map[symbol]testActionEntry{ - genSym("l_paren"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol][]*lr0Item{ - genSym("expr"): expectedKernels[1], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[1], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("expr'", "expr"), - }, - }, - }, - { - kernelItems: expectedKernels[2], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("expr", "term"), - }, - genSym("mul"): { - ty: ActionTypeShift, - nextState: expectedKernels[7], - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("expr", "term"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("expr", "term"), - }, - }, - }, - { - kernelItems: expectedKernels[3], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("term", "factor"), - }, - genSym("mul"): { - ty: ActionTypeReduce, - production: genProd("term", "factor"), - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("term", "factor"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("term", "factor"), - }, - }, - }, - { - kernelItems: expectedKernels[4], - acts: map[symbol]testActionEntry{ - genSym("l_paren"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol][]*lr0Item{ - genSym("expr"): expectedKernels[8], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[5], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("factor", "id"), - }, - genSym("mul"): { - ty: ActionTypeReduce, - production: genProd("factor", "id"), - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("factor", "id"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("factor", "id"), - }, - }, - }, - { - kernelItems: expectedKernels[6], - acts: map[symbol]testActionEntry{ - genSym("l_paren"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol][]*lr0Item{ - genSym("term"): expectedKernels[9], - genSym("factor"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[7], - acts: map[symbol]testActionEntry{ - genSym("l_paren"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol][]*lr0Item{ - genSym("factor"): expectedKernels[10], - }, - }, - { - kernelItems: expectedKernels[8], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - genSym("r_paren"): { - ty: ActionTypeShift, - nextState: expectedKernels[11], - }, - }, - }, - { - kernelItems: expectedKernels[9], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("expr", "expr", "add", "term"), - }, - genSym("mul"): { - ty: ActionTypeShift, - nextState: expectedKernels[7], - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("expr", "expr", "add", "term"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("expr", "expr", "add", "term"), - }, - }, - }, - { - kernelItems: expectedKernels[10], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("term", "term", "mul", "factor"), - }, - genSym("mul"): { - ty: ActionTypeReduce, - production: genProd("term", "term", "mul", "factor"), - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("term", "term", "mul", "factor"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("term", "term", "mul", "factor"), - }, - }, - }, - { - kernelItems: expectedKernels[11], - acts: map[symbol]testActionEntry{ - genSym("add"): { - ty: ActionTypeReduce, - production: genProd("factor", "l_paren", "expr", "r_paren"), - }, - genSym("mul"): { - ty: ActionTypeReduce, - production: genProd("factor", "l_paren", "expr", "r_paren"), - }, - genSym("r_paren"): { - ty: ActionTypeReduce, - production: genProd("factor", "l_paren", "expr", "r_paren"), - }, - symbolEOF: { - ty: ActionTypeReduce, - production: genProd("factor", "l_paren", "expr", "r_paren"), - }, - }, - }, - } - - t.Run("initial state", func(t *testing.T) { - iniState := findStateByNum(automaton.states, ptab.InitialState) - if iniState == nil { - t.Fatalf("the initial state was not found: #%v", ptab.InitialState) - } - eIniState, err := newKernel(expectedKernels[0]) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - if iniState.id != eIniState.id { - t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) - } - }) - - for i, eState := range expectedStates { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("state was not found: #%v", 0) - } - - // ACTION - { - nonEmptyEntries := map[symbolNum]struct{}{} - for eSym, eAct := range eState.acts { - nonEmptyEntries[eSym.num()] = struct{}{} - - ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num()) - if ty != eAct.ty { - t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - } - switch eAct.ty { - case ActionTypeShift: - eNextState, err := newKernel(eAct.nextState) - if err != nil { - t.Fatal(err) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found; state: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - case ActionTypeReduce: - prod := findProductionByNum(gram.productionSet, prodNum) - if prod == nil { - t.Fatalf("production was not found: #%v", prodNum) - } - if prod.id != eAct.production.id { - t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - } - } - } - for symNum := 0; symNum < termCount; symNum++ { - if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { - continue - } - ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum)) - if ty != ActionTypeError { - t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - } - } - } - - // GOTO - { - nonEmptyEntries := map[symbolNum]struct{}{} - for eSym, eGoTo := range eState.goTos { - nonEmptyEntries[eSym.num()] = struct{}{} - - eNextState, err := newKernel(eGoTo) - if err != nil { - t.Fatal(err) - } - ty, stateNum := ptab.getGoTo(state.num, eSym.num()) - if ty != GoToTypeRegistered { - t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - } - for symNum := 0; symNum < nonTermCount; symNum++ { - if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked { - continue - } - ty, _ := ptab.getGoTo(state.num, symbolNum(symNum)) - if ty != GoToTypeError { - t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - } - } - } - }) - } -} - -type testActionEntry struct { - ty ActionType - nextState []*lr0Item - production *production -} - -func findStateByNum(states map[kernelID]*lr0State, num stateNum) *lr0State { - for _, state := range states { - if state.num == num { - return state - } - } - return nil -} - -func findProductionByNum(prods *productionSet, num productionNum) *production { - for _, prod := range prods.getAllProductions() { - if prod.num == num { - return prod - } - } - return nil -} diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go index 8f5c372..cf9da2b 100644 --- a/grammar/test_helper_test.go +++ b/grammar/test_helper_test.go @@ -35,10 +35,10 @@ func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testPr } } -type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lr0Item +type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { - return func(lhs string, dot int, rhs ...string) *lr0Item { + return func(lhs string, dot int, rhs ...string) *lrItem { t.Helper() prod := genProd(lhs, rhs...) @@ -50,3 +50,15 @@ func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) test return item } } + +func withLookAhead(item *lrItem, lookAhead ...symbol) *lrItem { + if item.lookAhead.symbols == nil { + item.lookAhead.symbols = map[symbol]struct{}{} + } + + for _, a := range lookAhead { + item.lookAhead.symbols[a] = struct{}{} + } + + return item +} |