diff options
Diffstat (limited to 'grammar')
-rw-r--r-- | grammar/grammar.go | 107 | ||||
-rw-r--r-- | grammar/item.go | 6 | ||||
-rw-r--r-- | grammar/lalr1_test.go | 2 | ||||
-rw-r--r-- | grammar/lr0.go | 28 | ||||
-rw-r--r-- | grammar/lr0_test.go | 4 | ||||
-rw-r--r-- | grammar/parsing_table.go | 45 | ||||
-rw-r--r-- | grammar/parsing_table_test.go | 4 | ||||
-rw-r--r-- | grammar/semantic_error.go | 1 | ||||
-rw-r--r-- | grammar/slr1_test.go | 2 |
9 files changed, 156 insertions, 43 deletions
diff --git a/grammar/grammar.go b/grammar/grammar.go index 812a845..27fafc9 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -68,15 +68,21 @@ func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType { return assoc } +const reservedSymbolNameError = "error" + type Grammar struct { lexSpec *mlspec.LexSpec skipLexKinds []mlspec.LexKindName sym2AnonPat map[symbol]string productionSet *productionSet augmentedStartSymbol symbol + errorSymbol symbol symbolTable *symbolTable astActions map[productionID][]*astActionEntry precAndAssoc *precAndAssoc + + // recoverProductions is a set of productions having the recover directive. + recoverProductions map[productionID]struct{} } type GrammarBuilder struct { @@ -155,8 +161,10 @@ func (b *GrammarBuilder) Build() (*Grammar, error) { sym2AnonPat: symTabAndLexSpec.sym2AnonPat, productionSet: prodsAndActs.prods, augmentedStartSymbol: prodsAndActs.augStartSym, + errorSymbol: symTabAndLexSpec.errSym, symbolTable: symTabAndLexSpec.symTab, astActions: prodsAndActs.astActs, + recoverProductions: prodsAndActs.recoverProds, precAndAssoc: pa, }, nil } @@ -193,6 +201,9 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) (*usedAndUnusedSymbols, error start := root.Productions[0] mark[start.LHS] = true markUsedSymbols(mark, map[string]bool{}, prods, start) + + // We don't have to check the error symbol because the error symbol doesn't have a production. + delete(mark, reservedSymbolNameError) } usedTerms := make(map[string]*spec.ProductionNode, len(lexProds)) @@ -255,6 +266,7 @@ type symbolTableAndLexSpec struct { anonPat2Sym map[string]symbol sym2AnonPat map[symbol]string lexSpec *mlspec.LexSpec + errSym symbol skip []mlspec.LexKindName skipSyms []string } @@ -265,6 +277,16 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT symTab := newSymbolTable() entries := []*mlspec.LexEntry{} + // We need to register the reserved symbol before registering others. + var errSym symbol + { + sym, err := symTab.registerTerminalSymbol(reservedSymbolNameError) + if err != nil { + return nil, err + } + errSym = sym + } + anonPat2Sym := map[string]symbol{} sym2AnonPat := map[symbol]string{} { @@ -311,13 +333,22 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT skipKinds := []mlspec.LexKindName{} skipSyms := []string{} for _, prod := range root.LexProductions { - if _, exist := symTab.toSymbol(prod.LHS); exist { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateTerminal, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) + if sym, exist := symTab.toSymbol(prod.LHS); exist { + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateTerminal, + Detail: prod.LHS, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + continue } @@ -368,6 +399,7 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT lexSpec: &mlspec.LexSpec{ Entries: entries, }, + errSym: errSym, skip: skipKinds, skipSyms: skipSyms, }, nil @@ -465,14 +497,16 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE } type productionsAndActions struct { - prods *productionSet - augStartSym symbol - astActs map[productionID][]*astActionEntry + prods *productionSet + augStartSym symbol + astActs map[productionID][]*astActionEntry + recoverProds map[productionID]struct{} } func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) { symTab := symTabAndLexSpec.symTab anonPat2Sym := symTabAndLexSpec.anonPat2Sym + errSym := symTabAndLexSpec.errSym if len(root.Productions) == 0 { b.errs = append(b.errs, &verr.SpecError{ @@ -484,6 +518,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd prods := newProductionSet() var augStartSym symbol astActs := map[productionID][]*astActionEntry{} + recoverProds := map[productionID]struct{}{} startProd := root.Productions[0] augStartText := fmt.Sprintf("%s'", startProd.LHS) @@ -492,16 +527,33 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd if err != nil { return nil, err } + if augStartSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS) if err != nil { return nil, err } + if startSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + p, err := newProduction(augStartSym, []symbol{ startSym, }) if err != nil { return nil, err } + prods.append(p) for _, prod := range root.Productions { @@ -517,6 +569,13 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd Col: prod.Pos.Col, }) } + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } } for _, prod := range root.Productions { @@ -670,6 +729,17 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd } } astActs[p.id] = astAct + case "recover": + if len(dir.Parameters) > 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'recover' directive needs no parameter"), + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + recoverProds[p.id] = struct{}{} default: b.errs = append(b.errs, &verr.SpecError{ Cause: semErrDirInvalidName, @@ -684,9 +754,10 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd } return &productionsAndActions{ - prods: prods, - augStartSym: augStartSym, - astActs: astActs, + prods: prods, + augStartSym: augStartSym, + astActs: astActs, + recoverProds: recoverProds, }, nil } @@ -859,7 +930,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { return nil, err } @@ -929,11 +1000,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) + recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1) astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1) for _, p := range gram.productionSet.getAllProductions() { lhsSyms[p.num] = p.lhs.num().Int() altSymCounts[p.num] = p.rhsLen + if _, ok := gram.recoverProductions[p.id]; ok { + recoverProds[p.num] = 1 + } + astAct, ok := gram.astActions[p.id] if !ok { continue @@ -972,6 +1048,9 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error NonTerminals: nonTerms, NonTerminalCount: tab.nonTerminalCount, EOFSymbol: symbolEOF.num().Int(), + ErrorSymbol: gram.errorSymbol.num().Int(), + ErrorTrapperStates: tab.errorTrapperStates, + RecoverProductions: recoverProds, ExpectedTerminals: tab.expectedTerminals, }, ASTAction: &spec.ASTAction{ diff --git a/grammar/item.go b/grammar/item.go index 153aea4..100d920 100644 --- a/grammar/item.go +++ b/grammar/item.go @@ -195,4 +195,10 @@ type lrState struct { // s → ・A // s → ・ε emptyProdItems []*lrItem + + // When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form. + // The `α` and `β` can be empty. + // + // A → α・error β + isErrorTrapper bool } diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go index 7d8e48d..1cf8762 100644 --- a/grammar/lalr1_test.go +++ b/grammar/lalr1_test.go @@ -34,7 +34,7 @@ id: "[A-Za-z0-9_]+"; t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } diff --git a/grammar/lr0.go b/grammar/lr0.go index 20952c8..dea5254 100644 --- a/grammar/lr0.go +++ b/grammar/lr0.go @@ -10,7 +10,7 @@ type lr0Automaton struct { states map[kernelID]*lrState } -func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) { +func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0Automaton, error) { if !startSym.isStart() { return nil, fmt.Errorf("passed symbold is not a start symbol") } @@ -44,7 +44,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro for len(uncheckedKernels) > 0 { nextUncheckedKernels := []*kernel{} for _, k := range uncheckedKernels { - state, neighbours, err := genStateAndNeighbourKernels(k, prods) + state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym) if err != nil { return nil, err } @@ -67,7 +67,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro return automaton, nil } -func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*kernel, error) { +func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol) (*lrState, []*kernel, error) { items, err := genLR0Closure(k, prods) if err != nil { return nil, nil, err @@ -86,19 +86,22 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []* reducible := map[productionID]struct{}{} var emptyProdItems []*lrItem + isErrorTrapper := false for _, item := range items { - if !item.reducible { - continue + if item.dottedSymbol == errSym { + isErrorTrapper = true } - reducible[item.prod] = struct{}{} + if item.reducible { + reducible[item.prod] = struct{}{} - prod, ok := prods.findByID(item.prod) - if !ok { - return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) - } - if prod.isEmpty() { - emptyProdItems = append(emptyProdItems, item) + prod, ok := prods.findByID(item.prod) + if !ok { + return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) + } + if prod.isEmpty() { + emptyProdItems = append(emptyProdItems, item) + } } } @@ -107,6 +110,7 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []* next: next, reducible: reducible, emptyProdItems: emptyProdItems, + isErrorTrapper: isErrorTrapper, }, kernels, nil } diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go index 36c0957..4a613dd 100644 --- a/grammar/lr0_test.go +++ b/grammar/lr0_test.go @@ -52,7 +52,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Fatal(err) } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } @@ -254,7 +254,7 @@ BAR: "bar"; t.Fatal(err) } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go index aee5963..b6d9484 100644 --- a/grammar/parsing_table.go +++ b/grammar/parsing_table.go @@ -3,6 +3,7 @@ package grammar import ( "fmt" "io" + "sort" "strings" ) @@ -103,6 +104,12 @@ type ParsingTable struct { nonTerminalCount int expectedTerminals [][]int + // errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`, + // the state has an item having the following form. The `α` and `β` can be empty. + // + // A → α・error β + errorTrapperStates []int + InitialState stateNum } @@ -146,23 +153,28 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) { { initialState := b.automaton.states[b.automaton.initialState] ptab = &ParsingTable{ - actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), - goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), - stateCount: len(b.automaton.states), - terminalCount: b.termCount, - nonTerminalCount: b.nonTermCount, - expectedTerminals: make([][]int, len(b.automaton.states)), - InitialState: initialState.num, + actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), + goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), + stateCount: len(b.automaton.states), + terminalCount: b.termCount, + nonTerminalCount: b.nonTermCount, + expectedTerminals: make([][]int, len(b.automaton.states)), + errorTrapperStates: make([]int, len(b.automaton.states)), + InitialState: initialState.num, } } for _, state := range b.automaton.states { - var eTerms []int + if state.isErrorTrapper { + ptab.errorTrapperStates[state.num] = 1 + } + + eTerms := map[int]struct{}{} for sym, kID := range state.next { nextState := b.automaton.states[kID] if sym.isTerminal() { - eTerms = append(eTerms, sym.num().Int()) + eTerms[sym.num().Int()] = struct{}{} b.writeShiftAction(ptab, state.num, sym, nextState.num) } else { ptab.writeGoTo(state.num, sym, nextState.num) @@ -199,12 +211,23 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) { } for a := range reducibleItem.lookAhead.symbols { - eTerms = append(eTerms, a.num().Int()) + eTerms[a.num().Int()] = struct{}{} b.writeReduceAction(ptab, state.num, a, reducibleProd.num) } } - ptab.expectedTerminals[state.num] = eTerms + ts := make([]int, len(eTerms)) + { + i := 0 + for t := range eTerms { + ts[i] = t + i++ + } + sort.Slice(ts, func(i, j int) bool { + return ts[i] < ts[j] + }) + } + ptab.expectedTerminals[state.num] = ts } return ptab, nil diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go index 95bc543..feec74a 100644 --- a/grammar/parsing_table_test.go +++ b/grammar/parsing_table_test.go @@ -45,7 +45,7 @@ id: "[A-Za-z0-9_]+"; if err != nil { t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatal(err) } @@ -330,7 +330,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; if err != nil { t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatal(err) } diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go index 28154e2..c4ab9f6 100644 --- a/grammar/semantic_error.go +++ b/grammar/semantic_error.go @@ -25,6 +25,7 @@ var ( semErrDuplicateProduction = newSemanticError("duplicate production") semErrDuplicateTerminal = newSemanticError("duplicate terminal") semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals") + semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol") semErrDirInvalidName = newSemanticError("invalid directive name") semErrDirInvalidParam = newSemanticError("invalid parameter") ) diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go index 76ceee2..c773c6a 100644 --- a/grammar/slr1_test.go +++ b/grammar/slr1_test.go @@ -44,7 +44,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } |