diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-26 23:16:09 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-26 23:18:49 +0900 |
commit | 7271e46bbcb11acf860c91eddfe12dd7eed5ccad (patch) | |
tree | fafbf797ca806ff1e4cc68acaaaa6db66aec632d | |
parent | Update CHANGELOG (diff) | |
download | urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.gz urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.xz |
Add error symbol and #recover directive to recover from an error state
-rw-r--r-- | cmd/vartan/parse.go | 26 | ||||
-rw-r--r-- | driver/parser.go | 141 | ||||
-rw-r--r-- | driver/parser_test.go | 92 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 130 | ||||
-rw-r--r-- | grammar/grammar.go | 107 | ||||
-rw-r--r-- | grammar/item.go | 6 | ||||
-rw-r--r-- | grammar/lalr1_test.go | 2 | ||||
-rw-r--r-- | grammar/lr0.go | 28 | ||||
-rw-r--r-- | grammar/lr0_test.go | 4 | ||||
-rw-r--r-- | grammar/parsing_table.go | 45 | ||||
-rw-r--r-- | grammar/parsing_table_test.go | 4 | ||||
-rw-r--r-- | grammar/semantic_error.go | 1 | ||||
-rw-r--r-- | grammar/slr1_test.go | 2 | ||||
-rw-r--r-- | spec/grammar.go | 3 |
14 files changed, 520 insertions, 71 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 89de6f0..36401e1 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -96,7 +96,31 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { return err } - if !*parseFlags.onlyParse { + synErrs := p.SyntaxErrors() + for _, synErr := range synErrs { + tok := synErr.Token + + var msg string + switch { + case tok.EOF: + msg = "<eof>" + case tok.Invalid: + msg = fmt.Sprintf("'%v' (<invalid>)", tok.Text()) + default: + msg = fmt.Sprintf("'%v' (%v)", tok.Text(), tok.KindName) + } + + fmt.Fprintf(os.Stderr, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg) + if len(synErrs) > 0 { + fmt.Fprintf(os.Stderr, "; expected: %v", synErr.ExpectedTerminals[0]) + for _, t := range synErr.ExpectedTerminals[1:] { + fmt.Fprintf(os.Stderr, ", %v", t) + } + } + fmt.Fprintf(os.Stderr, "\n") + } + + if len(synErrs) == 0 && !*parseFlags.onlyParse { var tree *driver.Node if *parseFlags.cst { tree = p.CST() diff --git a/driver/parser.go b/driver/parser.go index a82d724..08120da 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -3,7 +3,6 @@ package driver import ( "fmt" "io" - "strings" mldriver "github.com/nihei9/maleeni/driver" mlspec "github.com/nihei9/maleeni/spec" @@ -53,6 +52,14 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s } } +type SyntaxError struct { + Row int + Col int + Message string + Token *mldriver.Token + ExpectedTerminals []string +} + type ParserOption func(p *Parser) error func MakeAST() ParserOption { @@ -84,6 +91,9 @@ type Parser struct { makeAST bool makeCST bool needSemAct bool + onError bool + shiftCount int + synErrs []*SyntaxError } func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) (*Parser, error) { @@ -117,6 +127,8 @@ func (p *Parser) Parse() error { if err != nil { return err } + +ACTION_LOOP: for { var tsym int if tok.EOF { @@ -130,11 +142,22 @@ func (p *Parser) Parse() error { tokText := tok.Text() tokRow := tok.Row tokCol := tok.Col - tok, err = p.shift(act * -1) + p.shift(act * -1) + tok, err = p.nextToken() if err != nil { return err } + if p.onError { + // When the parser performs shift three times, the parser recovers from the error state. + if p.shiftCount < 3 { + p.shiftCount++ + } else { + p.onError = false + p.shiftCount = 0 + } + } + // semantic action if p.needSemAct { var ast *Node @@ -173,9 +196,15 @@ func (p *Parser) Parse() error { return nil } + prodNum := act + + if p.onError && p.gram.ParsingTable.RecoverProductions[prodNum] != 0 { + p.onError = false + p.shiftCount = 0 + } + // semantic action if p.needSemAct { - prodNum := act lhs := p.gram.ParsingTable.LHSSymbols[prodNum] // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. @@ -251,31 +280,80 @@ func (p *Parser) Parse() error { }) } default: - var tokText string - if tok.EOF { - tokText = "<EOF>" - } else { - tokText = fmt.Sprintf("%v:%v: %v (%v)", tok.Row+1, tok.Col+1, tok.KindName.String(), tok.Text()) - } + if p.onError { + tok, err = p.nextToken() + if err != nil { + return err + } + if tok.EOF { + return nil + } - eKinds, eof := p.expectedKinds(p.top()) + continue ACTION_LOOP + } - var b strings.Builder - if len(eKinds) > 0 { - fmt.Fprintf(&b, "%v", eKinds[0]) - for _, k := range eKinds[1:] { - fmt.Fprintf(&b, ", %v", k) + var eKindNames []string + { + eKinds, eof := p.expectedKinds(p.top()) + for _, k := range eKinds { + eKindNames = append(eKindNames, k.String()) + } + if eof { + eKindNames = append(eKindNames, "<EOF>") } } - if eof { - if len(eKinds) > 0 { - fmt.Fprintf(&b, ", <EOF>") + + p.synErrs = append(p.synErrs, &SyntaxError{ + Row: tok.Row, + Col: tok.Col, + Message: "unexpected token", + Token: tok, + ExpectedTerminals: eKindNames, + }) + + for { + if p.gram.ParsingTable.ErrorTrapperStates[p.top()] != 0 { + p.onError = true + p.shiftCount = 0 + + errSym := p.gram.ParsingTable.ErrorSymbol + act := p.gram.ParsingTable.Action[p.top()*termCount+errSym] + if act >= 0 { + return fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.top(), p.gram.ParsingTable.Terminals[errSym]) + } + p.shift(act * -1) + + // semantic action + if p.needSemAct { + var ast *Node + var cst *Node + if p.makeAST { + ast = &Node{ + KindName: p.gram.ParsingTable.Terminals[errSym], + } + } + if p.makeCST { + cst = &Node{ + KindName: p.gram.ParsingTable.Terminals[errSym], + } + } + + p.semStack = append(p.semStack, &semanticFrame{ + cst: cst, + ast: ast, + }) + } + + continue ACTION_LOOP + } + + if p.top() != p.gram.ParsingTable.InitialState { + p.pop(1) + p.semStack = p.semStack[:len(p.semStack)-1] } else { - fmt.Fprintf(&b, "<EOF>") + return nil } } - - return fmt.Errorf("unexpected token: %v, expected: %v", tokText, b.String()) } } } @@ -283,13 +361,13 @@ func (p *Parser) Parse() error { func (p *Parser) nextToken() (*mldriver.Token, error) { skip := p.gram.LexicalSpecification.Maleeni.Skip for { + // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, + // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect + // a syntax error because the parser cannot find an entry corresponding to the invalid token. tok, err := p.lex.Next() if err != nil { return nil, err } - if tok.Invalid { - return nil, fmt.Errorf("invalid token: %v:%v: '%v'", tok.Row+1, tok.Col+1, tok.Text()) - } if skip[tok.KindID] > 0 { continue @@ -299,9 +377,8 @@ func (p *Parser) nextToken() (*mldriver.Token, error) { } } -func (p *Parser) shift(nextState int) (*mldriver.Token, error) { +func (p *Parser) shift(nextState int) { p.push(nextState) - return p.nextToken() } func (p *Parser) reduce(prodNum int) bool { @@ -337,16 +414,26 @@ func (p *Parser) AST() *Node { return p.ast } +func (p *Parser) SyntaxErrors() []*SyntaxError { + return p.synErrs +} + func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) { kinds := []mlspec.LexKindName{} eof := false terms := p.gram.ParsingTable.ExpectedTerminals[state] for _, tsym := range terms { - if tsym == 1 { + if tsym == p.gram.ParsingTable.EOFSymbol { eof = true continue } + // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol + // intentionally. + if tsym == p.gram.ParsingTable.ErrorSymbol { + continue + } + kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym] kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID] kinds = append(kinds, kindName) diff --git a/driver/parser_test.go b/driver/parser_test.go index 9467d6c..224f12e 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -447,6 +447,94 @@ a: 'a'; `, specErr: true, }, + // The grammar can contain the 'error' symbol. + { + specSrc: ` +s + : id id id ';' + | error ';' + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +id: "[A-Za-z_]+"; +`, + src: `foo bar baz ;`, + }, + // The grammar can contain the 'recover' directive. + { + specSrc: ` +seq + : seq elem + | elem + ; +elem + : id id id ';' + | error ';' #recover + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +id: "[A-Za-z_]+"; +`, + src: `a b c ; d e f ;`, + }, + // The 'recover' directive cannot take a parameter. + { + specSrc: ` +seq + : seq elem + | elem + ; +elem + : id id id ';' + | error ';' #recover foo + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +id: "[A-Za-z_]+"; +`, + src: `a b c ; d e f ;`, + specErr: true, + }, + // You cannot use the error symbol as a non-terminal symbol. + { + specSrc: ` +s + : foo + ; +error + : bar + ; + +foo: 'foo'; +bar: 'bar'; +`, + specErr: true, + }, + // You cannot use the error symbol as a terminal symbol. + { + specSrc: ` +s + : foo + | error + ; + +foo: 'foo'; +error: 'error'; +`, + specErr: true, + }, + // You cannot use the error symbol as a terminal symbol, even if given the skip directive. + { + specSrc: ` +s + : foo + ; + +foo: 'foo'; +error: 'error' #skip; +`, + specErr: true, + }, } classes := []grammar.Class{ @@ -492,6 +580,10 @@ a: 'a'; t.Fatal(err) } + if len(p.SyntaxErrors()) > 0 { + t.Fatalf("unexpected syntax errors occurred: %+v", p.SyntaxErrors()) + } + if tt.cst != nil { testTree(t, p.CST(), tt.cst) } diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go new file mode 100644 index 0000000..427f0d1 --- /dev/null +++ b/driver/syntax_error_test.go @@ -0,0 +1,130 @@ +package driver + +import ( + "fmt" + "strings" + "testing" + + "github.com/nihei9/vartan/grammar" + "github.com/nihei9/vartan/spec" +) + +func TestParserWithSyntaxErrors(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + synErrCount int + }{ + { + caption: "the parser can report a syntax error", + specSrc: ` +s + : foo + ; + +foo: 'foo'; +`, + src: `bar`, + synErrCount: 1, + }, + { + caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", + specSrc: ` +seq + : seq elem ';' + | elem ';' + | error ';' #recover + ; +elem + : a b c + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +a: 'a'; +b: 'b'; +c: 'c'; +`, + src: `!; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", + specSrc: ` +seq + : seq elem ';' + | elem ';' + | error ';' #recover + ; +elem + : a b c + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +a: 'a'; +b: 'b'; +c: 'c'; +`, + // After the parser trasits to the error state reading the first invalid symbol ('!'), + // the second and third invalid symbols ('!') are ignored. + src: `! ! !; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "when the parser performs shift three times, the parser recovers from the error state", + specSrc: ` +seq + : seq elem ';' + | elem ';' + | error '*' '*' ';' + ; +elem + : a b c + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +a: 'a'; +b: 'b'; +c: 'c'; +`, + src: `!**; a!**; ab!**; abc!`, + synErrCount: 4, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := spec.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + g, err := b.Build() + if err != nil { + t.Fatal(err) + } + + gram, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassLALR)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST()) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if len(synErrs) != tt.synErrCount { + t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs)) + } + }) + } +} diff --git a/grammar/grammar.go b/grammar/grammar.go index 812a845..27fafc9 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -68,15 +68,21 @@ func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType { return assoc } +const reservedSymbolNameError = "error" + type Grammar struct { lexSpec *mlspec.LexSpec skipLexKinds []mlspec.LexKindName sym2AnonPat map[symbol]string productionSet *productionSet augmentedStartSymbol symbol + errorSymbol symbol symbolTable *symbolTable astActions map[productionID][]*astActionEntry precAndAssoc *precAndAssoc + + // recoverProductions is a set of productions having the recover directive. + recoverProductions map[productionID]struct{} } type GrammarBuilder struct { @@ -155,8 +161,10 @@ func (b *GrammarBuilder) Build() (*Grammar, error) { sym2AnonPat: symTabAndLexSpec.sym2AnonPat, productionSet: prodsAndActs.prods, augmentedStartSymbol: prodsAndActs.augStartSym, + errorSymbol: symTabAndLexSpec.errSym, symbolTable: symTabAndLexSpec.symTab, astActions: prodsAndActs.astActs, + recoverProductions: prodsAndActs.recoverProds, precAndAssoc: pa, }, nil } @@ -193,6 +201,9 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) (*usedAndUnusedSymbols, error start := root.Productions[0] mark[start.LHS] = true markUsedSymbols(mark, map[string]bool{}, prods, start) + + // We don't have to check the error symbol because the error symbol doesn't have a production. + delete(mark, reservedSymbolNameError) } usedTerms := make(map[string]*spec.ProductionNode, len(lexProds)) @@ -255,6 +266,7 @@ type symbolTableAndLexSpec struct { anonPat2Sym map[string]symbol sym2AnonPat map[symbol]string lexSpec *mlspec.LexSpec + errSym symbol skip []mlspec.LexKindName skipSyms []string } @@ -265,6 +277,16 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT symTab := newSymbolTable() entries := []*mlspec.LexEntry{} + // We need to register the reserved symbol before registering others. + var errSym symbol + { + sym, err := symTab.registerTerminalSymbol(reservedSymbolNameError) + if err != nil { + return nil, err + } + errSym = sym + } + anonPat2Sym := map[string]symbol{} sym2AnonPat := map[symbol]string{} { @@ -311,13 +333,22 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT skipKinds := []mlspec.LexKindName{} skipSyms := []string{} for _, prod := range root.LexProductions { - if _, exist := symTab.toSymbol(prod.LHS); exist { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateTerminal, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) + if sym, exist := symTab.toSymbol(prod.LHS); exist { + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateTerminal, + Detail: prod.LHS, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + continue } @@ -368,6 +399,7 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT lexSpec: &mlspec.LexSpec{ Entries: entries, }, + errSym: errSym, skip: skipKinds, skipSyms: skipSyms, }, nil @@ -465,14 +497,16 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE } type productionsAndActions struct { - prods *productionSet - augStartSym symbol - astActs map[productionID][]*astActionEntry + prods *productionSet + augStartSym symbol + astActs map[productionID][]*astActionEntry + recoverProds map[productionID]struct{} } func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) { symTab := symTabAndLexSpec.symTab anonPat2Sym := symTabAndLexSpec.anonPat2Sym + errSym := symTabAndLexSpec.errSym if len(root.Productions) == 0 { b.errs = append(b.errs, &verr.SpecError{ @@ -484,6 +518,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd prods := newProductionSet() var augStartSym symbol astActs := map[productionID][]*astActionEntry{} + recoverProds := map[productionID]struct{}{} startProd := root.Productions[0] augStartText := fmt.Sprintf("%s'", startProd.LHS) @@ -492,16 +527,33 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd if err != nil { return nil, err } + if augStartSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS) if err != nil { return nil, err } + if startSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + p, err := newProduction(augStartSym, []symbol{ startSym, }) if err != nil { return nil, err } + prods.append(p) for _, prod := range root.Productions { @@ -517,6 +569,13 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd Col: prod.Pos.Col, }) } + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } } for _, prod := range root.Productions { @@ -670,6 +729,17 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd } } astActs[p.id] = astAct + case "recover": + if len(dir.Parameters) > 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'recover' directive needs no parameter"), + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + recoverProds[p.id] = struct{}{} default: b.errs = append(b.errs, &verr.SpecError{ Cause: semErrDirInvalidName, @@ -684,9 +754,10 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd } return &productionsAndActions{ - prods: prods, - augStartSym: augStartSym, - astActs: astActs, + prods: prods, + augStartSym: augStartSym, + astActs: astActs, + recoverProds: recoverProds, }, nil } @@ -859,7 +930,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { return nil, err } @@ -929,11 +1000,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) + recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1) astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1) for _, p := range gram.productionSet.getAllProductions() { lhsSyms[p.num] = p.lhs.num().Int() altSymCounts[p.num] = p.rhsLen + if _, ok := gram.recoverProductions[p.id]; ok { + recoverProds[p.num] = 1 + } + astAct, ok := gram.astActions[p.id] if !ok { continue @@ -972,6 +1048,9 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error NonTerminals: nonTerms, NonTerminalCount: tab.nonTerminalCount, EOFSymbol: symbolEOF.num().Int(), + ErrorSymbol: gram.errorSymbol.num().Int(), + ErrorTrapperStates: tab.errorTrapperStates, + RecoverProductions: recoverProds, ExpectedTerminals: tab.expectedTerminals, }, ASTAction: &spec.ASTAction{ diff --git a/grammar/item.go b/grammar/item.go index 153aea4..100d920 100644 --- a/grammar/item.go +++ b/grammar/item.go @@ -195,4 +195,10 @@ type lrState struct { // s → ・A // s → ・ε emptyProdItems []*lrItem + + // When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form. + // The `α` and `β` can be empty. + // + // A → α・error β + isErrorTrapper bool } diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go index 7d8e48d..1cf8762 100644 --- a/grammar/lalr1_test.go +++ b/grammar/lalr1_test.go @@ -34,7 +34,7 @@ id: "[A-Za-z0-9_]+"; t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } diff --git a/grammar/lr0.go b/grammar/lr0.go index 20952c8..dea5254 100644 --- a/grammar/lr0.go +++ b/grammar/lr0.go @@ -10,7 +10,7 @@ type lr0Automaton struct { states map[kernelID]*lrState } -func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) { +func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0Automaton, error) { if !startSym.isStart() { return nil, fmt.Errorf("passed symbold is not a start symbol") } @@ -44,7 +44,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro for len(uncheckedKernels) > 0 { nextUncheckedKernels := []*kernel{} for _, k := range uncheckedKernels { - state, neighbours, err := genStateAndNeighbourKernels(k, prods) + state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym) if err != nil { return nil, err } @@ -67,7 +67,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro return automaton, nil } -func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*kernel, error) { +func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol) (*lrState, []*kernel, error) { items, err := genLR0Closure(k, prods) if err != nil { return nil, nil, err @@ -86,19 +86,22 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []* reducible := map[productionID]struct{}{} var emptyProdItems []*lrItem + isErrorTrapper := false for _, item := range items { - if !item.reducible { - continue + if item.dottedSymbol == errSym { + isErrorTrapper = true } - reducible[item.prod] = struct{}{} + if item.reducible { + reducible[item.prod] = struct{}{} - prod, ok := prods.findByID(item.prod) - if !ok { - return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) - } - if prod.isEmpty() { - emptyProdItems = append(emptyProdItems, item) + prod, ok := prods.findByID(item.prod) + if !ok { + return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) + } + if prod.isEmpty() { + emptyProdItems = append(emptyProdItems, item) + } } } @@ -107,6 +110,7 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []* next: next, reducible: reducible, emptyProdItems: emptyProdItems, + isErrorTrapper: isErrorTrapper, }, kernels, nil } diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go index 36c0957..4a613dd 100644 --- a/grammar/lr0_test.go +++ b/grammar/lr0_test.go @@ -52,7 +52,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Fatal(err) } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } @@ -254,7 +254,7 @@ BAR: "bar"; t.Fatal(err) } - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go index aee5963..b6d9484 100644 --- a/grammar/parsing_table.go +++ b/grammar/parsing_table.go @@ -3,6 +3,7 @@ package grammar import ( "fmt" "io" + "sort" "strings" ) @@ -103,6 +104,12 @@ type ParsingTable struct { nonTerminalCount int expectedTerminals [][]int + // errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`, + // the state has an item having the following form. The `α` and `β` can be empty. + // + // A → α・error β + errorTrapperStates []int + InitialState stateNum } @@ -146,23 +153,28 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) { { initialState := b.automaton.states[b.automaton.initialState] ptab = &ParsingTable{ - actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), - goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), - stateCount: len(b.automaton.states), - terminalCount: b.termCount, - nonTerminalCount: b.nonTermCount, - expectedTerminals: make([][]int, len(b.automaton.states)), - InitialState: initialState.num, + actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), + goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), + stateCount: len(b.automaton.states), + terminalCount: b.termCount, + nonTerminalCount: b.nonTermCount, + expectedTerminals: make([][]int, len(b.automaton.states)), + errorTrapperStates: make([]int, len(b.automaton.states)), + InitialState: initialState.num, } } for _, state := range b.automaton.states { - var eTerms []int + if state.isErrorTrapper { + ptab.errorTrapperStates[state.num] = 1 + } + + eTerms := map[int]struct{}{} for sym, kID := range state.next { nextState := b.automaton.states[kID] if sym.isTerminal() { - eTerms = append(eTerms, sym.num().Int()) + eTerms[sym.num().Int()] = struct{}{} b.writeShiftAction(ptab, state.num, sym, nextState.num) } else { ptab.writeGoTo(state.num, sym, nextState.num) @@ -199,12 +211,23 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) { } for a := range reducibleItem.lookAhead.symbols { - eTerms = append(eTerms, a.num().Int()) + eTerms[a.num().Int()] = struct{}{} b.writeReduceAction(ptab, state.num, a, reducibleProd.num) } } - ptab.expectedTerminals[state.num] = eTerms + ts := make([]int, len(eTerms)) + { + i := 0 + for t := range eTerms { + ts[i] = t + i++ + } + sort.Slice(ts, func(i, j int) bool { + return ts[i] < ts[j] + }) + } + ptab.expectedTerminals[state.num] = ts } return ptab, nil diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go index 95bc543..feec74a 100644 --- a/grammar/parsing_table_test.go +++ b/grammar/parsing_table_test.go @@ -45,7 +45,7 @@ id: "[A-Za-z0-9_]+"; if err != nil { t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatal(err) } @@ -330,7 +330,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; if err != nil { t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatal(err) } diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go index 28154e2..c4ab9f6 100644 --- a/grammar/semantic_error.go +++ b/grammar/semantic_error.go @@ -25,6 +25,7 @@ var ( semErrDuplicateProduction = newSemanticError("duplicate production") semErrDuplicateTerminal = newSemanticError("duplicate terminal") semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals") + semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol") semErrDirInvalidName = newSemanticError("invalid directive name") semErrDirInvalidParam = newSemanticError("invalid parameter") ) diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go index 76ceee2..c773c6a 100644 --- a/grammar/slr1_test.go +++ b/grammar/slr1_test.go @@ -44,7 +44,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; t.Fatal(err) } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) if err != nil { t.Fatalf("failed to create a LR0 automaton: %v", err) } diff --git a/spec/grammar.go b/spec/grammar.go index 002fac9..7901957 100644 --- a/spec/grammar.go +++ b/spec/grammar.go @@ -33,6 +33,9 @@ type ParsingTable struct { NonTerminals []string `json:"non_terminals"` NonTerminalCount int `json:"non_terminal_count"` EOFSymbol int `json:"eof_symbol"` + ErrorSymbol int `json:"error_symbol"` + ErrorTrapperStates []int `json:"error_trapper_states"` + RecoverProductions []int `json:"recover_productions"` ExpectedTerminals [][]int `json:"expected_terminals"` } |