Add error symbol and #recover directive to recover from an error state

author: Ryo Nihei <nihei.dev@gmail.com> 2021-08-26 23:16:09 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-08-26 23:18:49 +0900
commit: 7271e46bbcb11acf860c91eddfe12dd7eed5ccad (patch)
tree: fafbf797ca806ff1e4cc68acaaaa6db66aec632d
parent: Update CHANGELOG (diff)
download: urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.gz
urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.xz
14 files changed, 520 insertions, 71 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index 89de6f0..36401e1 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -96,7 +96,31 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
 		return err
 	}
 
-	if !*parseFlags.onlyParse {
+	synErrs := p.SyntaxErrors()
+	for _, synErr := range synErrs {
+		tok := synErr.Token
+
+		var msg string
+		switch {
+		case tok.EOF:
+			msg = "<eof>"
+		case tok.Invalid:
+			msg = fmt.Sprintf("'%v' (<invalid>)", tok.Text())
+		default:
+			msg = fmt.Sprintf("'%v' (%v)", tok.Text(), tok.KindName)
+		}
+
+		fmt.Fprintf(os.Stderr, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg)
+		if len(synErrs) > 0 {
+			fmt.Fprintf(os.Stderr, "; expected: %v", synErr.ExpectedTerminals[0])
+			for _, t := range synErr.ExpectedTerminals[1:] {
+				fmt.Fprintf(os.Stderr, ", %v", t)
+			}
+		}
+		fmt.Fprintf(os.Stderr, "\n")
+	}
+
+	if len(synErrs) == 0 && !*parseFlags.onlyParse {
 		var tree *driver.Node
 		if *parseFlags.cst {
 			tree = p.CST()
diff --git a/driver/parser.go b/driver/parser.go
index a82d724..08120da 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -3,7 +3,6 @@ package driver
 import (
 	"fmt"
 	"io"
-	"strings"
 
 	mldriver "github.com/nihei9/maleeni/driver"
 	mlspec "github.com/nihei9/maleeni/spec"
@@ -53,6 +52,14 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s
 	}
 }
 
+type SyntaxError struct {
+	Row               int
+	Col               int
+	Message           string
+	Token             *mldriver.Token
+	ExpectedTerminals []string
+}
+
 type ParserOption func(p *Parser) error
 
 func MakeAST() ParserOption {
@@ -84,6 +91,9 @@ type Parser struct {
 	makeAST    bool
 	makeCST    bool
 	needSemAct bool
+	onError    bool
+	shiftCount int
+	synErrs    []*SyntaxError
 }
 
 func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) (*Parser, error) {
@@ -117,6 +127,8 @@ func (p *Parser) Parse() error {
 	if err != nil {
 		return err
 	}
+
+ACTION_LOOP:
 	for {
 		var tsym int
 		if tok.EOF {
@@ -130,11 +142,22 @@ func (p *Parser) Parse() error {
 			tokText := tok.Text()
 			tokRow := tok.Row
 			tokCol := tok.Col
-			tok, err = p.shift(act * -1)
+			p.shift(act * -1)
+			tok, err = p.nextToken()
 			if err != nil {
 				return err
 			}
 
+			if p.onError {
+				// When the parser performs shift three times, the parser recovers from the error state.
+				if p.shiftCount < 3 {
+					p.shiftCount++
+				} else {
+					p.onError = false
+					p.shiftCount = 0
+				}
+			}
+
 			// semantic action
 			if p.needSemAct {
 				var ast *Node
@@ -173,9 +196,15 @@ func (p *Parser) Parse() error {
 				return nil
 			}
 
+			prodNum := act
+
+			if p.onError && p.gram.ParsingTable.RecoverProductions[prodNum] != 0 {
+				p.onError = false
+				p.shiftCount = 0
+			}
+
 			// semantic action
 			if p.needSemAct {
-				prodNum := act
 				lhs := p.gram.ParsingTable.LHSSymbols[prodNum]
 
 				// When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
@@ -251,31 +280,80 @@ func (p *Parser) Parse() error {
 				})
 			}
 		default:
-			var tokText string
-			if tok.EOF {
-				tokText = "<EOF>"
-			} else {
-				tokText = fmt.Sprintf("%v:%v: %v (%v)", tok.Row+1, tok.Col+1, tok.KindName.String(), tok.Text())
-			}
+			if p.onError {
+				tok, err = p.nextToken()
+				if err != nil {
+					return err
+				}
+				if tok.EOF {
+					return nil
+				}
 
-			eKinds, eof := p.expectedKinds(p.top())
+				continue ACTION_LOOP
+			}
 
-			var b strings.Builder
-			if len(eKinds) > 0 {
-				fmt.Fprintf(&b, "%v", eKinds[0])
-				for _, k := range eKinds[1:] {
-					fmt.Fprintf(&b, ", %v", k)
+			var eKindNames []string
+			{
+				eKinds, eof := p.expectedKinds(p.top())
+				for _, k := range eKinds {
+					eKindNames = append(eKindNames, k.String())
+				}
+				if eof {
+					eKindNames = append(eKindNames, "<EOF>")
 				}
 			}
-			if eof {
-				if len(eKinds) > 0 {
-					fmt.Fprintf(&b, ", <EOF>")
+
+			p.synErrs = append(p.synErrs, &SyntaxError{
+				Row:               tok.Row,
+				Col:               tok.Col,
+				Message:           "unexpected token",
+				Token:             tok,
+				ExpectedTerminals: eKindNames,
+			})
+
+			for {
+				if p.gram.ParsingTable.ErrorTrapperStates[p.top()] != 0 {
+					p.onError = true
+					p.shiftCount = 0
+
+					errSym := p.gram.ParsingTable.ErrorSymbol
+					act := p.gram.ParsingTable.Action[p.top()*termCount+errSym]
+					if act >= 0 {
+						return fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.top(), p.gram.ParsingTable.Terminals[errSym])
+					}
+					p.shift(act * -1)
+
+					// semantic action
+					if p.needSemAct {
+						var ast *Node
+						var cst *Node
+						if p.makeAST {
+							ast = &Node{
+								KindName: p.gram.ParsingTable.Terminals[errSym],
+							}
+						}
+						if p.makeCST {
+							cst = &Node{
+								KindName: p.gram.ParsingTable.Terminals[errSym],
+							}
+						}
+
+						p.semStack = append(p.semStack, &semanticFrame{
+							cst: cst,
+							ast: ast,
+						})
+					}
+
+					continue ACTION_LOOP
+				}
+
+				if p.top() != p.gram.ParsingTable.InitialState {
+					p.pop(1)
+					p.semStack = p.semStack[:len(p.semStack)-1]
 				} else {
-					fmt.Fprintf(&b, "<EOF>")
+					return nil
 				}
 			}
-
-			return fmt.Errorf("unexpected token: %v, expected: %v", tokText, b.String())
 		}
 	}
 }
@@ -283,13 +361,13 @@ func (p *Parser) Parse() error {
 func (p *Parser) nextToken() (*mldriver.Token, error) {
 	skip := p.gram.LexicalSpecification.Maleeni.Skip
 	for {
+		// We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
+		// and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
+		// a syntax error because the parser cannot find an entry corresponding to the invalid token.
 		tok, err := p.lex.Next()
 		if err != nil {
 			return nil, err
 		}
-		if tok.Invalid {
-			return nil, fmt.Errorf("invalid token: %v:%v: '%v'", tok.Row+1, tok.Col+1, tok.Text())
-		}
 
 		if skip[tok.KindID] > 0 {
 			continue
@@ -299,9 +377,8 @@ func (p *Parser) nextToken() (*mldriver.Token, error) {
 	}
 }
 
-func (p *Parser) shift(nextState int) (*mldriver.Token, error) {
+func (p *Parser) shift(nextState int) {
 	p.push(nextState)
-	return p.nextToken()
 }
 
 func (p *Parser) reduce(prodNum int) bool {
@@ -337,16 +414,26 @@ func (p *Parser) AST() *Node {
 	return p.ast
 }
 
+func (p *Parser) SyntaxErrors() []*SyntaxError {
+	return p.synErrs
+}
+
 func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) {
 	kinds := []mlspec.LexKindName{}
 	eof := false
 	terms := p.gram.ParsingTable.ExpectedTerminals[state]
 	for _, tsym := range terms {
-		if tsym == 1 {
+		if tsym == p.gram.ParsingTable.EOFSymbol {
 			eof = true
 			continue
 		}
 
+		// We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
+		// intentionally.
+		if tsym == p.gram.ParsingTable.ErrorSymbol {
+			continue
+		}
+
 		kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym]
 		kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID]
 		kinds = append(kinds, kindName)
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 9467d6c..224f12e 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -447,6 +447,94 @@ a: 'a';
 `,
 			specErr: true,
 		},
+		// The grammar can contain the 'error' symbol.
+		{
+			specSrc: `
+s
+    : id id id ';'
+    | error ';'
+    ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+			src: `foo bar baz ;`,
+		},
+		// The grammar can contain the 'recover' directive.
+		{
+			specSrc: `
+seq
+    : seq elem
+    | elem
+    ;
+elem
+    : id id id ';'
+    | error ';' #recover
+    ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+			src: `a b c ; d e f ;`,
+		},
+		// The 'recover' directive cannot take a parameter.
+		{
+			specSrc: `
+seq
+    : seq elem
+    | elem
+    ;
+elem
+    : id id id ';'
+    | error ';' #recover foo
+    ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+			src:     `a b c ; d e f ;`,
+			specErr: true,
+		},
+		// You cannot use the error symbol as a non-terminal symbol.
+		{
+			specSrc: `
+s
+    : foo
+    ;
+error
+    : bar
+    ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+			specErr: true,
+		},
+		// You cannot use the error symbol as a terminal symbol.
+		{
+			specSrc: `
+s
+    : foo
+    | error
+    ;
+
+foo: 'foo';
+error: 'error';
+`,
+			specErr: true,
+		},
+		// You cannot use the error symbol as a terminal symbol, even if given the skip directive.
+		{
+			specSrc: `
+s
+    : foo
+    ;
+
+foo: 'foo';
+error: 'error' #skip;
+`,
+			specErr: true,
+		},
 	}
 
 	classes := []grammar.Class{
@@ -492,6 +580,10 @@ a: 'a';
 					t.Fatal(err)
 				}
 
+				if len(p.SyntaxErrors()) > 0 {
+					t.Fatalf("unexpected syntax errors occurred: %+v", p.SyntaxErrors())
+				}
+
 				if tt.cst != nil {
 					testTree(t, p.CST(), tt.cst)
 				}
diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go
new file mode 100644
index 0000000..427f0d1
--- /dev/null
+++ b/driver/syntax_error_test.go
@@ -0,0 +1,130 @@
+package driver
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/nihei9/vartan/grammar"
+	"github.com/nihei9/vartan/spec"
+)
+
+func TestParserWithSyntaxErrors(t *testing.T) {
+	tests := []struct {
+		caption     string
+		specSrc     string
+		src         string
+		synErrCount int
+	}{
+		{
+			caption: "the parser can report a syntax error",
+			specSrc: `
+s
+    : foo
+    ;
+
+foo: 'foo';
+`,
+			src:         `bar`,
+			synErrCount: 1,
+		},
+		{
+			caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
+			specSrc: `
+seq
+    : seq elem ';'
+	| elem ';'
+	| error ';' #recover
+	;
+elem
+    : a b c
+	;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+			src:         `!; a!; ab!;`,
+			synErrCount: 3,
+		},
+		{
+			caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
+			specSrc: `
+seq
+    : seq elem ';'
+	| elem ';'
+	| error ';' #recover
+	;
+elem
+    : a b c
+	;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+			// After the parser trasits to the error state reading the first invalid symbol ('!'),
+			// the second and third invalid symbols ('!') are ignored.
+			src:         `! ! !; a!; ab!;`,
+			synErrCount: 3,
+		},
+		{
+			caption: "when the parser performs shift three times, the parser recovers from the error state",
+			specSrc: `
+seq
+    : seq elem ';'
+	| elem ';'
+	| error '*' '*' ';'
+	;
+elem
+    : a b c
+	;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+			src:         `!**; a!**; ab!**; abc!`,
+			synErrCount: 4,
+		},
+	}
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+			ast, err := spec.Parse(strings.NewReader(tt.specSrc))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			b := grammar.GrammarBuilder{
+				AST: ast,
+			}
+			g, err := b.Build()
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			gram, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassLALR))
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST())
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			err = p.Parse()
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			synErrs := p.SyntaxErrors()
+			if len(synErrs) != tt.synErrCount {
+				t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
+			}
+		})
+	}
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 812a845..27fafc9 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -68,15 +68,21 @@ func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType {
 	return assoc
 }
 
+const reservedSymbolNameError = "error"
+
 type Grammar struct {
 	lexSpec              *mlspec.LexSpec
 	skipLexKinds         []mlspec.LexKindName
 	sym2AnonPat          map[symbol]string
 	productionSet        *productionSet
 	augmentedStartSymbol symbol
+	errorSymbol          symbol
 	symbolTable          *symbolTable
 	astActions           map[productionID][]*astActionEntry
 	precAndAssoc         *precAndAssoc
+
+	// recoverProductions is a set of productions having the recover directive.
+	recoverProductions map[productionID]struct{}
 }
 
 type GrammarBuilder struct {
@@ -155,8 +161,10 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
 		sym2AnonPat:          symTabAndLexSpec.sym2AnonPat,
 		productionSet:        prodsAndActs.prods,
 		augmentedStartSymbol: prodsAndActs.augStartSym,
+		errorSymbol:          symTabAndLexSpec.errSym,
 		symbolTable:          symTabAndLexSpec.symTab,
 		astActions:           prodsAndActs.astActs,
+		recoverProductions:   prodsAndActs.recoverProds,
 		precAndAssoc:         pa,
 	}, nil
 }
@@ -193,6 +201,9 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) (*usedAndUnusedSymbols, error
 		start := root.Productions[0]
 		mark[start.LHS] = true
 		markUsedSymbols(mark, map[string]bool{}, prods, start)
+
+		// We don't have to check the error symbol because the error symbol doesn't have a production.
+		delete(mark, reservedSymbolNameError)
 	}
 
 	usedTerms := make(map[string]*spec.ProductionNode, len(lexProds))
@@ -255,6 +266,7 @@ type symbolTableAndLexSpec struct {
 	anonPat2Sym map[string]symbol
 	sym2AnonPat map[symbol]string
 	lexSpec     *mlspec.LexSpec
+	errSym      symbol
 	skip        []mlspec.LexKindName
 	skipSyms    []string
 }
@@ -265,6 +277,16 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
 	symTab := newSymbolTable()
 	entries := []*mlspec.LexEntry{}
 
+	// We need to register the reserved symbol before registering others.
+	var errSym symbol
+	{
+		sym, err := symTab.registerTerminalSymbol(reservedSymbolNameError)
+		if err != nil {
+			return nil, err
+		}
+		errSym = sym
+	}
+
 	anonPat2Sym := map[string]symbol{}
 	sym2AnonPat := map[symbol]string{}
 	{
@@ -311,13 +333,22 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
 	skipKinds := []mlspec.LexKindName{}
 	skipSyms := []string{}
 	for _, prod := range root.LexProductions {
-		if _, exist := symTab.toSymbol(prod.LHS); exist {
-			b.errs = append(b.errs, &verr.SpecError{
-				Cause:  semErrDuplicateTerminal,
-				Detail: prod.LHS,
-				Row:    prod.Pos.Row,
-				Col:    prod.Pos.Col,
-			})
+		if sym, exist := symTab.toSymbol(prod.LHS); exist {
+			if sym == errSym {
+				b.errs = append(b.errs, &verr.SpecError{
+					Cause: semErrErrSymIsReserved,
+					Row:   prod.Pos.Row,
+					Col:   prod.Pos.Col,
+				})
+			} else {
+				b.errs = append(b.errs, &verr.SpecError{
+					Cause:  semErrDuplicateTerminal,
+					Detail: prod.LHS,
+					Row:    prod.Pos.Row,
+					Col:    prod.Pos.Col,
+				})
+			}
+
 			continue
 		}
 
@@ -368,6 +399,7 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
 		lexSpec: &mlspec.LexSpec{
 			Entries: entries,
 		},
+		errSym:   errSym,
 		skip:     skipKinds,
 		skipSyms: skipSyms,
 	}, nil
@@ -465,14 +497,16 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
 }
 
 type productionsAndActions struct {
-	prods       *productionSet
-	augStartSym symbol
-	astActs     map[productionID][]*astActionEntry
+	prods        *productionSet
+	augStartSym  symbol
+	astActs      map[productionID][]*astActionEntry
+	recoverProds map[productionID]struct{}
 }
 
 func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) {
 	symTab := symTabAndLexSpec.symTab
 	anonPat2Sym := symTabAndLexSpec.anonPat2Sym
+	errSym := symTabAndLexSpec.errSym
 
 	if len(root.Productions) == 0 {
 		b.errs = append(b.errs, &verr.SpecError{
@@ -484,6 +518,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
 	prods := newProductionSet()
 	var augStartSym symbol
 	astActs := map[productionID][]*astActionEntry{}
+	recoverProds := map[productionID]struct{}{}
 
 	startProd := root.Productions[0]
 	augStartText := fmt.Sprintf("%s'", startProd.LHS)
@@ -492,16 +527,33 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
 	if err != nil {
 		return nil, err
 	}
+	if augStartSym == errSym {
+		b.errs = append(b.errs, &verr.SpecError{
+			Cause: semErrErrSymIsReserved,
+			Row:   startProd.Pos.Row,
+			Col:   startProd.Pos.Col,
+		})
+	}
+
 	startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS)
 	if err != nil {
 		return nil, err
 	}
+	if startSym == errSym {
+		b.errs = append(b.errs, &verr.SpecError{
+			Cause: semErrErrSymIsReserved,
+			Row:   startProd.Pos.Row,
+			Col:   startProd.Pos.Col,
+		})
+	}
+
 	p, err := newProduction(augStartSym, []symbol{
 		startSym,
 	})
 	if err != nil {
 		return nil, err
 	}
+
 	prods.append(p)
 
 	for _, prod := range root.Productions {
@@ -517,6 +569,13 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
 				Col:    prod.Pos.Col,
 			})
 		}
+		if sym == errSym {
+			b.errs = append(b.errs, &verr.SpecError{
+				Cause: semErrErrSymIsReserved,
+				Row:   prod.Pos.Row,
+				Col:   prod.Pos.Col,
+			})
+		}
 	}
 
 	for _, prod := range root.Productions {
@@ -670,6 +729,17 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
 						}
 					}
 					astActs[p.id] = astAct
+				case "recover":
+					if len(dir.Parameters) > 0 {
+						b.errs = append(b.errs, &verr.SpecError{
+							Cause:  semErrDirInvalidParam,
+							Detail: fmt.Sprintf("'recover' directive needs no parameter"),
+							Row:    dir.Pos.Row,
+							Col:    dir.Pos.Col,
+						})
+						continue LOOP_RHS
+					}
+					recoverProds[p.id] = struct{}{}
 				default:
 					b.errs = append(b.errs, &verr.SpecError{
 						Cause:  semErrDirInvalidName,
@@ -684,9 +754,10 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
 	}
 
 	return &productionsAndActions{
-		prods:       prods,
-		augStartSym: augStartSym,
-		astActs:     astActs,
+		prods:        prods,
+		augStartSym:  augStartSym,
+		astActs:      astActs,
+		recoverProds: recoverProds,
 	}, nil
 }
 
@@ -859,7 +930,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
 		return nil, err
 	}
 
-	lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+	lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 	if err != nil {
 		return nil, err
 	}
@@ -929,11 +1000,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
 
 	lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1)
 	altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1)
+	recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1)
 	astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1)
 	for _, p := range gram.productionSet.getAllProductions() {
 		lhsSyms[p.num] = p.lhs.num().Int()
 		altSymCounts[p.num] = p.rhsLen
 
+		if _, ok := gram.recoverProductions[p.id]; ok {
+			recoverProds[p.num] = 1
+		}
+
 		astAct, ok := gram.astActions[p.id]
 		if !ok {
 			continue
@@ -972,6 +1048,9 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
 			NonTerminals:            nonTerms,
 			NonTerminalCount:        tab.nonTerminalCount,
 			EOFSymbol:               symbolEOF.num().Int(),
+			ErrorSymbol:             gram.errorSymbol.num().Int(),
+			ErrorTrapperStates:      tab.errorTrapperStates,
+			RecoverProductions:      recoverProds,
 			ExpectedTerminals:       tab.expectedTerminals,
 		},
 		ASTAction: &spec.ASTAction{
diff --git a/grammar/item.go b/grammar/item.go
index 153aea4..100d920 100644
--- a/grammar/item.go
+++ b/grammar/item.go
@@ -195,4 +195,10 @@ type lrState struct {
 	// s → ・A
 	// s → ・ε
 	emptyProdItems []*lrItem
+
+	// When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form.
+	// The `α` and `β` can be empty.
+	//
+	// A → α・error β
+	isErrorTrapper bool
 }
diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go
index 7d8e48d..1cf8762 100644
--- a/grammar/lalr1_test.go
+++ b/grammar/lalr1_test.go
@@ -34,7 +34,7 @@ id: "[A-Za-z0-9_]+";
 			t.Fatal(err)
 		}
 
-		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatalf("failed to create a LR0 automaton: %v", err)
 		}
diff --git a/grammar/lr0.go b/grammar/lr0.go
index 20952c8..dea5254 100644
--- a/grammar/lr0.go
+++ b/grammar/lr0.go
@@ -10,7 +10,7 @@ type lr0Automaton struct {
 	states       map[kernelID]*lrState
 }
 
-func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) {
+func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0Automaton, error) {
 	if !startSym.isStart() {
 		return nil, fmt.Errorf("passed symbold is not a start symbol")
 	}
@@ -44,7 +44,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro
 	for len(uncheckedKernels) > 0 {
 		nextUncheckedKernels := []*kernel{}
 		for _, k := range uncheckedKernels {
-			state, neighbours, err := genStateAndNeighbourKernels(k, prods)
+			state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym)
 			if err != nil {
 				return nil, err
 			}
@@ -67,7 +67,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro
 	return automaton, nil
 }
 
-func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*kernel, error) {
+func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol) (*lrState, []*kernel, error) {
 	items, err := genLR0Closure(k, prods)
 	if err != nil {
 		return nil, nil, err
@@ -86,19 +86,22 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*
 
 	reducible := map[productionID]struct{}{}
 	var emptyProdItems []*lrItem
+	isErrorTrapper := false
 	for _, item := range items {
-		if !item.reducible {
-			continue
+		if item.dottedSymbol == errSym {
+			isErrorTrapper = true
 		}
 
-		reducible[item.prod] = struct{}{}
+		if item.reducible {
+			reducible[item.prod] = struct{}{}
 
-		prod, ok := prods.findByID(item.prod)
-		if !ok {
-			return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
-		}
-		if prod.isEmpty() {
-			emptyProdItems = append(emptyProdItems, item)
+			prod, ok := prods.findByID(item.prod)
+			if !ok {
+				return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
+			}
+			if prod.isEmpty() {
+				emptyProdItems = append(emptyProdItems, item)
+			}
 		}
 	}
 
@@ -107,6 +110,7 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*
 		next:           next,
 		reducible:      reducible,
 		emptyProdItems: emptyProdItems,
+		isErrorTrapper: isErrorTrapper,
 	}, kernels, nil
 }
 
diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go
index 36c0957..4a613dd 100644
--- a/grammar/lr0_test.go
+++ b/grammar/lr0_test.go
@@ -52,7 +52,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
 			t.Fatal(err)
 		}
 
-		automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatalf("failed to create a LR0 automaton: %v", err)
 		}
@@ -254,7 +254,7 @@ BAR: "bar";
 			t.Fatal(err)
 		}
 
-		automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatalf("failed to create a LR0 automaton: %v", err)
 		}
diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go
index aee5963..b6d9484 100644
--- a/grammar/parsing_table.go
+++ b/grammar/parsing_table.go
@@ -3,6 +3,7 @@ package grammar
 import (
 	"fmt"
 	"io"
+	"sort"
 	"strings"
 )
 
@@ -103,6 +104,12 @@ type ParsingTable struct {
 	nonTerminalCount  int
 	expectedTerminals [][]int
 
+	// errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`,
+	// the state has an item having the following form. The `α` and `β` can be empty.
+	//
+	// A → α・error β
+	errorTrapperStates []int
+
 	InitialState stateNum
 }
 
@@ -146,23 +153,28 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
 	{
 		initialState := b.automaton.states[b.automaton.initialState]
 		ptab = &ParsingTable{
-			actionTable:       make([]actionEntry, len(b.automaton.states)*b.termCount),
-			goToTable:         make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
-			stateCount:        len(b.automaton.states),
-			terminalCount:     b.termCount,
-			nonTerminalCount:  b.nonTermCount,
-			expectedTerminals: make([][]int, len(b.automaton.states)),
-			InitialState:      initialState.num,
+			actionTable:        make([]actionEntry, len(b.automaton.states)*b.termCount),
+			goToTable:          make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
+			stateCount:         len(b.automaton.states),
+			terminalCount:      b.termCount,
+			nonTerminalCount:   b.nonTermCount,
+			expectedTerminals:  make([][]int, len(b.automaton.states)),
+			errorTrapperStates: make([]int, len(b.automaton.states)),
+			InitialState:       initialState.num,
 		}
 	}
 
 	for _, state := range b.automaton.states {
-		var eTerms []int
+		if state.isErrorTrapper {
+			ptab.errorTrapperStates[state.num] = 1
+		}
+
+		eTerms := map[int]struct{}{}
 
 		for sym, kID := range state.next {
 			nextState := b.automaton.states[kID]
 			if sym.isTerminal() {
-				eTerms = append(eTerms, sym.num().Int())
+				eTerms[sym.num().Int()] = struct{}{}
 				b.writeShiftAction(ptab, state.num, sym, nextState.num)
 			} else {
 				ptab.writeGoTo(state.num, sym, nextState.num)
@@ -199,12 +211,23 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
 			}
 
 			for a := range reducibleItem.lookAhead.symbols {
-				eTerms = append(eTerms, a.num().Int())
+				eTerms[a.num().Int()] = struct{}{}
 				b.writeReduceAction(ptab, state.num, a, reducibleProd.num)
 			}
 		}
 
-		ptab.expectedTerminals[state.num] = eTerms
+		ts := make([]int, len(eTerms))
+		{
+			i := 0
+			for t := range eTerms {
+				ts[i] = t
+				i++
+			}
+			sort.Slice(ts, func(i, j int) bool {
+				return ts[i] < ts[j]
+			})
+		}
+		ptab.expectedTerminals[state.num] = ts
 	}
 
 	return ptab, nil
diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go
index 95bc543..feec74a 100644
--- a/grammar/parsing_table_test.go
+++ b/grammar/parsing_table_test.go
@@ -45,7 +45,7 @@ id: "[A-Za-z0-9_]+";
 		if err != nil {
 			t.Fatal(err)
 		}
-		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -330,7 +330,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
 		if err != nil {
 			t.Fatal(err)
 		}
-		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go
index 28154e2..c4ab9f6 100644
--- a/grammar/semantic_error.go
+++ b/grammar/semantic_error.go
@@ -25,6 +25,7 @@ var (
 	semErrDuplicateProduction = newSemanticError("duplicate production")
 	semErrDuplicateTerminal   = newSemanticError("duplicate terminal")
 	semErrDuplicateName       = newSemanticError("duplicate names are not allowed between terminals and non-terminals")
+	semErrErrSymIsReserved    = newSemanticError("symbol 'error' is reserved as a terminal symbol")
 	semErrDirInvalidName      = newSemanticError("invalid directive name")
 	semErrDirInvalidParam     = newSemanticError("invalid parameter")
 )
diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go
index 76ceee2..c773c6a 100644
--- a/grammar/slr1_test.go
+++ b/grammar/slr1_test.go
@@ -44,7 +44,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
 			t.Fatal(err)
 		}
 
-		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+		lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
 		if err != nil {
 			t.Fatalf("failed to create a LR0 automaton: %v", err)
 		}
diff --git a/spec/grammar.go b/spec/grammar.go
index 002fac9..7901957 100644
--- a/spec/grammar.go
+++ b/spec/grammar.go
@@ -33,6 +33,9 @@ type ParsingTable struct {
 	NonTerminals            []string `json:"non_terminals"`
 	NonTerminalCount        int      `json:"non_terminal_count"`
 	EOFSymbol               int      `json:"eof_symbol"`
+	ErrorSymbol             int      `json:"error_symbol"`
+	ErrorTrapperStates      []int    `json:"error_trapper_states"`
+	RecoverProductions      []int    `json:"recover_productions"`
 	ExpectedTerminals       [][]int  `json:"expected_terminals"`
 }
author	Ryo Nihei <nihei.dev@gmail.com>	2021-08-26 23:16:09 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-08-26 23:18:49 +0900
commit	7271e46bbcb11acf860c91eddfe12dd7eed5ccad (patch)
tree	fafbf797ca806ff1e4cc68acaaaa6db66aec632d
parent	Update CHANGELOG (diff)
download	urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.gz urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.xz