aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-08-26 23:16:09 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-08-26 23:18:49 +0900
commit7271e46bbcb11acf860c91eddfe12dd7eed5ccad (patch)
treefafbf797ca806ff1e4cc68acaaaa6db66aec632d
parentUpdate CHANGELOG (diff)
downloadurubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.gz
urubu-7271e46bbcb11acf860c91eddfe12dd7eed5ccad.tar.xz
Add error symbol and #recover directive to recover from an error state
-rw-r--r--cmd/vartan/parse.go26
-rw-r--r--driver/parser.go141
-rw-r--r--driver/parser_test.go92
-rw-r--r--driver/syntax_error_test.go130
-rw-r--r--grammar/grammar.go107
-rw-r--r--grammar/item.go6
-rw-r--r--grammar/lalr1_test.go2
-rw-r--r--grammar/lr0.go28
-rw-r--r--grammar/lr0_test.go4
-rw-r--r--grammar/parsing_table.go45
-rw-r--r--grammar/parsing_table_test.go4
-rw-r--r--grammar/semantic_error.go1
-rw-r--r--grammar/slr1_test.go2
-rw-r--r--spec/grammar.go3
14 files changed, 520 insertions, 71 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index 89de6f0..36401e1 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -96,7 +96,31 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
return err
}
- if !*parseFlags.onlyParse {
+ synErrs := p.SyntaxErrors()
+ for _, synErr := range synErrs {
+ tok := synErr.Token
+
+ var msg string
+ switch {
+ case tok.EOF:
+ msg = "<eof>"
+ case tok.Invalid:
+ msg = fmt.Sprintf("'%v' (<invalid>)", tok.Text())
+ default:
+ msg = fmt.Sprintf("'%v' (%v)", tok.Text(), tok.KindName)
+ }
+
+ fmt.Fprintf(os.Stderr, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg)
+ if len(synErrs) > 0 {
+ fmt.Fprintf(os.Stderr, "; expected: %v", synErr.ExpectedTerminals[0])
+ for _, t := range synErr.ExpectedTerminals[1:] {
+ fmt.Fprintf(os.Stderr, ", %v", t)
+ }
+ }
+ fmt.Fprintf(os.Stderr, "\n")
+ }
+
+ if len(synErrs) == 0 && !*parseFlags.onlyParse {
var tree *driver.Node
if *parseFlags.cst {
tree = p.CST()
diff --git a/driver/parser.go b/driver/parser.go
index a82d724..08120da 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -3,7 +3,6 @@ package driver
import (
"fmt"
"io"
- "strings"
mldriver "github.com/nihei9/maleeni/driver"
mlspec "github.com/nihei9/maleeni/spec"
@@ -53,6 +52,14 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s
}
}
+type SyntaxError struct {
+ Row int
+ Col int
+ Message string
+ Token *mldriver.Token
+ ExpectedTerminals []string
+}
+
type ParserOption func(p *Parser) error
func MakeAST() ParserOption {
@@ -84,6 +91,9 @@ type Parser struct {
makeAST bool
makeCST bool
needSemAct bool
+ onError bool
+ shiftCount int
+ synErrs []*SyntaxError
}
func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) (*Parser, error) {
@@ -117,6 +127,8 @@ func (p *Parser) Parse() error {
if err != nil {
return err
}
+
+ACTION_LOOP:
for {
var tsym int
if tok.EOF {
@@ -130,11 +142,22 @@ func (p *Parser) Parse() error {
tokText := tok.Text()
tokRow := tok.Row
tokCol := tok.Col
- tok, err = p.shift(act * -1)
+ p.shift(act * -1)
+ tok, err = p.nextToken()
if err != nil {
return err
}
+ if p.onError {
+ // When the parser performs shift three times, the parser recovers from the error state.
+ if p.shiftCount < 3 {
+ p.shiftCount++
+ } else {
+ p.onError = false
+ p.shiftCount = 0
+ }
+ }
+
// semantic action
if p.needSemAct {
var ast *Node
@@ -173,9 +196,15 @@ func (p *Parser) Parse() error {
return nil
}
+ prodNum := act
+
+ if p.onError && p.gram.ParsingTable.RecoverProductions[prodNum] != 0 {
+ p.onError = false
+ p.shiftCount = 0
+ }
+
// semantic action
if p.needSemAct {
- prodNum := act
lhs := p.gram.ParsingTable.LHSSymbols[prodNum]
// When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
@@ -251,31 +280,80 @@ func (p *Parser) Parse() error {
})
}
default:
- var tokText string
- if tok.EOF {
- tokText = "<EOF>"
- } else {
- tokText = fmt.Sprintf("%v:%v: %v (%v)", tok.Row+1, tok.Col+1, tok.KindName.String(), tok.Text())
- }
+ if p.onError {
+ tok, err = p.nextToken()
+ if err != nil {
+ return err
+ }
+ if tok.EOF {
+ return nil
+ }
- eKinds, eof := p.expectedKinds(p.top())
+ continue ACTION_LOOP
+ }
- var b strings.Builder
- if len(eKinds) > 0 {
- fmt.Fprintf(&b, "%v", eKinds[0])
- for _, k := range eKinds[1:] {
- fmt.Fprintf(&b, ", %v", k)
+ var eKindNames []string
+ {
+ eKinds, eof := p.expectedKinds(p.top())
+ for _, k := range eKinds {
+ eKindNames = append(eKindNames, k.String())
+ }
+ if eof {
+ eKindNames = append(eKindNames, "<EOF>")
}
}
- if eof {
- if len(eKinds) > 0 {
- fmt.Fprintf(&b, ", <EOF>")
+
+ p.synErrs = append(p.synErrs, &SyntaxError{
+ Row: tok.Row,
+ Col: tok.Col,
+ Message: "unexpected token",
+ Token: tok,
+ ExpectedTerminals: eKindNames,
+ })
+
+ for {
+ if p.gram.ParsingTable.ErrorTrapperStates[p.top()] != 0 {
+ p.onError = true
+ p.shiftCount = 0
+
+ errSym := p.gram.ParsingTable.ErrorSymbol
+ act := p.gram.ParsingTable.Action[p.top()*termCount+errSym]
+ if act >= 0 {
+ return fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.top(), p.gram.ParsingTable.Terminals[errSym])
+ }
+ p.shift(act * -1)
+
+ // semantic action
+ if p.needSemAct {
+ var ast *Node
+ var cst *Node
+ if p.makeAST {
+ ast = &Node{
+ KindName: p.gram.ParsingTable.Terminals[errSym],
+ }
+ }
+ if p.makeCST {
+ cst = &Node{
+ KindName: p.gram.ParsingTable.Terminals[errSym],
+ }
+ }
+
+ p.semStack = append(p.semStack, &semanticFrame{
+ cst: cst,
+ ast: ast,
+ })
+ }
+
+ continue ACTION_LOOP
+ }
+
+ if p.top() != p.gram.ParsingTable.InitialState {
+ p.pop(1)
+ p.semStack = p.semStack[:len(p.semStack)-1]
} else {
- fmt.Fprintf(&b, "<EOF>")
+ return nil
}
}
-
- return fmt.Errorf("unexpected token: %v, expected: %v", tokText, b.String())
}
}
}
@@ -283,13 +361,13 @@ func (p *Parser) Parse() error {
func (p *Parser) nextToken() (*mldriver.Token, error) {
skip := p.gram.LexicalSpecification.Maleeni.Skip
for {
+ // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
+ // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
+ // a syntax error because the parser cannot find an entry corresponding to the invalid token.
tok, err := p.lex.Next()
if err != nil {
return nil, err
}
- if tok.Invalid {
- return nil, fmt.Errorf("invalid token: %v:%v: '%v'", tok.Row+1, tok.Col+1, tok.Text())
- }
if skip[tok.KindID] > 0 {
continue
@@ -299,9 +377,8 @@ func (p *Parser) nextToken() (*mldriver.Token, error) {
}
}
-func (p *Parser) shift(nextState int) (*mldriver.Token, error) {
+func (p *Parser) shift(nextState int) {
p.push(nextState)
- return p.nextToken()
}
func (p *Parser) reduce(prodNum int) bool {
@@ -337,16 +414,26 @@ func (p *Parser) AST() *Node {
return p.ast
}
+func (p *Parser) SyntaxErrors() []*SyntaxError {
+ return p.synErrs
+}
+
func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) {
kinds := []mlspec.LexKindName{}
eof := false
terms := p.gram.ParsingTable.ExpectedTerminals[state]
for _, tsym := range terms {
- if tsym == 1 {
+ if tsym == p.gram.ParsingTable.EOFSymbol {
eof = true
continue
}
+ // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
+ // intentionally.
+ if tsym == p.gram.ParsingTable.ErrorSymbol {
+ continue
+ }
+
kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym]
kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID]
kinds = append(kinds, kindName)
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 9467d6c..224f12e 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -447,6 +447,94 @@ a: 'a';
`,
specErr: true,
},
+ // The grammar can contain the 'error' symbol.
+ {
+ specSrc: `
+s
+ : id id id ';'
+ | error ';'
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+ src: `foo bar baz ;`,
+ },
+ // The grammar can contain the 'recover' directive.
+ {
+ specSrc: `
+seq
+ : seq elem
+ | elem
+ ;
+elem
+ : id id id ';'
+ | error ';' #recover
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+ src: `a b c ; d e f ;`,
+ },
+ // The 'recover' directive cannot take a parameter.
+ {
+ specSrc: `
+seq
+ : seq elem
+ | elem
+ ;
+elem
+ : id id id ';'
+ | error ';' #recover foo
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+id: "[A-Za-z_]+";
+`,
+ src: `a b c ; d e f ;`,
+ specErr: true,
+ },
+ // You cannot use the error symbol as a non-terminal symbol.
+ {
+ specSrc: `
+s
+ : foo
+ ;
+error
+ : bar
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ specErr: true,
+ },
+ // You cannot use the error symbol as a terminal symbol.
+ {
+ specSrc: `
+s
+ : foo
+ | error
+ ;
+
+foo: 'foo';
+error: 'error';
+`,
+ specErr: true,
+ },
+ // You cannot use the error symbol as a terminal symbol, even if given the skip directive.
+ {
+ specSrc: `
+s
+ : foo
+ ;
+
+foo: 'foo';
+error: 'error' #skip;
+`,
+ specErr: true,
+ },
}
classes := []grammar.Class{
@@ -492,6 +580,10 @@ a: 'a';
t.Fatal(err)
}
+ if len(p.SyntaxErrors()) > 0 {
+ t.Fatalf("unexpected syntax errors occurred: %+v", p.SyntaxErrors())
+ }
+
if tt.cst != nil {
testTree(t, p.CST(), tt.cst)
}
diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go
new file mode 100644
index 0000000..427f0d1
--- /dev/null
+++ b/driver/syntax_error_test.go
@@ -0,0 +1,130 @@
+package driver
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/grammar"
+ "github.com/nihei9/vartan/spec"
+)
+
+func TestParserWithSyntaxErrors(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ synErrCount int
+ }{
+ {
+ caption: "the parser can report a syntax error",
+ specSrc: `
+s
+ : foo
+ ;
+
+foo: 'foo';
+`,
+ src: `bar`,
+ synErrCount: 1,
+ },
+ {
+ caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
+ specSrc: `
+seq
+ : seq elem ';'
+ | elem ';'
+ | error ';' #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+ src: `!; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
+ specSrc: `
+seq
+ : seq elem ';'
+ | elem ';'
+ | error ';' #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+ // After the parser trasits to the error state reading the first invalid symbol ('!'),
+ // the second and third invalid symbols ('!') are ignored.
+ src: `! ! !; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "when the parser performs shift three times, the parser recovers from the error state",
+ specSrc: `
+seq
+ : seq elem ';'
+ | elem ';'
+ | error '*' '*' ';'
+ ;
+elem
+ : a b c
+ ;
+
+ws: "[\u{0009}\u{0020}]+" #skip;
+a: 'a';
+b: 'b';
+c: 'c';
+`,
+ src: `!**; a!**; ab!**; abc!`,
+ synErrCount: 4,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := spec.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ g, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassLALR))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST())
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) != tt.synErrCount {
+ t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
+ }
+ })
+ }
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 812a845..27fafc9 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -68,15 +68,21 @@ func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType {
return assoc
}
+const reservedSymbolNameError = "error"
+
type Grammar struct {
lexSpec *mlspec.LexSpec
skipLexKinds []mlspec.LexKindName
sym2AnonPat map[symbol]string
productionSet *productionSet
augmentedStartSymbol symbol
+ errorSymbol symbol
symbolTable *symbolTable
astActions map[productionID][]*astActionEntry
precAndAssoc *precAndAssoc
+
+ // recoverProductions is a set of productions having the recover directive.
+ recoverProductions map[productionID]struct{}
}
type GrammarBuilder struct {
@@ -155,8 +161,10 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
sym2AnonPat: symTabAndLexSpec.sym2AnonPat,
productionSet: prodsAndActs.prods,
augmentedStartSymbol: prodsAndActs.augStartSym,
+ errorSymbol: symTabAndLexSpec.errSym,
symbolTable: symTabAndLexSpec.symTab,
astActions: prodsAndActs.astActs,
+ recoverProductions: prodsAndActs.recoverProds,
precAndAssoc: pa,
}, nil
}
@@ -193,6 +201,9 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) (*usedAndUnusedSymbols, error
start := root.Productions[0]
mark[start.LHS] = true
markUsedSymbols(mark, map[string]bool{}, prods, start)
+
+ // We don't have to check the error symbol because the error symbol doesn't have a production.
+ delete(mark, reservedSymbolNameError)
}
usedTerms := make(map[string]*spec.ProductionNode, len(lexProds))
@@ -255,6 +266,7 @@ type symbolTableAndLexSpec struct {
anonPat2Sym map[string]symbol
sym2AnonPat map[symbol]string
lexSpec *mlspec.LexSpec
+ errSym symbol
skip []mlspec.LexKindName
skipSyms []string
}
@@ -265,6 +277,16 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
symTab := newSymbolTable()
entries := []*mlspec.LexEntry{}
+ // We need to register the reserved symbol before registering others.
+ var errSym symbol
+ {
+ sym, err := symTab.registerTerminalSymbol(reservedSymbolNameError)
+ if err != nil {
+ return nil, err
+ }
+ errSym = sym
+ }
+
anonPat2Sym := map[string]symbol{}
sym2AnonPat := map[symbol]string{}
{
@@ -311,13 +333,22 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
skipKinds := []mlspec.LexKindName{}
skipSyms := []string{}
for _, prod := range root.LexProductions {
- if _, exist := symTab.toSymbol(prod.LHS); exist {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateTerminal,
- Detail: prod.LHS,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
+ if sym, exist := symTab.toSymbol(prod.LHS); exist {
+ if sym == errSym {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrErrSymIsReserved,
+ Row: prod.Pos.Row,
+ Col: prod.Pos.Col,
+ })
+ } else {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrDuplicateTerminal,
+ Detail: prod.LHS,
+ Row: prod.Pos.Row,
+ Col: prod.Pos.Col,
+ })
+ }
+
continue
}
@@ -368,6 +399,7 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT
lexSpec: &mlspec.LexSpec{
Entries: entries,
},
+ errSym: errSym,
skip: skipKinds,
skipSyms: skipSyms,
}, nil
@@ -465,14 +497,16 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
}
type productionsAndActions struct {
- prods *productionSet
- augStartSym symbol
- astActs map[productionID][]*astActionEntry
+ prods *productionSet
+ augStartSym symbol
+ astActs map[productionID][]*astActionEntry
+ recoverProds map[productionID]struct{}
}
func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) {
symTab := symTabAndLexSpec.symTab
anonPat2Sym := symTabAndLexSpec.anonPat2Sym
+ errSym := symTabAndLexSpec.errSym
if len(root.Productions) == 0 {
b.errs = append(b.errs, &verr.SpecError{
@@ -484,6 +518,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
prods := newProductionSet()
var augStartSym symbol
astActs := map[productionID][]*astActionEntry{}
+ recoverProds := map[productionID]struct{}{}
startProd := root.Productions[0]
augStartText := fmt.Sprintf("%s'", startProd.LHS)
@@ -492,16 +527,33 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
if err != nil {
return nil, err
}
+ if augStartSym == errSym {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrErrSymIsReserved,
+ Row: startProd.Pos.Row,
+ Col: startProd.Pos.Col,
+ })
+ }
+
startSym, err := symTab.registerNonTerminalSymbol(startProd.LHS)
if err != nil {
return nil, err
}
+ if startSym == errSym {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrErrSymIsReserved,
+ Row: startProd.Pos.Row,
+ Col: startProd.Pos.Col,
+ })
+ }
+
p, err := newProduction(augStartSym, []symbol{
startSym,
})
if err != nil {
return nil, err
}
+
prods.append(p)
for _, prod := range root.Productions {
@@ -517,6 +569,13 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
Col: prod.Pos.Col,
})
}
+ if sym == errSym {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrErrSymIsReserved,
+ Row: prod.Pos.Row,
+ Col: prod.Pos.Col,
+ })
+ }
}
for _, prod := range root.Productions {
@@ -670,6 +729,17 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
}
}
astActs[p.id] = astAct
+ case "recover":
+ if len(dir.Parameters) > 0 {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrDirInvalidParam,
+ Detail: fmt.Sprintf("'recover' directive needs no parameter"),
+ Row: dir.Pos.Row,
+ Col: dir.Pos.Col,
+ })
+ continue LOOP_RHS
+ }
+ recoverProds[p.id] = struct{}{}
default:
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidName,
@@ -684,9 +754,10 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
}
return &productionsAndActions{
- prods: prods,
- augStartSym: augStartSym,
- astActs: astActs,
+ prods: prods,
+ augStartSym: augStartSym,
+ astActs: astActs,
+ recoverProds: recoverProds,
}, nil
}
@@ -859,7 +930,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
return nil, err
}
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
return nil, err
}
@@ -929,11 +1000,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1)
altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1)
+ recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1)
astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1)
for _, p := range gram.productionSet.getAllProductions() {
lhsSyms[p.num] = p.lhs.num().Int()
altSymCounts[p.num] = p.rhsLen
+ if _, ok := gram.recoverProductions[p.id]; ok {
+ recoverProds[p.num] = 1
+ }
+
astAct, ok := gram.astActions[p.id]
if !ok {
continue
@@ -972,6 +1048,9 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
NonTerminals: nonTerms,
NonTerminalCount: tab.nonTerminalCount,
EOFSymbol: symbolEOF.num().Int(),
+ ErrorSymbol: gram.errorSymbol.num().Int(),
+ ErrorTrapperStates: tab.errorTrapperStates,
+ RecoverProductions: recoverProds,
ExpectedTerminals: tab.expectedTerminals,
},
ASTAction: &spec.ASTAction{
diff --git a/grammar/item.go b/grammar/item.go
index 153aea4..100d920 100644
--- a/grammar/item.go
+++ b/grammar/item.go
@@ -195,4 +195,10 @@ type lrState struct {
// s → ・A
// s → ・ε
emptyProdItems []*lrItem
+
+ // When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form.
+ // The `α` and `β` can be empty.
+ //
+ // A → α・error β
+ isErrorTrapper bool
}
diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go
index 7d8e48d..1cf8762 100644
--- a/grammar/lalr1_test.go
+++ b/grammar/lalr1_test.go
@@ -34,7 +34,7 @@ id: "[A-Za-z0-9_]+";
t.Fatal(err)
}
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatalf("failed to create a LR0 automaton: %v", err)
}
diff --git a/grammar/lr0.go b/grammar/lr0.go
index 20952c8..dea5254 100644
--- a/grammar/lr0.go
+++ b/grammar/lr0.go
@@ -10,7 +10,7 @@ type lr0Automaton struct {
states map[kernelID]*lrState
}
-func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) {
+func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0Automaton, error) {
if !startSym.isStart() {
return nil, fmt.Errorf("passed symbold is not a start symbol")
}
@@ -44,7 +44,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro
for len(uncheckedKernels) > 0 {
nextUncheckedKernels := []*kernel{}
for _, k := range uncheckedKernels {
- state, neighbours, err := genStateAndNeighbourKernels(k, prods)
+ state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym)
if err != nil {
return nil, err
}
@@ -67,7 +67,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, erro
return automaton, nil
}
-func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*kernel, error) {
+func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol) (*lrState, []*kernel, error) {
items, err := genLR0Closure(k, prods)
if err != nil {
return nil, nil, err
@@ -86,19 +86,22 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*
reducible := map[productionID]struct{}{}
var emptyProdItems []*lrItem
+ isErrorTrapper := false
for _, item := range items {
- if !item.reducible {
- continue
+ if item.dottedSymbol == errSym {
+ isErrorTrapper = true
}
- reducible[item.prod] = struct{}{}
+ if item.reducible {
+ reducible[item.prod] = struct{}{}
- prod, ok := prods.findByID(item.prod)
- if !ok {
- return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
- }
- if prod.isEmpty() {
- emptyProdItems = append(emptyProdItems, item)
+ prod, ok := prods.findByID(item.prod)
+ if !ok {
+ return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
+ }
+ if prod.isEmpty() {
+ emptyProdItems = append(emptyProdItems, item)
+ }
}
}
@@ -107,6 +110,7 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lrState, []*
next: next,
reducible: reducible,
emptyProdItems: emptyProdItems,
+ isErrorTrapper: isErrorTrapper,
}, kernels, nil
}
diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go
index 36c0957..4a613dd 100644
--- a/grammar/lr0_test.go
+++ b/grammar/lr0_test.go
@@ -52,7 +52,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
t.Fatal(err)
}
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatalf("failed to create a LR0 automaton: %v", err)
}
@@ -254,7 +254,7 @@ BAR: "bar";
t.Fatal(err)
}
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatalf("failed to create a LR0 automaton: %v", err)
}
diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go
index aee5963..b6d9484 100644
--- a/grammar/parsing_table.go
+++ b/grammar/parsing_table.go
@@ -3,6 +3,7 @@ package grammar
import (
"fmt"
"io"
+ "sort"
"strings"
)
@@ -103,6 +104,12 @@ type ParsingTable struct {
nonTerminalCount int
expectedTerminals [][]int
+ // errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`,
+ // the state has an item having the following form. The `α` and `β` can be empty.
+ //
+ // A → α・error β
+ errorTrapperStates []int
+
InitialState stateNum
}
@@ -146,23 +153,28 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
{
initialState := b.automaton.states[b.automaton.initialState]
ptab = &ParsingTable{
- actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
- goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
- stateCount: len(b.automaton.states),
- terminalCount: b.termCount,
- nonTerminalCount: b.nonTermCount,
- expectedTerminals: make([][]int, len(b.automaton.states)),
- InitialState: initialState.num,
+ actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
+ goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
+ stateCount: len(b.automaton.states),
+ terminalCount: b.termCount,
+ nonTerminalCount: b.nonTermCount,
+ expectedTerminals: make([][]int, len(b.automaton.states)),
+ errorTrapperStates: make([]int, len(b.automaton.states)),
+ InitialState: initialState.num,
}
}
for _, state := range b.automaton.states {
- var eTerms []int
+ if state.isErrorTrapper {
+ ptab.errorTrapperStates[state.num] = 1
+ }
+
+ eTerms := map[int]struct{}{}
for sym, kID := range state.next {
nextState := b.automaton.states[kID]
if sym.isTerminal() {
- eTerms = append(eTerms, sym.num().Int())
+ eTerms[sym.num().Int()] = struct{}{}
b.writeShiftAction(ptab, state.num, sym, nextState.num)
} else {
ptab.writeGoTo(state.num, sym, nextState.num)
@@ -199,12 +211,23 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
}
for a := range reducibleItem.lookAhead.symbols {
- eTerms = append(eTerms, a.num().Int())
+ eTerms[a.num().Int()] = struct{}{}
b.writeReduceAction(ptab, state.num, a, reducibleProd.num)
}
}
- ptab.expectedTerminals[state.num] = eTerms
+ ts := make([]int, len(eTerms))
+ {
+ i := 0
+ for t := range eTerms {
+ ts[i] = t
+ i++
+ }
+ sort.Slice(ts, func(i, j int) bool {
+ return ts[i] < ts[j]
+ })
+ }
+ ptab.expectedTerminals[state.num] = ts
}
return ptab, nil
diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go
index 95bc543..feec74a 100644
--- a/grammar/parsing_table_test.go
+++ b/grammar/parsing_table_test.go
@@ -45,7 +45,7 @@ id: "[A-Za-z0-9_]+";
if err != nil {
t.Fatal(err)
}
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatal(err)
}
@@ -330,7 +330,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
if err != nil {
t.Fatal(err)
}
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatal(err)
}
diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go
index 28154e2..c4ab9f6 100644
--- a/grammar/semantic_error.go
+++ b/grammar/semantic_error.go
@@ -25,6 +25,7 @@ var (
semErrDuplicateProduction = newSemanticError("duplicate production")
semErrDuplicateTerminal = newSemanticError("duplicate terminal")
semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals")
+ semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol")
semErrDirInvalidName = newSemanticError("invalid directive name")
semErrDirInvalidParam = newSemanticError("invalid parameter")
)
diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go
index 76ceee2..c773c6a 100644
--- a/grammar/slr1_test.go
+++ b/grammar/slr1_test.go
@@ -44,7 +44,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
t.Fatal(err)
}
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
if err != nil {
t.Fatalf("failed to create a LR0 automaton: %v", err)
}
diff --git a/spec/grammar.go b/spec/grammar.go
index 002fac9..7901957 100644
--- a/spec/grammar.go
+++ b/spec/grammar.go
@@ -33,6 +33,9 @@ type ParsingTable struct {
NonTerminals []string `json:"non_terminals"`
NonTerminalCount int `json:"non_terminal_count"`
EOFSymbol int `json:"eof_symbol"`
+ ErrorSymbol int `json:"error_symbol"`
+ ErrorTrapperStates []int `json:"error_trapper_states"`
+ RecoverProductions []int `json:"recover_productions"`
ExpectedTerminals [][]int `json:"expected_terminals"`
}