diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-18 22:15:35 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-18 22:15:35 +0900 |
commit | e4a5958fc8d9e131ab083215a51a1b60acf91410 (patch) | |
tree | 6ca224a42915135e8f5427d62fc52455b1a1bc59 | |
parent | Add token positions to an AST (diff) | |
download | urubu-e4a5958fc8d9e131ab083215a51a1b60acf91410.tar.gz urubu-e4a5958fc8d9e131ab083215a51a1b60acf91410.tar.xz |
Detect multiple semantic errors in a single parse
-rw-r--r-- | cmd/vartan/compile.go | 3 | ||||
-rw-r--r-- | driver/parser_test.go | 3 | ||||
-rw-r--r-- | error/error.go | 4 | ||||
-rw-r--r-- | grammar/first_test.go | 3 | ||||
-rw-r--r-- | grammar/follow_test.go | 3 | ||||
-rw-r--r-- | grammar/grammar.go | 132 | ||||
-rw-r--r-- | grammar/lr0_item_test.go | 3 | ||||
-rw-r--r-- | grammar/semantic_error.go | 22 | ||||
-rw-r--r-- | grammar/slr_test.go | 3 | ||||
-rw-r--r-- | spec/lexer.go | 4 | ||||
-rw-r--r-- | spec/parser.go | 30 | ||||
-rw-r--r-- | spec/parser_test.go | 2 |
12 files changed, 165 insertions, 47 deletions
diff --git a/cmd/vartan/compile.go b/cmd/vartan/compile.go index 804bd4f..354637b 100644 --- a/cmd/vartan/compile.go +++ b/cmd/vartan/compile.go @@ -119,7 +119,8 @@ func readGrammar(path string) (grm *grammar.Grammar, retErr error) { return nil, err } - return grammar.NewGrammar(ast) + var b grammar.GrammarBuilder + return b.Build(ast) } func writeCompiledGrammar(cgram *spec.CompiledGrammar, path string) error { diff --git a/driver/parser_test.go b/driver/parser_test.go index 3dec508..c352e91 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -281,7 +281,8 @@ foo: "foo"; t.Fatal(err) } - g, err := grammar.NewGrammar(ast) + var b grammar.GrammarBuilder + g, err := b.Build(ast) if tt.specErr { if err == nil { t.Fatal("an expected error didn't occur") diff --git a/error/error.go b/error/error.go index 1e5df7a..f953e3b 100644 --- a/error/error.go +++ b/error/error.go @@ -25,6 +25,7 @@ func (e SpecErrors) Error() string { type SpecError struct { Cause error + Detail string FilePath string SourceName string Row int @@ -39,6 +40,9 @@ func (e *SpecError) Error() string { fmt.Fprintf(&b, "%v: ", e.Row) } fmt.Fprintf(&b, "error: %v", e.Cause) + if e.Detail != "" { + fmt.Fprintf(&b, ": %v", e.Detail) + } line := readLine(e.FilePath, e.Row) if line != "" { diff --git a/grammar/first_test.go b/grammar/first_test.go index b65b0ea..b2118c0 100644 --- a/grammar/first_test.go +++ b/grammar/first_test.go @@ -155,7 +155,8 @@ func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { if err != nil { t.Fatal(err) } - gram, err := NewGrammar(ast) + var b GrammarBuilder + gram, err := b.Build(ast) if err != nil { t.Fatal(err) } diff --git a/grammar/follow_test.go b/grammar/follow_test.go index 042bc88..6ae8a9e 100644 --- a/grammar/follow_test.go +++ b/grammar/follow_test.go @@ -136,7 +136,8 @@ func genActualFollow(t *testing.T, src string) (*followSet, *Grammar) { if err != nil { t.Fatal(err) } - gram, err := NewGrammar(ast) + var b GrammarBuilder + gram, err := b.Build(ast) if err != nil { t.Fatal(err) } diff --git a/grammar/grammar.go b/grammar/grammar.go index 3a8acbe..0cd055d 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -5,6 +5,7 @@ import ( mlcompiler "github.com/nihei9/maleeni/compiler" mlspec "github.com/nihei9/maleeni/spec" + verr "github.com/nihei9/vartan/error" "github.com/nihei9/vartan/spec" ) @@ -22,17 +23,27 @@ type Grammar struct { astActions map[productionID][]*astActionEntry } -func NewGrammar(root *spec.RootNode) (*Grammar, error) { - symTabAndLexSpec, err := genSymbolTableAndLexSpec(root) +type GrammarBuilder struct { + errs verr.SpecErrors +} + +func (b *GrammarBuilder) Build(root *spec.RootNode) (*Grammar, error) { + b.errs = nil + + symTabAndLexSpec, err := b.genSymbolTableAndLexSpec(root) if err != nil { return nil, err } - prodsAndActs, err := genProductionsAndActions(root, symTabAndLexSpec) + prodsAndActs, err := b.genProductionsAndActions(root, symTabAndLexSpec) if err != nil { return nil, err } + if len(b.errs) > 0 { + return nil, b.errs + } + return &Grammar{ lexSpec: symTabAndLexSpec.lexSpec, skipLexKinds: symTabAndLexSpec.skip, @@ -50,7 +61,7 @@ type symbolTableAndLexSpec struct { skip []mlspec.LexKind } -func genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolTableAndLexSpec, error) { +func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolTableAndLexSpec, error) { symTab := newSymbolTable() skipKinds := []mlspec.LexKind{} entries := []*mlspec.LexEntry{} @@ -60,10 +71,14 @@ func genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolTableAndLexSpec, erro return nil, err } - entry, skip, err := genLexEntry(prod) + entry, skip, specErr, err := genLexEntry(prod) if err != nil { return nil, err } + if specErr != nil { + b.errs = append(b.errs, specErr) + continue + } if skip { skipKinds = append(skipKinds, mlspec.LexKind(prod.LHS)) } @@ -132,23 +147,35 @@ func genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolTableAndLexSpec, erro }, nil } -func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, error) { +func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecError, error) { var modes []mlspec.LexModeName if prod.Directive != nil { dir := prod.Directive switch dir.Name { case "mode": if len(dir.Parameters) == 0 { - return nil, false, fmt.Errorf("'mode' directive needs an ID parameter") + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'mode' directive needs an ID parameter"), + Row: dir.Pos.Row, + }, nil } for _, param := range dir.Parameters { if param.ID == "" { - return nil, false, fmt.Errorf("'mode' directive needs an ID parameter") + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'mode' directive needs an ID parameter"), + Row: param.Pos.Row, + }, nil } modes = append(modes, mlspec.LexModeName(param.ID)) } default: - return nil, false, fmt.Errorf("invalid directive name '%v'", dir.Name) + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: dir.Name, + Row: dir.Pos.Row, + }, nil } } @@ -161,21 +188,37 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, error) { switch dir.Name { case "skip": if len(dir.Parameters) > 0 { - return nil, false, fmt.Errorf("'skip' directive needs no parameter") + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'skip' directive needs no parameter"), + Row: dir.Pos.Row, + }, nil } skip = true case "push": if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - return nil, false, fmt.Errorf("'push' directive needs an ID parameter") + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'push' directive needs an ID parameter"), + Row: dir.Pos.Row, + }, nil } push = mlspec.LexModeName(dir.Parameters[0].ID) case "pop": if len(dir.Parameters) > 0 { - return nil, false, fmt.Errorf("'pop' directive needs no parameter") + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'pop' directive needs no parameter"), + Row: dir.Pos.Row, + }, nil } pop = true default: - return nil, false, fmt.Errorf("invalid directive name '%v'", dir.Name) + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: dir.Name, + Row: dir.Pos.Row, + }, nil } } @@ -185,7 +228,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, error) { Pattern: mlspec.LexPattern(alt.Elements[0].Pattern), Push: push, Pop: pop, - }, skip, nil + }, skip, nil, nil } type productionsAndActions struct { @@ -194,12 +237,15 @@ type productionsAndActions struct { astActs map[productionID][]*astActionEntry } -func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) { +func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTableAndLexSpec) (*productionsAndActions, error) { symTab := symTabAndLexSpec.symTab anonPat2Sym := symTabAndLexSpec.anonPat2Sym if len(root.Productions) == 0 { - return nil, fmt.Errorf("a grammar must have at least one production") + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrNoProduction, + }) + return nil, nil } prods := newProductionSet() @@ -235,8 +281,11 @@ func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTable for _, prod := range root.Productions { lhsSym, ok := symTab.toSymbol(prod.LHS) if !ok { + // All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here. return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS) } + + LOOP_RHS: for _, alt := range prod.RHS { altSyms := make([]symbol, len(alt.Elements)) for i, elem := range alt.Elements { @@ -245,17 +294,24 @@ func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTable var ok bool sym, ok = anonPat2Sym[elem.Pattern] if !ok { + // All patterns are assumed to be pre-detected, so it's a bug if we cannot find them here. return nil, fmt.Errorf("pattern '%v' is undefined", elem.Pattern) } } else { var ok bool sym, ok = symTab.toSymbol(elem.ID) if !ok { - return nil, fmt.Errorf("symbol '%v' is undefined", elem.ID) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUndefinedSym, + Detail: elem.ID, + Row: elem.Pos.Row, + }) + continue LOOP_RHS } } altSyms[i] = sym } + p, err := newProduction(lhsSym, altSyms) if err != nil { return nil, err @@ -267,24 +323,44 @@ func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTable switch dir.Name { case "ast": if len(dir.Parameters) != 1 || dir.Parameters[0].Tree == nil { - return nil, fmt.Errorf("'ast' directive needs a tree parameter") + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'ast' directive needs a tree parameter"), + Row: dir.Pos.Row, + }) + continue LOOP_RHS } param := dir.Parameters[0] lhsText, ok := symTab.toText(p.lhs) if !ok || param.Tree.Name != lhsText { - return nil, fmt.Errorf("a name of a tree structure must be the same ID as an LHS of a production; LHS: %v", lhsText) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("a name of a tree structure must be the same ID as an LHS of a production; LHS: %v", lhsText), + Row: param.Pos.Row, + }) + continue LOOP_RHS } astAct := make([]*astActionEntry, len(param.Tree.Children)) for i, c := range param.Tree.Children { if c.Position > len(alt.Elements) { - return nil, fmt.Errorf("a position must be less than or equal to the length of an alternative; alternative length: %v", len(alt.Elements)) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("a position must be less than or equal to the length of an alternativ (%v)", len(alt.Elements)), + Row: c.Pos.Row, + }) + continue LOOP_RHS } if c.Expansion { offset := c.Position - 1 elem := alt.Elements[offset] if elem.Pattern != "" { - return nil, fmt.Errorf("the expansion symbol cannot be applied to a pattern ($%v: %v)", c.Position, elem.Pattern) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("the expansion symbol cannot be applied to a pattern ($%v: %v)", c.Position, elem.Pattern), + Row: c.Pos.Row, + }) + continue LOOP_RHS } elemSym, ok := symTab.toSymbol(elem.ID) if !ok { @@ -292,7 +368,12 @@ func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTable return nil, fmt.Errorf("a symbol corresponding to a position ($%v: %v) was not found", c.Position, elem.ID) } if elemSym.isTerminal() { - return nil, fmt.Errorf("the expansion symbol cannot be applied to a terminal symbol ($%v: %v)", c.Position, elem.ID) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol ($%v: %v)", c.Position, elem.ID), + Row: c.Pos.Row, + }) + continue LOOP_RHS } } @@ -303,7 +384,12 @@ func genProductionsAndActions(root *spec.RootNode, symTabAndLexSpec *symbolTable } astActs[p.id] = astAct default: - return nil, fmt.Errorf("invalid directive name '%v'", dir.Name) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: fmt.Sprintf("invalid directive name '%v'", dir.Name), + Row: dir.Pos.Row, + }) + continue LOOP_RHS } } } diff --git a/grammar/lr0_item_test.go b/grammar/lr0_item_test.go index c04ce13..9a0628a 100644 --- a/grammar/lr0_item_test.go +++ b/grammar/lr0_item_test.go @@ -33,7 +33,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; if err != nil { t.Fatal(err) } - gram, err := NewGrammar(ast) + var b GrammarBuilder + gram, err := b.Build(ast) if err != nil { t.Fatal(err) } diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go new file mode 100644 index 0000000..01446ed --- /dev/null +++ b/grammar/semantic_error.go @@ -0,0 +1,22 @@ +package grammar + +type SemanticError struct { + message string +} + +func newSemanticError(message string) *SemanticError { + return &SemanticError{ + message: message, + } +} + +func (e *SemanticError) Error() string { + return e.message +} + +var ( + semErrNoProduction = newSemanticError("a grammar needs at least one production") + semErrUndefinedSym = newSemanticError("undefined symbol") + semErrDirInvalidName = newSemanticError("invalid directive name") + semErrDirInvalidParam = newSemanticError("invalid parameter") +) diff --git a/grammar/slr_test.go b/grammar/slr_test.go index 089b73b..1ebb02d 100644 --- a/grammar/slr_test.go +++ b/grammar/slr_test.go @@ -39,7 +39,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; if err != nil { t.Fatal(err) } - gram, err = NewGrammar(ast) + var b GrammarBuilder + gram, err = b.Build(ast) if err != nil { t.Fatal(err) } diff --git a/spec/lexer.go b/spec/lexer.go index 83b1a93..1f8805a 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -35,12 +35,12 @@ const ( ) type Position struct { - row int + Row int } func newPosition(row int) Position { return Position{ - row: row, + Row: row, } } diff --git a/spec/parser.go b/spec/parser.go index 7f37598..4773da6 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -186,7 +186,7 @@ func (p *parser) parseFragment() *FragmentNode { p.consume(tokenKindNewline) if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.row, synErrNoProductionName) + raiseSyntaxError(p.pos.Row, synErrNoProductionName) } lhs := p.lastTok.text lhsPos := p.lastTok.pos @@ -194,23 +194,23 @@ func (p *parser) parseFragment() *FragmentNode { p.consume(tokenKindNewline) if !p.consume(tokenKindColon) { - raiseSyntaxError(p.pos.row, synErrNoColon) + raiseSyntaxError(p.pos.Row, synErrNoColon) } if !p.consume(tokenKindTerminalPattern) { - raiseSyntaxError(p.pos.row, synErrFragmentNoPattern) + raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern) } rhs := p.lastTok.text p.consume(tokenKindNewline) if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(p.pos.row, synErrNoSemicolon) + raiseSyntaxError(p.pos.Row, synErrNoSemicolon) } if !p.consume(tokenKindNewline) { if !p.consume(tokenKindEOF) { - raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline) + raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) } } @@ -248,12 +248,12 @@ func (p *parser) parseProduction() *ProductionNode { dir := p.parseDirective() if dir != nil { if !p.consume(tokenKindNewline) { - raiseSyntaxError(p.pos.row, synErrProdDirNoNewline) + raiseSyntaxError(p.pos.Row, synErrProdDirNoNewline) } } if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.row, synErrNoProductionName) + raiseSyntaxError(p.pos.Row, synErrNoProductionName) } lhs := p.lastTok.text lhsPos := p.lastTok.pos @@ -261,7 +261,7 @@ func (p *parser) parseProduction() *ProductionNode { p.consume(tokenKindNewline) if !p.consume(tokenKindColon) { - raiseSyntaxError(p.pos.row, synErrNoColon) + raiseSyntaxError(p.pos.Row, synErrNoColon) } alt := p.parseAlternative() @@ -279,12 +279,12 @@ func (p *parser) parseProduction() *ProductionNode { p.consume(tokenKindNewline) if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(p.pos.row, synErrNoSemicolon) + raiseSyntaxError(p.pos.Row, synErrNoSemicolon) } if !p.consume(tokenKindNewline) { if !p.consume(tokenKindEOF) { - raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline) + raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) } } @@ -344,7 +344,7 @@ func (p *parser) parseDirective() *DirectiveNode { dirPos := p.lastTok.pos if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.row, synErrNoDirectiveName) + raiseSyntaxError(p.pos.Row, synErrNoDirectiveName) } name := p.lastTok.text @@ -373,7 +373,7 @@ func (p *parser) parseParameter() *ParameterNode { } case p.consume(tokenKindTreeNodeOpen): if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.row, synErrTreeInvalidFirstElem) + raiseSyntaxError(p.pos.Row, synErrTreeInvalidFirstElem) } name := p.lastTok.text namePos := p.lastTok.pos @@ -396,7 +396,7 @@ func (p *parser) parseParameter() *ParameterNode { } if !p.consume(tokenKindTreeNodeClose) { - raiseSyntaxError(p.pos.row, synErrTreeUnclosed) + raiseSyntaxError(p.pos.Row, synErrTreeUnclosed) } return &ParameterNode{ @@ -426,7 +426,7 @@ func (p *parser) consume(expected tokenKind) bool { } p.pos = tok.pos if tok.kind == tokenKindInvalid { - raiseSyntaxError(p.pos.row, synErrInvalidToken) + raiseSyntaxError(p.pos.Row, synErrInvalidToken) } if tok.kind == expected { p.lastTok = tok @@ -449,7 +449,7 @@ func (p *parser) skip() { if err != nil { p.errs = append(p.errs, &verr.SpecError{ Cause: err, - Row: p.pos.row, + Row: p.pos.Row, }) continue } diff --git a/spec/parser_test.go b/spec/parser_test.go index 5a54b83..d7ca30f 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -654,7 +654,7 @@ func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition b func testPosition(t *testing.T, pos, expected Position) { t.Helper() - if pos.row != expected.row { + if pos.Row != expected.Row { t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) } } |