diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-02 01:42:50 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-08-02 01:42:50 +0900 |
commit | 74b51fb8cc2945af10da841a2ff901f4d3046977 (patch) | |
tree | c4f3d5e2f3fb4969cd7393107210e22649240f06 | |
parent | Use maleeni v0.3.0 (diff) | |
download | urubu-74b51fb8cc2945af10da841a2ff901f4d3046977.tar.gz urubu-74b51fb8cc2945af10da841a2ff901f4d3046977.tar.xz |
Print expected terminals on a parse error
-rw-r--r-- | driver/parser.go | 46 | ||||
-rw-r--r-- | grammar/grammar.go | 7 | ||||
-rw-r--r-- | grammar/slr.go | 34 | ||||
-rw-r--r-- | grammar/symbol.go | 9 | ||||
-rw-r--r-- | spec/grammar.go | 2 |
5 files changed, 81 insertions, 17 deletions
diff --git a/driver/parser.go b/driver/parser.go index 3af6a54..10c7162 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -3,8 +3,10 @@ package driver import ( "fmt" "io" + "strings" mldriver "github.com/nihei9/maleeni/driver" + mlspec "github.com/nihei9/maleeni/spec" "github.com/nihei9/vartan/spec" ) @@ -176,7 +178,29 @@ func (p *Parser) Parse() error { ast: ast, }) default: - return fmt.Errorf("unexpected token: %v", tok) + var tokText string + if tok.EOF { + tokText = "<EOF>" + } else { + tokText = fmt.Sprintf("%v (%v)", tok.KindName.String(), tok.Text()) + } + + eKinds, eof := p.expectedKinds(p.top()) + + var b strings.Builder + fmt.Fprintf(&b, "%v", eKinds[0]) + for _, k := range eKinds[1:] { + fmt.Fprintf(&b, ", %v", k) + } + if eof { + if len(eKinds) > 0 { + fmt.Fprintf(&b, ", <EOF>") + } else { + fmt.Fprintf(&b, "<EOF>") + } + } + + return fmt.Errorf("unexpected token: %v, expected: %v", tokText, b.String()) } } } @@ -189,7 +213,7 @@ func (p *Parser) nextToken() (*mldriver.Token, error) { return nil, err } if tok.Invalid { - return nil, fmt.Errorf("invalid token: %+v", tok) + return nil, fmt.Errorf("invalid token: '%v'", tok.Text()) } if skip[tok.KindID] > 0 { @@ -237,3 +261,21 @@ func (p *Parser) CST() *Node { func (p *Parser) AST() *Node { return p.ast } + +func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) { + kinds := []mlspec.LexKindName{} + eof := false + terms := p.gram.ParsingTable.ExpectedTerminals[state] + for _, tsym := range terms { + if tsym == 1 { + eof = true + continue + } + + kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym] + kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID] + kinds = append(kinds, kindName) + } + + return kinds, eof +} diff --git a/grammar/grammar.go b/grammar/grammar.go index 4cde798..a7f80ca 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -629,10 +629,12 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error } kind2Term := make([]int, len(lexSpec.KindNames)) + term2Kind := make([]int, gram.symbolTable.termNum.Int()) skip := make([]int, len(lexSpec.KindNames)) for i, k := range lexSpec.KindNames { if k == mlspec.LexKindNameNil { - kind2Term[0] = symbolNil.num().Int() + kind2Term[mlspec.LexKindIDNil] = symbolNil.num().Int() + term2Kind[symbolNil.num()] = mlspec.LexKindIDNil.Int() continue } @@ -641,6 +643,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error return nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k) } kind2Term[i] = sym.num().Int() + term2Kind[sym.num()] = i for _, sk := range gram.skipLexKinds { if k != sk { @@ -735,6 +738,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error Maleeni: &spec.Maleeni{ Spec: lexSpec, KindToTerminal: kind2Term, + TerminalToKind: term2Kind, Skip: skip, }, }, @@ -751,6 +755,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error NonTerminals: nonTerms, NonTerminalCount: tab.nonTerminalCount, EOFSymbol: symbolEOF.num().Int(), + ExpectedTerminals: tab.expectedTerminals, }, ASTAction: &spec.ASTAction{ Entries: astActEnties, diff --git a/grammar/slr.go b/grammar/slr.go index eb74622..2f4c786 100644 --- a/grammar/slr.go +++ b/grammar/slr.go @@ -96,11 +96,12 @@ var ( ) type ParsingTable struct { - actionTable []actionEntry - goToTable []goToEntry - stateCount int - terminalCount int - nonTerminalCount int + actionTable []actionEntry + goToTable []goToEntry + stateCount int + terminalCount int + nonTerminalCount int + expectedTerminals [][]int InitialState stateNum } @@ -181,20 +182,25 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) { { initialState := b.automaton.states[b.automaton.initialState] ptab = &ParsingTable{ - actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), - goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), - stateCount: len(b.automaton.states), - terminalCount: b.termCount, - nonTerminalCount: b.nonTermCount, - InitialState: initialState.num, + actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), + goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), + stateCount: len(b.automaton.states), + terminalCount: b.termCount, + nonTerminalCount: b.nonTermCount, + expectedTerminals: make([][]int, len(b.automaton.states)), + InitialState: initialState.num, } } var conflicts []conflict for _, state := range b.automaton.states { + var eTerms []int + for sym, kID := range state.next { nextState := b.automaton.states[kID] if sym.isTerminal() { + eTerms = append(eTerms, sym.num().Int()) + c := ptab.writeShiftAction(state.num, sym, nextState.num) if c != nil { conflicts = append(conflicts, c) @@ -212,6 +218,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) { return nil, err } for sym := range flw.symbols { + eTerms = append(eTerms, sym.num().Int()) + c := ptab.writeReduceAction(state.num, sym, prod.num) if c != nil { conflicts = append(conflicts, c) @@ -219,6 +227,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) { } } if flw.eof { + eTerms = append(eTerms, symbolEOF.num().Int()) + c := ptab.writeReduceAction(state.num, symbolEOF, prod.num) if c != nil { conflicts = append(conflicts, c) @@ -226,6 +236,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) { } } } + + ptab.expectedTerminals[state.num] = eTerms } b.conflicts = conflicts diff --git a/grammar/symbol.go b/grammar/symbol.go index 9c0e9bd..136e909 100644 --- a/grammar/symbol.go +++ b/grammar/symbol.go @@ -53,9 +53,12 @@ const ( maskNumberPart = uint16(0x3fff) // 0011 1111 1111 1111 - symbolNil = symbol(0) // 0000 0000 0000 0000 - symbolStart = symbol(0x4001) // 0100 0000 0000 0001 - symbolEOF = symbol(0xc001) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol. + symbolNumStart = uint16(0x0001) // 0000 0000 0000 0001 + symbolNumEOF = uint16(0x0001) // 0000 0000 0000 0001 + + symbolNil = symbol(0) // 0000 0000 0000 0000 + symbolStart = symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001 + symbolEOF = symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol. nonTerminalNumMin = symbolNum(2) // The number 1 is used by a start symbol. terminalNumMin = symbolNum(2) // The number 1 is used by the EOF symbol. diff --git a/spec/grammar.go b/spec/grammar.go index eeeb372..002fac9 100644 --- a/spec/grammar.go +++ b/spec/grammar.go @@ -16,6 +16,7 @@ type LexicalSpecification struct { type Maleeni struct { Spec *mlspec.CompiledLexSpec `json:"spec"` KindToTerminal []int `json:"kind_to_terminal"` + TerminalToKind []int `json:"terminal_to_kind"` Skip []int `json:"skip"` } @@ -32,6 +33,7 @@ type ParsingTable struct { NonTerminals []string `json:"non_terminals"` NonTerminalCount int `json:"non_terminal_count"` EOFSymbol int `json:"eof_symbol"` + ExpectedTerminals [][]int `json:"expected_terminals"` } type ASTAction struct { |