aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-08-02 01:42:50 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-08-02 01:42:50 +0900
commit74b51fb8cc2945af10da841a2ff901f4d3046977 (patch)
treec4f3d5e2f3fb4969cd7393107210e22649240f06
parentUse maleeni v0.3.0 (diff)
downloadurubu-74b51fb8cc2945af10da841a2ff901f4d3046977.tar.gz
urubu-74b51fb8cc2945af10da841a2ff901f4d3046977.tar.xz
Print expected terminals on a parse error
-rw-r--r--driver/parser.go46
-rw-r--r--grammar/grammar.go7
-rw-r--r--grammar/slr.go34
-rw-r--r--grammar/symbol.go9
-rw-r--r--spec/grammar.go2
5 files changed, 81 insertions, 17 deletions
diff --git a/driver/parser.go b/driver/parser.go
index 3af6a54..10c7162 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -3,8 +3,10 @@ package driver
import (
"fmt"
"io"
+ "strings"
mldriver "github.com/nihei9/maleeni/driver"
+ mlspec "github.com/nihei9/maleeni/spec"
"github.com/nihei9/vartan/spec"
)
@@ -176,7 +178,29 @@ func (p *Parser) Parse() error {
ast: ast,
})
default:
- return fmt.Errorf("unexpected token: %v", tok)
+ var tokText string
+ if tok.EOF {
+ tokText = "<EOF>"
+ } else {
+ tokText = fmt.Sprintf("%v (%v)", tok.KindName.String(), tok.Text())
+ }
+
+ eKinds, eof := p.expectedKinds(p.top())
+
+ var b strings.Builder
+ fmt.Fprintf(&b, "%v", eKinds[0])
+ for _, k := range eKinds[1:] {
+ fmt.Fprintf(&b, ", %v", k)
+ }
+ if eof {
+ if len(eKinds) > 0 {
+ fmt.Fprintf(&b, ", <EOF>")
+ } else {
+ fmt.Fprintf(&b, "<EOF>")
+ }
+ }
+
+ return fmt.Errorf("unexpected token: %v, expected: %v", tokText, b.String())
}
}
}
@@ -189,7 +213,7 @@ func (p *Parser) nextToken() (*mldriver.Token, error) {
return nil, err
}
if tok.Invalid {
- return nil, fmt.Errorf("invalid token: %+v", tok)
+ return nil, fmt.Errorf("invalid token: '%v'", tok.Text())
}
if skip[tok.KindID] > 0 {
@@ -237,3 +261,21 @@ func (p *Parser) CST() *Node {
func (p *Parser) AST() *Node {
return p.ast
}
+
+func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) {
+ kinds := []mlspec.LexKindName{}
+ eof := false
+ terms := p.gram.ParsingTable.ExpectedTerminals[state]
+ for _, tsym := range terms {
+ if tsym == 1 {
+ eof = true
+ continue
+ }
+
+ kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym]
+ kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID]
+ kinds = append(kinds, kindName)
+ }
+
+ return kinds, eof
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 4cde798..a7f80ca 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -629,10 +629,12 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error
}
kind2Term := make([]int, len(lexSpec.KindNames))
+ term2Kind := make([]int, gram.symbolTable.termNum.Int())
skip := make([]int, len(lexSpec.KindNames))
for i, k := range lexSpec.KindNames {
if k == mlspec.LexKindNameNil {
- kind2Term[0] = symbolNil.num().Int()
+ kind2Term[mlspec.LexKindIDNil] = symbolNil.num().Int()
+ term2Kind[symbolNil.num()] = mlspec.LexKindIDNil.Int()
continue
}
@@ -641,6 +643,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error
return nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k)
}
kind2Term[i] = sym.num().Int()
+ term2Kind[sym.num()] = i
for _, sk := range gram.skipLexKinds {
if k != sk {
@@ -735,6 +738,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error
Maleeni: &spec.Maleeni{
Spec: lexSpec,
KindToTerminal: kind2Term,
+ TerminalToKind: term2Kind,
Skip: skip,
},
},
@@ -751,6 +755,7 @@ func Compile(gram *Grammar, opts ...compileOption) (*spec.CompiledGrammar, error
NonTerminals: nonTerms,
NonTerminalCount: tab.nonTerminalCount,
EOFSymbol: symbolEOF.num().Int(),
+ ExpectedTerminals: tab.expectedTerminals,
},
ASTAction: &spec.ASTAction{
Entries: astActEnties,
diff --git a/grammar/slr.go b/grammar/slr.go
index eb74622..2f4c786 100644
--- a/grammar/slr.go
+++ b/grammar/slr.go
@@ -96,11 +96,12 @@ var (
)
type ParsingTable struct {
- actionTable []actionEntry
- goToTable []goToEntry
- stateCount int
- terminalCount int
- nonTerminalCount int
+ actionTable []actionEntry
+ goToTable []goToEntry
+ stateCount int
+ terminalCount int
+ nonTerminalCount int
+ expectedTerminals [][]int
InitialState stateNum
}
@@ -181,20 +182,25 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) {
{
initialState := b.automaton.states[b.automaton.initialState]
ptab = &ParsingTable{
- actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
- goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
- stateCount: len(b.automaton.states),
- terminalCount: b.termCount,
- nonTerminalCount: b.nonTermCount,
- InitialState: initialState.num,
+ actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
+ goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
+ stateCount: len(b.automaton.states),
+ terminalCount: b.termCount,
+ nonTerminalCount: b.nonTermCount,
+ expectedTerminals: make([][]int, len(b.automaton.states)),
+ InitialState: initialState.num,
}
}
var conflicts []conflict
for _, state := range b.automaton.states {
+ var eTerms []int
+
for sym, kID := range state.next {
nextState := b.automaton.states[kID]
if sym.isTerminal() {
+ eTerms = append(eTerms, sym.num().Int())
+
c := ptab.writeShiftAction(state.num, sym, nextState.num)
if c != nil {
conflicts = append(conflicts, c)
@@ -212,6 +218,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) {
return nil, err
}
for sym := range flw.symbols {
+ eTerms = append(eTerms, sym.num().Int())
+
c := ptab.writeReduceAction(state.num, sym, prod.num)
if c != nil {
conflicts = append(conflicts, c)
@@ -219,6 +227,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) {
}
}
if flw.eof {
+ eTerms = append(eTerms, symbolEOF.num().Int())
+
c := ptab.writeReduceAction(state.num, symbolEOF, prod.num)
if c != nil {
conflicts = append(conflicts, c)
@@ -226,6 +236,8 @@ func (b *slrTableBuilder) build() (*ParsingTable, error) {
}
}
}
+
+ ptab.expectedTerminals[state.num] = eTerms
}
b.conflicts = conflicts
diff --git a/grammar/symbol.go b/grammar/symbol.go
index 9c0e9bd..136e909 100644
--- a/grammar/symbol.go
+++ b/grammar/symbol.go
@@ -53,9 +53,12 @@ const (
maskNumberPart = uint16(0x3fff) // 0011 1111 1111 1111
- symbolNil = symbol(0) // 0000 0000 0000 0000
- symbolStart = symbol(0x4001) // 0100 0000 0000 0001
- symbolEOF = symbol(0xc001) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol.
+ symbolNumStart = uint16(0x0001) // 0000 0000 0000 0001
+ symbolNumEOF = uint16(0x0001) // 0000 0000 0000 0001
+
+ symbolNil = symbol(0) // 0000 0000 0000 0000
+ symbolStart = symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001
+ symbolEOF = symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol.
nonTerminalNumMin = symbolNum(2) // The number 1 is used by a start symbol.
terminalNumMin = symbolNum(2) // The number 1 is used by the EOF symbol.
diff --git a/spec/grammar.go b/spec/grammar.go
index eeeb372..002fac9 100644
--- a/spec/grammar.go
+++ b/spec/grammar.go
@@ -16,6 +16,7 @@ type LexicalSpecification struct {
type Maleeni struct {
Spec *mlspec.CompiledLexSpec `json:"spec"`
KindToTerminal []int `json:"kind_to_terminal"`
+ TerminalToKind []int `json:"terminal_to_kind"`
Skip []int `json:"skip"`
}
@@ -32,6 +33,7 @@ type ParsingTable struct {
NonTerminals []string `json:"non_terminals"`
NonTerminalCount int `json:"non_terminal_count"`
EOFSymbol int `json:"eof_symbol"`
+ ExpectedTerminals [][]int `json:"expected_terminals"`
}
type ASTAction struct {