diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-23 02:04:56 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-23 21:37:12 +0900 |
commit | ba524fa0d49f597a4ace4bec72802334a0972c7a (patch) | |
tree | 94c00f562324bb722aebe80cb07a162f61f8f7b9 | |
parent | Add name directive to specify a grammar name (diff) | |
download | urubu-ba524fa0d49f597a4ace4bec72802334a0972c7a.tar.gz urubu-ba524fa0d49f597a4ace4bec72802334a0972c7a.tar.xz |
Use grammar via an interface
-rw-r--r-- | cmd/vartan/parse.go | 2 | ||||
-rw-r--r-- | driver/conflict_test.go | 2 | ||||
-rw-r--r-- | driver/lac_test.go | 4 | ||||
-rw-r--r-- | driver/parser.go | 124 | ||||
-rw-r--r-- | driver/parser_test.go | 2 | ||||
-rw-r--r-- | driver/semantic_action_test.go | 2 | ||||
-rw-r--r-- | driver/spec.go | 84 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 2 |
8 files changed, 173 insertions, 49 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 7cf3c81..0e95405 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -98,7 +98,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { } } - p, err = driver.NewParser(cgram, src, opts...) + p, err = driver.NewParser(driver.NewGrammar(cgram), src, opts...) if err != nil { return err } diff --git a/driver/conflict_test.go b/driver/conflict_test.go index 87b7b17..4517b33 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -350,7 +350,7 @@ assign: '='; } treeAct := NewSyntaxTreeActionSet(gram, false, true) - p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(treeAct)) + p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } diff --git a/driver/lac_test.go b/driver/lac_test.go index aa32d05..9b7bf40 100644 --- a/driver/lac_test.go +++ b/driver/lac_test.go @@ -66,7 +66,7 @@ d: 'd'; gram: gram, } - p, err := NewParser(gram, strings.NewReader(src), SemanticAction(semAct)) + p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct)) if err != nil { t.Fatal(err) } @@ -92,7 +92,7 @@ d: 'd'; gram: gram, } - p, err := NewParser(gram, strings.NewReader(src), SemanticAction(semAct), DisableLAC()) + p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct), DisableLAC()) if err != nil { t.Fatal(err) } diff --git a/driver/parser.go b/driver/parser.go index 6a62554..08a4c4a 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -5,9 +5,61 @@ import ( "io" mldriver "github.com/nihei9/maleeni/driver" - "github.com/nihei9/vartan/spec" ) +type Grammar interface { + // Class returns a class of grammar. + Class() string + + // InitialState returns the initial state of a parser. + InitialState() int + + // StartProduction returns the start production of grammar. + StartProduction() int + + // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair. + Action(state int, terminal int) int + + // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair. + GoTo(state int, lhs int) int + + // ErrorTrapperState returns true when a state can shift the error symbol. + ErrorTrapperState(state int) bool + + // LHS returns a LHS symbol of a production. + LHS(prod int) int + + // AlternativeSymbolCount returns a symbol count of p production. + AlternativeSymbolCount(prod int) int + + // RecoverProduction returns true when a production has the recover directive. + RecoverProduction(prod int) bool + + // LexicalSpecification returns a lexical specification. + LexicalSpecification() mldriver.LexSpec + + // TerminalCount returns a terminal symbol count of grammar. + TerminalCount() int + + // EOF returns the EOF symbol. + EOF() int + + // Error returns the error symbol. + Error() int + + // Terminal retuns a string representaion of a terminal symbol. + Terminal(terminal int) string + + // TerminalAlias returns an alias for a terminal. + TerminalAlias(terminal int) string + + // Skip returns true when a terminal symbol must be skipped. + Skip(kind mldriver.KindID) bool + + // LexicalKindToTerminal maps a lexical kind to a terminal symbol. + LexicalKindToTerminal(kind mldriver.KindID) int +} + type SyntaxError struct { Row int Col int @@ -34,7 +86,7 @@ func SemanticAction(semAct SemanticActionSet) ParserOption { } type Parser struct { - gram *spec.CompiledGrammar + gram Grammar lex *mldriver.Lexer stateStack *stateStack semAct SemanticActionSet @@ -44,8 +96,8 @@ type Parser struct { synErrs []*SyntaxError } -func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) (*Parser, error) { - lex, err := mldriver.NewLexer(mldriver.NewLexSpec(gram.LexicalSpecification.Maleeni.Spec), src) +func NewParser(gram Grammar, src io.Reader, opts ...ParserOption) (*Parser, error) { + lex, err := mldriver.NewLexer(gram.LexicalSpecification(), src) if err != nil { return nil, err } @@ -56,7 +108,7 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) stateStack: &stateStack{}, } - if p.gram.ParsingTable.Class != "lalr" { + if p.gram.Class() != "lalr" { p.disableLAC = true } @@ -71,7 +123,7 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) } func (p *Parser) Parse() error { - p.stateStack.push(p.gram.ParsingTable.InitialState) + p.stateStack.push(p.gram.InitialState()) tok, err := p.nextToken() if err != nil { return err @@ -111,7 +163,7 @@ ACTION_LOOP: prodNum := act recovered := false - if p.onError && p.gram.ParsingTable.RecoverProductions[prodNum] != 0 { + if p.onError && p.gram.RecoverProduction(prodNum) { p.onError = false p.shiftCount = 0 recovered = true @@ -186,10 +238,8 @@ func (p *Parser) validateLookahead(term int) bool { p.stateStack.enableExploratoryMode() defer p.stateStack.disableExploratoryMode() - tab := p.gram.ParsingTable - for { - act := tab.Action[p.stateStack.topExploratorily()*tab.TerminalCount+term] + act := p.gram.Action(p.stateStack.topExploratorily(), term) switch { case act < 0: // Shift @@ -197,13 +247,13 @@ func (p *Parser) validateLookahead(term int) bool { case act > 0: // Reduce prodNum := act - lhs := tab.LHSSymbols[prodNum] - if lhs == tab.LHSSymbols[tab.StartProduction] { + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { return true } - n := tab.AlternativeSymbolCounts[prodNum] + n := p.gram.AlternativeSymbolCount(prodNum) p.stateStack.popExploratorily(n) - state := tab.GoTo[p.stateStack.topExploratorily()*tab.NonTerminalCount+lhs] + state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs) p.stateStack.pushExploratorily(state) default: // Error return false @@ -212,7 +262,6 @@ func (p *Parser) validateLookahead(term int) bool { } func (p *Parser) nextToken() (*mldriver.Token, error) { - skip := p.gram.LexicalSpecification.Maleeni.Skip for { // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect @@ -222,7 +271,7 @@ func (p *Parser) nextToken() (*mldriver.Token, error) { return nil, err } - if skip[tok.KindID] > 0 { + if p.gram.Skip(tok.KindID) { continue } @@ -232,10 +281,10 @@ func (p *Parser) nextToken() (*mldriver.Token, error) { func (p *Parser) tokenToTerminal(tok *mldriver.Token) int { if tok.EOF { - return p.gram.ParsingTable.EOFSymbol + return p.gram.EOF() } - return p.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.KindID] + return p.gram.LexicalKindToTerminal(tok.KindID) } func (p *Parser) lookupAction(tok *mldriver.Token) int { @@ -246,17 +295,13 @@ func (p *Parser) lookupAction(tok *mldriver.Token) int { } } - termCount := p.gram.ParsingTable.TerminalCount - term := p.tokenToTerminal(tok) - return p.gram.ParsingTable.Action[p.stateStack.top()*termCount+term] + return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok)) } func (p *Parser) lookupActionOnError() (int, error) { - termCount := p.gram.ParsingTable.TerminalCount - errSym := p.gram.ParsingTable.ErrorSymbol - act := p.gram.ParsingTable.Action[p.stateStack.top()*termCount+errSym] + act := p.gram.Action(p.stateStack.top(), p.gram.Error()) if act >= 0 { - return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.ParsingTable.Terminals[errSym]) + return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error())) } return act, nil @@ -267,14 +312,13 @@ func (p *Parser) shift(nextState int) { } func (p *Parser) reduce(prodNum int) bool { - tab := p.gram.ParsingTable - lhs := tab.LHSSymbols[prodNum] - if lhs == tab.LHSSymbols[tab.StartProduction] { + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { return true } - n := tab.AlternativeSymbolCounts[prodNum] + n := p.gram.AlternativeSymbolCount(prodNum) p.stateStack.pop(n) - nextState := tab.GoTo[p.stateStack.top()*tab.NonTerminalCount+lhs] + nextState := p.gram.GoTo(p.stateStack.top(), lhs) p.stateStack.push(nextState) return false } @@ -282,11 +326,11 @@ func (p *Parser) reduce(prodNum int) bool { func (p *Parser) trapError() (int, bool) { count := 0 for { - if p.gram.ParsingTable.ErrorTrapperStates[p.stateStack.top()] != 0 { + if p.gram.ErrorTrapperState(p.stateStack.top()) { return count, true } - if p.stateStack.top() != p.gram.ParsingTable.InitialState { + if p.stateStack.top() != p.gram.InitialState() { p.stateStack.pop(1) count++ } else { @@ -301,14 +345,10 @@ func (p *Parser) SyntaxErrors() []*SyntaxError { func (p *Parser) searchLookahead(state int) []string { kinds := []string{} - term2Kind := p.gram.LexicalSpecification.Maleeni.TerminalToKind - kindNames := p.gram.LexicalSpecification.Maleeni.Spec.KindNames - aliases := p.gram.LexicalSpecification.Maleeni.KindAliases - termCount := p.gram.ParsingTable.TerminalCount - base := p.stateStack.top() * termCount + termCount := p.gram.TerminalCount() for term := 0; term < termCount; term++ { if p.disableLAC { - if p.gram.ParsingTable.Action[base+term] == 0 { + if p.gram.Action(p.stateStack.top(), term) == 0 { continue } } else { @@ -319,19 +359,19 @@ func (p *Parser) searchLookahead(state int) []string { // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol // intentionally. - if term == p.gram.ParsingTable.ErrorSymbol { + if term == p.gram.Error() { continue } - if term == p.gram.ParsingTable.EOFSymbol { + if term == p.gram.EOF() { kinds = append(kinds, "<eof>") continue } - if alias := aliases[term]; alias != "" { + if alias := p.gram.TerminalAlias(term); alias != "" { kinds = append(kinds, alias) } else { - kinds = append(kinds, kindNames[term2Kind[term]].String()) + kinds = append(kinds, p.gram.Terminal(term)) } } diff --git a/driver/parser_test.go b/driver/parser_test.go index 4d31ec1..69780bc 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -768,7 +768,7 @@ error: 'error' #skip; } treeAct := NewSyntaxTreeActionSet(gram, true, true) - p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(treeAct)) + p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go index e36784d..d3f15a8 100644 --- a/driver/semantic_action_test.go +++ b/driver/semantic_action_test.go @@ -196,7 +196,7 @@ char: "[a-z]"; semAct := &testSemAct{ gram: gram, } - p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(semAct)) + p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(semAct)) if err != nil { t.Fatal(err) } diff --git a/driver/spec.go b/driver/spec.go new file mode 100644 index 0000000..71935b4 --- /dev/null +++ b/driver/spec.go @@ -0,0 +1,84 @@ +package driver + +import ( + mldriver "github.com/nihei9/maleeni/driver" + "github.com/nihei9/vartan/spec" +) + +type gram struct { + g *spec.CompiledGrammar +} + +func NewGrammar(g *spec.CompiledGrammar) *gram { + return &gram{ + g: g, + } +} + +func (g *gram) LexicalSpecification() mldriver.LexSpec { + return mldriver.NewLexSpec(g.g.LexicalSpecification.Maleeni.Spec) +} + +func (g *gram) Class() string { + return g.g.ParsingTable.Class +} + +func (g *gram) InitialState() int { + return g.g.ParsingTable.InitialState +} + +func (g *gram) StartProduction() int { + return g.g.ParsingTable.StartProduction +} + +func (g *gram) RecoverProduction(prod int) bool { + return g.g.ParsingTable.RecoverProductions[prod] != 0 +} + +func (g *gram) Action(state int, terminal int) int { + return g.g.ParsingTable.Action[state*g.g.ParsingTable.TerminalCount+terminal] +} + +func (g *gram) GoTo(state int, lhs int) int { + return g.g.ParsingTable.GoTo[state*g.g.ParsingTable.NonTerminalCount+lhs] +} + +func (g *gram) AlternativeSymbolCount(prod int) int { + return g.g.ParsingTable.AlternativeSymbolCounts[prod] +} + +func (g *gram) TerminalCount() int { + return g.g.ParsingTable.TerminalCount +} + +func (g *gram) ErrorTrapperState(state int) bool { + return g.g.ParsingTable.ErrorTrapperStates[state] != 0 +} + +func (g *gram) LHS(prod int) int { + return g.g.ParsingTable.LHSSymbols[prod] +} + +func (g *gram) EOF() int { + return g.g.ParsingTable.EOFSymbol +} + +func (g *gram) Error() int { + return g.g.ParsingTable.ErrorSymbol +} + +func (g *gram) Terminal(terminal int) string { + return g.g.ParsingTable.Terminals[terminal] +} + +func (g *gram) TerminalAlias(terminal int) string { + return g.g.LexicalSpecification.Maleeni.KindAliases[terminal] +} + +func (g *gram) Skip(kind mldriver.KindID) bool { + return g.g.LexicalSpecification.Maleeni.Skip[kind] > 0 +} + +func (g *gram) LexicalKindToTerminal(kind mldriver.KindID) int { + return g.g.LexicalSpecification.Maleeni.KindToTerminal[kind] +} diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index 579ead1..8feec91 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -119,7 +119,7 @@ c: 'c'; t.Fatal(err) } - p, err := NewParser(gram, strings.NewReader(tt.src)) + p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src)) if err != nil { t.Fatal(err) } |