diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-01 23:57:02 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-02 00:01:35 +0900 |
commit | 8832b64b4227245e45f9a24d543c1b80168c489d (patch) | |
tree | 20ed014caa923254d8e7870241225d8c20f7d2b4 | |
parent | Remove the expected terminals field from the parsing table (diff) | |
download | urubu-8832b64b4227245e45f9a24d543c1b80168c489d.tar.gz urubu-8832b64b4227245e45f9a24d543c1b80168c489d.tar.xz |
Support LAC (lookahead correction)
-rw-r--r-- | cmd/vartan/describe.go | 6 | ||||
-rw-r--r-- | cmd/vartan/parse.go | 11 | ||||
-rw-r--r-- | driver/parser.go | 144 | ||||
-rw-r--r-- | grammar/grammar.go | 7 | ||||
-rw-r--r-- | grammar/parsing_table.go | 2 | ||||
-rw-r--r-- | spec/description.go | 1 | ||||
-rw-r--r-- | spec/grammar.go | 1 |
7 files changed, 139 insertions, 33 deletions
diff --git a/cmd/vartan/describe.go b/cmd/vartan/describe.go index c2cde93..2dabf9f 100644 --- a/cmd/vartan/describe.go +++ b/cmd/vartan/describe.go @@ -87,7 +87,11 @@ func readDescription(path string) (*spec.Description, error) { return desc, nil } -const descTemplate = `# Conflicts +const descTemplate = `# Class + +{{ .Class }} + +# Conflicts {{ printConflictSummary . }} diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 36401e1..5cf8b5b 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -13,9 +13,10 @@ import ( ) var parseFlags = struct { - source *string - onlyParse *bool - cst *bool + source *string + onlyParse *bool + cst *bool + disableLAC *bool }{} func init() { @@ -29,6 +30,7 @@ func init() { parseFlags.source = cmd.Flags().StringP("source", "s", "", "source file path (default stdin)") parseFlags.onlyParse = cmd.Flags().Bool("only-parse", false, "when this option is enabled, the parser performs only parse and doesn't semantic actions") parseFlags.cst = cmd.Flags().Bool("cst", false, "when this option is enabled, the parser generates a CST") + parseFlags.disableLAC = cmd.Flags().Bool("disable-lac", false, "disable LAC (lookahead correction)") rootCmd.AddCommand(cmd) } @@ -85,6 +87,9 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { case !*parseFlags.onlyParse: opts = append(opts, driver.MakeAST()) } + if *parseFlags.disableLAC { + opts = append(opts, driver.DisableLAC()) + } p, err = driver.NewParser(cgram, src, opts...) if err != nil { return err diff --git a/driver/parser.go b/driver/parser.go index 91f364e..59fb56a 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -75,6 +75,14 @@ func MakeCST() ParserOption { } } +// DisableLAC disables LAC (lookahead correction). When the grammar has the LALR class, LAC is enabled by default. +func DisableLAC() ParserOption { + return func(p *Parser) error { + p.disableLAC = true + return nil + } +} + type semanticFrame struct { cst *Node ast *Node @@ -83,13 +91,14 @@ type semanticFrame struct { type Parser struct { gram *spec.CompiledGrammar lex *mldriver.Lexer - stateStack []int + stateStack *stateStack semStack []*semanticFrame cst *Node ast *Node makeAST bool makeCST bool needSemAct bool + disableLAC bool onError bool shiftCount int synErrs []*SyntaxError @@ -104,7 +113,11 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) p := &Parser{ gram: gram, lex: lex, - stateStack: []int{}, + stateStack: &stateStack{}, + } + + if p.gram.ParsingTable.Class != "lalr" { + p.disableLAC = true } for _, opt := range opts { @@ -120,7 +133,7 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) } func (p *Parser) Parse() error { - p.push(p.gram.ParsingTable.InitialState) + p.stateStack.push(p.gram.ParsingTable.InitialState) tok, err := p.nextToken() if err != nil { return err @@ -129,6 +142,7 @@ func (p *Parser) Parse() error { ACTION_LOOP: for { act := p.lookupAction(tok) + switch { case act < 0: // Shift nextState := act * -1 @@ -185,7 +199,7 @@ ACTION_LOOP: Col: tok.Col, Message: "unexpected token", Token: tok, - ExpectedTerminals: p.searchLookahead(p.top()), + ExpectedTerminals: p.searchLookahead(p.stateStack.top()), }) ok := p.trapError() @@ -208,6 +222,37 @@ ACTION_LOOP: } } +// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid, +// this method returns `true`. +func (p *Parser) validateLookahead(term int) bool { + p.stateStack.enableExploratoryMode() + defer p.stateStack.disableExploratoryMode() + + tab := p.gram.ParsingTable + + for { + act := tab.Action[p.stateStack.topExploratorily()*tab.TerminalCount+term] + + switch { + case act < 0: // Shift + return true + case act > 0: // Reduce + prodNum := act + + lhs := tab.LHSSymbols[prodNum] + if lhs == tab.LHSSymbols[tab.StartProduction] { + return true + } + n := tab.AlternativeSymbolCounts[prodNum] + p.stateStack.popExploratorily(n) + state := tab.GoTo[p.stateStack.topExploratorily()*tab.NonTerminalCount+lhs] + p.stateStack.pushExploratorily(state) + default: // Error + return false + } + } +} + func (p *Parser) nextToken() (*mldriver.Token, error) { skip := p.gram.LexicalSpecification.Maleeni.Skip for { @@ -236,24 +281,31 @@ func (p *Parser) tokenToTerminal(tok *mldriver.Token) int { } func (p *Parser) lookupAction(tok *mldriver.Token) int { + if !p.disableLAC { + term := p.tokenToTerminal(tok) + if !p.validateLookahead(term) { + return 0 + } + } + termCount := p.gram.ParsingTable.TerminalCount term := p.tokenToTerminal(tok) - return p.gram.ParsingTable.Action[p.top()*termCount+term] + return p.gram.ParsingTable.Action[p.stateStack.top()*termCount+term] } func (p *Parser) lookupActionOnError() (int, error) { termCount := p.gram.ParsingTable.TerminalCount errSym := p.gram.ParsingTable.ErrorSymbol - act := p.gram.ParsingTable.Action[p.top()*termCount+errSym] + act := p.gram.ParsingTable.Action[p.stateStack.top()*termCount+errSym] if act >= 0 { - return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.top(), p.gram.ParsingTable.Terminals[errSym]) + return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.ParsingTable.Terminals[errSym]) } return act, nil } func (p *Parser) shift(nextState int) { - p.push(nextState) + p.stateStack.push(nextState) } func (p *Parser) reduce(prodNum int) bool { @@ -263,20 +315,20 @@ func (p *Parser) reduce(prodNum int) bool { return true } n := tab.AlternativeSymbolCounts[prodNum] - p.pop(n) - nextState := tab.GoTo[p.top()*tab.NonTerminalCount+lhs] - p.push(nextState) + p.stateStack.pop(n) + nextState := tab.GoTo[p.stateStack.top()*tab.NonTerminalCount+lhs] + p.stateStack.push(nextState) return false } func (p *Parser) trapError() bool { for { - if p.gram.ParsingTable.ErrorTrapperStates[p.top()] != 0 { + if p.gram.ParsingTable.ErrorTrapperStates[p.stateStack.top()] != 0 { return true } - if p.top() != p.gram.ParsingTable.InitialState { - p.pop(1) + if p.stateStack.top() != p.gram.ParsingTable.InitialState { + p.stateStack.pop(1) p.semStack = p.semStack[:len(p.semStack)-1] } else { return false @@ -432,18 +484,6 @@ func (p *Parser) actOnError() { }) } -func (p *Parser) top() int { - return p.stateStack[len(p.stateStack)-1] -} - -func (p *Parser) push(state int) { - p.stateStack = append(p.stateStack, state) -} - -func (p *Parser) pop(n int) { - p.stateStack = p.stateStack[:len(p.stateStack)-n] -} - func (p *Parser) CST() *Node { return p.cst } @@ -462,10 +502,16 @@ func (p *Parser) searchLookahead(state int) []string { kindNames := p.gram.LexicalSpecification.Maleeni.Spec.KindNames aliases := p.gram.LexicalSpecification.Maleeni.KindAliases termCount := p.gram.ParsingTable.TerminalCount - base := p.top() * termCount + base := p.stateStack.top() * termCount for term := 0; term < termCount; term++ { - if p.gram.ParsingTable.Action[base+term] == 0 { - continue + if p.disableLAC { + if p.gram.ParsingTable.Action[base+term] == 0 { + continue + } + } else { + if !p.validateLookahead(term) { + continue + } } // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol @@ -488,3 +534,43 @@ func (p *Parser) searchLookahead(state int) []string { return kinds } + +type stateStack struct { + items []int + itemsExp []int +} + +func (s *stateStack) enableExploratoryMode() { + s.itemsExp = make([]int, len(s.items)) + for i, v := range s.items { + s.itemsExp[i] = v + } +} + +func (s *stateStack) disableExploratoryMode() { + s.itemsExp = nil +} + +func (s *stateStack) top() int { + return s.items[len(s.items)-1] +} + +func (s *stateStack) topExploratorily() int { + return s.itemsExp[len(s.itemsExp)-1] +} + +func (s *stateStack) push(state int) { + s.items = append(s.items, state) +} + +func (s *stateStack) pushExploratorily(state int) { + s.itemsExp = append(s.itemsExp, state) +} + +func (s *stateStack) pop(n int) { + s.items = s.items[:len(s.items)-n] +} + +func (s *stateStack) popExploratorily(n int) { + s.itemsExp = s.itemsExp[:len(s.itemsExp)-n] +} diff --git a/grammar/grammar.go b/grammar/grammar.go index efa0f35..7e86ea8 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -1038,9 +1038,12 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } + var class string var automaton *lr0Automaton switch config.class { case ClassSLR: + class = "slr" + followSet, err := genFollowSet(gram.productionSet, firstSet) if err != nil { return nil, err @@ -1053,6 +1056,8 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error automaton = slr1.lr0Automaton case ClassLALR: + class = "lalr" + lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) if err != nil { return nil, err @@ -1064,6 +1069,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error var tab *ParsingTable { b := &lrTableBuilder{ + class: config.class, automaton: automaton, prods: gram.productionSet, termCount: len(terms), @@ -1150,6 +1156,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error }, }, ParsingTable: &spec.ParsingTable{ + Class: class, Action: action, GoTo: goTo, StateCount: tab.stateCount, diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go index aa88ad5..28f6392 100644 --- a/grammar/parsing_table.go +++ b/grammar/parsing_table.go @@ -136,6 +136,7 @@ func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) } type lrTableBuilder struct { + class Class automaton *lr0Automaton prods *productionSet termCount int @@ -552,6 +553,7 @@ func (b *lrTableBuilder) genDescription(tab *ParsingTable, gram *Grammar) (*spec } return &spec.Description{ + Class: string(b.class), Terminals: terms, NonTerminals: nonTerms, Productions: prods, diff --git a/spec/description.go b/spec/description.go index 08b037e..ae56814 100644 --- a/spec/description.go +++ b/spec/description.go @@ -64,6 +64,7 @@ type State struct { } type Description struct { + Class string `json:"class"` Terminals []*Terminal `json:"terminals"` NonTerminals []*NonTerminal `json:"non_terminals"` Productions []*Production `json:"productions"` diff --git a/spec/grammar.go b/spec/grammar.go index 8403308..825fc2c 100644 --- a/spec/grammar.go +++ b/spec/grammar.go @@ -22,6 +22,7 @@ type Maleeni struct { } type ParsingTable struct { + Class string `json:"class"` Action []int `json:"action"` GoTo []int `json:"goto"` StateCount int `json:"state_count"` |