aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-09-01 23:57:02 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-09-02 00:01:35 +0900
commit8832b64b4227245e45f9a24d543c1b80168c489d (patch)
tree20ed014caa923254d8e7870241225d8c20f7d2b4
parentRemove the expected terminals field from the parsing table (diff)
downloadurubu-8832b64b4227245e45f9a24d543c1b80168c489d.tar.gz
urubu-8832b64b4227245e45f9a24d543c1b80168c489d.tar.xz
Support LAC (lookahead correction)
-rw-r--r--cmd/vartan/describe.go6
-rw-r--r--cmd/vartan/parse.go11
-rw-r--r--driver/parser.go144
-rw-r--r--grammar/grammar.go7
-rw-r--r--grammar/parsing_table.go2
-rw-r--r--spec/description.go1
-rw-r--r--spec/grammar.go1
7 files changed, 139 insertions, 33 deletions
diff --git a/cmd/vartan/describe.go b/cmd/vartan/describe.go
index c2cde93..2dabf9f 100644
--- a/cmd/vartan/describe.go
+++ b/cmd/vartan/describe.go
@@ -87,7 +87,11 @@ func readDescription(path string) (*spec.Description, error) {
return desc, nil
}
-const descTemplate = `# Conflicts
+const descTemplate = `# Class
+
+{{ .Class }}
+
+# Conflicts
{{ printConflictSummary . }}
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index 36401e1..5cf8b5b 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -13,9 +13,10 @@ import (
)
var parseFlags = struct {
- source *string
- onlyParse *bool
- cst *bool
+ source *string
+ onlyParse *bool
+ cst *bool
+ disableLAC *bool
}{}
func init() {
@@ -29,6 +30,7 @@ func init() {
parseFlags.source = cmd.Flags().StringP("source", "s", "", "source file path (default stdin)")
parseFlags.onlyParse = cmd.Flags().Bool("only-parse", false, "when this option is enabled, the parser performs only parse and doesn't semantic actions")
parseFlags.cst = cmd.Flags().Bool("cst", false, "when this option is enabled, the parser generates a CST")
+ parseFlags.disableLAC = cmd.Flags().Bool("disable-lac", false, "disable LAC (lookahead correction)")
rootCmd.AddCommand(cmd)
}
@@ -85,6 +87,9 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
case !*parseFlags.onlyParse:
opts = append(opts, driver.MakeAST())
}
+ if *parseFlags.disableLAC {
+ opts = append(opts, driver.DisableLAC())
+ }
p, err = driver.NewParser(cgram, src, opts...)
if err != nil {
return err
diff --git a/driver/parser.go b/driver/parser.go
index 91f364e..59fb56a 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -75,6 +75,14 @@ func MakeCST() ParserOption {
}
}
+// DisableLAC disables LAC (lookahead correction). When the grammar has the LALR class, LAC is enabled by default.
+func DisableLAC() ParserOption {
+ return func(p *Parser) error {
+ p.disableLAC = true
+ return nil
+ }
+}
+
type semanticFrame struct {
cst *Node
ast *Node
@@ -83,13 +91,14 @@ type semanticFrame struct {
type Parser struct {
gram *spec.CompiledGrammar
lex *mldriver.Lexer
- stateStack []int
+ stateStack *stateStack
semStack []*semanticFrame
cst *Node
ast *Node
makeAST bool
makeCST bool
needSemAct bool
+ disableLAC bool
onError bool
shiftCount int
synErrs []*SyntaxError
@@ -104,7 +113,11 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption)
p := &Parser{
gram: gram,
lex: lex,
- stateStack: []int{},
+ stateStack: &stateStack{},
+ }
+
+ if p.gram.ParsingTable.Class != "lalr" {
+ p.disableLAC = true
}
for _, opt := range opts {
@@ -120,7 +133,7 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption)
}
func (p *Parser) Parse() error {
- p.push(p.gram.ParsingTable.InitialState)
+ p.stateStack.push(p.gram.ParsingTable.InitialState)
tok, err := p.nextToken()
if err != nil {
return err
@@ -129,6 +142,7 @@ func (p *Parser) Parse() error {
ACTION_LOOP:
for {
act := p.lookupAction(tok)
+
switch {
case act < 0: // Shift
nextState := act * -1
@@ -185,7 +199,7 @@ ACTION_LOOP:
Col: tok.Col,
Message: "unexpected token",
Token: tok,
- ExpectedTerminals: p.searchLookahead(p.top()),
+ ExpectedTerminals: p.searchLookahead(p.stateStack.top()),
})
ok := p.trapError()
@@ -208,6 +222,37 @@ ACTION_LOOP:
}
}
+// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid,
+// this method returns `true`.
+func (p *Parser) validateLookahead(term int) bool {
+ p.stateStack.enableExploratoryMode()
+ defer p.stateStack.disableExploratoryMode()
+
+ tab := p.gram.ParsingTable
+
+ for {
+ act := tab.Action[p.stateStack.topExploratorily()*tab.TerminalCount+term]
+
+ switch {
+ case act < 0: // Shift
+ return true
+ case act > 0: // Reduce
+ prodNum := act
+
+ lhs := tab.LHSSymbols[prodNum]
+ if lhs == tab.LHSSymbols[tab.StartProduction] {
+ return true
+ }
+ n := tab.AlternativeSymbolCounts[prodNum]
+ p.stateStack.popExploratorily(n)
+ state := tab.GoTo[p.stateStack.topExploratorily()*tab.NonTerminalCount+lhs]
+ p.stateStack.pushExploratorily(state)
+ default: // Error
+ return false
+ }
+ }
+}
+
func (p *Parser) nextToken() (*mldriver.Token, error) {
skip := p.gram.LexicalSpecification.Maleeni.Skip
for {
@@ -236,24 +281,31 @@ func (p *Parser) tokenToTerminal(tok *mldriver.Token) int {
}
func (p *Parser) lookupAction(tok *mldriver.Token) int {
+ if !p.disableLAC {
+ term := p.tokenToTerminal(tok)
+ if !p.validateLookahead(term) {
+ return 0
+ }
+ }
+
termCount := p.gram.ParsingTable.TerminalCount
term := p.tokenToTerminal(tok)
- return p.gram.ParsingTable.Action[p.top()*termCount+term]
+ return p.gram.ParsingTable.Action[p.stateStack.top()*termCount+term]
}
func (p *Parser) lookupActionOnError() (int, error) {
termCount := p.gram.ParsingTable.TerminalCount
errSym := p.gram.ParsingTable.ErrorSymbol
- act := p.gram.ParsingTable.Action[p.top()*termCount+errSym]
+ act := p.gram.ParsingTable.Action[p.stateStack.top()*termCount+errSym]
if act >= 0 {
- return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.top(), p.gram.ParsingTable.Terminals[errSym])
+ return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.ParsingTable.Terminals[errSym])
}
return act, nil
}
func (p *Parser) shift(nextState int) {
- p.push(nextState)
+ p.stateStack.push(nextState)
}
func (p *Parser) reduce(prodNum int) bool {
@@ -263,20 +315,20 @@ func (p *Parser) reduce(prodNum int) bool {
return true
}
n := tab.AlternativeSymbolCounts[prodNum]
- p.pop(n)
- nextState := tab.GoTo[p.top()*tab.NonTerminalCount+lhs]
- p.push(nextState)
+ p.stateStack.pop(n)
+ nextState := tab.GoTo[p.stateStack.top()*tab.NonTerminalCount+lhs]
+ p.stateStack.push(nextState)
return false
}
func (p *Parser) trapError() bool {
for {
- if p.gram.ParsingTable.ErrorTrapperStates[p.top()] != 0 {
+ if p.gram.ParsingTable.ErrorTrapperStates[p.stateStack.top()] != 0 {
return true
}
- if p.top() != p.gram.ParsingTable.InitialState {
- p.pop(1)
+ if p.stateStack.top() != p.gram.ParsingTable.InitialState {
+ p.stateStack.pop(1)
p.semStack = p.semStack[:len(p.semStack)-1]
} else {
return false
@@ -432,18 +484,6 @@ func (p *Parser) actOnError() {
})
}
-func (p *Parser) top() int {
- return p.stateStack[len(p.stateStack)-1]
-}
-
-func (p *Parser) push(state int) {
- p.stateStack = append(p.stateStack, state)
-}
-
-func (p *Parser) pop(n int) {
- p.stateStack = p.stateStack[:len(p.stateStack)-n]
-}
-
func (p *Parser) CST() *Node {
return p.cst
}
@@ -462,10 +502,16 @@ func (p *Parser) searchLookahead(state int) []string {
kindNames := p.gram.LexicalSpecification.Maleeni.Spec.KindNames
aliases := p.gram.LexicalSpecification.Maleeni.KindAliases
termCount := p.gram.ParsingTable.TerminalCount
- base := p.top() * termCount
+ base := p.stateStack.top() * termCount
for term := 0; term < termCount; term++ {
- if p.gram.ParsingTable.Action[base+term] == 0 {
- continue
+ if p.disableLAC {
+ if p.gram.ParsingTable.Action[base+term] == 0 {
+ continue
+ }
+ } else {
+ if !p.validateLookahead(term) {
+ continue
+ }
}
// We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
@@ -488,3 +534,43 @@ func (p *Parser) searchLookahead(state int) []string {
return kinds
}
+
+type stateStack struct {
+ items []int
+ itemsExp []int
+}
+
+func (s *stateStack) enableExploratoryMode() {
+ s.itemsExp = make([]int, len(s.items))
+ for i, v := range s.items {
+ s.itemsExp[i] = v
+ }
+}
+
+func (s *stateStack) disableExploratoryMode() {
+ s.itemsExp = nil
+}
+
+func (s *stateStack) top() int {
+ return s.items[len(s.items)-1]
+}
+
+func (s *stateStack) topExploratorily() int {
+ return s.itemsExp[len(s.itemsExp)-1]
+}
+
+func (s *stateStack) push(state int) {
+ s.items = append(s.items, state)
+}
+
+func (s *stateStack) pushExploratorily(state int) {
+ s.itemsExp = append(s.itemsExp, state)
+}
+
+func (s *stateStack) pop(n int) {
+ s.items = s.items[:len(s.items)-n]
+}
+
+func (s *stateStack) popExploratorily(n int) {
+ s.itemsExp = s.itemsExp[:len(s.itemsExp)-n]
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index efa0f35..7e86ea8 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -1038,9 +1038,12 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
return nil, err
}
+ var class string
var automaton *lr0Automaton
switch config.class {
case ClassSLR:
+ class = "slr"
+
followSet, err := genFollowSet(gram.productionSet, firstSet)
if err != nil {
return nil, err
@@ -1053,6 +1056,8 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
automaton = slr1.lr0Automaton
case ClassLALR:
+ class = "lalr"
+
lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet)
if err != nil {
return nil, err
@@ -1064,6 +1069,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
var tab *ParsingTable
{
b := &lrTableBuilder{
+ class: config.class,
automaton: automaton,
prods: gram.productionSet,
termCount: len(terms),
@@ -1150,6 +1156,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error
},
},
ParsingTable: &spec.ParsingTable{
+ Class: class,
Action: action,
GoTo: goTo,
StateCount: tab.stateCount,
diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go
index aa88ad5..28f6392 100644
--- a/grammar/parsing_table.go
+++ b/grammar/parsing_table.go
@@ -136,6 +136,7 @@ func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum)
}
type lrTableBuilder struct {
+ class Class
automaton *lr0Automaton
prods *productionSet
termCount int
@@ -552,6 +553,7 @@ func (b *lrTableBuilder) genDescription(tab *ParsingTable, gram *Grammar) (*spec
}
return &spec.Description{
+ Class: string(b.class),
Terminals: terms,
NonTerminals: nonTerms,
Productions: prods,
diff --git a/spec/description.go b/spec/description.go
index 08b037e..ae56814 100644
--- a/spec/description.go
+++ b/spec/description.go
@@ -64,6 +64,7 @@ type State struct {
}
type Description struct {
+ Class string `json:"class"`
Terminals []*Terminal `json:"terminals"`
NonTerminals []*NonTerminal `json:"non_terminals"`
Productions []*Production `json:"productions"`
diff --git a/spec/grammar.go b/spec/grammar.go
index 8403308..825fc2c 100644
--- a/spec/grammar.go
+++ b/spec/grammar.go
@@ -22,6 +22,7 @@ type Maleeni struct {
}
type ParsingTable struct {
+ Class string `json:"class"`
Action []int `json:"action"`
GoTo []int `json:"goto"`
StateCount int `json:"state_count"`