diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-03 14:49:37 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-03 14:55:13 +0900 |
commit | f4e3fef07e8e38e37e63989254718e6c4cb543a9 (patch) | |
tree | 0eeb7b043800282d921d81159e8d5d9ec87c1211 | |
parent | Rename describe command to show command (diff) | |
download | cotia-f4e3fef07e8e38e37e63989254718e6c4cb543a9.tar.gz cotia-f4e3fef07e8e38e37e63989254718e6c4cb543a9.tar.xz |
Make semantic actions user-configurable
-rw-r--r-- | cmd/vartan/parse.go | 28 | ||||
-rw-r--r-- | driver/conflict_test.go | 7 | ||||
-rw-r--r-- | driver/parser.go | 264 | ||||
-rw-r--r-- | driver/parser_test.go | 11 | ||||
-rw-r--r-- | driver/semantic_action.go | 276 | ||||
-rw-r--r-- | driver/semantic_action_test.go | 203 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 2 |
7 files changed, 539 insertions, 252 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 5cf8b5b..c370c15 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -69,6 +69,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { } var p *driver.Parser + var treeAct *driver.SyntaxTreeActionSet { src := os.Stdin if *parseFlags.source != "" { @@ -81,15 +82,22 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { } var opts []driver.ParserOption - switch { - case *parseFlags.cst: - opts = append(opts, driver.MakeCST()) - case !*parseFlags.onlyParse: - opts = append(opts, driver.MakeAST()) - } - if *parseFlags.disableLAC { - opts = append(opts, driver.DisableLAC()) + { + switch { + case *parseFlags.cst: + treeAct = driver.NewSyntaxTreeActionSet(cgram, false, true) + case !*parseFlags.onlyParse: + treeAct = driver.NewSyntaxTreeActionSet(cgram, true, false) + } + if treeAct != nil { + opts = append(opts, driver.SemanticAction(treeAct)) + } + + if *parseFlags.disableLAC { + opts = append(opts, driver.DisableLAC()) + } } + p, err = driver.NewParser(cgram, src, opts...) if err != nil { return err @@ -128,9 +136,9 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { if len(synErrs) == 0 && !*parseFlags.onlyParse { var tree *driver.Node if *parseFlags.cst { - tree = p.CST() + tree = treeAct.CST() } else { - tree = p.AST() + tree = treeAct.AST() } driver.PrintTree(os.Stdout, tree) } diff --git a/driver/conflict_test.go b/driver/conflict_test.go index 46eeaba..0fa00f6 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -335,7 +335,8 @@ assign: '='; t.Fatal(err) } - p, err := NewParser(gram, strings.NewReader(tt.src), MakeCST()) + treeAct := NewSyntaxTreeActionSet(gram, false, true) + p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } @@ -346,9 +347,9 @@ assign: '='; } fmt.Printf("CST:\n") - PrintTree(os.Stdout, p.CST()) + PrintTree(os.Stdout, treeAct.CST()) - testTree(t, p.CST(), tt.cst) + testTree(t, treeAct.CST(), tt.cst) }) } } diff --git a/driver/parser.go b/driver/parser.go index 59fb56a..1e7af29 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -8,49 +8,6 @@ import ( "github.com/nihei9/vartan/spec" ) -type Node struct { - KindName string - Text string - Row int - Col int - Children []*Node -} - -func PrintTree(w io.Writer, node *Node) { - printTree(w, node, "", "") -} - -func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { - if node == nil { - return - } - - if node.Text != "" { - fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) - } else { - fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) - } - - num := len(node.Children) - for i, child := range node.Children { - var line string - if num > 1 && i < num-1 { - line = "├─ " - } else { - line = "└─ " - } - - var prefix string - if i >= num-1 { - prefix = " " - } else { - prefix = "│ " - } - - printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) - } -} - type SyntaxError struct { Row int Col int @@ -61,20 +18,6 @@ type SyntaxError struct { type ParserOption func(p *Parser) error -func MakeAST() ParserOption { - return func(p *Parser) error { - p.makeAST = true - return nil - } -} - -func MakeCST() ParserOption { - return func(p *Parser) error { - p.makeCST = true - return nil - } -} - // DisableLAC disables LAC (lookahead correction). When the grammar has the LALR class, LAC is enabled by default. func DisableLAC() ParserOption { return func(p *Parser) error { @@ -83,21 +26,18 @@ func DisableLAC() ParserOption { } } -type semanticFrame struct { - cst *Node - ast *Node +func SemanticAction(semAct SemanticActionSet) ParserOption { + return func(p *Parser) error { + p.semAct = semAct + return nil + } } type Parser struct { gram *spec.CompiledGrammar lex *mldriver.Lexer stateStack *stateStack - semStack []*semanticFrame - cst *Node - ast *Node - makeAST bool - makeCST bool - needSemAct bool + semAct SemanticActionSet disableLAC bool onError bool shiftCount int @@ -127,8 +67,6 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader, opts ...ParserOption) } } - p.needSemAct = p.makeAST || p.makeCST - return p, nil } @@ -159,7 +97,9 @@ ACTION_LOOP: p.shift(nextState) - p.actOnShift(tok) + if p.semAct != nil { + p.semAct.Shift(tok) + } tok, err = p.nextToken() if err != nil { @@ -175,12 +115,16 @@ ACTION_LOOP: accepted := p.reduce(prodNum) if accepted { - p.actOnAccepting() + if p.semAct != nil { + p.semAct.Accept() + } return nil } - p.actOnReduction(prodNum) + if p.semAct != nil { + p.semAct.Reduce(prodNum) + } default: // Error if p.onError { tok, err = p.nextToken() @@ -202,10 +146,17 @@ ACTION_LOOP: ExpectedTerminals: p.searchLookahead(p.stateStack.top()), }) - ok := p.trapError() + count, ok := p.trapError() if !ok { + if p.semAct != nil { + p.semAct.MissError() + } + return nil } + if p.semAct != nil { + p.semAct.TrapError(count) + } p.onError = true p.shiftCount = 0 @@ -217,7 +168,9 @@ ACTION_LOOP: p.shift(act * -1) - p.actOnError() + if p.semAct != nil { + p.semAct.ShiftError() + } } } } @@ -321,177 +274,22 @@ func (p *Parser) reduce(prodNum int) bool { return false } -func (p *Parser) trapError() bool { +func (p *Parser) trapError() (int, bool) { + count := 0 for { if p.gram.ParsingTable.ErrorTrapperStates[p.stateStack.top()] != 0 { - return true + return count, true } if p.stateStack.top() != p.gram.ParsingTable.InitialState { p.stateStack.pop(1) - p.semStack = p.semStack[:len(p.semStack)-1] + count++ } else { - return false + return 0, false } } } -func (p *Parser) actOnShift(tok *mldriver.Token) { - if !p.needSemAct { - return - } - - term := p.tokenToTerminal(tok) - - var ast *Node - var cst *Node - if p.makeAST { - ast = &Node{ - KindName: p.gram.ParsingTable.Terminals[term], - Text: tok.Text(), - Row: tok.Row, - Col: tok.Col, - } - } - if p.makeCST { - cst = &Node{ - KindName: p.gram.ParsingTable.Terminals[term], - Text: tok.Text(), - Row: tok.Row, - Col: tok.Col, - } - } - - p.semStack = append(p.semStack, &semanticFrame{ - cst: cst, - ast: ast, - }) -} - -func (p *Parser) actOnReduction(prodNum int) { - if !p.needSemAct { - return - } - - lhs := p.gram.ParsingTable.LHSSymbols[prodNum] - - // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. - n := p.gram.ParsingTable.AlternativeSymbolCounts[prodNum] - handle := p.semStack[len(p.semStack)-n:] - - var ast *Node - var cst *Node - if p.makeAST { - act := p.gram.ASTAction.Entries[prodNum] - var children []*Node - if act != nil { - // Count the number of children in advance to avoid frequent growth in a slice for children. - { - l := 0 - for _, e := range act { - if e > 0 { - l++ - } else { - offset := e*-1 - 1 - l += len(handle[offset].ast.Children) - } - } - - children = make([]*Node, l) - } - - p := 0 - for _, e := range act { - if e > 0 { - offset := e - 1 - children[p] = handle[offset].ast - p++ - } else { - offset := e*-1 - 1 - for _, c := range handle[offset].ast.Children { - children[p] = c - p++ - } - } - } - } else { - // If an alternative has no AST action, a driver generates - // a node with the same structure as a CST. - - children = make([]*Node, len(handle)) - for i, f := range handle { - children[i] = f.ast - } - } - - ast = &Node{ - KindName: p.gram.ParsingTable.NonTerminals[lhs], - Children: children, - } - } - if p.makeCST { - children := make([]*Node, len(handle)) - for i, f := range handle { - children[i] = f.cst - } - - cst = &Node{ - KindName: p.gram.ParsingTable.NonTerminals[lhs], - Children: children, - } - } - - p.semStack = p.semStack[:len(p.semStack)-n] - p.semStack = append(p.semStack, &semanticFrame{ - cst: cst, - ast: ast, - }) -} - -func (p *Parser) actOnAccepting() { - if !p.needSemAct { - return - } - - top := p.semStack[len(p.semStack)-1] - p.cst = top.cst - p.ast = top.ast -} - -func (p *Parser) actOnError() { - if !p.needSemAct { - return - } - - errSym := p.gram.ParsingTable.ErrorSymbol - - var ast *Node - var cst *Node - if p.makeAST { - ast = &Node{ - KindName: p.gram.ParsingTable.Terminals[errSym], - } - } - if p.makeCST { - cst = &Node{ - KindName: p.gram.ParsingTable.Terminals[errSym], - } - } - - p.semStack = append(p.semStack, &semanticFrame{ - cst: cst, - ast: ast, - }) -} - -func (p *Parser) CST() *Node { - return p.cst -} - -func (p *Parser) AST() *Node { - return p.ast -} - func (p *Parser) SyntaxErrors() []*SyntaxError { return p.synErrs } diff --git a/driver/parser_test.go b/driver/parser_test.go index 4779a00..0fad19f 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -662,7 +662,8 @@ error: 'error' #skip; t.Fatal(err) } - p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST()) + treeAct := NewSyntaxTreeActionSet(gram, true, true) + p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } @@ -677,17 +678,17 @@ error: 'error' #skip; } if tt.cst != nil { - testTree(t, p.CST(), tt.cst) + testTree(t, treeAct.CST(), tt.cst) } if tt.ast != nil { - testTree(t, p.AST(), tt.ast) + testTree(t, treeAct.AST(), tt.ast) } fmt.Println("CST:") - PrintTree(os.Stdout, p.CST()) + PrintTree(os.Stdout, treeAct.CST()) fmt.Println("AST:") - PrintTree(os.Stdout, p.AST()) + PrintTree(os.Stdout, treeAct.AST()) }) } } diff --git a/driver/semantic_action.go b/driver/semantic_action.go new file mode 100644 index 0000000..b31c35b --- /dev/null +++ b/driver/semantic_action.go @@ -0,0 +1,276 @@ +package driver + +import ( + "fmt" + "io" + + mldriver "github.com/nihei9/maleeni/driver" + "github.com/nihei9/vartan/spec" +) + +type SemanticActionSet interface { + // Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to + // the symbol. + Shift(tok *mldriver.Token) + + // Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to + // the symbol. This function doesn't take a token as an argument because a token corresponding to + // the error symbol doesn't exist. + ShiftError() + + // Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of + // the production. + Reduce(prodNum int) + + // Accept runs when the driver accepts an input. + Accept() + + // TrapError runs when the driver traps a syntax error. `n` is the number of frames that the driver discards + // from the state stack. + TrapError(n int) + + // MissError runs when the driver fails to trap a syntax error. + MissError() +} + +var _ SemanticActionSet = &SyntaxTreeActionSet{} + +type Node struct { + KindName string + Text string + Row int + Col int + Children []*Node +} + +func PrintTree(w io.Writer, node *Node) { + printTree(w, node, "", "") +} + +func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { + if node == nil { + return + } + + if node.Text != "" { + fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) + } else { + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) + } + + num := len(node.Children) + for i, child := range node.Children { + var line string + if num > 1 && i < num-1 { + line = "├─ " + } else { + line = "└─ " + } + + var prefix string + if i >= num-1 { + prefix = " " + } else { + prefix = "│ " + } + + printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) + } +} + +type SyntaxTreeActionSet struct { + gram *spec.CompiledGrammar + makeAST bool + makeCST bool + ast *Node + cst *Node + semStack *semanticStack +} + +func NewSyntaxTreeActionSet(gram *spec.CompiledGrammar, makeAST bool, makeCST bool) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + makeAST: makeAST, + makeCST: makeCST, + semStack: newSemanticStack(), + } +} + +func (a *SyntaxTreeActionSet) Shift(tok *mldriver.Token) { + term := a.tokenToTerminal(tok) + + var ast *Node + var cst *Node + if a.makeAST { + ast = &Node{ + KindName: a.gram.ParsingTable.Terminals[term], + Text: tok.Text(), + Row: tok.Row, + Col: tok.Col, + } + } + if a.makeCST { + cst = &Node{ + KindName: a.gram.ParsingTable.Terminals[term], + Text: tok.Text(), + Row: tok.Row, + Col: tok.Col, + } + } + + a.semStack.push(&semanticFrame{ + cst: cst, + ast: ast, + }) +} + +func (a *SyntaxTreeActionSet) ShiftError() { + errSym := a.gram.ParsingTable.ErrorSymbol + + var ast *Node + var cst *Node + if a.makeAST { + ast = &Node{ + KindName: a.gram.ParsingTable.Terminals[errSym], + } + } + if a.makeCST { + cst = &Node{ + KindName: a.gram.ParsingTable.Terminals[errSym], + } + } + + a.semStack.push(&semanticFrame{ + cst: cst, + ast: ast, + }) +} + +func (a *SyntaxTreeActionSet) Reduce(prodNum int) { + lhs := a.gram.ParsingTable.LHSSymbols[prodNum] + + // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. + n := a.gram.ParsingTable.AlternativeSymbolCounts[prodNum] + handle := a.semStack.pop(n) + + var ast *Node + var cst *Node + if a.makeAST { + act := a.gram.ASTAction.Entries[prodNum] + var children []*Node + if act != nil { + // Count the number of children in advance to avoid frequent growth in a slice for children. + { + l := 0 + for _, e := range act { + if e > 0 { + l++ + } else { + offset := e*-1 - 1 + l += len(handle[offset].ast.Children) + } + } + + children = make([]*Node, l) + } + + p := 0 + for _, e := range act { + if e > 0 { + offset := e - 1 + children[p] = handle[offset].ast + p++ + } else { + offset := e*-1 - 1 + for _, c := range handle[offset].ast.Children { + children[p] = c + p++ + } + } + } + } else { + // If an alternative has no AST action, a driver generates + // a node with the same structure as a CST. + + children = make([]*Node, len(handle)) + for i, f := range handle { + children[i] = f.ast + } + } + + ast = &Node{ + KindName: a.gram.ParsingTable.NonTerminals[lhs], + Children: children, + } + } + if a.makeCST { + children := make([]*Node, len(handle)) + for i, f := range handle { + children[i] = f.cst + } + + cst = &Node{ + KindName: a.gram.ParsingTable.NonTerminals[lhs], + Children: children, + } + } + + a.semStack.push(&semanticFrame{ + cst: cst, + ast: ast, + }) + +} + +func (a *SyntaxTreeActionSet) Accept() { + top := a.semStack.pop(1) + a.cst = top[0].cst + a.ast = top[0].ast +} + +func (a *SyntaxTreeActionSet) TrapError(n int) { + a.semStack.pop(n) +} + +func (a *SyntaxTreeActionSet) MissError() { +} + +func (a *SyntaxTreeActionSet) CST() *Node { + return a.cst +} + +func (a *SyntaxTreeActionSet) AST() *Node { + return a.ast +} + +func (a *SyntaxTreeActionSet) tokenToTerminal(tok *mldriver.Token) int { + if tok.EOF { + return a.gram.ParsingTable.EOFSymbol + } + + return a.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.KindID] +} + +type semanticFrame struct { + cst *Node + ast *Node +} + +type semanticStack struct { + frames []*semanticFrame +} + +func newSemanticStack() *semanticStack { + return &semanticStack{} +} + +func (s *semanticStack) push(f *semanticFrame) { + s.frames = append(s.frames, f) +} + +func (s *semanticStack) pop(n int) []*semanticFrame { + fs := s.frames[len(s.frames)-n:] + s.frames = s.frames[:len(s.frames)-n] + + return fs +} diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go new file mode 100644 index 0000000..a2c2bb0 --- /dev/null +++ b/driver/semantic_action_test.go @@ -0,0 +1,203 @@ +package driver + +import ( + "fmt" + "strings" + "testing" + + mldriver "github.com/nihei9/maleeni/driver" + "github.com/nihei9/vartan/grammar" + "github.com/nihei9/vartan/spec" +) + +type testSemAct struct { + gram *spec.CompiledGrammar + actLog []string +} + +func (a *testSemAct) Shift(tok *mldriver.Token) { + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", tok.KindName)) +} + +func (a *testSemAct) ShiftError() { + a.actLog = append(a.actLog, "shift/error") +} + +func (a *testSemAct) Reduce(prodNum int) { + lhsSym := a.gram.ParsingTable.LHSSymbols[prodNum] + lhsText := a.gram.ParsingTable.NonTerminals[lhsSym] + a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText)) +} + +func (a *testSemAct) Accept() { + a.actLog = append(a.actLog, "accept") +} + +func (a *testSemAct) TrapError(n int) { + a.actLog = append(a.actLog, fmt.Sprintf("trap/%v", n)) +} + +func (a *testSemAct) MissError() { + a.actLog = append(a.actLog, "miss") +} + +func TestParserWithSemanticAction(t *testing.T) { + specSrcWithErrorProd := ` +seq + : seq elem semicolon + | elem semicolon + | error semicolon #recover + ; +elem + : char char char + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +semicolon: ';'; +char: "[a-z]"; +` + + specSrcWithoutErrorProd := ` +seq + : seq elem semicolon + | elem semicolon + ; +elem + : char char char + ; + +ws: "[\u{0009}\u{0020}]+" #skip; +semicolon: ';'; +char: "[a-z]"; +` + + tests := []struct { + caption string + specSrc string + src string + actLog []string + }{ + { + caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.", + specSrc: specSrcWithErrorProd, + src: `a b c; d e f;`, + actLog: []string{ + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "accept", + }, + }, + { + caption: "when a grammar has `error` symbol, the driver calls `TrapError` and `ShiftError`.", + specSrc: specSrcWithErrorProd, + src: `a; b !; c d !; e f g;`, + actLog: []string{ + "shift/char", + "trap/1", + "shift/error", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "trap/2", + "shift/error", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "shift/char", + "trap/3", + "shift/error", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted + // according to the error production. + "accept", + }, + }, + { + caption: "the driver doesn't call `Accept` when a syntax error is trapped, but the input doesn't meet the error production", + specSrc: specSrcWithErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + "trap/1", + "shift/error", + }, + }, + { + caption: "when a syntax error isn't trapped, the driver calls `MissError`.", + specSrc: specSrcWithoutErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + + "miss", + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := spec.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + g, err := b.Build() + if err != nil { + t.Fatal(err) + } + + gram, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassLALR)) + if err != nil { + t.Fatal(err) + } + + semAct := &testSemAct{ + gram: gram, + } + p, err := NewParser(gram, strings.NewReader(tt.src), SemanticAction(semAct)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(tt.actLog) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + + for i, e := range tt.actLog { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + } + }) + } +} diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index 427f0d1..a019b61 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -111,7 +111,7 @@ c: 'c'; t.Fatal(err) } - p, err := NewParser(gram, strings.NewReader(tt.src), MakeAST(), MakeCST()) + p, err := NewParser(gram, strings.NewReader(tt.src)) if err != nil { t.Fatal(err) } |