diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-04-03 00:23:18 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-04-03 01:26:39 +0900 |
commit | 14b2d7e2728ab0314db56fc6e493d06fa285d006 (patch) | |
tree | 94b92e6307a570edeaae1a43a104e200981d18cc | |
parent | Fix help documents (diff) | |
download | urubu-14b2d7e2728ab0314db56fc6e493d06fa285d006.tar.gz urubu-14b2d7e2728ab0314db56fc6e493d06fa285d006.tar.xz |
Allow arbitrary user-defined types for nodes in a syntax tree
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | cmd/vartan/parse.go | 11 | ||||
-rw-r--r-- | driver/conflict_test.go | 13 | ||||
-rw-r--r-- | driver/parser_test.go | 28 | ||||
-rw-r--r-- | driver/semantic_action.go | 374 |
5 files changed, 229 insertions, 203 deletions
@@ -226,8 +226,8 @@ func main() { os.Exit(1) } gram := NewGrammar() - treeAct := NewSyntaxTreeActionSet(gram, true, false) - p, err := NewParser(toks, gram, SemanticAction(treeAct)) + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb))) if err != nil { fmt.Println(err) os.Exit(1) @@ -245,7 +245,7 @@ func main() { os.Exit(1) } fmt.Println("accepted") - PrintTree(os.Stdout, treeAct.AST()) + PrintTree(os.Stdout, tb.Tree()) } func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) { diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 14412bf..b65f72f 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -70,6 +70,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { var p *driver.Parser var treeAct *driver.SyntaxTreeActionSet + var tb *driver.DefaulSyntaxTreeBuilder { src := os.Stdin if *parseFlags.source != "" { @@ -87,9 +88,11 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { { switch { case *parseFlags.cst: - treeAct = driver.NewSyntaxTreeActionSet(gram, false, true) + tb = driver.NewDefaultSyntaxTreeBuilder() + treeAct = driver.NewCSTActionSet(gram, tb) case !*parseFlags.onlyParse: - treeAct = driver.NewSyntaxTreeActionSet(gram, true, false) + tb = driver.NewDefaultSyntaxTreeBuilder() + treeAct = driver.NewASTActionSet(gram, tb) } if treeAct != nil { opts = append(opts, driver.SemanticAction(treeAct)) @@ -147,9 +150,9 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { var tree *driver.Node if *parseFlags.cst { - tree = treeAct.CST() + tree = tb.Tree() } else { - tree = treeAct.AST() + tree = tb.Tree() } if tree != nil { if len(synErrs) > 0 { diff --git a/driver/conflict_test.go b/driver/conflict_test.go index 1f8914f..7672c00 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -1,8 +1,6 @@ package driver import ( - "fmt" - "os" "strings" "testing" @@ -363,8 +361,8 @@ assign: '='; } gram := NewGrammar(cg) - treeAct := NewSyntaxTreeActionSet(gram, false, true) - p, err := NewParser(toks, gram, SemanticAction(treeAct)) + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb))) if err != nil { t.Fatal(err) } @@ -374,10 +372,9 @@ assign: '='; t.Fatal(err) } - fmt.Printf("CST:\n") - PrintTree(os.Stdout, treeAct.CST()) - - testTree(t, treeAct.CST(), tt.cst) + if tt.cst != nil { + testTree(t, tb.Tree(), tt.cst) + } }) } } diff --git a/driver/parser_test.go b/driver/parser_test.go index d964cbc..04c42c5 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -2,7 +2,6 @@ package driver import ( "fmt" - "os" "strings" "testing" @@ -1072,8 +1071,15 @@ bar: 'bar'; } gram := NewGrammar(cg) - treeAct := NewSyntaxTreeActionSet(gram, true, true) - p, err := NewParser(toks, gram, SemanticAction(treeAct)) + tb := NewDefaultSyntaxTreeBuilder() + var opt []ParserOption + switch { + case tt.ast != nil: + opt = append(opt, SemanticAction(NewASTActionSet(gram, tb))) + case tt.cst != nil: + opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb))) + } + p, err := NewParser(toks, gram, opt...) if err != nil { t.Fatal(err) } @@ -1087,18 +1093,12 @@ bar: 'bar'; t.Fatalf("unexpected syntax errors occurred: %+v", p.SyntaxErrors()) } - if tt.cst != nil { - testTree(t, treeAct.CST(), tt.cst) - } - - if tt.ast != nil { - testTree(t, treeAct.AST(), tt.ast) + switch { + case tt.ast != nil: + testTree(t, tb.Tree(), tt.ast) + case tt.cst != nil: + testTree(t, tb.Tree(), tt.cst) } - - fmt.Println("CST:") - PrintTree(os.Stdout, treeAct.CST()) - fmt.Println("AST:") - PrintTree(os.Stdout, treeAct.AST()) }) } } diff --git a/driver/semantic_action.go b/driver/semantic_action.go index 61b00f0..7c52e24 100644 --- a/driver/semantic_action.go +++ b/driver/semantic_action.go @@ -5,127 +5,143 @@ import ( "io" ) +// SemanticActionSet is a set of semantic actions a parser calls. type SemanticActionSet interface { - // Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to - // the symbol. When the driver recovered from an error state by shifting the token, `recovered` is true. + // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol. + // When the parser recovered from an error state by shifting the token, `recovered` is true. Shift(tok VToken, recovered bool) - // Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of - // the production. When the driver recovered from an error state by reducing the production, - // `recovered` is true. + // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production. + // When the parser recovered from an error state by reducing the production, `recovered` is true. Reduce(prodNum int, recovered bool) - // Accept runs when the driver accepts an input. + // Accept runs when the parser accepts an input. Accept() - // TrapAndShiftError runs when the driver traps a syntax error and shifts a error symbol onto the state stack. - // `cause` is a token that caused a syntax error. `popped` is the number of frames that the driver discards + // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack. + // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards // from the state stack. // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token // corresponding to the error symbol doesn't exist. TrapAndShiftError(cause VToken, popped int) - // MissError runs when the driver fails to trap a syntax error. `cause` is a token that caused a syntax error. + // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error. MissError(cause VToken) } var _ SemanticActionSet = &SyntaxTreeActionSet{} -type Node struct { - KindName string - Text string - Row int - Col int - Children []*Node - Error bool +// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface. +type SyntaxTreeNode interface { + // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast` + // directive with `...` operator. + ChildCount() int + + // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast` + // directive with `...` operator. + ExpandChildren() []SyntaxTreeNode } -func PrintTree(w io.Writer, node *Node) { - printTree(w, node, "", "") +var _ SyntaxTreeNode = &Node{} + +// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types. +// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface. +type SyntaxTreeBuilder interface { + Shift(kindName string, text string, row, col int) SyntaxTreeNode + ShiftError(kindName string) SyntaxTreeNode + Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode + Accept(f SyntaxTreeNode) } -func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { - if node == nil { - return +var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{} + +// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder. +type DefaulSyntaxTreeBuilder struct { + tree *Node +} + +// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder. +func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder { + return &DefaulSyntaxTreeBuilder{} +} + +// Shift is a implementation of SyntaxTreeBuilder.Shift. +func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode { + return &Node{ + KindName: kindName, + Text: text, + Row: row, + Col: col, } +} - switch { - case node.Error: - fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName) - case node.Text != "": - fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) - default: - fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) +// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError. +func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode { + return &Node{ + KindName: kindName, + Error: true, } +} - num := len(node.Children) - for i, child := range node.Children { - var line string - if num > 1 && i < num-1 { - line = "├─ " - } else { - line = "└─ " - } +// Reduce is a implementation of SyntaxTreeBuilder.Reduce. +func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode { + cNodes := make([]*Node, len(children)) + for i, c := range children { + cNodes[i] = c.(*Node) + } + return &Node{ + KindName: kindName, + Children: cNodes, + } +} - var prefix string - if i >= num-1 { - prefix = " " - } else { - prefix = "│ " - } +// Accept is a implementation of SyntaxTreeBuilder.Accept. +func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) { + b.tree = f.(*Node) +} - printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) - } +// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil. +func (b *DefaulSyntaxTreeBuilder) Tree() *Node { + return b.tree } +// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree. type SyntaxTreeActionSet struct { - gram Grammar - makeAST bool - makeCST bool - ast *Node - cst *Node - semStack *semanticStack + gram Grammar + builder SyntaxTreeBuilder + semStack *semanticStack + disableASTAction bool } -func NewSyntaxTreeActionSet(gram Grammar, makeAST bool, makeCST bool) *SyntaxTreeActionSet { +// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree). +// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them. +func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { return &SyntaxTreeActionSet{ gram: gram, - makeAST: makeAST, - makeCST: makeCST, + builder: builder, semStack: newSemanticStack(), } } -func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { - term := a.tokenToTerminal(tok) - - var ast *Node - var cst *Node - if a.makeAST { - row, col := tok.Position() - ast = &Node{ - KindName: a.gram.Terminal(term), - Text: string(tok.Lexeme()), - Row: row, - Col: col, - } - } - if a.makeCST { - row, col := tok.Position() - cst = &Node{ - KindName: a.gram.Terminal(term), - Text: string(tok.Lexeme()), - Row: row, - Col: col, - } +// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree). +// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them. +func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + builder: builder, + semStack: newSemanticStack(), + disableASTAction: true, } +} - a.semStack.push(&semanticFrame{ - cst: cst, - ast: ast, - }) +// Shift is a implementation of SemanticActionSet.Shift method. +func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { + term := a.tokenToTerminal(tok) + row, col := tok.Position() + a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col)) } +// Reduce is a implementation of SemanticActionSet.Reduce method. func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { lhs := a.gram.LHS(prodNum) @@ -133,116 +149,66 @@ func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { n := a.gram.AlternativeSymbolCount(prodNum) handle := a.semStack.pop(n) - var ast *Node - var cst *Node - if a.makeAST { - act := a.gram.ASTAction(prodNum) - var children []*Node - if act != nil { - // Count the number of children in advance to avoid frequent growth in a slice for children. - { - l := 0 - for _, e := range act { - if e > 0 { - l++ - } else { - offset := e*-1 - 1 - l += len(handle[offset].ast.Children) - } - } - - children = make([]*Node, l) - } - - p := 0 - for _, e := range act { + var astAct []int + if !a.disableASTAction { + astAct = a.gram.ASTAction(prodNum) + } + var children []SyntaxTreeNode + if astAct != nil { + // Count the number of children in advance to avoid frequent growth in a slice for children. + { + l := 0 + for _, e := range astAct { if e > 0 { - offset := e - 1 - children[p] = handle[offset].ast - p++ + l++ } else { offset := e*-1 - 1 - for _, c := range handle[offset].ast.Children { - children[p] = c - p++ - } + l += handle[offset].ChildCount() } } - } else { - // If an alternative has no AST action, a driver generates - // a node with the same structure as a CST. - - children = make([]*Node, len(handle)) - for i, f := range handle { - children[i] = f.ast - } - } - ast = &Node{ - KindName: a.gram.NonTerminal(lhs), - Children: children, - } - } - if a.makeCST { - children := make([]*Node, len(handle)) - for i, f := range handle { - children[i] = f.cst + children = make([]SyntaxTreeNode, l) } - cst = &Node{ - KindName: a.gram.NonTerminal(lhs), - Children: children, + p := 0 + for _, e := range astAct { + if e > 0 { + offset := e - 1 + children[p] = handle[offset] + p++ + } else { + offset := e*-1 - 1 + for _, c := range handle[offset].ExpandChildren() { + children[p] = c + p++ + } + } } + } else { + // If an alternative has no AST action, a driver generates + // a node with the same structure as a CST. + children = handle } - a.semStack.push(&semanticFrame{ - cst: cst, - ast: ast, - }) - + a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children)) } +// Accept is a implementation of SemanticActionSet.Accept method. func (a *SyntaxTreeActionSet) Accept() { top := a.semStack.pop(1) - a.cst = top[0].cst - a.ast = top[0].ast + a.builder.Accept(top[0]) } +// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method. func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) { a.semStack.pop(popped) - - var ast *Node - var cst *Node - if a.makeAST { - ast = &Node{ - KindName: a.gram.Terminal(a.gram.Error()), - Error: true, - } - } - if a.makeCST { - cst = &Node{ - KindName: a.gram.Terminal(a.gram.Error()), - Error: true, - } - } - - a.semStack.push(&semanticFrame{ - cst: cst, - ast: ast, - }) + a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error()))) } +// MissError is a implementation of SemanticActionSet.MissError method. func (a *SyntaxTreeActionSet) MissError(cause VToken) { } -func (a *SyntaxTreeActionSet) CST() *Node { - return a.cst -} - -func (a *SyntaxTreeActionSet) AST() *Node { - return a.ast -} - func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { if tok.EOF() { return a.gram.EOF() @@ -251,26 +217,86 @@ func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { return tok.TerminalID() } -type semanticFrame struct { - cst *Node - ast *Node -} - type semanticStack struct { - frames []*semanticFrame + frames []SyntaxTreeNode } func newSemanticStack() *semanticStack { - return &semanticStack{} + return &semanticStack{ + frames: make([]SyntaxTreeNode, 0, 100), + } } -func (s *semanticStack) push(f *semanticFrame) { +func (s *semanticStack) push(f SyntaxTreeNode) { s.frames = append(s.frames, f) } -func (s *semanticStack) pop(n int) []*semanticFrame { +func (s *semanticStack) pop(n int) []SyntaxTreeNode { fs := s.frames[len(s.frames)-n:] s.frames = s.frames[:len(s.frames)-n] return fs } + +// Node is a implementation of SyntaxTreeNode interface. +type Node struct { + KindName string + Text string + Row int + Col int + Children []*Node + Error bool +} + +// ChildCount is a implementation of SyntaxTreeNode.ChildCount. +func (n *Node) ChildCount() int { + return len(n.Children) +} + +// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren. +func (n *Node) ExpandChildren() []SyntaxTreeNode { + fs := make([]SyntaxTreeNode, len(n.Children)) + for i, n := range n.Children { + fs[i] = n + } + return fs +} + +// PrintTree prints a syntax tree whose root is `node`. +func PrintTree(w io.Writer, node *Node) { + printTree(w, node, "", "") +} + +func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { + if node == nil { + return + } + + switch { + case node.Error: + fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName) + case node.Text != "": + fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) + default: + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) + } + + num := len(node.Children) + for i, child := range node.Children { + var line string + if num > 1 && i < num-1 { + line = "├─ " + } else { + line = "└─ " + } + + var prefix string + if i >= num-1 { + prefix = " " + } else { + prefix = "│ " + } + + printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) + } +} |