aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2022-04-03 00:23:18 +0900
committerRyo Nihei <nihei.dev@gmail.com>2022-04-03 01:26:39 +0900
commit14b2d7e2728ab0314db56fc6e493d06fa285d006 (patch)
tree94b92e6307a570edeaae1a43a104e200981d18cc
parentFix help documents (diff)
downloadurubu-14b2d7e2728ab0314db56fc6e493d06fa285d006.tar.gz
urubu-14b2d7e2728ab0314db56fc6e493d06fa285d006.tar.xz
Allow arbitrary user-defined types for nodes in a syntax tree
-rw-r--r--README.md6
-rw-r--r--cmd/vartan/parse.go11
-rw-r--r--driver/conflict_test.go13
-rw-r--r--driver/parser_test.go28
-rw-r--r--driver/semantic_action.go374
5 files changed, 229 insertions, 203 deletions
diff --git a/README.md b/README.md
index 0ce4b94..7716184 100644
--- a/README.md
+++ b/README.md
@@ -226,8 +226,8 @@ func main() {
os.Exit(1)
}
gram := NewGrammar()
- treeAct := NewSyntaxTreeActionSet(gram, true, false)
- p, err := NewParser(toks, gram, SemanticAction(treeAct))
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb)))
if err != nil {
fmt.Println(err)
os.Exit(1)
@@ -245,7 +245,7 @@ func main() {
os.Exit(1)
}
fmt.Println("accepted")
- PrintTree(os.Stdout, treeAct.AST())
+ PrintTree(os.Stdout, tb.Tree())
}
func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) {
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index 14412bf..b65f72f 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -70,6 +70,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
var p *driver.Parser
var treeAct *driver.SyntaxTreeActionSet
+ var tb *driver.DefaulSyntaxTreeBuilder
{
src := os.Stdin
if *parseFlags.source != "" {
@@ -87,9 +88,11 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
{
switch {
case *parseFlags.cst:
- treeAct = driver.NewSyntaxTreeActionSet(gram, false, true)
+ tb = driver.NewDefaultSyntaxTreeBuilder()
+ treeAct = driver.NewCSTActionSet(gram, tb)
case !*parseFlags.onlyParse:
- treeAct = driver.NewSyntaxTreeActionSet(gram, true, false)
+ tb = driver.NewDefaultSyntaxTreeBuilder()
+ treeAct = driver.NewASTActionSet(gram, tb)
}
if treeAct != nil {
opts = append(opts, driver.SemanticAction(treeAct))
@@ -147,9 +150,9 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
var tree *driver.Node
if *parseFlags.cst {
- tree = treeAct.CST()
+ tree = tb.Tree()
} else {
- tree = treeAct.AST()
+ tree = tb.Tree()
}
if tree != nil {
if len(synErrs) > 0 {
diff --git a/driver/conflict_test.go b/driver/conflict_test.go
index 1f8914f..7672c00 100644
--- a/driver/conflict_test.go
+++ b/driver/conflict_test.go
@@ -1,8 +1,6 @@
package driver
import (
- "fmt"
- "os"
"strings"
"testing"
@@ -363,8 +361,8 @@ assign: '=';
}
gram := NewGrammar(cg)
- treeAct := NewSyntaxTreeActionSet(gram, false, true)
- p, err := NewParser(toks, gram, SemanticAction(treeAct))
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
if err != nil {
t.Fatal(err)
}
@@ -374,10 +372,9 @@ assign: '=';
t.Fatal(err)
}
- fmt.Printf("CST:\n")
- PrintTree(os.Stdout, treeAct.CST())
-
- testTree(t, treeAct.CST(), tt.cst)
+ if tt.cst != nil {
+ testTree(t, tb.Tree(), tt.cst)
+ }
})
}
}
diff --git a/driver/parser_test.go b/driver/parser_test.go
index d964cbc..04c42c5 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -2,7 +2,6 @@ package driver
import (
"fmt"
- "os"
"strings"
"testing"
@@ -1072,8 +1071,15 @@ bar: 'bar';
}
gram := NewGrammar(cg)
- treeAct := NewSyntaxTreeActionSet(gram, true, true)
- p, err := NewParser(toks, gram, SemanticAction(treeAct))
+ tb := NewDefaultSyntaxTreeBuilder()
+ var opt []ParserOption
+ switch {
+ case tt.ast != nil:
+ opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
+ case tt.cst != nil:
+ opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
+ }
+ p, err := NewParser(toks, gram, opt...)
if err != nil {
t.Fatal(err)
}
@@ -1087,18 +1093,12 @@ bar: 'bar';
t.Fatalf("unexpected syntax errors occurred: %+v", p.SyntaxErrors())
}
- if tt.cst != nil {
- testTree(t, treeAct.CST(), tt.cst)
- }
-
- if tt.ast != nil {
- testTree(t, treeAct.AST(), tt.ast)
+ switch {
+ case tt.ast != nil:
+ testTree(t, tb.Tree(), tt.ast)
+ case tt.cst != nil:
+ testTree(t, tb.Tree(), tt.cst)
}
-
- fmt.Println("CST:")
- PrintTree(os.Stdout, treeAct.CST())
- fmt.Println("AST:")
- PrintTree(os.Stdout, treeAct.AST())
})
}
}
diff --git a/driver/semantic_action.go b/driver/semantic_action.go
index 61b00f0..7c52e24 100644
--- a/driver/semantic_action.go
+++ b/driver/semantic_action.go
@@ -5,127 +5,143 @@ import (
"io"
)
+// SemanticActionSet is a set of semantic actions a parser calls.
type SemanticActionSet interface {
- // Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to
- // the symbol. When the driver recovered from an error state by shifting the token, `recovered` is true.
+ // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol.
+ // When the parser recovered from an error state by shifting the token, `recovered` is true.
Shift(tok VToken, recovered bool)
- // Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of
- // the production. When the driver recovered from an error state by reducing the production,
- // `recovered` is true.
+ // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production.
+ // When the parser recovered from an error state by reducing the production, `recovered` is true.
Reduce(prodNum int, recovered bool)
- // Accept runs when the driver accepts an input.
+ // Accept runs when the parser accepts an input.
Accept()
- // TrapAndShiftError runs when the driver traps a syntax error and shifts a error symbol onto the state stack.
- // `cause` is a token that caused a syntax error. `popped` is the number of frames that the driver discards
+ // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack.
+ // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards
// from the state stack.
// Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
// corresponding to the error symbol doesn't exist.
TrapAndShiftError(cause VToken, popped int)
- // MissError runs when the driver fails to trap a syntax error. `cause` is a token that caused a syntax error.
+ // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error.
MissError(cause VToken)
}
var _ SemanticActionSet = &SyntaxTreeActionSet{}
-type Node struct {
- KindName string
- Text string
- Row int
- Col int
- Children []*Node
- Error bool
+// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface.
+type SyntaxTreeNode interface {
+ // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast`
+ // directive with `...` operator.
+ ChildCount() int
+
+ // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast`
+ // directive with `...` operator.
+ ExpandChildren() []SyntaxTreeNode
}
-func PrintTree(w io.Writer, node *Node) {
- printTree(w, node, "", "")
+var _ SyntaxTreeNode = &Node{}
+
+// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types.
+// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface.
+type SyntaxTreeBuilder interface {
+ Shift(kindName string, text string, row, col int) SyntaxTreeNode
+ ShiftError(kindName string) SyntaxTreeNode
+ Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode
+ Accept(f SyntaxTreeNode)
}
-func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) {
- if node == nil {
- return
+var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{}
+
+// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder.
+type DefaulSyntaxTreeBuilder struct {
+ tree *Node
+}
+
+// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder.
+func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder {
+ return &DefaulSyntaxTreeBuilder{}
+}
+
+// Shift is a implementation of SyntaxTreeBuilder.Shift.
+func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode {
+ return &Node{
+ KindName: kindName,
+ Text: text,
+ Row: row,
+ Col: col,
}
+}
- switch {
- case node.Error:
- fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName)
- case node.Text != "":
- fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text)
- default:
- fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
+// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError.
+func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode {
+ return &Node{
+ KindName: kindName,
+ Error: true,
}
+}
- num := len(node.Children)
- for i, child := range node.Children {
- var line string
- if num > 1 && i < num-1 {
- line = "├─ "
- } else {
- line = "└─ "
- }
+// Reduce is a implementation of SyntaxTreeBuilder.Reduce.
+func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode {
+ cNodes := make([]*Node, len(children))
+ for i, c := range children {
+ cNodes[i] = c.(*Node)
+ }
+ return &Node{
+ KindName: kindName,
+ Children: cNodes,
+ }
+}
- var prefix string
- if i >= num-1 {
- prefix = " "
- } else {
- prefix = "│ "
- }
+// Accept is a implementation of SyntaxTreeBuilder.Accept.
+func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) {
+ b.tree = f.(*Node)
+}
- printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
- }
+// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil.
+func (b *DefaulSyntaxTreeBuilder) Tree() *Node {
+ return b.tree
}
+// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree.
type SyntaxTreeActionSet struct {
- gram Grammar
- makeAST bool
- makeCST bool
- ast *Node
- cst *Node
- semStack *semanticStack
+ gram Grammar
+ builder SyntaxTreeBuilder
+ semStack *semanticStack
+ disableASTAction bool
}
-func NewSyntaxTreeActionSet(gram Grammar, makeAST bool, makeCST bool) *SyntaxTreeActionSet {
+// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree).
+// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them.
+func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
return &SyntaxTreeActionSet{
gram: gram,
- makeAST: makeAST,
- makeCST: makeCST,
+ builder: builder,
semStack: newSemanticStack(),
}
}
-func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
- term := a.tokenToTerminal(tok)
-
- var ast *Node
- var cst *Node
- if a.makeAST {
- row, col := tok.Position()
- ast = &Node{
- KindName: a.gram.Terminal(term),
- Text: string(tok.Lexeme()),
- Row: row,
- Col: col,
- }
- }
- if a.makeCST {
- row, col := tok.Position()
- cst = &Node{
- KindName: a.gram.Terminal(term),
- Text: string(tok.Lexeme()),
- Row: row,
- Col: col,
- }
+// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree).
+// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them.
+func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
+ return &SyntaxTreeActionSet{
+ gram: gram,
+ builder: builder,
+ semStack: newSemanticStack(),
+ disableASTAction: true,
}
+}
- a.semStack.push(&semanticFrame{
- cst: cst,
- ast: ast,
- })
+// Shift is a implementation of SemanticActionSet.Shift method.
+func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
+ term := a.tokenToTerminal(tok)
+ row, col := tok.Position()
+ a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col))
}
+// Reduce is a implementation of SemanticActionSet.Reduce method.
func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
lhs := a.gram.LHS(prodNum)
@@ -133,116 +149,66 @@ func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
n := a.gram.AlternativeSymbolCount(prodNum)
handle := a.semStack.pop(n)
- var ast *Node
- var cst *Node
- if a.makeAST {
- act := a.gram.ASTAction(prodNum)
- var children []*Node
- if act != nil {
- // Count the number of children in advance to avoid frequent growth in a slice for children.
- {
- l := 0
- for _, e := range act {
- if e > 0 {
- l++
- } else {
- offset := e*-1 - 1
- l += len(handle[offset].ast.Children)
- }
- }
-
- children = make([]*Node, l)
- }
-
- p := 0
- for _, e := range act {
+ var astAct []int
+ if !a.disableASTAction {
+ astAct = a.gram.ASTAction(prodNum)
+ }
+ var children []SyntaxTreeNode
+ if astAct != nil {
+ // Count the number of children in advance to avoid frequent growth in a slice for children.
+ {
+ l := 0
+ for _, e := range astAct {
if e > 0 {
- offset := e - 1
- children[p] = handle[offset].ast
- p++
+ l++
} else {
offset := e*-1 - 1
- for _, c := range handle[offset].ast.Children {
- children[p] = c
- p++
- }
+ l += handle[offset].ChildCount()
}
}
- } else {
- // If an alternative has no AST action, a driver generates
- // a node with the same structure as a CST.
-
- children = make([]*Node, len(handle))
- for i, f := range handle {
- children[i] = f.ast
- }
- }
- ast = &Node{
- KindName: a.gram.NonTerminal(lhs),
- Children: children,
- }
- }
- if a.makeCST {
- children := make([]*Node, len(handle))
- for i, f := range handle {
- children[i] = f.cst
+ children = make([]SyntaxTreeNode, l)
}
- cst = &Node{
- KindName: a.gram.NonTerminal(lhs),
- Children: children,
+ p := 0
+ for _, e := range astAct {
+ if e > 0 {
+ offset := e - 1
+ children[p] = handle[offset]
+ p++
+ } else {
+ offset := e*-1 - 1
+ for _, c := range handle[offset].ExpandChildren() {
+ children[p] = c
+ p++
+ }
+ }
}
+ } else {
+ // If an alternative has no AST action, a driver generates
+ // a node with the same structure as a CST.
+ children = handle
}
- a.semStack.push(&semanticFrame{
- cst: cst,
- ast: ast,
- })
-
+ a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children))
}
+// Accept is a implementation of SemanticActionSet.Accept method.
func (a *SyntaxTreeActionSet) Accept() {
top := a.semStack.pop(1)
- a.cst = top[0].cst
- a.ast = top[0].ast
+ a.builder.Accept(top[0])
}
+// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method.
func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) {
a.semStack.pop(popped)
-
- var ast *Node
- var cst *Node
- if a.makeAST {
- ast = &Node{
- KindName: a.gram.Terminal(a.gram.Error()),
- Error: true,
- }
- }
- if a.makeCST {
- cst = &Node{
- KindName: a.gram.Terminal(a.gram.Error()),
- Error: true,
- }
- }
-
- a.semStack.push(&semanticFrame{
- cst: cst,
- ast: ast,
- })
+ a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error())))
}
+// MissError is a implementation of SemanticActionSet.MissError method.
func (a *SyntaxTreeActionSet) MissError(cause VToken) {
}
-func (a *SyntaxTreeActionSet) CST() *Node {
- return a.cst
-}
-
-func (a *SyntaxTreeActionSet) AST() *Node {
- return a.ast
-}
-
func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
if tok.EOF() {
return a.gram.EOF()
@@ -251,26 +217,86 @@ func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
return tok.TerminalID()
}
-type semanticFrame struct {
- cst *Node
- ast *Node
-}
-
type semanticStack struct {
- frames []*semanticFrame
+ frames []SyntaxTreeNode
}
func newSemanticStack() *semanticStack {
- return &semanticStack{}
+ return &semanticStack{
+ frames: make([]SyntaxTreeNode, 0, 100),
+ }
}
-func (s *semanticStack) push(f *semanticFrame) {
+func (s *semanticStack) push(f SyntaxTreeNode) {
s.frames = append(s.frames, f)
}
-func (s *semanticStack) pop(n int) []*semanticFrame {
+func (s *semanticStack) pop(n int) []SyntaxTreeNode {
fs := s.frames[len(s.frames)-n:]
s.frames = s.frames[:len(s.frames)-n]
return fs
}
+
+// Node is a implementation of SyntaxTreeNode interface.
+type Node struct {
+ KindName string
+ Text string
+ Row int
+ Col int
+ Children []*Node
+ Error bool
+}
+
+// ChildCount is a implementation of SyntaxTreeNode.ChildCount.
+func (n *Node) ChildCount() int {
+ return len(n.Children)
+}
+
+// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren.
+func (n *Node) ExpandChildren() []SyntaxTreeNode {
+ fs := make([]SyntaxTreeNode, len(n.Children))
+ for i, n := range n.Children {
+ fs[i] = n
+ }
+ return fs
+}
+
+// PrintTree prints a syntax tree whose root is `node`.
+func PrintTree(w io.Writer, node *Node) {
+ printTree(w, node, "", "")
+}
+
+func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) {
+ if node == nil {
+ return
+ }
+
+ switch {
+ case node.Error:
+ fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName)
+ case node.Text != "":
+ fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text)
+ default:
+ fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
+ }
+
+ num := len(node.Children)
+ for i, child := range node.Children {
+ var line string
+ if num > 1 && i < num-1 {
+ line = "├─ "
+ } else {
+ line = "└─ "
+ }
+
+ var prefix string
+ if i >= num-1 {
+ prefix = " "
+ } else {
+ prefix = "│ "
+ }
+
+ printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
+ }
+}