diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-04 17:45:24 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-04 17:45:24 +0900 |
commit | a0fd0d5a9077947d09777cf7631ca721ffabf9ec (patch) | |
tree | af060dd455d1895676f1ac5c030839560ec3eb6e /compiler/ast.go | |
parent | Add lex mode (diff) | |
download | tre-a0fd0d5a9077947d09777cf7631ca721ffabf9ec.tar.gz tre-a0fd0d5a9077947d09777cf7631ca721ffabf9ec.tar.xz |
Improve performance of the symbolPositionSet
When using a map to represent a set, performance degrades due to
the increased number of calls of runtime.mapassign.
Especially when the number of symbols is large, as in compiling a pattern that
contains character properties like \p{Letter}, adding elements to the set
alone may take several tens of seconds of CPU time.
Therefore, this commit solves this problem by changing the representation of
the set from map to array.
Diffstat (limited to 'compiler/ast.go')
-rw-r--r-- | compiler/ast.go | 66 |
1 files changed, 37 insertions, 29 deletions
diff --git a/compiler/ast.go b/compiler/ast.go index 79c759c..a419f98 100644 --- a/compiler/ast.go +++ b/compiler/ast.go @@ -9,8 +9,8 @@ type astNode interface { fmt.Stringer children() (astNode, astNode) nullable() bool - first() symbolPositionSet - last() symbolPositionSet + first() *symbolPositionSet + last() *symbolPositionSet } var ( @@ -24,8 +24,8 @@ var ( type symbolNode struct { byteRange pos symbolPosition - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newSymbolNode(value byte) *symbolNode { @@ -60,7 +60,7 @@ func (n *symbolNode) nullable() bool { return false } -func (n *symbolNode) first() symbolPositionSet { +func (n *symbolNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.add(n.pos) @@ -68,7 +68,7 @@ func (n *symbolNode) first() symbolPositionSet { return n.firstMemo } -func (n *symbolNode) last() symbolPositionSet { +func (n *symbolNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.add(n.pos) @@ -79,8 +79,8 @@ func (n *symbolNode) last() symbolPositionSet { type endMarkerNode struct { id int pos symbolPosition - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newEndMarkerNode(id int) *endMarkerNode { @@ -102,7 +102,7 @@ func (n *endMarkerNode) nullable() bool { return false } -func (n *endMarkerNode) first() symbolPositionSet { +func (n *endMarkerNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.add(n.pos) @@ -110,7 +110,7 @@ func (n *endMarkerNode) first() symbolPositionSet { return n.firstMemo } -func (n *endMarkerNode) last() symbolPositionSet { +func (n *endMarkerNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.add(n.pos) @@ -121,8 +121,8 @@ func (n *endMarkerNode) last() symbolPositionSet { type concatNode struct { left astNode right astNode - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newConcatNode(left, right astNode) *concatNode { @@ -144,24 +144,26 @@ func (n *concatNode) nullable() bool { return n.left.nullable() && n.right.nullable() } -func (n *concatNode) first() symbolPositionSet { +func (n *concatNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.merge(n.left.first()) if n.left.nullable() { n.firstMemo.merge(n.right.first()) } + n.firstMemo.sortAndRemoveDuplicates() } return n.firstMemo } -func (n *concatNode) last() symbolPositionSet { +func (n *concatNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.merge(n.right.last()) if n.right.nullable() { n.lastMemo.merge(n.left.last()) } + n.lastMemo.sortAndRemoveDuplicates() } return n.lastMemo } @@ -169,8 +171,8 @@ func (n *concatNode) last() symbolPositionSet { type altNode struct { left astNode right astNode - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newAltNode(left, right astNode) *altNode { @@ -192,28 +194,30 @@ func (n *altNode) nullable() bool { return n.left.nullable() || n.right.nullable() } -func (n *altNode) first() symbolPositionSet { +func (n *altNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.merge(n.left.first()) n.firstMemo.merge(n.right.first()) + n.firstMemo.sortAndRemoveDuplicates() } return n.firstMemo } -func (n *altNode) last() symbolPositionSet { +func (n *altNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.merge(n.left.last()) n.lastMemo.merge(n.right.last()) + n.lastMemo.sortAndRemoveDuplicates() } return n.lastMemo } type repeatNode struct { left astNode - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newRepeatNode(left astNode) *repeatNode { @@ -234,18 +238,20 @@ func (n *repeatNode) nullable() bool { return true } -func (n *repeatNode) first() symbolPositionSet { +func (n *repeatNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.merge(n.left.first()) + n.firstMemo.sortAndRemoveDuplicates() } return n.firstMemo } -func (n *repeatNode) last() symbolPositionSet { +func (n *repeatNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.merge(n.left.last()) + n.lastMemo.sortAndRemoveDuplicates() } return n.lastMemo } @@ -260,8 +266,8 @@ func newRepeatOneOrMoreNode(left astNode) *concatNode { type optionNode struct { left astNode - firstMemo symbolPositionSet - lastMemo symbolPositionSet + firstMemo *symbolPositionSet + lastMemo *symbolPositionSet } func newOptionNode(left astNode) *optionNode { @@ -282,18 +288,20 @@ func (n *optionNode) nullable() bool { return true } -func (n *optionNode) first() symbolPositionSet { +func (n *optionNode) first() *symbolPositionSet { if n.firstMemo == nil { n.firstMemo = newSymbolPositionSet() n.firstMemo.merge(n.left.first()) + n.firstMemo.sortAndRemoveDuplicates() } return n.firstMemo } -func (n *optionNode) last() symbolPositionSet { +func (n *optionNode) last() *symbolPositionSet { if n.lastMemo == nil { n.lastMemo = newSymbolPositionSet() n.lastMemo.merge(n.left.last()) + n.lastMemo.sortAndRemoveDuplicates() } return n.lastMemo } @@ -316,7 +324,7 @@ func copyAST(src astNode) astNode { panic(fmt.Errorf("copyAST cannot handle %T type; AST: %v", src, src)) } -type followTable map[symbolPosition]symbolPositionSet +type followTable map[symbolPosition]*symbolPositionSet func genFollowTable(root astNode) followTable { follow := followTable{} @@ -334,14 +342,14 @@ func calcFollow(follow followTable, ast astNode) { switch n := ast.(type) { case *concatNode: l, r := n.children() - for _, p := range l.last().sort() { + for _, p := range l.last().set() { if _, ok := follow[p]; !ok { follow[p] = newSymbolPositionSet() } follow[p].merge(r.first()) } case *repeatNode: - for _, p := range n.last().sort() { + for _, p := range n.last().set() { if _, ok := follow[p]; !ok { follow[p] = newSymbolPositionSet() } |