aboutsummaryrefslogtreecommitdiff
path: root/compiler/symbol_position.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-05-04 17:45:24 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-05-04 17:45:24 +0900
commita0fd0d5a9077947d09777cf7631ca721ffabf9ec (patch)
treeaf060dd455d1895676f1ac5c030839560ec3eb6e /compiler/symbol_position.go
parentAdd lex mode (diff)
downloadtre-a0fd0d5a9077947d09777cf7631ca721ffabf9ec.tar.gz
tre-a0fd0d5a9077947d09777cf7631ca721ffabf9ec.tar.xz
Improve performance of the symbolPositionSet
When using a map to represent a set, performance degrades due to the increased number of calls of runtime.mapassign. Especially when the number of symbols is large, as in compiling a pattern that contains character properties like \p{Letter}, adding elements to the set alone may take several tens of seconds of CPU time. Therefore, this commit solves this problem by changing the representation of the set from map to array.
Diffstat (limited to 'compiler/symbol_position.go')
-rw-r--r--compiler/symbol_position.go79
1 files changed, 53 insertions, 26 deletions
diff --git a/compiler/symbol_position.go b/compiler/symbol_position.go
index 1b400bd..d35dfde 100644
--- a/compiler/symbol_position.go
+++ b/compiler/symbol_position.go
@@ -52,17 +52,26 @@ func (p symbolPosition) describe() (uint16, bool) {
return v, false
}
-type symbolPositionSet map[symbolPosition]struct{}
+type symbolPositionSet struct {
+ // `s` represents a set of symbol positions.
+ // However, immediately after adding a symbol position, the elements may be duplicated.
+ // When you need an aligned set with no duplicates, you can get such value via the set function.
+ s []symbolPosition
+ sorted bool
+}
-func newSymbolPositionSet() symbolPositionSet {
- return map[symbolPosition]struct{}{}
+func newSymbolPositionSet() *symbolPositionSet {
+ return &symbolPositionSet{
+ s: []symbolPosition{},
+ sorted: false,
+ }
}
-func (s symbolPositionSet) String() string {
- if len(s) <= 0 {
+func (s *symbolPositionSet) String() string {
+ if len(s.s) <= 0 {
return "{}"
}
- ps := s.sort()
+ ps := s.sortAndRemoveDuplicates()
var b strings.Builder
fmt.Fprintf(&b, "{")
for i, p := range ps {
@@ -76,22 +85,27 @@ func (s symbolPositionSet) String() string {
return b.String()
}
-func (s symbolPositionSet) add(pos symbolPosition) symbolPositionSet {
- s[pos] = struct{}{}
+func (s *symbolPositionSet) set() []symbolPosition {
+ s.sortAndRemoveDuplicates()
+ return s.s
+}
+
+func (s *symbolPositionSet) add(pos symbolPosition) *symbolPositionSet {
+ s.s = append(s.s, pos)
+ s.sorted = false
return s
}
-func (s symbolPositionSet) merge(t symbolPositionSet) symbolPositionSet {
- for p := range t {
- s.add(p)
- }
+func (s *symbolPositionSet) merge(t *symbolPositionSet) *symbolPositionSet {
+ s.s = append(s.s, t.s...)
+ s.sorted = false
return s
}
-func (s symbolPositionSet) intersect(set symbolPositionSet) symbolPositionSet {
+func (s *symbolPositionSet) intersect(set *symbolPositionSet) *symbolPositionSet {
in := newSymbolPositionSet()
- for p1 := range s {
- for p2 := range set {
+ for _, p1 := range s.s {
+ for _, p2 := range set.s {
if p1 != p2 {
continue
}
@@ -101,11 +115,11 @@ func (s symbolPositionSet) intersect(set symbolPositionSet) symbolPositionSet {
return in
}
-func (s symbolPositionSet) hash() string {
- if len(s) <= 0 {
+func (s *symbolPositionSet) hash() string {
+ if len(s.s) <= 0 {
return ""
}
- sorted := s.sort()
+ sorted := s.sortAndRemoveDuplicates()
var buf []byte
for _, p := range sorted {
b := make([]byte, 8)
@@ -118,15 +132,28 @@ func (s symbolPositionSet) hash() string {
return string(buf)
}
-func (s symbolPositionSet) sort() []symbolPosition {
- sorted := make([]symbolPosition, len(s))
- i := 0
- for p := range s {
- sorted[i] = p
- i++
+func (s *symbolPositionSet) sortAndRemoveDuplicates() []symbolPosition {
+ if s.sorted {
+ return s.s
}
- sortSymbolPositions(sorted, 0, len(sorted)-1)
- return sorted
+
+ sortSymbolPositions(s.s, 0, len(s.s)-1)
+
+ // Remove duplicates.
+ lastV := s.s[0]
+ nextIdx := 1
+ for _, v := range s.s[1:] {
+ if v == lastV {
+ continue
+ }
+ s.s[nextIdx] = v
+ nextIdx++
+ lastV = v
+ }
+ s.s = s.s[:nextIdx]
+ s.sorted = true
+
+ return s.s
}
// sortSymbolPositions sorts a slice of symbol positions as it uses quick sort.