diff options
-rw-r--r-- | README.md | 21 | ||||
-rw-r--r-- | immutable.go | 222 | ||||
-rw-r--r-- | immutable_test.go | 152 |
3 files changed, 342 insertions, 53 deletions
@@ -107,6 +107,27 @@ By default iterators start from index zero, however, the `Seek()` method can be used to jump to a given index. +### Efficiently building lists + +If you are building large lists, it is significantly more efficient to use the +`ListBuilder`. It uses nearly the same API as `List` except that it updates +a list in-place until you are ready to use it. This can improve bulk list +building by 10x or more. + +```go +b := immutable.NewListBuilder(immutable.NewList()) +b.Append("foo") +b.Append("bar") +b.Set(2, "baz") + +l := b.List() +fmt.Println(l.Get(0)) // "foo" +fmt.Println(l.Get(1)) // "baz" +``` + +Lists are safe to use even after you continue to use the builder. It is also +safe to build on top of existing lists. + ## Map diff --git a/immutable.go b/immutable.go index 2a8acb1..ac430ea 100644 --- a/immutable.go +++ b/immutable.go @@ -66,6 +66,12 @@ func NewList() *List { } } +// clone returns a copy of the list. +func (l *List) clone() *List { + other := *l + return &other +} + // Len returns the number of elements in the list. func (l *List) Len() int { return l.size @@ -89,18 +95,33 @@ func (l *List) Get(index int) interface{} { // method will panic if index is below zero or if the index is greater than // or equal to the list size. func (l *List) Set(index int, value interface{}) *List { + return l.set(index, value, false) +} + +func (l *List) set(index int, value interface{}, mutable bool) *List { if index < 0 || index >= l.size { panic(fmt.Sprintf("immutable.List.Set: index %d out of bounds", index)) } - other := *l - other.root = other.root.set(l.origin+index, value) - return &other + other := l + if !mutable { + other = l.clone() + } + other.root = other.root.set(l.origin+index, value, mutable) + return other } // Append returns a new list with value added to the end of the list. func (l *List) Append(value interface{}) *List { + return l.append(value, false) +} + +func (l *List) append(value interface{}, mutable bool) *List { + other := l + if !mutable { + other = l.clone() + } + // Expand list to the right if no slots remain. - other := *l if other.size+other.origin >= l.cap() { newRoot := &listBranchNode{d: other.root.depth() + 1} newRoot.children[0] = other.root @@ -109,14 +130,22 @@ func (l *List) Append(value interface{}) *List { // Increase size and set the last element to the new value. other.size++ - other.root = other.root.set(other.origin+other.size-1, value) - return &other + other.root = other.root.set(other.origin+other.size-1, value, mutable) + return other } // Prepend returns a new list with value added to the beginning of the list. func (l *List) Prepend(value interface{}) *List { + return l.prepend(value, false) +} + +func (l *List) prepend(value interface{}, mutable bool) *List { + other := l + if !mutable { + other = l.clone() + } + // Expand list to the left if no slots remain. - other := *l if other.origin == 0 { newRoot := &listBranchNode{d: other.root.depth() + 1} newRoot.children[listNodeSize-1] = other.root @@ -127,8 +156,8 @@ func (l *List) Prepend(value interface{}) *List { // Increase size and move origin back. Update first element to value. other.size++ other.origin-- - other.root = other.root.set(other.origin, value) - return &other + other.root = other.root.set(other.origin, value, mutable) + return other } // Slice returns a new list of elements between start index and end index. @@ -139,6 +168,10 @@ func (l *List) Prepend(value interface{}) *List { // Unlike Go slices, references to inaccessible elements will be automatically // removed so they can be garbage collected. func (l *List) Slice(start, end int) *List { + return l.slice(start, end, false) +} + +func (l *List) slice(start, end int, mutable bool) *List { // Panics similar to Go slices. if start < 0 || start > l.size { panic(fmt.Sprintf("immutable.List.Slice: start index %d out of bounds", start)) @@ -153,8 +186,13 @@ func (l *List) Slice(start, end int) *List { return l } - // Create copy with new origin/size. - other := *l + // Create copy, if immutable. + other := l + if !mutable { + other = l.clone() + } + + // Update origin/size. other.origin = l.origin + start other.size = end - start @@ -172,10 +210,10 @@ func (l *List) Slice(start, end int) *List { } // Ensure all references are removed before start & after end. - other.root = other.root.deleteBefore(other.origin) - other.root = other.root.deleteAfter(other.origin + other.size - 1) + other.root = other.root.deleteBefore(other.origin, mutable) + other.root = other.root.deleteAfter(other.origin+other.size-1, mutable) - return &other + return other } // Iterator returns a new iterator for this list positioned at the first index. @@ -185,6 +223,62 @@ func (l *List) Iterator() *ListIterator { return itr } +// ListBuilder represents an efficient builder for creating Lists. +// +// Lists returned from the builder are safe to use even after you continue to +// use the builder. However, for efficiency, you should only retrieve your list +// after you have completed building it. +type ListBuilder struct { + list *List // current state + mutable bool // if true, next mutation will operate in-place. +} + +// NewListBuilder returns a new instance of ListBuilder to build on a base list. +func NewListBuilder(list *List) *ListBuilder { + return &ListBuilder{list: list} +} + +// List returns the current copy of the list. +// The returned list is safe to use even if after the builder continues to be used. +func (b *ListBuilder) List() *List { + list := b.list + b.mutable = false + return list +} + +// Get returns the value at the given index. Similar to slices, this method will +// panic if index is below zero or is greater than or equal to the list size. +func (b *ListBuilder) Get(index int) interface{} { + return b.list.Get(index) +} + +// Set updates the value at the given index. Similar to slices, this method will +// panic if index is below zero or if the index is greater than or equal to the +// list size. +func (b *ListBuilder) Set(index int, value interface{}) { + b.list = b.list.set(index, value, b.mutable) + b.mutable = true +} + +// Append adds value to the end of the list. +func (b *ListBuilder) Append(value interface{}) { + b.list = b.list.append(value, b.mutable) + b.mutable = true +} + +// Prepend adds value to the beginning of the list. +func (b *ListBuilder) Prepend(value interface{}) { + b.list = b.list.prepend(value, b.mutable) + b.mutable = true +} + +// Slice updates the list with a sublist of elements between start and end index. +// See List.Slice() for more details. +func (b *ListBuilder) Slice(start, end int) { + b.list = b.list.slice(start, end, b.mutable) + b.mutable = true +} + // Constants for bit shifts used for levels in the List trie. const ( listNodeBits = 5 @@ -196,13 +290,13 @@ const ( type listNode interface { depth() uint get(index int) interface{} - set(index int, v interface{}) listNode + set(index int, v interface{}, mutable bool) listNode containsBefore(index int) bool containsAfter(index int) bool - deleteBefore(index int) listNode - deleteAfter(index int) listNode + deleteBefore(index int, mutable bool) listNode + deleteAfter(index int, mutable bool) listNode } // newListNode returns a leaf node for depth zero, otherwise returns a branch node. @@ -229,7 +323,7 @@ func (n *listBranchNode) get(index int) interface{} { } // set recursively updates the value at index for each lower depth from the node. -func (n *listBranchNode) set(index int, v interface{}) listNode { +func (n *listBranchNode) set(index int, v interface{}, mutable bool) listNode { idx := (index >> (n.d * listNodeBits)) & listNodeMask // Find child for the given value in the branch. Create new if it doesn't exist. @@ -239,9 +333,15 @@ func (n *listBranchNode) set(index int, v interface{}) listNode { } // Return a copy of this branch with the new child. - other := *n - other.children[idx] = child.set(index, v) - return &other + var other *listBranchNode + if mutable { + other = n + } else { + tmp := *n + other = &tmp + } + other.children[idx] = child.set(index, v, mutable) + return other } // containsBefore returns true if non-nil values exists between [0,index). @@ -281,7 +381,7 @@ func (n *listBranchNode) containsAfter(index int) bool { } // deleteBefore returns a new node with all elements before index removed. -func (n *listBranchNode) deleteBefore(index int) listNode { +func (n *listBranchNode) deleteBefore(index int, mutable bool) listNode { // Ignore if no nodes exist before the given index. if !n.containsBefore(index) { return n @@ -289,16 +389,26 @@ func (n *listBranchNode) deleteBefore(index int) listNode { // Return a copy with any nodes prior to the index removed. idx := (index >> (n.d * listNodeBits)) & listNodeMask - other := &listBranchNode{d: n.d} - copy(other.children[idx:][:], n.children[idx:][:]) + + var other *listBranchNode + if mutable { + other = n + for i := 0; i < idx; i++ { + n.children[i] = nil + } + } else { + other = &listBranchNode{d: n.d} + copy(other.children[idx:][:], n.children[idx:][:]) + } + if other.children[idx] != nil { - other.children[idx] = other.children[idx].deleteBefore(index) + other.children[idx] = other.children[idx].deleteBefore(index, mutable) } return other } // deleteBefore returns a new node with all elements before index removed. -func (n *listBranchNode) deleteAfter(index int) listNode { +func (n *listBranchNode) deleteAfter(index int, mutable bool) listNode { // Ignore if no nodes exist after the given index. if !n.containsAfter(index) { return n @@ -306,10 +416,20 @@ func (n *listBranchNode) deleteAfter(index int) listNode { // Return a copy with any nodes after the index removed. idx := (index >> (n.d * listNodeBits)) & listNodeMask - other := &listBranchNode{d: n.d} - copy(other.children[:idx+1], n.children[:idx+1]) + + var other *listBranchNode + if mutable { + other = n + for i := idx + 1; i < len(n.children); i++ { + n.children[i] = nil + } + } else { + other = &listBranchNode{d: n.d} + copy(other.children[:idx+1], n.children[:idx+1]) + } + if other.children[idx] != nil { - other.children[idx] = other.children[idx].deleteAfter(index) + other.children[idx] = other.children[idx].deleteAfter(index, mutable) } return other } @@ -328,11 +448,17 @@ func (n *listLeafNode) get(index int) interface{} { } // set returns a copy of the node with the value at the index updated to v. -func (n *listLeafNode) set(index int, v interface{}) listNode { +func (n *listLeafNode) set(index int, v interface{}, mutable bool) listNode { idx := index & listNodeMask - other := *n + var other *listLeafNode + if mutable { + other = n + } else { + tmp := *n + other = &tmp + } other.children[idx] = v - return &other + return other } // containsBefore returns true if non-nil values exists between [0,index). @@ -358,27 +484,43 @@ func (n *listLeafNode) containsAfter(index int) bool { } // deleteBefore returns a new node with all elements before index removed. -func (n *listLeafNode) deleteBefore(index int) listNode { +func (n *listLeafNode) deleteBefore(index int, mutable bool) listNode { if !n.containsBefore(index) { return n } idx := index & listNodeMask - var other listLeafNode - copy(other.children[idx:][:], n.children[idx:][:]) - return &other + var other *listLeafNode + if mutable { + other = n + for i := 0; i < idx; i++ { + other.children[i] = nil + } + } else { + other = &listLeafNode{} + copy(other.children[idx:][:], n.children[idx:][:]) + } + return other } // deleteBefore returns a new node with all elements before index removed. -func (n *listLeafNode) deleteAfter(index int) listNode { +func (n *listLeafNode) deleteAfter(index int, mutable bool) listNode { if !n.containsAfter(index) { return n } idx := index & listNodeMask - var other listLeafNode - copy(other.children[:idx+1][:], n.children[:idx+1][:]) - return &other + var other *listLeafNode + if mutable { + other = n + for i := idx + 1; i < len(n.children); i++ { + other.children[i] = nil + } + } else { + other = &listLeafNode{} + copy(other.children[:idx+1][:], n.children[:idx+1][:]) + } + return other } // ListIterator represents an ordered iterator over a list. diff --git a/immutable_test.go b/immutable_test.go index 7418250..c2c91d9 100644 --- a/immutable_test.go +++ b/immutable_test.go @@ -207,13 +207,15 @@ func TestList(t *testing.T) { // TList represents a list that operates on a standard Go slice & immutable list. type TList struct { im, prev *List + builder *ListBuilder std []int } // NewTList returns a new instance of TList. func NewTList() *TList { return &TList{ - im: NewList(), + im: NewList(), + builder: NewListBuilder(NewList()), } } @@ -244,6 +246,7 @@ func (l *TList) ChooseSliceIndices(rand *rand.Rand) (start, end int) { func (l *TList) Append(v int) { l.prev = l.im l.im = l.im.Append(v) + l.builder.Append(v) l.std = append(l.std, v) } @@ -251,6 +254,7 @@ func (l *TList) Append(v int) { func (l *TList) Prepend(v int) { l.prev = l.im l.im = l.im.Prepend(v) + l.builder.Prepend(v) l.std = append([]int{v}, l.std...) } @@ -258,6 +262,7 @@ func (l *TList) Prepend(v int) { func (l *TList) Set(i, v int) { l.prev = l.im l.im = l.im.Set(i, v) + l.builder.Set(i, v) l.std[i] = v } @@ -265,6 +270,7 @@ func (l *TList) Set(i, v int) { func (l *TList) Slice(start, end int) { l.prev = l.im l.im = l.im.Slice(start, end) + l.builder.Slice(start, end) l.std = l.std[start:end] } @@ -274,53 +280,64 @@ func (l *TList) Validate() error { return fmt.Errorf("Len()=%v, expected %d", got, exp) } + bl := l.builder.List() for i := range l.std { if got, exp := l.im.Get(i), l.std[i]; got != exp { return fmt.Errorf("Get(%d)=%v, expected %v", i, got, exp) + } else if got, exp := l.builder.Get(i), l.std[i]; got != exp { + return fmt.Errorf("Builder.Get(%d)=%v, expected %v", i, got, exp) + } else if got, exp := bl.Get(i), l.std[i]; got != exp { + return fmt.Errorf("Builder.List/Get(%d)=%v, expected %v", i, got, exp) } } - if err := l.validateForwardIterator(); err != nil { + if err := l.validateForwardIterator("basic", l.im); err != nil { return err - } else if err := l.validateBackwardIterator(); err != nil { + } else if err := l.validateBackwardIterator("basic", l.im); err != nil { + return err + } + + if err := l.validateForwardIterator("builder", bl); err != nil { + return err + } else if err := l.validateBackwardIterator("builder", bl); err != nil { return err } return nil } -func (l *TList) validateForwardIterator() error { - itr := l.im.Iterator() +func (l *TList) validateForwardIterator(typ string, list *List) error { + itr := list.Iterator() for i := range l.std { if j, v := itr.Next(); i != j || l.std[i] != v { - return fmt.Errorf("ListIterator.Next()=<%v,%v>, expected <%v,%v>", j, v, i, l.std[i]) + return fmt.Errorf("ListIterator.Next()=<%v,%v>, expected <%v,%v> [%s]", j, v, i, l.std[i], typ) } done := i == len(l.std)-1 if v := itr.Done(); v != done { - return fmt.Errorf("ListIterator.Done()=%v, expected %v", v, done) + return fmt.Errorf("ListIterator.Done()=%v, expected %v [%s]", v, done, typ) } } if i, v := itr.Next(); i != -1 || v != nil { - return fmt.Errorf("ListIterator.Next()=<%v,%v>, expected DONE", i, v) + return fmt.Errorf("ListIterator.Next()=<%v,%v>, expected DONE [%s]", i, v, typ) } return nil } -func (l *TList) validateBackwardIterator() error { - itr := l.im.Iterator() +func (l *TList) validateBackwardIterator(typ string, list *List) error { + itr := list.Iterator() itr.Last() for i := len(l.std) - 1; i >= 0; i-- { if j, v := itr.Prev(); i != j || l.std[i] != v { - return fmt.Errorf("ListIterator.Prev()=<%v,%v>, expected <%v,%v>", j, v, i, l.std[i]) + return fmt.Errorf("ListIterator.Prev()=<%v,%v>, expected <%v,%v> [%s]", j, v, i, l.std[i], typ) } done := i == 0 if v := itr.Done(); v != done { - return fmt.Errorf("ListIterator.Done()=%v, expected %v", v, done) + return fmt.Errorf("ListIterator.Done()=%v, expected %v [%s]", v, done, typ) } } if i, v := itr.Prev(); i != -1 || v != nil { - return fmt.Errorf("ListIterator.Prev()=<%v,%v>, expected DONE", i, v) + return fmt.Errorf("ListIterator.Prev()=<%v,%v>, expected DONE [%s]", i, v, typ) } return nil } @@ -386,6 +403,54 @@ func BenchmarkList_Iterator(b *testing.B) { }) } +func BenchmarkBuiltinSlice_Append(b *testing.B) { + b.Run("Int", func(b *testing.B) { + b.ReportAllocs() + var a []int + for i := 0; i < b.N; i++ { + a = append(a, i) + } + }) + b.Run("Interface", func(b *testing.B) { + b.ReportAllocs() + var a []interface{} + for i := 0; i < b.N; i++ { + a = append(a, i) + } + }) +} + +func BenchmarkListBuilder_Append(b *testing.B) { + b.ReportAllocs() + builder := NewListBuilder(NewList()) + for i := 0; i < b.N; i++ { + builder.Append(i) + } +} + +func BenchmarkListBuilder_Prepend(b *testing.B) { + b.ReportAllocs() + builder := NewListBuilder(NewList()) + for i := 0; i < b.N; i++ { + builder.Prepend(i) + } +} + +func BenchmarkListBuilder_Set(b *testing.B) { + const n = 10000 + + builder := NewListBuilder(NewList()) + for i := 0; i < 10000; i++ { + builder.Append(i) + } + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + builder.Set(i%n, i*10) + } +} + func ExampleList_Append() { l := NewList() l = l.Append("foo") @@ -478,6 +543,67 @@ func ExampleList_Iterator_reverse() { // 0 foo } +func ExampleListBuilder_Append() { + b := NewListBuilder(NewList()) + b.Append("foo") + b.Append("bar") + b.Append("baz") + + l := b.List() + fmt.Println(l.Get(0)) + fmt.Println(l.Get(1)) + fmt.Println(l.Get(2)) + // Output: + // foo + // bar + // baz +} + +func ExampleListBuilder_Prepend() { + b := NewListBuilder(NewList()) + b.Prepend("foo") + b.Prepend("bar") + b.Prepend("baz") + + l := b.List() + fmt.Println(l.Get(0)) + fmt.Println(l.Get(1)) + fmt.Println(l.Get(2)) + // Output: + // baz + // bar + // foo +} + +func ExampleListBuilder_Set() { + b := NewListBuilder(NewList()) + b.Append("foo") + b.Append("bar") + b.Set(1, "baz") + + l := b.List() + fmt.Println(l.Get(0)) + fmt.Println(l.Get(1)) + // Output: + // foo + // baz +} + +func ExampleListBuilder_Slice() { + b := NewListBuilder(NewList()) + b.Append("foo") + b.Append("bar") + b.Append("baz") + b.Slice(1, 3) + + l := b.List() + fmt.Println(l.Get(0)) + fmt.Println(l.Get(1)) + // Output: + // bar + // baz +} + // Ensure node can support overwrites as it expands. func TestInternal_mapNode_Overwrite(t *testing.T) { const n = 1000 |