aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Johnson <benbjohnson@yahoo.com>2014-02-07 22:55:25 -0700
committerBen Johnson <benbjohnson@yahoo.com>2014-02-08 23:13:54 -0700
commit3da04c52b991337e74c72f2c76bd1aa9df77eab2 (patch)
treed31bed34f7fe34eb8c5a21f311a1103ed4b58b2a
parentRefactor node lookup. (diff)
downloaddedo-3da04c52b991337e74c72f2c76bd1aa9df77eab2.tar.gz
dedo-3da04c52b991337e74c72f2c76bd1aa9df77eab2.tar.xz
Rebalance after deletion.
-rw-r--r--assert.go4
-rw-r--r--cursor.go10
-rw-r--r--node.go163
-rw-r--r--rwtransaction.go12
-rw-r--r--rwtransaction_test.go41
5 files changed, 218 insertions, 12 deletions
diff --git a/assert.go b/assert.go
index 912e484..8101f8a 100644
--- a/assert.go
+++ b/assert.go
@@ -4,8 +4,8 @@ import "fmt"
// TODO(benbjohnson): Remove assertions before release.
-// __assert__ will panic with a given formatted message if the given condition is false.
-func __assert__(condition bool, msg string, v ...interface{}) {
+// _assert will panic with a given formatted message if the given condition is false.
+func _assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assertion failed: " + msg, v...))
}
diff --git a/cursor.go b/cursor.go
index 2f65911..c76aab1 100644
--- a/cursor.go
+++ b/cursor.go
@@ -115,12 +115,12 @@ func (c *Cursor) node(t *RWTransaction) *node {
// Start from root and traverse down the hierarchy.
n := t.node(c.stack[0].page.id, nil)
for _, ref := range c.stack[:len(c.stack)-1] {
- __assert__(!n.isLeaf, "expected branch node")
- __assert__(ref.page.id == n.pgid, "node/page mismatch a: %d != %d", ref.page.id, n.childAt(ref.index).pgid)
- n = n.childAt(ref.index)
+ _assert(!n.isLeaf, "expected branch node")
+ _assert(ref.page.id == n.pgid, "node/page mismatch a: %d != %d", ref.page.id, n.childAt(int(ref.index)).pgid)
+ n = n.childAt(int(ref.index))
}
- __assert__(n.isLeaf, "expected leaf node")
- __assert__(n.pgid == c.stack[len(c.stack)-1].page.id, "node/page mismatch b: %d != %d", n.pgid, c.stack[len(c.stack)-1].page.id)
+ _assert(n.isLeaf, "expected leaf node")
+ _assert(n.pgid == c.stack[len(c.stack)-1].page.id, "node/page mismatch b: %d != %d", n.pgid, c.stack[len(c.stack)-1].page.id)
return n
}
diff --git a/node.go b/node.go
index 111f12c..15f5763 100644
--- a/node.go
+++ b/node.go
@@ -10,6 +10,7 @@ import (
type node struct {
transaction *RWTransaction
isLeaf bool
+ unbalanced bool
key []byte
depth int
pgid pgid
@@ -17,6 +18,14 @@ type node struct {
inodes inodes
}
+// minKeys returns the minimum number of inodes this node should have.
+func (n *node) minKeys() int {
+ if n.isLeaf {
+ return 1
+ }
+ return 2
+}
+
// size returns the size of the node after serialization.
func (n *node) size() int {
var elementSize int = n.pageElementSize()
@@ -45,11 +54,46 @@ func (n *node) root() *node {
}
// childAt returns the child node at a given index.
-func (n *node) childAt(index uint16) *node {
- __assert__(!n.isLeaf, "invalid childAt(%d) on a leaf node", index)
+func (n *node) childAt(index int) *node {
+ _assert(!n.isLeaf, "invalid childAt(%d) on a leaf node", index)
return n.transaction.node(n.inodes[index].pgid, n)
}
+// childIndex returns the index of a given child node.
+func (n *node) childIndex(child *node) int {
+ index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
+ return index
+}
+
+// numChildren returns the number of children.
+func (n *node) numChildren() int {
+ return len(n.inodes)
+}
+
+// nextSibling returns the next node with the same parent.
+func (n *node) nextSibling() *node {
+ if n.parent == nil {
+ return nil
+ }
+ index := n.parent.childIndex(n)
+ if index >= n.parent.numChildren() - 1 {
+ return nil
+ }
+ return n.parent.childAt(index + 1)
+}
+
+// prevSibling returns the previous node with the same parent.
+func (n *node) prevSibling() *node {
+ if n.parent == nil {
+ return nil
+ }
+ index := n.parent.childIndex(n)
+ if index == 0 {
+ return nil
+ }
+ return n.parent.childAt(index - 1)
+}
+
// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgid pgid) {
// Find insertion index.
@@ -80,6 +124,9 @@ func (n *node) del(key []byte) {
// Delete inode from the node.
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
+
+ // Mark the node as needing rebalancing.
+ n.unbalanced = true
}
// read initializes the node from a page.
@@ -178,6 +225,118 @@ func (n *node) split(pageSize int) []*node {
return nodes
}
+// rebalance attempts to combine the node with sibling nodes if the node fill
+// size is below a threshold or if there are not enough keys.
+func (n *node) rebalance() {
+ if !n.unbalanced {
+ return
+ }
+ n.unbalanced = false
+
+ // Ignore if node is above threshold (25%) and has enough keys.
+ var threshold = n.transaction.db.pageSize / 4
+ if n.size() > threshold && len(n.inodes) > n.minKeys() {
+ return
+ }
+
+ // Root node has special handling.
+ if n.parent == nil {
+ // If root node is a branch and only has one node then collapse it.
+ if !n.isLeaf && len(n.inodes) == 1 {
+ // Move child's children up.
+ child := n.transaction.nodes[n.inodes[0].pgid]
+ n.isLeaf = child.isLeaf
+ n.inodes = child.inodes[:]
+
+ // Reparent all child nodes being moved.
+ for _, inode := range n.inodes {
+ if child, ok := n.transaction.nodes[inode.pgid]; ok {
+ child.parent = n
+ }
+ }
+
+ // Remove old child.
+ child.parent = nil
+ delete(n.transaction.nodes, child.pgid)
+ }
+
+ return
+ }
+
+ _assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
+
+ // Destination node is right sibling if idx == 0, otherwise left sibling.
+ var target *node
+ var useNextSibling = (n.parent.childIndex(n) == 0)
+ if useNextSibling {
+ target = n.nextSibling()
+ } else {
+ target = n.prevSibling()
+ }
+
+ // If target node has extra nodes then just move one over.
+ if target.numChildren() > target.minKeys() {
+ if useNextSibling {
+ // Reparent and move node.
+ if child, ok := n.transaction.nodes[target.inodes[0].pgid]; ok {
+ child.parent = n
+ }
+ n.inodes = append(n.inodes, target.inodes[0])
+ target.inodes = target.inodes[1:]
+
+ // Update target key on parent.
+ target.parent.put(target.key, target.inodes[0].key, nil, target.pgid)
+ target.key = target.inodes[0].key
+ } else {
+ // Reparent and move node.
+ if child, ok := n.transaction.nodes[target.inodes[len(target.inodes)-1].pgid]; ok {
+ child.parent = n
+ }
+ n.inodes = append(n.inodes, inode{})
+ copy(n.inodes[1:], n.inodes)
+ n.inodes[0] = target.inodes[len(target.inodes)-1]
+ target.inodes = target.inodes[:len(target.inodes)-1]
+ }
+
+ // Update parent key for node.
+ n.parent.put(n.key, n.inodes[0].key, nil, n.pgid)
+ n.key = n.inodes[0].key
+
+ return
+ }
+
+ // If both this node and the target node are too small then merge them.
+ if useNextSibling {
+ // Reparent all child nodes being moved.
+ for _, inode := range target.inodes {
+ if child, ok := n.transaction.nodes[inode.pgid]; ok {
+ child.parent = n
+ }
+ }
+
+ // Copy over inodes from target and remove target.
+ n.inodes = append(n.inodes, target.inodes...)
+ n.parent.del(target.key)
+ delete(n.transaction.nodes, target.pgid)
+ } else {
+ // Reparent all child nodes being moved.
+ for _, inode := range n.inodes {
+ if child, ok := n.transaction.nodes[inode.pgid]; ok {
+ child.parent = target
+ }
+ }
+
+ // Copy over inodes to target and remove node.
+ target.inodes = append(target.inodes, n.inodes...)
+ n.parent.del(n.key)
+ n.parent.put(target.key, target.inodes[0].key, nil, target.pgid)
+ delete(n.transaction.nodes, n.pgid)
+ }
+
+ // Either this node or the target node was deleted from the parent so rebalance it.
+ n.parent.rebalance()
+}
+
// nodesByDepth sorts a list of branches by deepest first.
type nodesByDepth []*node
diff --git a/rwtransaction.go b/rwtransaction.go
index ed44e3d..2e1685b 100644
--- a/rwtransaction.go
+++ b/rwtransaction.go
@@ -99,9 +99,8 @@ func (t *RWTransaction) Delete(name string, key []byte) error {
func (t *RWTransaction) Commit() error {
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
- // TODO: Rebalance.
-
- // Spill data onto dirty pages.
+ // Rebalance and spill data onto dirty pages.
+ t.rebalance()
t.spill()
// Spill buckets page.
@@ -154,6 +153,13 @@ func (t *RWTransaction) allocate(count int) *page {
return p
}
+// rebalance attempts to balance all nodes.
+func (t *RWTransaction) rebalance() {
+ for _, n := range t.nodes {
+ n.rebalance()
+ }
+}
+
// spill writes all the nodes to dirty pages.
func (t *RWTransaction) spill() {
// Keep track of the current root nodes.
diff --git a/rwtransaction_test.go b/rwtransaction_test.go
index f027e8b..5bf3f9f 100644
--- a/rwtransaction_test.go
+++ b/rwtransaction_test.go
@@ -136,3 +136,44 @@ func TestRWTransactionPutMultiple(t *testing.T) {
}
fmt.Fprint(os.Stderr, "\n")
}
+
+// Ensure that a transaction can delete all key/value pairs and return to a single leaf page.
+func TestRWTransactionDelete(t *testing.T) {
+ f := func(items testdata) bool {
+ withOpenDB(func(db *DB, path string) {
+ // Bulk insert all values.
+ db.CreateBucket("widgets")
+ rwtxn, _ := db.RWTransaction()
+ for _, item := range items {
+ assert.NoError(t, rwtxn.Put("widgets", item.Key, item.Value))
+ }
+ assert.NoError(t, rwtxn.Commit())
+
+ // Remove items one at a time and check consistency.
+ for i, item := range items {
+ assert.NoError(t, db.Delete("widgets", item.Key))
+
+ // Anything before our deletion index should be nil.
+ txn, _ := db.Transaction()
+ for j, exp := range items {
+ if j > i {
+ if !assert.Equal(t, exp.Value, txn.Get("widgets", exp.Key)) {
+ t.FailNow()
+ }
+ } else {
+ if !assert.Nil(t, txn.Get("widgets", exp.Key)) {
+ t.FailNow()
+ }
+ }
+ }
+ txn.Close()
+ }
+ })
+ fmt.Fprint(os.Stderr, ".")
+ return true
+ }
+ if err := quick.Check(f, qconfig()); err != nil {
+ t.Error(err)
+ }
+ fmt.Fprint(os.Stderr, "\n")
+}