diff options
author | Ben Johnson <benbjohnson@yahoo.com> | 2014-02-07 22:55:25 -0700 |
---|---|---|
committer | Ben Johnson <benbjohnson@yahoo.com> | 2014-02-08 23:13:54 -0700 |
commit | 3da04c52b991337e74c72f2c76bd1aa9df77eab2 (patch) | |
tree | d31bed34f7fe34eb8c5a21f311a1103ed4b58b2a | |
parent | Refactor node lookup. (diff) | |
download | dedo-3da04c52b991337e74c72f2c76bd1aa9df77eab2.tar.gz dedo-3da04c52b991337e74c72f2c76bd1aa9df77eab2.tar.xz |
Rebalance after deletion.
-rw-r--r-- | assert.go | 4 | ||||
-rw-r--r-- | cursor.go | 10 | ||||
-rw-r--r-- | node.go | 163 | ||||
-rw-r--r-- | rwtransaction.go | 12 | ||||
-rw-r--r-- | rwtransaction_test.go | 41 |
5 files changed, 218 insertions, 12 deletions
@@ -4,8 +4,8 @@ import "fmt" // TODO(benbjohnson): Remove assertions before release. -// __assert__ will panic with a given formatted message if the given condition is false. -func __assert__(condition bool, msg string, v ...interface{}) { +// _assert will panic with a given formatted message if the given condition is false. +func _assert(condition bool, msg string, v ...interface{}) { if !condition { panic(fmt.Sprintf("assertion failed: " + msg, v...)) } @@ -115,12 +115,12 @@ func (c *Cursor) node(t *RWTransaction) *node { // Start from root and traverse down the hierarchy. n := t.node(c.stack[0].page.id, nil) for _, ref := range c.stack[:len(c.stack)-1] { - __assert__(!n.isLeaf, "expected branch node") - __assert__(ref.page.id == n.pgid, "node/page mismatch a: %d != %d", ref.page.id, n.childAt(ref.index).pgid) - n = n.childAt(ref.index) + _assert(!n.isLeaf, "expected branch node") + _assert(ref.page.id == n.pgid, "node/page mismatch a: %d != %d", ref.page.id, n.childAt(int(ref.index)).pgid) + n = n.childAt(int(ref.index)) } - __assert__(n.isLeaf, "expected leaf node") - __assert__(n.pgid == c.stack[len(c.stack)-1].page.id, "node/page mismatch b: %d != %d", n.pgid, c.stack[len(c.stack)-1].page.id) + _assert(n.isLeaf, "expected leaf node") + _assert(n.pgid == c.stack[len(c.stack)-1].page.id, "node/page mismatch b: %d != %d", n.pgid, c.stack[len(c.stack)-1].page.id) return n } @@ -10,6 +10,7 @@ import ( type node struct { transaction *RWTransaction isLeaf bool + unbalanced bool key []byte depth int pgid pgid @@ -17,6 +18,14 @@ type node struct { inodes inodes } +// minKeys returns the minimum number of inodes this node should have. +func (n *node) minKeys() int { + if n.isLeaf { + return 1 + } + return 2 +} + // size returns the size of the node after serialization. func (n *node) size() int { var elementSize int = n.pageElementSize() @@ -45,11 +54,46 @@ func (n *node) root() *node { } // childAt returns the child node at a given index. -func (n *node) childAt(index uint16) *node { - __assert__(!n.isLeaf, "invalid childAt(%d) on a leaf node", index) +func (n *node) childAt(index int) *node { + _assert(!n.isLeaf, "invalid childAt(%d) on a leaf node", index) return n.transaction.node(n.inodes[index].pgid, n) } +// childIndex returns the index of a given child node. +func (n *node) childIndex(child *node) int { + index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 }) + return index +} + +// numChildren returns the number of children. +func (n *node) numChildren() int { + return len(n.inodes) +} + +// nextSibling returns the next node with the same parent. +func (n *node) nextSibling() *node { + if n.parent == nil { + return nil + } + index := n.parent.childIndex(n) + if index >= n.parent.numChildren() - 1 { + return nil + } + return n.parent.childAt(index + 1) +} + +// prevSibling returns the previous node with the same parent. +func (n *node) prevSibling() *node { + if n.parent == nil { + return nil + } + index := n.parent.childIndex(n) + if index == 0 { + return nil + } + return n.parent.childAt(index - 1) +} + // put inserts a key/value. func (n *node) put(oldKey, newKey, value []byte, pgid pgid) { // Find insertion index. @@ -80,6 +124,9 @@ func (n *node) del(key []byte) { // Delete inode from the node. n.inodes = append(n.inodes[:index], n.inodes[index+1:]...) + + // Mark the node as needing rebalancing. + n.unbalanced = true } // read initializes the node from a page. @@ -178,6 +225,118 @@ func (n *node) split(pageSize int) []*node { return nodes } +// rebalance attempts to combine the node with sibling nodes if the node fill +// size is below a threshold or if there are not enough keys. +func (n *node) rebalance() { + if !n.unbalanced { + return + } + n.unbalanced = false + + // Ignore if node is above threshold (25%) and has enough keys. + var threshold = n.transaction.db.pageSize / 4 + if n.size() > threshold && len(n.inodes) > n.minKeys() { + return + } + + // Root node has special handling. + if n.parent == nil { + // If root node is a branch and only has one node then collapse it. + if !n.isLeaf && len(n.inodes) == 1 { + // Move child's children up. + child := n.transaction.nodes[n.inodes[0].pgid] + n.isLeaf = child.isLeaf + n.inodes = child.inodes[:] + + // Reparent all child nodes being moved. + for _, inode := range n.inodes { + if child, ok := n.transaction.nodes[inode.pgid]; ok { + child.parent = n + } + } + + // Remove old child. + child.parent = nil + delete(n.transaction.nodes, child.pgid) + } + + return + } + + _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") + + // Destination node is right sibling if idx == 0, otherwise left sibling. + var target *node + var useNextSibling = (n.parent.childIndex(n) == 0) + if useNextSibling { + target = n.nextSibling() + } else { + target = n.prevSibling() + } + + // If target node has extra nodes then just move one over. + if target.numChildren() > target.minKeys() { + if useNextSibling { + // Reparent and move node. + if child, ok := n.transaction.nodes[target.inodes[0].pgid]; ok { + child.parent = n + } + n.inodes = append(n.inodes, target.inodes[0]) + target.inodes = target.inodes[1:] + + // Update target key on parent. + target.parent.put(target.key, target.inodes[0].key, nil, target.pgid) + target.key = target.inodes[0].key + } else { + // Reparent and move node. + if child, ok := n.transaction.nodes[target.inodes[len(target.inodes)-1].pgid]; ok { + child.parent = n + } + n.inodes = append(n.inodes, inode{}) + copy(n.inodes[1:], n.inodes) + n.inodes[0] = target.inodes[len(target.inodes)-1] + target.inodes = target.inodes[:len(target.inodes)-1] + } + + // Update parent key for node. + n.parent.put(n.key, n.inodes[0].key, nil, n.pgid) + n.key = n.inodes[0].key + + return + } + + // If both this node and the target node are too small then merge them. + if useNextSibling { + // Reparent all child nodes being moved. + for _, inode := range target.inodes { + if child, ok := n.transaction.nodes[inode.pgid]; ok { + child.parent = n + } + } + + // Copy over inodes from target and remove target. + n.inodes = append(n.inodes, target.inodes...) + n.parent.del(target.key) + delete(n.transaction.nodes, target.pgid) + } else { + // Reparent all child nodes being moved. + for _, inode := range n.inodes { + if child, ok := n.transaction.nodes[inode.pgid]; ok { + child.parent = target + } + } + + // Copy over inodes to target and remove node. + target.inodes = append(target.inodes, n.inodes...) + n.parent.del(n.key) + n.parent.put(target.key, target.inodes[0].key, nil, target.pgid) + delete(n.transaction.nodes, n.pgid) + } + + // Either this node or the target node was deleted from the parent so rebalance it. + n.parent.rebalance() +} + // nodesByDepth sorts a list of branches by deepest first. type nodesByDepth []*node diff --git a/rwtransaction.go b/rwtransaction.go index ed44e3d..2e1685b 100644 --- a/rwtransaction.go +++ b/rwtransaction.go @@ -99,9 +99,8 @@ func (t *RWTransaction) Delete(name string, key []byte) error { func (t *RWTransaction) Commit() error { // TODO(benbjohnson): Use vectorized I/O to write out dirty pages. - // TODO: Rebalance. - - // Spill data onto dirty pages. + // Rebalance and spill data onto dirty pages. + t.rebalance() t.spill() // Spill buckets page. @@ -154,6 +153,13 @@ func (t *RWTransaction) allocate(count int) *page { return p } +// rebalance attempts to balance all nodes. +func (t *RWTransaction) rebalance() { + for _, n := range t.nodes { + n.rebalance() + } +} + // spill writes all the nodes to dirty pages. func (t *RWTransaction) spill() { // Keep track of the current root nodes. diff --git a/rwtransaction_test.go b/rwtransaction_test.go index f027e8b..5bf3f9f 100644 --- a/rwtransaction_test.go +++ b/rwtransaction_test.go @@ -136,3 +136,44 @@ func TestRWTransactionPutMultiple(t *testing.T) { } fmt.Fprint(os.Stderr, "\n") } + +// Ensure that a transaction can delete all key/value pairs and return to a single leaf page. +func TestRWTransactionDelete(t *testing.T) { + f := func(items testdata) bool { + withOpenDB(func(db *DB, path string) { + // Bulk insert all values. + db.CreateBucket("widgets") + rwtxn, _ := db.RWTransaction() + for _, item := range items { + assert.NoError(t, rwtxn.Put("widgets", item.Key, item.Value)) + } + assert.NoError(t, rwtxn.Commit()) + + // Remove items one at a time and check consistency. + for i, item := range items { + assert.NoError(t, db.Delete("widgets", item.Key)) + + // Anything before our deletion index should be nil. + txn, _ := db.Transaction() + for j, exp := range items { + if j > i { + if !assert.Equal(t, exp.Value, txn.Get("widgets", exp.Key)) { + t.FailNow() + } + } else { + if !assert.Nil(t, txn.Get("widgets", exp.Key)) { + t.FailNow() + } + } + } + txn.Close() + } + }) + fmt.Fprint(os.Stderr, ".") + return true + } + if err := quick.Check(f, qconfig()); err != nil { + t.Error(err) + } + fmt.Fprint(os.Stderr, "\n") +} |