Allow split nodes to be merged with the next node.

This commit changes the node.split() functionality to check if the next node has available space and, if so, it will merge the newly split keys into the next node. Previously, keys could be continually put into the left side of a split causing that first half to split off small right side nodes. This was especially problematic with databases with a high fill percent.
author: Ben Johnson <benbjohnson@yahoo.com> 2014-06-02 15:26:58 -0600
committer: Ben Johnson <benbjohnson@yahoo.com> 2014-06-02 15:26:58 -0600
commit: a96185e8b69725985e48926b7b28747ddbbfe9b6 (patch)
tree: 538e1f759c900a5a368d04a57a09af07f8d32099 /node.go
parent: Add ipxed to README. (diff)
download: dedo-a96185e8b69725985e48926b7b28747ddbbfe9b6.tar.gz
dedo-a96185e8b69725985e48926b7b28747ddbbfe9b6.tar.xz
1 files changed, 51 insertions, 36 deletions
diff --git a/node.go b/node.go
index 1502be0..57a50e9 100644
--- a/node.go
+++ b/node.go
@@ -205,15 +205,14 @@ func (n *node) write(p *page) {
 	// DEBUG ONLY: n.dump()
 }
 
-// split breaks up a node into smaller nodes, if appropriate.
+// split breaks up a node into two smaller nodes, if appropriate.
 // This should only be called from the spill() function.
 func (n *node) split(pageSize int) []*node {
-	var nodes = []*node{n}
-
 	// Ignore the split if the page doesn't have at least enough nodes for
-	// multiple pages or if the data can fit on a single page.
-	if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
-		return nodes
+	// two pages or if the data can fit on a single page.
+	sz := n.size()
+	if len(n.inodes) <= (minKeysPerPage*2) || sz < pageSize {
+		return []*node{n}
 	}
 
 	// Determine the threshold before starting a new node.
@@ -225,43 +224,59 @@ func (n *node) split(pageSize int) []*node {
 	}
 	threshold := int(float64(pageSize) * fillPercent)
 
-	// Group into smaller pages and target a given fill size.
-	size := pageHeaderSize
-	internalNodes := n.inodes
-	current := n
-	current.inodes = nil
-
-	// Loop over every inode and split once we reach our threshold.
-	for i, inode := range internalNodes {
-		elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
-
-		// Split once we reach our threshold split size. However, this should
-		// only be done if we have enough keys for this node and we will have
-		// enough keys for the next node.
-		if len(current.inodes) >= minKeysPerPage && i < len(internalNodes)-minKeysPerPage && size+elemSize > threshold {
-			// If there's no parent then we need to create one.
-			if n.parent == nil {
-				n.parent = &node{bucket: n.bucket, children: []*node{n}}
-			}
+	// Determine split position and sizes of the two pages.
+	splitIndex, sz0 := n.splitIndex(threshold)
+	sz1 := pageHeaderSize + (sz - sz0)
+
+	// If we can fit our extra keys on the next page then merge into it.
+	if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
+		next.inodes = append(n.inodes[splitIndex:], next.inodes...)
+		return []*node{n}
+	}
+
+	// Otherwise split node into two separate nodes. If there's no parent then
+	// we'll need to create one.
+	if n.parent == nil {
+		n.parent = &node{bucket: n.bucket, children: []*node{n}}
+	}
+
+	// Create a new node and add it to the parent.
+	next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
+	n.parent.children = append(n.parent.children, next)
 
-			// Create a new node and add it to the parent.
-			current = &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
-			n.parent.children = append(n.parent.children, current)
-			nodes = append(nodes, current)
+	// Split inodes across two nodes.
+	next.inodes = n.inodes[splitIndex:]
+	n.inodes = n.inodes[:splitIndex]
 
-			// Reset our running total back to zero (plus header size).
-			size = pageHeaderSize
+	// Update the statistics.
+	n.bucket.tx.stats.Split++
+
+	return []*node{n, next}
+}
 
-			// Update the statistics.
-			n.bucket.tx.stats.Split++
+// splitIndex finds the position where a page will fill a given threshold.
+// It returns the index as well as the size of the first page.
+// This is only be called from split().
+func (n *node) splitIndex(threshold int) (index, sz int) {
+	sz = pageHeaderSize
+
+	// Loop until we only have the minimum number of keys required for the second page.
+	for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
+		index = i
+		inode := n.inodes[i]
+		elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+		// If we have at least the minimum number of keys and adding another
+		// node would put us over the threshold then exit and return.
+		if i >= minKeysPerPage && sz+elsize > threshold {
+			break
 		}
 
-		// Increase our running total of the size and append the inode.
-		size += elemSize
-		current.inodes = append(current.inodes, inode)
+		// Add the element size to the total size.
+		sz += elsize
 	}
 
-	return nodes
+	return
 }
 
 // spill writes the nodes to dirty pages and splits nodes as it goes.
author	Ben Johnson <benbjohnson@yahoo.com>	2014-06-02 15:26:58 -0600
committer	Ben Johnson <benbjohnson@yahoo.com>	2014-06-02 15:26:58 -0600
commit	a96185e8b69725985e48926b7b28747ddbbfe9b6 (patch)
tree	538e1f759c900a5a368d04a57a09af07f8d32099 /node.go
parent	Add ipxed to README. (diff)
download	dedo-a96185e8b69725985e48926b7b28747ddbbfe9b6.tar.gz dedo-a96185e8b69725985e48926b7b28747ddbbfe9b6.tar.xz