diff options
author | Ben Johnson <benbjohnson@yahoo.com> | 2014-06-03 16:44:58 -0600 |
---|---|---|
committer | Ben Johnson <benbjohnson@yahoo.com> | 2014-06-03 16:44:58 -0600 |
commit | 510143d852e394d2fb5fb773bab41f1d9e35d602 (patch) | |
tree | c4d06f342b5e53d5eea7a34d3d5138db74f26b15 /node.go | |
parent | Add ipxed to README. (diff) | |
parent | Fix merge-split spill issues. (diff) | |
download | dedo-510143d852e394d2fb5fb773bab41f1d9e35d602.tar.gz dedo-510143d852e394d2fb5fb773bab41f1d9e35d602.tar.xz |
Merge pull request #181 from benbjohnson/split-merge
Allow split nodes to be merged with the next node.
Diffstat (limited to 'node.go')
-rw-r--r-- | node.go | 121 |
1 files changed, 75 insertions, 46 deletions
@@ -14,7 +14,7 @@ type node struct { key []byte pgid pgid parent *node - children []*node + children nodes inodes inodes } @@ -205,15 +205,14 @@ func (n *node) write(p *page) { // DEBUG ONLY: n.dump() } -// split breaks up a node into smaller nodes, if appropriate. +// split breaks up a node into two smaller nodes, if appropriate. // This should only be called from the spill() function. func (n *node) split(pageSize int) []*node { - var nodes = []*node{n} - // Ignore the split if the page doesn't have at least enough nodes for - // multiple pages or if the data can fit on a single page. - if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize { - return nodes + // two pages or if the data can fit on a single page. + sz := n.size() + if len(n.inodes) <= (minKeysPerPage*2) || sz < pageSize { + return []*node{n} } // Determine the threshold before starting a new node. @@ -225,43 +224,60 @@ func (n *node) split(pageSize int) []*node { } threshold := int(float64(pageSize) * fillPercent) - // Group into smaller pages and target a given fill size. - size := pageHeaderSize - internalNodes := n.inodes - current := n - current.inodes = nil - - // Loop over every inode and split once we reach our threshold. - for i, inode := range internalNodes { - elemSize := n.pageElementSize() + len(inode.key) + len(inode.value) - - // Split once we reach our threshold split size. However, this should - // only be done if we have enough keys for this node and we will have - // enough keys for the next node. - if len(current.inodes) >= minKeysPerPage && i < len(internalNodes)-minKeysPerPage && size+elemSize > threshold { - // If there's no parent then we need to create one. - if n.parent == nil { - n.parent = &node{bucket: n.bucket, children: []*node{n}} - } + // Determine split position and sizes of the two pages. + splitIndex, sz0 := n.splitIndex(threshold) + sz1 := pageHeaderSize + (sz - sz0) - // Create a new node and add it to the parent. - current = &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent} - n.parent.children = append(n.parent.children, current) - nodes = append(nodes, current) + // If we can fit our extra keys on the next page then merge into it. + if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold { + next.inodes = append(n.inodes[splitIndex:], next.inodes...) + n.inodes = n.inodes[:splitIndex] + return []*node{n} + } - // Reset our running total back to zero (plus header size). - size = pageHeaderSize + // Otherwise split node into two separate nodes. If there's no parent then + // we'll need to create one. + if n.parent == nil { + n.parent = &node{bucket: n.bucket, children: []*node{n}} + } - // Update the statistics. - n.bucket.tx.stats.Split++ + // Create a new node and add it to the parent. + next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent} + n.parent.children = append(n.parent.children, next) + + // Split inodes across two nodes. + next.inodes = n.inodes[splitIndex:] + n.inodes = n.inodes[:splitIndex] + + // Update the statistics. + n.bucket.tx.stats.Split++ + + return []*node{n, next} +} + +// splitIndex finds the position where a page will fill a given threshold. +// It returns the index as well as the size of the first page. +// This is only be called from split(). +func (n *node) splitIndex(threshold int) (index, sz int) { + sz = pageHeaderSize + + // Loop until we only have the minimum number of keys required for the second page. + for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { + index = i + inode := n.inodes[i] + elsize := n.pageElementSize() + len(inode.key) + len(inode.value) + + // If we have at least the minimum number of keys and adding another + // node would put us over the threshold then exit and return. + if i >= minKeysPerPage && sz+elsize > threshold { + break } - // Increase our running total of the size and append the inode. - size += elemSize - current.inodes = append(current.inodes, inode) + // Add the element size to the total size. + sz += elsize } - return nodes + return } // spill writes the nodes to dirty pages and splits nodes as it goes. @@ -269,22 +285,29 @@ func (n *node) split(pageSize int) []*node { func (n *node) spill() error { var tx = n.bucket.tx - // Spill child nodes first. - for _, child := range n.children { - if err := child.spill(); err != nil { + // Spill child nodes first. Child nodes can materialize sibling nodes in + // the case of split-merge so we cannot use a range loop. We have to check + // the children size on every loop iteration. + sort.Sort(n.children) + for i := 0; i < len(n.children); i++ { + if err := n.children[i].spill(); err != nil { return err } } - // Add node's page to the freelist if it's not new. - if n.pgid > 0 { - tx.db.freelist.free(tx.id(), tx.page(n.pgid)) - n.pgid = 0 - } + // We no longer need the child list because it's only used for spill tracking. + n.children = nil - // Spill nodes by deepest first. + // Spill nodes by deepest first. The first node returned from split() will + // always be "n". var nodes = n.split(tx.db.pageSize) for _, node := range nodes { + // Add node's page to the freelist if it's not new. + if node.pgid > 0 { + tx.db.freelist.free(tx.id(), tx.page(node.pgid)) + node.pgid = 0 + } + // Allocate contiguous space for the node. p, err := tx.allocate((node.size() / tx.db.pageSize) + 1) if err != nil { @@ -550,6 +573,12 @@ func (n *node) dump() { } */ +type nodes []*node + +func (s nodes) Len() int { return len(s) } +func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 } + // inode represents an internal node inside of a node. // It can be used to point to elements in a page or point // to an element which hasn't been added to a page yet. |