aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bucket.go1
-rw-r--r--bucket_test.go93
-rw-r--r--db_test.go34
-rw-r--r--node.go121
4 files changed, 181 insertions, 68 deletions
diff --git a/bucket.go b/bucket.go
index 2338a8b..e66d41c 100644
--- a/bucket.go
+++ b/bucket.go
@@ -613,6 +613,7 @@ func (b *Bucket) rebalance() {
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgid pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
+
// Retrieve node if it's already been created.
if n := b.nodes[pgid]; n != nil {
return n
diff --git a/bucket_test.go b/bucket_test.go
index 9ed63ee..bae3941 100644
--- a/bucket_test.go
+++ b/bucket_test.go
@@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
+ "math/rand"
"os"
"strconv"
"strings"
@@ -560,35 +561,36 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
// Ensure a bucket can calculate stats.
func TestBucket_Stats(t *testing.T) {
withOpenDB(func(db *DB, path string) {
+ // Add bucket with fewer keys but one big value.
big_key := []byte("really-big-value")
+ for i := 0; i < 500; i++ {
+ db.Update(func(tx *Tx) error {
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ return b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i)))
+ })
+ }
db.Update(func(tx *Tx) error {
- // Add bucket with fewer keys but one big value.
- b, err := tx.CreateBucket([]byte("woojits"))
- assert.NoError(t, err)
- for i := 0; i < 500; i++ {
- b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i)))
- }
- b.Put(big_key, []byte(strings.Repeat("*", 10000)))
-
- return nil
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ return b.Put(big_key, []byte(strings.Repeat("*", 10000)))
})
+
mustCheck(db)
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("woojits"))
stats := b.Stats()
assert.Equal(t, 1, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
- assert.Equal(t, 6, stats.LeafPageN, "LeafPageN")
+ assert.Equal(t, 7, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 2, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 501, stats.KeyN, "KeyN")
assert.Equal(t, 2, stats.Depth, "Depth")
branchInuse := pageHeaderSize // branch page header
- branchInuse += 6 * branchPageElementSize // branch elements
- branchInuse += 6 * 3 // branch keys (6 3-byte keys)
+ branchInuse += 7 * branchPageElementSize // branch elements
+ branchInuse += 7 * 3 // branch keys (6 3-byte keys)
assert.Equal(t, branchInuse, stats.BranchInuse, "BranchInuse")
- leafInuse := 6 * pageHeaderSize // leaf page header
+ leafInuse := 7 * pageHeaderSize // leaf page header
leafInuse += 501 * leafPageElementSize // leaf elements
leafInuse += 500*3 + len(big_key) // leaf keys
leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values
@@ -597,7 +599,7 @@ func TestBucket_Stats(t *testing.T) {
if os.Getpagesize() == 4096 {
// Incompatible page size
assert.Equal(t, 4096, stats.BranchAlloc, "BranchAlloc")
- assert.Equal(t, 32768, stats.LeafAlloc, "LeafAlloc")
+ assert.Equal(t, 36864, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
@@ -608,6 +610,53 @@ func TestBucket_Stats(t *testing.T) {
})
}
+// Ensure a bucket with random insertion utilizes fill percentage correctly.
+func TestBucket_Stats_RandomFill(t *testing.T) {
+ if testing.Short() {
+ t.Skip("skipping test in short mode.")
+ }
+ if os.Getpagesize() != 4096 {
+ t.Skip("invalid page size for test")
+ }
+
+ withOpenDB(func(db *DB, path string) {
+ db.FillPercent = 0.9
+
+ // Add a set of values in random order. It will be the same random
+ // order so we can maintain consistency between test runs.
+ var count int
+ r := rand.New(rand.NewSource(42))
+ for _, i := range r.Perm(1000) {
+ db.Update(func(tx *Tx) error {
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ for _, j := range r.Perm(100) {
+ index := (j * 10000) + i
+ b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
+ count++
+ }
+ return nil
+ })
+ }
+ mustCheck(db)
+
+ db.View(func(tx *Tx) error {
+ s := tx.Bucket([]byte("woojits")).Stats()
+ assert.Equal(t, 100000, s.KeyN, "KeyN")
+
+ assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
+ assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
+ assert.Equal(t, 61708, s.BranchInuse, "BranchInuse")
+ assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
+
+ assert.Equal(t, 1643, s.LeafPageN, "LeafPageN")
+ assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
+ assert.Equal(t, 4714178, s.LeafInuse, "LeafInuse")
+ assert.Equal(t, 6729728, s.LeafAlloc, "LeafAlloc")
+ return nil
+ })
+ })
+}
+
// Ensure a bucket can calculate stats.
func TestBucket_Stats_Small(t *testing.T) {
@@ -750,11 +799,11 @@ func TestBucket_Stats_Large(t *testing.T) {
withOpenDB(func(db *DB, path string) {
var index int
- for i := 0; i < 1000; i++ {
+ for i := 0; i < 10000; i++ {
db.Update(func(tx *Tx) error {
// Add bucket with lots of keys.
b, _ := tx.CreateBucketIfNotExists([]byte("widgets"))
- for i := 0; i < 100; i++ {
+ for i := 0; i < 10; i++ {
b.Put([]byte(strconv.Itoa(index)), []byte(strconv.Itoa(index)))
index++
}
@@ -766,18 +815,18 @@ func TestBucket_Stats_Large(t *testing.T) {
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("widgets"))
stats := b.Stats()
- assert.Equal(t, 19, stats.BranchPageN, "BranchPageN")
+ assert.Equal(t, 13, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
- assert.Equal(t, 1291, stats.LeafPageN, "LeafPageN")
+ assert.Equal(t, 1195, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 100000, stats.KeyN, "KeyN")
assert.Equal(t, 3, stats.Depth, "Depth")
- assert.Equal(t, 27007, stats.BranchInuse, "BranchInuse")
- assert.Equal(t, 2598436, stats.LeafInuse, "LeafInuse")
+ assert.Equal(t, 25208, stats.BranchInuse, "BranchInuse")
+ assert.Equal(t, 2596900, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
- assert.Equal(t, 77824, stats.BranchAlloc, "BranchAlloc")
- assert.Equal(t, 5287936, stats.LeafAlloc, "LeafAlloc")
+ assert.Equal(t, 53248, stats.BranchAlloc, "BranchAlloc")
+ assert.Equal(t, 4894720, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 0, stats.InlineBucketN, "InlineBucketN")
diff --git a/db_test.go b/db_test.go
index 691c8fb..9fd4662 100644
--- a/db_test.go
+++ b/db_test.go
@@ -7,6 +7,8 @@ import (
"io/ioutil"
"os"
"regexp"
+ "sort"
+ "strings"
"testing"
"time"
"unsafe"
@@ -520,6 +522,38 @@ func mustCheck(db *DB) {
}
}
+// mustContainKeys checks that a bucket contains a given set of keys.
+func mustContainKeys(b *Bucket, m map[string]string) {
+ found := make(map[string]string)
+ b.ForEach(func(k, _ []byte) error {
+ found[string(k)] = ""
+ return nil
+ })
+
+ // Check for keys found in bucket that shouldn't be there.
+ var keys []string
+ for k, _ := range found {
+ if _, ok := m[string(k)]; !ok {
+ keys = append(keys, k)
+ }
+ }
+ if len(keys) > 0 {
+ sort.Strings(keys)
+ panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ",")))
+ }
+
+ // Check for keys not found in bucket that should be there.
+ for k, _ := range m {
+ if _, ok := found[string(k)]; !ok {
+ keys = append(keys, k)
+ }
+ }
+ if len(keys) > 0 {
+ sort.Strings(keys)
+ panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ",")))
+ }
+}
+
func trunc(b []byte, length int) []byte {
if length < len(b) {
return b[:length]
diff --git a/node.go b/node.go
index 1502be0..f0978ca 100644
--- a/node.go
+++ b/node.go
@@ -14,7 +14,7 @@ type node struct {
key []byte
pgid pgid
parent *node
- children []*node
+ children nodes
inodes inodes
}
@@ -205,15 +205,14 @@ func (n *node) write(p *page) {
// DEBUG ONLY: n.dump()
}
-// split breaks up a node into smaller nodes, if appropriate.
+// split breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize int) []*node {
- var nodes = []*node{n}
-
// Ignore the split if the page doesn't have at least enough nodes for
- // multiple pages or if the data can fit on a single page.
- if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
- return nodes
+ // two pages or if the data can fit on a single page.
+ sz := n.size()
+ if len(n.inodes) <= (minKeysPerPage*2) || sz < pageSize {
+ return []*node{n}
}
// Determine the threshold before starting a new node.
@@ -225,43 +224,60 @@ func (n *node) split(pageSize int) []*node {
}
threshold := int(float64(pageSize) * fillPercent)
- // Group into smaller pages and target a given fill size.
- size := pageHeaderSize
- internalNodes := n.inodes
- current := n
- current.inodes = nil
-
- // Loop over every inode and split once we reach our threshold.
- for i, inode := range internalNodes {
- elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
-
- // Split once we reach our threshold split size. However, this should
- // only be done if we have enough keys for this node and we will have
- // enough keys for the next node.
- if len(current.inodes) >= minKeysPerPage && i < len(internalNodes)-minKeysPerPage && size+elemSize > threshold {
- // If there's no parent then we need to create one.
- if n.parent == nil {
- n.parent = &node{bucket: n.bucket, children: []*node{n}}
- }
+ // Determine split position and sizes of the two pages.
+ splitIndex, sz0 := n.splitIndex(threshold)
+ sz1 := pageHeaderSize + (sz - sz0)
- // Create a new node and add it to the parent.
- current = &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
- n.parent.children = append(n.parent.children, current)
- nodes = append(nodes, current)
+ // If we can fit our extra keys on the next page then merge into it.
+ if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
+ next.inodes = append(n.inodes[splitIndex:], next.inodes...)
+ n.inodes = n.inodes[:splitIndex]
+ return []*node{n}
+ }
- // Reset our running total back to zero (plus header size).
- size = pageHeaderSize
+ // Otherwise split node into two separate nodes. If there's no parent then
+ // we'll need to create one.
+ if n.parent == nil {
+ n.parent = &node{bucket: n.bucket, children: []*node{n}}
+ }
- // Update the statistics.
- n.bucket.tx.stats.Split++
+ // Create a new node and add it to the parent.
+ next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
+ n.parent.children = append(n.parent.children, next)
+
+ // Split inodes across two nodes.
+ next.inodes = n.inodes[splitIndex:]
+ n.inodes = n.inodes[:splitIndex]
+
+ // Update the statistics.
+ n.bucket.tx.stats.Split++
+
+ return []*node{n, next}
+}
+
+// splitIndex finds the position where a page will fill a given threshold.
+// It returns the index as well as the size of the first page.
+// This is only be called from split().
+func (n *node) splitIndex(threshold int) (index, sz int) {
+ sz = pageHeaderSize
+
+ // Loop until we only have the minimum number of keys required for the second page.
+ for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
+ index = i
+ inode := n.inodes[i]
+ elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+ // If we have at least the minimum number of keys and adding another
+ // node would put us over the threshold then exit and return.
+ if i >= minKeysPerPage && sz+elsize > threshold {
+ break
}
- // Increase our running total of the size and append the inode.
- size += elemSize
- current.inodes = append(current.inodes, inode)
+ // Add the element size to the total size.
+ sz += elsize
}
- return nodes
+ return
}
// spill writes the nodes to dirty pages and splits nodes as it goes.
@@ -269,22 +285,29 @@ func (n *node) split(pageSize int) []*node {
func (n *node) spill() error {
var tx = n.bucket.tx
- // Spill child nodes first.
- for _, child := range n.children {
- if err := child.spill(); err != nil {
+ // Spill child nodes first. Child nodes can materialize sibling nodes in
+ // the case of split-merge so we cannot use a range loop. We have to check
+ // the children size on every loop iteration.
+ sort.Sort(n.children)
+ for i := 0; i < len(n.children); i++ {
+ if err := n.children[i].spill(); err != nil {
return err
}
}
- // Add node's page to the freelist if it's not new.
- if n.pgid > 0 {
- tx.db.freelist.free(tx.id(), tx.page(n.pgid))
- n.pgid = 0
- }
+ // We no longer need the child list because it's only used for spill tracking.
+ n.children = nil
- // Spill nodes by deepest first.
+ // Spill nodes by deepest first. The first node returned from split() will
+ // always be "n".
var nodes = n.split(tx.db.pageSize)
for _, node := range nodes {
+ // Add node's page to the freelist if it's not new.
+ if node.pgid > 0 {
+ tx.db.freelist.free(tx.id(), tx.page(node.pgid))
+ node.pgid = 0
+ }
+
// Allocate contiguous space for the node.
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
if err != nil {
@@ -550,6 +573,12 @@ func (n *node) dump() {
}
*/
+type nodes []*node
+
+func (s nodes) Len() int { return len(s) }
+func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
+
// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.