aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bucket_test.go78
-rw-r--r--freelist.go5
-rw-r--r--node.go87
3 files changed, 117 insertions, 53 deletions
diff --git a/bucket_test.go b/bucket_test.go
index 9ed63ee..803e043 100644
--- a/bucket_test.go
+++ b/bucket_test.go
@@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
+ "math/rand"
"os"
"strconv"
"strings"
@@ -560,18 +561,19 @@ func TestBucket_Put_KeyTooLarge(t *testing.T) {
// Ensure a bucket can calculate stats.
func TestBucket_Stats(t *testing.T) {
withOpenDB(func(db *DB, path string) {
+ // Add bucket with fewer keys but one big value.
big_key := []byte("really-big-value")
+ for i := 0; i < 500; i++ {
+ db.Update(func(tx *Tx) error {
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ return b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i)))
+ })
+ }
db.Update(func(tx *Tx) error {
- // Add bucket with fewer keys but one big value.
- b, err := tx.CreateBucket([]byte("woojits"))
- assert.NoError(t, err)
- for i := 0; i < 500; i++ {
- b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i)))
- }
- b.Put(big_key, []byte(strings.Repeat("*", 10000)))
-
- return nil
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ return b.Put(big_key, []byte(strings.Repeat("*", 10000)))
})
+
mustCheck(db)
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("woojits"))
@@ -608,6 +610,48 @@ func TestBucket_Stats(t *testing.T) {
})
}
+// Ensure a bucket with random insertion utilizes fill percentage correctly.
+func TestBucket_Stats_RandomFill(t *testing.T) {
+ if os.Getpagesize() != 4096 {
+ t.Skip("invalid page size for test")
+ }
+
+ withOpenDB(func(db *DB, path string) {
+ db.FillPercent = 0.9
+
+ // Add a set of values in random order. It will be the same random
+ // order so we can maintain consistency between test runs.
+ r := rand.New(rand.NewSource(42))
+ for _, i := range r.Perm(1000) {
+ db.Update(func(tx *Tx) error {
+ b, _ := tx.CreateBucketIfNotExists([]byte("woojits"))
+ for _, j := range r.Perm(100) {
+ index := (j * 10000) + i
+ b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000"))
+ }
+ return nil
+ })
+ mustCheck(db)
+ }
+
+ db.View(func(tx *Tx) error {
+ s := tx.Bucket([]byte("woojits")).Stats()
+ assert.Equal(t, 100000, s.KeyN, "KeyN")
+
+ assert.Equal(t, 22, s.BranchPageN, "BranchPageN")
+ assert.Equal(t, 0, s.BranchOverflowN, "BranchOverflowN")
+ assert.Equal(t, 62963, s.BranchInuse, "BranchInuse")
+ assert.Equal(t, 90112, s.BranchAlloc, "BranchAlloc")
+
+ assert.Equal(t, 1677, s.LeafPageN, "LeafPageN")
+ assert.Equal(t, 0, s.LeafOverflowN, "LeafOverflowN")
+ assert.Equal(t, 4714722, s.LeafInuse, "LeafInuse")
+ assert.Equal(t, 6868992, s.LeafAlloc, "LeafAlloc")
+ return nil
+ })
+ })
+}
+
// Ensure a bucket can calculate stats.
func TestBucket_Stats_Small(t *testing.T) {
@@ -750,11 +794,11 @@ func TestBucket_Stats_Large(t *testing.T) {
withOpenDB(func(db *DB, path string) {
var index int
- for i := 0; i < 1000; i++ {
+ for i := 0; i < 10000; i++ {
db.Update(func(tx *Tx) error {
// Add bucket with lots of keys.
b, _ := tx.CreateBucketIfNotExists([]byte("widgets"))
- for i := 0; i < 100; i++ {
+ for i := 0; i < 10; i++ {
b.Put([]byte(strconv.Itoa(index)), []byte(strconv.Itoa(index)))
index++
}
@@ -766,18 +810,18 @@ func TestBucket_Stats_Large(t *testing.T) {
db.View(func(tx *Tx) error {
b := tx.Bucket([]byte("widgets"))
stats := b.Stats()
- assert.Equal(t, 19, stats.BranchPageN, "BranchPageN")
+ assert.Equal(t, 13, stats.BranchPageN, "BranchPageN")
assert.Equal(t, 0, stats.BranchOverflowN, "BranchOverflowN")
- assert.Equal(t, 1291, stats.LeafPageN, "LeafPageN")
+ assert.Equal(t, 1195, stats.LeafPageN, "LeafPageN")
assert.Equal(t, 0, stats.LeafOverflowN, "LeafOverflowN")
assert.Equal(t, 100000, stats.KeyN, "KeyN")
assert.Equal(t, 3, stats.Depth, "Depth")
- assert.Equal(t, 27007, stats.BranchInuse, "BranchInuse")
- assert.Equal(t, 2598436, stats.LeafInuse, "LeafInuse")
+ assert.Equal(t, 25208, stats.BranchInuse, "BranchInuse")
+ assert.Equal(t, 2596900, stats.LeafInuse, "LeafInuse")
if os.Getpagesize() == 4096 {
// Incompatible page size
- assert.Equal(t, 77824, stats.BranchAlloc, "BranchAlloc")
- assert.Equal(t, 5287936, stats.LeafAlloc, "LeafAlloc")
+ assert.Equal(t, 53248, stats.BranchAlloc, "BranchAlloc")
+ assert.Equal(t, 4894720, stats.LeafAlloc, "LeafAlloc")
}
assert.Equal(t, 1, stats.BucketN, "BucketN")
assert.Equal(t, 0, stats.InlineBucketN, "InlineBucketN")
diff --git a/freelist.go b/freelist.go
index a236079..2fa4a94 100644
--- a/freelist.go
+++ b/freelist.go
@@ -70,12 +70,14 @@ func (f *freelist) allocate(n int) pgid {
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
+ warn("free:", txid, p.id, p.overflow)
_assert(p.id > 1, "cannot free page 0 or 1: %d", p.id)
// Verify that page is not already free.
minid, maxid := p.id, p.id+pgid(p.overflow)
for _, id := range f.ids {
if id >= minid && id <= maxid {
+ warn(" ‡", id, "|", minid, maxid)
panic(fmt.Sprintf("page %d already freed in tx", id))
}
}
@@ -90,6 +92,9 @@ func (f *freelist) free(txid txid, p *page) {
// Free page and all its overflow pages.
var ids = f.pending[txid]
for i := 0; i < int(p.overflow+1); i++ {
+ if p.id+pgid(i) == 55 {
+ warn(" •", txid, p.id+pgid(i))
+ }
ids = append(ids, p.id+pgid(i))
}
f.pending[txid] = ids
diff --git a/node.go b/node.go
index 1502be0..57a50e9 100644
--- a/node.go
+++ b/node.go
@@ -205,15 +205,14 @@ func (n *node) write(p *page) {
// DEBUG ONLY: n.dump()
}
-// split breaks up a node into smaller nodes, if appropriate.
+// split breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize int) []*node {
- var nodes = []*node{n}
-
// Ignore the split if the page doesn't have at least enough nodes for
- // multiple pages or if the data can fit on a single page.
- if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
- return nodes
+ // two pages or if the data can fit on a single page.
+ sz := n.size()
+ if len(n.inodes) <= (minKeysPerPage*2) || sz < pageSize {
+ return []*node{n}
}
// Determine the threshold before starting a new node.
@@ -225,43 +224,59 @@ func (n *node) split(pageSize int) []*node {
}
threshold := int(float64(pageSize) * fillPercent)
- // Group into smaller pages and target a given fill size.
- size := pageHeaderSize
- internalNodes := n.inodes
- current := n
- current.inodes = nil
-
- // Loop over every inode and split once we reach our threshold.
- for i, inode := range internalNodes {
- elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
-
- // Split once we reach our threshold split size. However, this should
- // only be done if we have enough keys for this node and we will have
- // enough keys for the next node.
- if len(current.inodes) >= minKeysPerPage && i < len(internalNodes)-minKeysPerPage && size+elemSize > threshold {
- // If there's no parent then we need to create one.
- if n.parent == nil {
- n.parent = &node{bucket: n.bucket, children: []*node{n}}
- }
+ // Determine split position and sizes of the two pages.
+ splitIndex, sz0 := n.splitIndex(threshold)
+ sz1 := pageHeaderSize + (sz - sz0)
+
+ // If we can fit our extra keys on the next page then merge into it.
+ if next := n.nextSibling(); next != nil && next.size()+sz1 < threshold {
+ next.inodes = append(n.inodes[splitIndex:], next.inodes...)
+ return []*node{n}
+ }
+
+ // Otherwise split node into two separate nodes. If there's no parent then
+ // we'll need to create one.
+ if n.parent == nil {
+ n.parent = &node{bucket: n.bucket, children: []*node{n}}
+ }
+
+ // Create a new node and add it to the parent.
+ next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
+ n.parent.children = append(n.parent.children, next)
- // Create a new node and add it to the parent.
- current = &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
- n.parent.children = append(n.parent.children, current)
- nodes = append(nodes, current)
+ // Split inodes across two nodes.
+ next.inodes = n.inodes[splitIndex:]
+ n.inodes = n.inodes[:splitIndex]
- // Reset our running total back to zero (plus header size).
- size = pageHeaderSize
+ // Update the statistics.
+ n.bucket.tx.stats.Split++
+
+ return []*node{n, next}
+}
- // Update the statistics.
- n.bucket.tx.stats.Split++
+// splitIndex finds the position where a page will fill a given threshold.
+// It returns the index as well as the size of the first page.
+// This is only be called from split().
+func (n *node) splitIndex(threshold int) (index, sz int) {
+ sz = pageHeaderSize
+
+ // Loop until we only have the minimum number of keys required for the second page.
+ for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
+ index = i
+ inode := n.inodes[i]
+ elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+ // If we have at least the minimum number of keys and adding another
+ // node would put us over the threshold then exit and return.
+ if i >= minKeysPerPage && sz+elsize > threshold {
+ break
}
- // Increase our running total of the size and append the inode.
- size += elemSize
- current.inodes = append(current.inodes, inode)
+ // Add the element size to the total size.
+ sz += elsize
}
- return nodes
+ return
}
// spill writes the nodes to dirty pages and splits nodes as it goes.