From 55e71b090259eb775c1bb74a2c3ec23bdfba8db5 Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Mon, 5 May 2014 10:27:02 -0600 Subject: Add inline bucket support. This commit adds support for writing small buckets directly inline to their value in their parent's leaf node. Previously, subbuckets would simply have a bucket header stored in their parent bucket which pointed to the root page. This required that every bucket use at least a single page. This has a high overhead for buckets with only one or two small items. Inline buckets checks subbuckets to see if they only have a small amount of data (about 1kb) and no subbuckets. If these conditions are met then the bucket's root node is written to a fake page which is simply a pointer to the end of the bucket's header. Fixes #124. --- node.go | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'node.go') diff --git a/node.go b/node.go index fd737ea..72145c4 100644 --- a/node.go +++ b/node.go @@ -2,6 +2,8 @@ package bolt import ( "bytes" + "fmt" + "io" "sort" "unsafe" ) @@ -191,6 +193,8 @@ func (n *node) write(p *page) { copy(b[0:], item.value) b = b[len(item.value):] } + + // DEBUG ONLY: n.dump(os.Stderr) } // split breaks up a node into smaller nodes, if appropriate. @@ -261,6 +265,7 @@ func (n *node) spill() error { // Add node's page to the freelist if it's not new. if n.pgid > 0 { tx.db.freelist.free(tx.id(), tx.page(n.pgid)) + n.pgid = 0 } // Spill nodes by deepest first. @@ -273,8 +278,8 @@ func (n *node) spill() error { } // Write the node. - node.write(p) node.pgid = p.id + node.write(p) // Insert into parent inodes. if node.parent != nil { @@ -302,8 +307,8 @@ func (n *node) spill() error { } // Write the new root. - parent.write(p) parent.pgid = p.id + parent.write(p) } return nil @@ -477,6 +482,31 @@ func (n *node) free() { } } +// dump writes the contents of the node for debugging purposes. +func (n *node) dump(w io.Writer) { + // Write node header. + var typ = "branch" + if n.isLeaf { + typ = "leaf" + } + fmt.Fprintf(w, "[NODE %d {type=%s count=%d}]\n", n.pgid, typ, len(n.inodes)) + + // Write out abbreviated version of each item. + for _, item := range n.inodes { + if n.isLeaf { + if item.flags&bucketLeafFlag != 0 { + bucket := (*bucket)(unsafe.Pointer(&item.value[0])) + fmt.Fprintf(w, "[L \"%s\" -> (bucket root=%d)]\n", item.key, bucket.root) + } else { + fmt.Fprintf(w, "[L \"%s\" -> \"%s\"]\n", item.key, item.value) + } + } else { + fmt.Fprintf(w, "[B \"%s\" -> pgid=%d]\n", item.key, item.pgid) + } + } + fmt.Fprint(w, "\n") +} + // inode represents an internal node inside of a node. // It can be used to point to elements in a page or point // to an element which hasn't been added to a page yet. -- cgit v1.2.3 From 0966dde0d44d5b42b132e3f2b8e8c6e3dc2c93fe Mon Sep 17 00:00:00 2001 From: Ben Johnson Date: Wed, 7 May 2014 10:37:50 -0600 Subject: Fix bucket free. --- bucket.go | 42 ++++++++++++++++++++++++++++++++++++++++-- bucket_test.go | 27 +++++++++++++++++++++++++++ cursor.go | 2 +- freelist.go | 5 +++-- node.go | 12 +++++++++++- 5 files changed, 82 insertions(+), 6 deletions(-) (limited to 'node.go') diff --git a/bucket.go b/bucket.go index 6672356..4ad62fe 100644 --- a/bucket.go +++ b/bucket.go @@ -392,6 +392,40 @@ func (b *Bucket) forEachPage(fn func(*page, int)) { b.tx.forEachPage(b.root, 0, fn) } +// forEachPageNode iterates over every page (or node) in a bucket. +// This also includes inline pages. +func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { + // If we have an inline page or root node then just use that. + if b.page != nil { + fn(b.page, nil, 0) + return + } + b._forEachPageNode(b.root, 0, fn) +} + +func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) { + var p, n = b.pageNode(pgid) + + // Execute function. + fn(p, n, depth) + + // Recursively loop over children. + if p != nil { + if (p.flags & branchPageFlag) != 0 { + for i := 0; i < int(p.count); i++ { + elem := p.branchPageElement(uint16(i)) + b._forEachPageNode(elem.pgid, depth+1, fn) + } + } + } else { + if !n.isLeaf { + for _, inode := range n.inodes { + b._forEachPageNode(inode.pgid, depth+1, fn) + } + } + } +} + // spill writes all the nodes for this bucket to dirty pages. func (b *Bucket) spill() error { // Spill all child buckets first. @@ -541,8 +575,12 @@ func (b *Bucket) free() { } var tx = b.tx - tx.forEachPage(b.root, 0, func(p *page, _ int) { - tx.db.freelist.free(tx.id(), p) + b.forEachPageNode(func(p *page, n *node, _ int) { + if p != nil { + tx.db.freelist.free(tx.id(), p) + } else { + n.free() + } }) b.root = 0 } diff --git a/bucket_test.go b/bucket_test.go index 8fcf84c..b87031b 100644 --- a/bucket_test.go +++ b/bucket_test.go @@ -161,6 +161,33 @@ func TestBucket_Delete(t *testing.T) { }) } +// Ensure that deleting a large set of keys will work correctly. +func TestBucket_Delete_Large(t *testing.T) { + withOpenDB(func(db *DB, path string) { + db.Update(func(tx *Tx) error { + var b, _ = tx.CreateBucket([]byte("widgets")) + for i := 0; i < 100; i++ { + assert.NoError(t, b.Put([]byte(strconv.Itoa(i)), []byte(strings.Repeat("*", 1024)))) + } + return nil + }) + db.Update(func(tx *Tx) error { + var b = tx.Bucket([]byte("widgets")) + for i := 0; i < 100; i++ { + assert.NoError(t, b.Delete([]byte(strconv.Itoa(i)))) + } + return nil + }) + db.View(func(tx *Tx) error { + var b = tx.Bucket([]byte("widgets")) + for i := 0; i < 100; i++ { + assert.Nil(t, b.Get([]byte(strconv.Itoa(i)))) + } + return nil + }) + }) +} + // Ensure that accessing and updating nested buckets is ok across transactions. func TestBucket_Nested(t *testing.T) { withOpenDB(func(db *DB, path string) { diff --git a/cursor.go b/cursor.go index 56948fc..31a3d5f 100644 --- a/cursor.go +++ b/cursor.go @@ -148,7 +148,7 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { func (c *Cursor) first() { for { // Exit when we hit a leaf page. - ref := &c.stack[len(c.stack)-1] + var ref = &c.stack[len(c.stack)-1] if ref.isLeaf() { break } diff --git a/freelist.go b/freelist.go index 6c527fe..ebe2810 100644 --- a/freelist.go +++ b/freelist.go @@ -1,7 +1,6 @@ package bolt import ( - "fmt" "sort" "unsafe" ) @@ -64,7 +63,7 @@ func (f *freelist) free(txid txid, p *page) { } f.pending[txid] = ids - f.check() // DEBUG ONLY: + // DEBUG ONLY: f.check() } // release moves all page ids for a transaction id (or older) to the freelist. @@ -115,6 +114,7 @@ func (f *freelist) write(p *page) { // check verifies there are no double free pages. // This is slow so it should only be used while debugging. // If errors are found then a panic invoked. +/* func (f *freelist) check() { var lookup = make(map[pgid]txid) for _, id := range f.ids { @@ -132,6 +132,7 @@ func (f *freelist) check() { } } } +*/ type reverseSortedPgids []pgid diff --git a/node.go b/node.go index 72145c4..1bc889d 100644 --- a/node.go +++ b/node.go @@ -336,7 +336,7 @@ func (n *node) rebalance() { // If root node is a branch and only has one node then collapse it. if !n.isLeaf && len(n.inodes) == 1 { // Move root's child up. - child := n.bucket.nodes[n.inodes[0].pgid] + child := n.bucket.node(n.inodes[0].pgid, n) n.isLeaf = child.isLeaf n.inodes = child.inodes[:] n.children = child.children @@ -357,6 +357,16 @@ func (n *node) rebalance() { return } + // If node has no keys then just remove it. + if n.numChildren() == 0 { + n.parent.del(n.key) + n.parent.removeChild(n) + delete(n.bucket.nodes, n.pgid) + n.free() + n.parent.rebalance() + return + } + _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") // Destination node is right sibling if idx == 0, otherwise left sibling. -- cgit v1.2.3