aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Johnson <benbjohnson@yahoo.com>2014-01-31 13:18:51 -0500
committerBen Johnson <benbjohnson@yahoo.com>2014-02-01 12:30:37 -0500
commit1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc (patch)
treed6aa197560053444d76dce1bd198a584c051c7b9
parentMerge pull request #4 from benbjohnson/api (diff)
downloaddedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.gz
dedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.xz
Add RWTransaction.Put().
-rw-r--r--NOTES17
-rw-r--r--TODO2
-rw-r--r--bnode.go20
-rw-r--r--branch.go129
-rw-r--r--branch_test.go161
-rw-r--r--bucket.go9
-rw-r--r--cursor.go67
-rw-r--r--db.go46
-rw-r--r--db_test.go74
-rw-r--r--leaf.go120
-rw-r--r--leaf_test.go124
-rw-r--r--lnode.go27
-rw-r--r--meta.go5
-rw-r--r--node.go178
-rw-r--r--node_test.go129
-rw-r--r--page.go85
-rw-r--r--quick_test.go65
-rw-r--r--rwtransaction.go159
-rw-r--r--rwtransaction_test.go33
-rw-r--r--sys.go17
-rw-r--r--transaction.go2
-rw-r--r--warn.go2
22 files changed, 735 insertions, 736 deletions
diff --git a/NOTES b/NOTES
new file mode 100644
index 0000000..967d3aa
--- /dev/null
+++ b/NOTES
@@ -0,0 +1,17 @@
+
+
+ ===0===
+ | d|g |
+ |1|2|3|
+ =======
+ | | |
+ ------ | -------
+ | | |
+===1=== ===2=== ===3===
+|a|b|c| |d|e|f| |g|h|i|
+|-|-|-| |-|-|-| |-|-|-|
+|*|*|*| |*|*|*| |*|*|*|
+|*|*|*| |*|*|*| |*|*|*|
+|*|*|*| |*|*|*| |*|*|*|
+
+
diff --git a/TODO b/TODO
index efd1a81..9c8a1a0 100644
--- a/TODO
+++ b/TODO
@@ -2,7 +2,7 @@ TODO
====
X Open DB.
X Initialize transaction.
-- Cursor First, Goto(key), Next
+- Cursor First, Get(key), Next
- RWTransaction.insert()
- rebalance
- adjust cursors
diff --git a/bnode.go b/bnode.go
deleted file mode 100644
index 3ad4f58..0000000
--- a/bnode.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package bolt
-
-import (
- "unsafe"
-)
-
-const bnodeSize = int(unsafe.Sizeof(lnode{}))
-
-// bnode represents a node on a branch page.
-type bnode struct {
- pos uint32
- ksize uint32
- pgid pgid
-}
-
-// key returns a byte slice of the node key.
-func (n *bnode) key() []byte {
- buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
- return buf[n.pos : n.pos+n.ksize]
-}
diff --git a/branch.go b/branch.go
deleted file mode 100644
index c4f0b1f..0000000
--- a/branch.go
+++ /dev/null
@@ -1,129 +0,0 @@
-package bolt
-
-import (
- "bytes"
- "unsafe"
-)
-
-// branch represents a temporary in-memory branch page.
-type branch struct {
- pgid pgid
- depth int
- parent *branch
- items branchItems
-}
-
-// size returns the size of the branch after serialization.
-func (b *branch) size() int {
- var size int = pageHeaderSize
- for _, item := range b.items {
- size += bnodeSize + len(item.key)
- }
- return size
-}
-
-// put adds a new node or replaces an existing node.
-func (b *branch) put(id pgid, newid pgid, key []byte, replace bool) {
- var index int
- for ; index < len(b.items); index++ {
- if b.items[index].pgid == id {
- break
- }
- }
-
- if !replace {
- index++
- b.items = append(b.items, branchItem{})
- if index < len(b.items) {
- copy(b.items[index+1:], b.items[index:])
- }
- }
-
- b.items[index].pgid = newid
- b.items[index].key = key
-}
-
-// read initializes the item data from an on-disk page.
-func (b *branch) read(p *page) {
- b.pgid = p.id
- b.items = make(branchItems, int(p.count))
- bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
- for i := 0; i < int(p.count); i++ {
- bnode := &bnodes[i]
- item := &b.items[i]
- item.pgid = bnode.pgid
- item.key = bnode.key()
- }
-}
-
-// write writes the items onto a branch page.
-func (b *branch) write(p *page) {
- // Initialize page.
- p.flags |= p_branch
- p.count = uint16(len(b.items))
-
- // Loop over each item and write it to the page.
- bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
- buf := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(b.items):]
- for index, item := range b.items {
- // Write node.
- bnode := &bnodes[index]
- bnode.pgid = item.pgid
- bnode.pos = uint32(uintptr(unsafe.Pointer(&buf[0])) - uintptr(unsafe.Pointer(bnode)))
- bnode.ksize = uint32(len(item.key))
-
- // Write key to the end of the page.
- copy(buf[0:], item.key)
- buf = buf[len(item.key):]
- }
-}
-
-// split divides up the noes in the branch into appropriately sized groups.
-func (b *branch) split(pageSize int) []*branch {
- // Ignore the split if the page doesn't have at least enough nodes for
- // multiple pages or if the data can fit on a single page.
- if len(b.items) <= (minKeysPerPage*2) || b.size() < pageSize {
- return []*branch{b}
- }
-
- // Set fill threshold to 50%.
- threshold := pageSize / 2
-
- // Otherwise group into smaller pages and target a given fill size.
- size := 0
- current := &branch{}
- branches := make([]*branch, 0)
-
- for index, item := range b.items {
- nodeSize := bnodeSize + len(item.key)
-
- if len(current.items) >= minKeysPerPage && index < len(b.items)-minKeysPerPage && size+nodeSize > threshold {
- size = pageHeaderSize
- branches = append(branches, current)
- current = &branch{}
- }
-
- size += nodeSize
- current.items = append(current.items, item)
- }
- branches = append(branches, current)
-
- return branches
-}
-
-type branches []*branch
-
-func (s branches) Len() int { return len(s) }
-func (s branches) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
-func (s branches) Less(i, j int) bool { return s[i].depth < s[j].depth }
-
-type branchItems []branchItem
-
-type branchItem struct {
- pgid pgid
- key []byte
-}
-
-func (s branchItems) Len() int { return len(s) }
-func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
-func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/branch_test.go b/branch_test.go
deleted file mode 100644
index 4a810c8..0000000
--- a/branch_test.go
+++ /dev/null
@@ -1,161 +0,0 @@
-package bolt
-
-import (
- "testing"
- "unsafe"
-
- "github.com/stretchr/testify/assert"
-)
-
-// Ensure that a branch can replace a key.
-func TestBranchPutReplace(t *testing.T) {
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("bar")},
- branchItem{pgid: 2, key: []byte("baz")},
- branchItem{pgid: 3, key: []byte("foo")},
- },
- }
- b.put(1, 4, []byte("bar"), true)
- b.put(2, 5, []byte("boo"), true)
- assert.Equal(t, len(b.items), 3)
- assert.Equal(t, b.items[0].pgid, pgid(4))
- assert.Equal(t, string(b.items[0].key), "bar")
- assert.Equal(t, b.items[1].pgid, pgid(5))
- assert.Equal(t, string(b.items[1].key), "boo")
- assert.Equal(t, b.items[2].pgid, pgid(3))
- assert.Equal(t, string(b.items[2].key), "foo")
-}
-
-// Ensure that a branch can insert a key.
-func TestBranchPutInsert(t *testing.T) {
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("bar")},
- branchItem{pgid: 2, key: []byte("foo")},
- },
- }
- b.put(1, 4, []byte("baz"), false)
- b.put(2, 5, []byte("zzz"), false)
- assert.Equal(t, len(b.items), 4)
- assert.Equal(t, b.items[0].pgid, pgid(1))
- assert.Equal(t, string(b.items[0].key), "bar")
- assert.Equal(t, b.items[1].pgid, pgid(4))
- assert.Equal(t, string(b.items[1].key), "baz")
- assert.Equal(t, b.items[2].pgid, pgid(2))
- assert.Equal(t, string(b.items[2].key), "foo")
- assert.Equal(t, b.items[3].pgid, pgid(5))
- assert.Equal(t, string(b.items[3].key), "zzz")
-}
-
-// Ensure that a branch can deserialize from a page.
-func TestBranchRead(t *testing.T) {
- // Create a page.
- var buf [4096]byte
- page := (*page)(unsafe.Pointer(&buf[0]))
- page.count = 2
-
- // Insert 2 items at the beginning. sizeof(bnode) == 16
- nodes := (*[3]bnode)(unsafe.Pointer(&page.ptr))
- nodes[0] = bnode{pos: 32, ksize: 3, pgid: 100} // pos = sizeof(bnode) * 2
- nodes[1] = bnode{pos: 19, ksize: 10, pgid: 101} // pos = sizeof(bnode) + 3
-
- // Write data for the nodes at the end.
- data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
- copy(data[:], []byte("bar"))
- copy(data[3:], []byte("helloworld"))
-
- // Deserialize page into a branch.
- b := &branch{}
- b.read(page)
-
- // Check that there are two items with correct data.
- assert.Equal(t, len(b.items), 2)
- assert.Equal(t, b.items[0].key, []byte("bar"))
- assert.Equal(t, b.items[1].key, []byte("helloworld"))
-}
-
-// Ensure that a branch can serialize itself.
-func TestBranchWrite(t *testing.T) {
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("susy")},
- branchItem{pgid: 2, key: []byte("ricki")},
- branchItem{pgid: 3, key: []byte("john")},
- },
- }
-
- // Write it to a page.
- var buf [4096]byte
- p := (*page)(unsafe.Pointer(&buf[0]))
- b.write(p)
-
- // Read the page back in.
- b2 := &branch{}
- b2.read(p)
-
- // Check that the two pages are the same.
- assert.Equal(t, len(b2.items), 3)
- assert.Equal(t, b2.items[0].pgid, pgid(1))
- assert.Equal(t, b2.items[0].key, []byte("susy"))
- assert.Equal(t, b2.items[1].pgid, pgid(2))
- assert.Equal(t, b2.items[1].key, []byte("ricki"))
- assert.Equal(t, b2.items[2].pgid, pgid(3))
- assert.Equal(t, b2.items[2].key, []byte("john"))
-}
-
-// Ensure that a branch can split into appropriate subgroups.
-func TestBranchSplit(t *testing.T) {
- // Create a branch.
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("00000001")},
- branchItem{pgid: 2, key: []byte("00000002")},
- branchItem{pgid: 3, key: []byte("00000003")},
- branchItem{pgid: 4, key: []byte("00000004")},
- branchItem{pgid: 5, key: []byte("00000005")},
- },
- }
-
- // Split between 3 & 4.
- branches := b.split(100)
-
- assert.Equal(t, len(branches), 2)
- assert.Equal(t, len(branches[0].items), 2)
- assert.Equal(t, len(branches[1].items), 3)
-}
-
-// Ensure that a branch with the minimum number of items just returns a single branch.
-func TestBranchSplitWithMinKeys(t *testing.T) {
- // Create a branch.
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("00000001")},
- branchItem{pgid: 2, key: []byte("00000002")},
- },
- }
-
- // Split.
- branches := b.split(20)
- assert.Equal(t, len(branches), 1)
- assert.Equal(t, len(branches[0].items), 2)
-}
-
-// Ensure that a branch that has keys that all fit on a page just returns one branch.
-func TestBranchSplitFitsInPage(t *testing.T) {
- // Create a branch.
- b := &branch{
- items: branchItems{
- branchItem{pgid: 1, key: []byte("00000001")},
- branchItem{pgid: 2, key: []byte("00000002")},
- branchItem{pgid: 3, key: []byte("00000003")},
- branchItem{pgid: 4, key: []byte("00000004")},
- branchItem{pgid: 5, key: []byte("00000005")},
- },
- }
-
- // Split.
- branches := b.split(4096)
- assert.Equal(t, len(branches), 1)
- assert.Equal(t, len(branches[0].items), 5)
-}
diff --git a/bucket.go b/bucket.go
index 6cfe1ca..b1c1cc0 100644
--- a/bucket.go
+++ b/bucket.go
@@ -15,13 +15,8 @@ func (b *Bucket) Name() string {
return b.name
}
-// Get retrieves the value for a key in the bucket.
-func (b *Bucket) Get(key []byte) []byte {
- return b.Cursor().Get(key)
-}
-
-// Cursor creates a new cursor for this bucket.
-func (b *Bucket) Cursor() *Cursor {
+// cursor creates a new cursor for this bucket.
+func (b *Bucket) cursor() *Cursor {
return &Cursor{
transaction: b.transaction,
root: b.root,
diff --git a/cursor.go b/cursor.go
index 7b8c9bb..d56119b 100644
--- a/cursor.go
+++ b/cursor.go
@@ -14,7 +14,7 @@ type Cursor struct {
// elem represents a node on a page that's on the cursor's stack.
type elem struct {
page *page
- index int
+ index uint16
}
// First moves the cursor to the first item in the bucket and returns its key and data.
@@ -30,25 +30,29 @@ func (c *Cursor) Next() ([]byte, []byte) {
// Get positions the cursor at a specific key and returns the its value.
func (c *Cursor) Get(key []byte) []byte {
- if c.Goto(key) {
- return c.node().value()
- }
- return nil
-}
-
-// Goto positions the cursor at a specific key.
-// Returns true if an exact match or false if positioned after the closest match.
-func (c *Cursor) Goto(key []byte) bool {
- // TODO(benbjohnson): Optimize for specific use cases.
-
// Start from root page and traverse to correct page.
c.stack = c.stack[:0]
c.search(key, c.transaction.page(c.root))
+ p, index := c.top()
+
+ // If the cursor is pointing to the end of page then return nil.
+ if index == p.count {
+ return nil
+ }
- return false
+ // If our target node isn't the same key as what's passed in then return nil.
+ // c.page().hexdump(512)
+ if !bytes.Equal(key, c.node().key()) {
+ return nil
+ }
+
+ return c.node().value()
}
func (c *Cursor) search(key []byte, p *page) {
+ if (p.flags & (p_branch | p_leaf)) == 0 {
+ panic("invalid page type: " + p.typ())
+ }
e := elem{page: p}
c.stack = append(c.stack, e)
@@ -58,12 +62,26 @@ func (c *Cursor) search(key []byte, p *page) {
return
}
- // Binary search for the correct branch node.
- nodes := p.bnodes()
- e.index = sort.Search(int(p.count)-1, func(i int) bool { return bytes.Compare(nodes[i+1].key(), key) != -1 })
+ // Binary search for the correct range.
+ inodes := p.branchPageElements()
+
+ var exact bool
+ index := sort.Search(int(p.count), func(i int) bool {
+ // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+ // sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+ ret := bytes.Compare(inodes[i].key(), key)
+ if ret == 0 {
+ exact = true
+ }
+ return ret != -1
+ })
+ if !exact && index > 0 {
+ index--
+ }
+ e.index = uint16(index)
// Recursively search to the next page.
- c.search(key, c.transaction.page(nodes[e.index].pgid))
+ c.search(key, c.transaction.page(inodes[e.index].pgid))
}
// nsearch searches a leaf node for the index of the node that matches key.
@@ -71,16 +89,17 @@ func (c *Cursor) nsearch(key []byte, p *page) {
e := &c.stack[len(c.stack)-1]
// Binary search for the correct leaf node index.
- nodes := p.lnodes()
- e.index = sort.Search(int(p.count), func(i int) bool {
- return bytes.Compare(nodes[i].key(), key) != -1
+ inodes := p.leafPageElements()
+ index := sort.Search(int(p.count), func(i int) bool {
+ return bytes.Compare(inodes[i].key(), key) != -1
})
+ e.index = uint16(index)
}
// top returns the page and leaf node that the cursor is currently pointing at.
-func (c *Cursor) top() (*page, *lnode) {
+func (c *Cursor) top() (*page, uint16) {
elem := c.stack[len(c.stack)-1]
- return elem.page, elem.page.lnode(elem.index)
+ return elem.page, elem.index
}
// page returns the page that the cursor is currently pointing at.
@@ -89,7 +108,7 @@ func (c *Cursor) page() *page {
}
// node returns the leaf node that the cursor is currently positioned on.
-func (c *Cursor) node() *lnode {
+func (c *Cursor) node() *leafPageElement {
elem := c.stack[len(c.stack)-1]
- return elem.page.lnode(elem.index)
+ return elem.page.leafPageElement(elem.index)
}
diff --git a/db.go b/db.go
index 646c31b..41fbc64 100644
--- a/db.go
+++ b/db.go
@@ -1,6 +1,7 @@
package bolt
import (
+ "io"
"os"
"sync"
"syscall"
@@ -114,7 +115,7 @@ func (db *DB) mmap() error {
}
// TEMP(benbjohnson): Set max size to 1MB.
- size := 2 << 20
+ size := 2 << 30
// Memory-map the data file as a byte slice.
if db.data, err = db.syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil {
@@ -224,10 +225,7 @@ func (db *DB) RWTransaction() (*RWTransaction, error) {
}
// Create a transaction associated with the database.
- t := &RWTransaction{
- branches: make(map[pgid]*branch),
- leafs: make(map[pgid]*leaf),
- }
+ t := &RWTransaction{nodes: make(map[pgid]*node)}
t.init(db)
return t, nil
@@ -319,6 +317,44 @@ func (db *DB) Delete(name string, key []byte) error {
return t.Commit()
}
+// Copy writes the entire database to a writer.
+func (db *DB) Copy(w io.Writer) error {
+ if !db.opened {
+ return DatabaseNotOpenError
+ }
+
+ // Maintain a reader transaction so pages don't get reclaimed.
+ t, err := db.Transaction()
+ if err != nil {
+ return err
+ }
+ defer t.Close()
+
+ // Open reader on the database.
+ f, err := os.Open(db.path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ // Copy everything.
+ if _, err := io.Copy(w, f); err != nil {
+ return err
+ }
+ return nil
+}
+
+// CopyFile copies the entire database to file at the given path.
+func (db *DB) CopyFile(path string) error {
+ f, err := os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+
+ return db.Copy(f)
+}
+
// page retrieves a page reference from the mmap based on the current page size.
func (db *DB) page(id pgid) *page {
return (*page)(unsafe.Pointer(&db.data[id*pgid(db.pageSize)]))
diff --git a/db_test.go b/db_test.go
index 53988b7..50ec3c3 100644
--- a/db_test.go
+++ b/db_test.go
@@ -1,11 +1,14 @@
package bolt
import (
+ "bytes"
+ "fmt"
"io"
"io/ioutil"
"os"
"syscall"
"testing"
+ "testing/quick"
"time"
"unsafe"
@@ -99,25 +102,6 @@ func TestDBMmapStatError(t *testing.T) {
})
}
-// Ensure that mmap errors get returned.
-/*
-func TestDBMmapError(t *testing.T) {
- withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
- exp := errors.New("")
- file, metafile := &mockfile{}, &mockfile{}
- mockos.On("OpenFile", path, os.O_RDWR|os.O_CREATE, os.FileMode(0666)).Return(file, nil)
- mockos.On("OpenFile", path, os.O_RDWR|os.O_SYNC, os.FileMode(0666)).Return(metafile, nil)
- mockos.On("Getpagesize").Return(0x1000)
- file.On("ReadAt", mock.Anything, int64(0)).Return(0, nil)
- file.On("Stat").Return(&mockfileinfo{"", 0x2000, 0666, time.Now(), false, nil}, nil)
- metafile.On("WriteAt", mock.Anything, int64(0)).Return(0, nil)
- mocksyscall.On("Mmap", 0, int64(0), 0x2000, syscall.PROT_READ, syscall.MAP_SHARED).Return(([]byte)(nil), exp)
- err := db.Open(path, 0666)
- assert.Equal(t, err, exp)
- })
-}
-*/
-
// Ensure that corrupt meta0 page errors get returned.
func TestDBCorruptMeta0(t *testing.T) {
withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
@@ -150,10 +134,6 @@ func TestDBCorruptMeta0(t *testing.T) {
})
}
-//--------------------------------------
-// Transaction()
-//--------------------------------------
-
// Ensure that a database cannot open a transaction when it's not open.
func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
withDB(func(db *DB, path string) {
@@ -163,6 +143,54 @@ func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
})
}
+// Ensure that a bucket can write a key/value.
+func TestDBPut(t *testing.T) {
+ withOpenDB(func(db *DB, path string) {
+ db.CreateBucket("widgets")
+ err := db.Put("widgets", []byte("foo"), []byte("bar"))
+ assert.NoError(t, err)
+ value, err := db.Get("widgets", []byte("foo"))
+ if assert.NoError(t, err) {
+ assert.Equal(t, value, []byte("bar"))
+ }
+ })
+}
+
+// Ensure that a bucket can write random keys and values across multiple txns.
+func TestDBPutRandom(t *testing.T) {
+ f := func(items testKeyValuePairs) bool {
+ withOpenDB(func(db *DB, path string) {
+ db.CreateBucket("widgets")
+ for _, item := range items {
+ if len(item.Key) == 0 {
+ continue
+ }
+ if err := db.Put("widgets", item.Key, item.Value); err != nil {
+ panic("put error: " + err.Error())
+ }
+ }
+ for _, item := range items {
+ if len(item.Key) == 0 {
+ continue
+ }
+ value, err := db.Get("widgets", item.Key)
+ if err != nil {
+ panic("get error: " + err.Error())
+ }
+ if !bytes.Equal(value, []byte(item.Value)) {
+ // db.CopyFile("/tmp/bolt.random.db")
+ t.Fatalf("value mismatch:\n%x\n%x", item.Value, value)
+ }
+ }
+ fmt.Fprint(os.Stderr, ".")
+ })
+ return true
+ }
+ if err := quick.Check(f, qc()); err != nil {
+ t.Error(err)
+ }
+}
+
// withDB executes a function with a database reference.
func withDB(fn func(*DB, string)) {
f, _ := ioutil.TempFile("", "bolt-")
diff --git a/leaf.go b/leaf.go
deleted file mode 100644
index c86e041..0000000
--- a/leaf.go
+++ /dev/null
@@ -1,120 +0,0 @@
-package bolt
-
-import (
- "bytes"
- "sort"
- "unsafe"
-)
-
-// leaf represents an in-memory, deserialized leaf page.
-type leaf struct {
- pgid pgid
- parent *branch
- items leafItems
-}
-
-// size returns the size of the leaf after serialization.
-func (l *leaf) size() int {
- var size int = pageHeaderSize
- for _, item := range l.items {
- size += lnodeSize + len(item.key) + len(item.value)
- }
- return size
-}
-
-// put inserts or replaces a key on a leaf page.
-func (l *leaf) put(key []byte, value []byte) {
- // Find insertion index.
- index := sort.Search(len(l.items), func(i int) bool { return bytes.Compare(l.items[i].key, key) != -1 })
-
- // If there is no existing key then add a new item.
- if index == len(l.items) {
- l.items = append(l.items, leafItem{})
- } else if len(l.items) == 0 || !bytes.Equal(l.items[index].key, key) {
- l.items = append(l.items, leafItem{})
- copy(l.items[index+1:], l.items[index:])
- }
- l.items[index].key = key
- l.items[index].value = value
-}
-
-// read initializes the item data from an on-disk page.
-func (l *leaf) read(p *page) {
- l.pgid = p.id
- l.items = make(leafItems, int(p.count))
- lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
- for i := 0; i < int(p.count); i++ {
- lnode := &lnodes[i]
- item := &l.items[i]
- item.key = lnode.key()
- item.value = lnode.value()
- }
-}
-
-// write writes the items onto one or more leaf pages.
-func (l *leaf) write(p *page) {
- // Initialize page.
- p.flags |= p_leaf
- p.count = uint16(len(l.items))
-
- // Loop over each item and write it to the page.
- lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
- b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):]
- for index, item := range l.items {
- // Write node.
- lnode := &lnodes[index]
- lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
- lnode.ksize = uint32(len(item.key))
- lnode.vsize = uint32(len(item.value))
-
- // Write data to the end of the page.
- copy(b[0:], item.key)
- b = b[len(item.key):]
- copy(b[0:], item.value)
- b = b[len(item.value):]
- }
-}
-
-// split divides up the noes in the page into appropriately sized groups.
-func (l *leaf) split(pageSize int) []*leaf {
- // Ignore the split if the page doesn't have at least enough nodes for
- // multiple pages or if the data can fit on a single page.
- if len(l.items) <= (minKeysPerPage*2) || l.size() < pageSize {
- return []*leaf{l}
- }
-
- // Set fill threshold to 50%.
- threshold := pageSize / 2
-
- // Otherwise group into smaller pages and target a given fill size.
- size := 0
- current := &leaf{}
- leafs := make([]*leaf, 0)
-
- for index, item := range l.items {
- nodeSize := lnodeSize + len(item.key) + len(item.value)
-
- if len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold {
- size = pageHeaderSize
- leafs = append(leafs, current)
- current = &leaf{}
- }
-
- size += nodeSize
- current.items = append(current.items, item)
- }
- leafs = append(leafs, current)
-
- return leafs
-}
-
-type leafItems []leafItem
-
-type leafItem struct {
- key []byte
- value []byte
-}
-
-func (s leafItems) Len() int { return len(s) }
-func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
-func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/leaf_test.go b/leaf_test.go
deleted file mode 100644
index ce98e37..0000000
--- a/leaf_test.go
+++ /dev/null
@@ -1,124 +0,0 @@
-package bolt
-
-import (
- "testing"
- "unsafe"
-
- "github.com/stretchr/testify/assert"
-)
-
-// Ensure that a leaf can insert a key/value.
-func TestLeafPut(t *testing.T) {
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("baz"), []byte("2"))
- l.put([]byte("foo"), []byte("0"))
- l.put([]byte("bar"), []byte("1"))
- l.put([]byte("foo"), []byte("3"))
- assert.Equal(t, len(l.items), 3)
- assert.Equal(t, l.items[0], leafItem{[]byte("bar"), []byte("1")})
- assert.Equal(t, l.items[1], leafItem{[]byte("baz"), []byte("2")})
- assert.Equal(t, l.items[2], leafItem{[]byte("foo"), []byte("3")})
-}
-
-// Ensure that a leaf can deserialize from a page.
-func TestLeafRead(t *testing.T) {
- // Create a page.
- var buf [4096]byte
- page := (*page)(unsafe.Pointer(&buf[0]))
- page.count = 2
-
- // Insert 2 leaf items at the beginning. sizeof(lnode) == 16
- nodes := (*[3]lnode)(unsafe.Pointer(&page.ptr))
- nodes[0] = lnode{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(lnode) * 2
- nodes[1] = lnode{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(lnode) + 3 + 4
-
- // Write data for the nodes at the end.
- data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
- copy(data[:], []byte("barfooz"))
- copy(data[7:], []byte("helloworldbye"))
-
- // Deserialize page into a leaf.
- l := &leaf{}
- l.read(page)
-
- // Check that there are two items with correct data.
- assert.Equal(t, len(l.items), 2)
- assert.Equal(t, l.items[0].key, []byte("bar"))
- assert.Equal(t, l.items[0].value, []byte("fooz"))
- assert.Equal(t, l.items[1].key, []byte("helloworld"))
- assert.Equal(t, l.items[1].value, []byte("bye"))
-}
-
-// Ensure that a leaf can serialize itself.
-func TestLeafWrite(t *testing.T) {
- // Create a leaf.
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("susy"), []byte("que"))
- l.put([]byte("ricki"), []byte("lake"))
- l.put([]byte("john"), []byte("johnson"))
-
- // Write it to a page.
- var buf [4096]byte
- p := (*page)(unsafe.Pointer(&buf[0]))
- l.write(p)
-
- // Read the page back in.
- l2 := &leaf{}
- l2.read(p)
-
- // Check that the two pages are the same.
- assert.Equal(t, len(l2.items), 3)
- assert.Equal(t, l2.items[0].key, []byte("john"))
- assert.Equal(t, l2.items[0].value, []byte("johnson"))
- assert.Equal(t, l2.items[1].key, []byte("ricki"))
- assert.Equal(t, l2.items[1].value, []byte("lake"))
- assert.Equal(t, l2.items[2].key, []byte("susy"))
- assert.Equal(t, l2.items[2].value, []byte("que"))
-}
-
-// Ensure that a leaf can split into appropriate subgroups.
-func TestLeafSplit(t *testing.T) {
- // Create a leaf.
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("00000001"), []byte("0123456701234567"))
- l.put([]byte("00000002"), []byte("0123456701234567"))
- l.put([]byte("00000003"), []byte("0123456701234567"))
- l.put([]byte("00000004"), []byte("0123456701234567"))
- l.put([]byte("00000005"), []byte("0123456701234567"))
-
- // Split between 3 & 4.
- leafs := l.split(100)
-
- assert.Equal(t, len(leafs), 2)
- assert.Equal(t, len(leafs[0].items), 2)
- assert.Equal(t, len(leafs[1].items), 3)
-}
-
-// Ensure that a leaf with the minimum number of items just returns a single leaf.
-func TestLeafSplitWithMinKeys(t *testing.T) {
- // Create a leaf.
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("00000001"), []byte("0123456701234567"))
- l.put([]byte("00000002"), []byte("0123456701234567"))
-
- // Split.
- leafs := l.split(20)
- assert.Equal(t, len(leafs), 1)
- assert.Equal(t, len(leafs[0].items), 2)
-}
-
-// Ensure that a leaf that has keys that all fit on a page just returns one leaf.
-func TestLeafSplitFitsInPage(t *testing.T) {
- // Create a leaf.
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("00000001"), []byte("0123456701234567"))
- l.put([]byte("00000002"), []byte("0123456701234567"))
- l.put([]byte("00000003"), []byte("0123456701234567"))
- l.put([]byte("00000004"), []byte("0123456701234567"))
- l.put([]byte("00000005"), []byte("0123456701234567"))
-
- // Split.
- leafs := l.split(4096)
- assert.Equal(t, len(leafs), 1)
- assert.Equal(t, len(leafs[0].items), 5)
-}
diff --git a/lnode.go b/lnode.go
deleted file mode 100644
index dd6f494..0000000
--- a/lnode.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package bolt
-
-import (
- "unsafe"
-)
-
-const lnodeSize = int(unsafe.Sizeof(lnode{}))
-
-// lnode represents a node on a leaf page.
-type lnode struct {
- flags uint32
- pos uint32
- ksize uint32
- vsize uint32
-}
-
-// key returns a byte slice of the node key.
-func (n *lnode) key() []byte {
- buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
- return buf[n.pos : n.pos+n.ksize]
-}
-
-// value returns a byte slice of the node value.
-func (n *lnode) value() []byte {
- buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
- return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
-}
diff --git a/meta.go b/meta.go
index 1ef9ffc..871d092 100644
--- a/meta.go
+++ b/meta.go
@@ -6,9 +6,10 @@ type meta struct {
magic uint32
version uint32
pageSize uint32
- pgid pgid
- free pgid
+ flags uint32
sys pgid
+ free pgid
+ pgid pgid
txnid txnid
}
diff --git a/node.go b/node.go
new file mode 100644
index 0000000..8d03681
--- /dev/null
+++ b/node.go
@@ -0,0 +1,178 @@
+package bolt
+
+import (
+ "bytes"
+ "sort"
+ "unsafe"
+)
+
+// node represents an in-memory, deserialized page.
+type node struct {
+ isLeaf bool
+ key []byte
+ depth int
+ pgid pgid
+ parent *node
+ inodes inodes
+}
+
+// size returns the size of the node after serialization.
+func (n *node) size() int {
+ var elementSize int = n.pageElementSize()
+
+ var size int = pageHeaderSize
+ for _, item := range n.inodes {
+ size += elementSize + len(item.key) + len(item.value)
+ }
+ return size
+}
+
+// pageElementSize returns the size of each page element based on the type of node.
+func (n *node) pageElementSize() int {
+ if n.isLeaf {
+ return leafPageElementSize
+ }
+ return branchPageElementSize
+}
+
+// root returns the root node in the tree.
+func (n *node) root() *node {
+ if n.parent == nil {
+ return n
+ }
+ return n.parent
+}
+
+// put inserts a key/value.
+func (n *node) put(oldKey, newKey, value []byte, pgid pgid) {
+ // Find insertion index.
+ index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
+
+ // Add capacity and shift nodes if we don't have an exact match and need to insert.
+ exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
+ if !exact {
+ n.inodes = append(n.inodes, inode{})
+ copy(n.inodes[index+1:], n.inodes[index:])
+ }
+
+ inode := &n.inodes[index]
+ inode.key = newKey
+ inode.value = value
+ inode.pgid = pgid
+}
+
+// read initializes the node from a page.
+func (n *node) read(p *page) {
+ n.pgid = p.id
+ n.isLeaf = ((p.flags & p_leaf) != 0)
+ n.inodes = make(inodes, int(p.count))
+
+ for i := 0; i < int(p.count); i++ {
+ inode := &n.inodes[i]
+ if n.isLeaf {
+ elem := p.leafPageElement(uint16(i))
+ inode.key = elem.key()
+ inode.value = elem.value()
+ } else {
+ elem := p.branchPageElement(uint16(i))
+ inode.pgid = elem.pgid
+ inode.key = elem.key()
+ }
+ }
+
+ // Save first key so we can find the node in the parent when we spill.
+ if len(n.inodes) > 0 {
+ n.key = n.inodes[0].key
+ } else {
+ n.key = nil
+ }
+}
+
+// write writes the items onto one or more pages.
+func (n *node) write(p *page) {
+ // Initialize page.
+ if n.isLeaf {
+ p.flags |= p_leaf
+ } else {
+ p.flags |= p_branch
+ }
+ p.count = uint16(len(n.inodes))
+
+ // Loop over each item and write it to the page.
+ b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
+ for i, item := range n.inodes {
+ // Write the page element.
+ if n.isLeaf {
+ elem := p.leafPageElement(uint16(i))
+ elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+ elem.ksize = uint32(len(item.key))
+ elem.vsize = uint32(len(item.value))
+ } else {
+ elem := p.branchPageElement(uint16(i))
+ elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+ elem.ksize = uint32(len(item.key))
+ elem.pgid = item.pgid
+ }
+
+ // Write data for the element to the end of the page.
+ copy(b[0:], item.key)
+ b = b[len(item.key):]
+ copy(b[0:], item.value)
+ b = b[len(item.value):]
+ }
+}
+
+// split divides up the node into appropriately sized nodes.
+func (n *node) split(pageSize int) []*node {
+ // Ignore the split if the page doesn't have at least enough nodes for
+ // multiple pages or if the data can fit on a single page.
+ if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
+ return []*node{n}
+ }
+
+ // Set fill threshold to 50%.
+ threshold := pageSize / 2
+
+ // Group into smaller pages and target a given fill size.
+ size := 0
+ current := &node{isLeaf: n.isLeaf}
+ nodes := make([]*node, 0)
+
+ for i, inode := range n.inodes {
+ elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+ if len(current.inodes) >= minKeysPerPage && i < len(n.inodes)-minKeysPerPage && size+elemSize > threshold {
+ size = pageHeaderSize
+ nodes = append(nodes, current)
+ current = &node{isLeaf: n.isLeaf}
+ }
+
+ size += elemSize
+ current.inodes = append(current.inodes, inode)
+ }
+ nodes = append(nodes, current)
+
+ return nodes
+}
+
+// nodesByDepth sorts a list of branches by deepest first.
+type nodesByDepth []*node
+
+func (s nodesByDepth) Len() int { return len(s) }
+func (s nodesByDepth) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s nodesByDepth) Less(i, j int) bool { return s[i].depth > s[j].depth }
+
+// inode represents an internal node inside of a node.
+// It can be used to point to elements in a page or point
+// to an element which hasn't been added to a page yet.
+type inode struct {
+ pgid pgid
+ key []byte
+ value []byte
+}
+
+type inodes []inode
+
+func (s inodes) Len() int { return len(s) }
+func (s inodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s inodes) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/node_test.go b/node_test.go
new file mode 100644
index 0000000..15d6498
--- /dev/null
+++ b/node_test.go
@@ -0,0 +1,129 @@
+package bolt
+
+import (
+ "testing"
+ "unsafe"
+
+ "github.com/stretchr/testify/assert"
+)
+
+// Ensure that a node can insert a key/value.
+func TestNodePut(t *testing.T) {
+ n := &node{inodes: make(inodes, 0)}
+ n.put([]byte("baz"), []byte("baz"), []byte("2"), 0)
+ n.put([]byte("foo"), []byte("foo"), []byte("0"), 0)
+ n.put([]byte("bar"), []byte("bar"), []byte("1"), 0)
+ n.put([]byte("foo"), []byte("foo"), []byte("3"), 0)
+ assert.Equal(t, len(n.inodes), 3)
+ assert.Equal(t, n.inodes[0].key, []byte("bar"))
+ assert.Equal(t, n.inodes[0].value, []byte("1"))
+ assert.Equal(t, n.inodes[1].key, []byte("baz"))
+ assert.Equal(t, n.inodes[1].value, []byte("2"))
+ assert.Equal(t, n.inodes[2].key, []byte("foo"))
+ assert.Equal(t, n.inodes[2].value, []byte("3"))
+}
+
+// Ensure that a node can deserialize from a leaf page.
+func TestNodeReadLeafPage(t *testing.T) {
+ // Create a page.
+ var buf [4096]byte
+ page := (*page)(unsafe.Pointer(&buf[0]))
+ page.flags = p_leaf
+ page.count = 2
+
+ // Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
+ nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr))
+ nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2
+ nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
+
+ // Write data for the nodes at the end.
+ data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
+ copy(data[:], []byte("barfooz"))
+ copy(data[7:], []byte("helloworldbye"))
+
+ // Deserialize page into a leaf.
+ n := &node{}
+ n.read(page)
+
+ // Check that there are two inodes with correct data.
+ assert.True(t, n.isLeaf)
+ assert.Equal(t, len(n.inodes), 2)
+ assert.Equal(t, n.inodes[0].key, []byte("bar"))
+ assert.Equal(t, n.inodes[0].value, []byte("fooz"))
+ assert.Equal(t, n.inodes[1].key, []byte("helloworld"))
+ assert.Equal(t, n.inodes[1].value, []byte("bye"))
+}
+
+// Ensure that a node can serialize into a leaf page.
+func TestNodeWriteLeafPage(t *testing.T) {
+ // Create a node.
+ n := &node{isLeaf: true, inodes: make(inodes, 0)}
+ n.put([]byte("susy"), []byte("susy"), []byte("que"), 0)
+ n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0)
+ n.put([]byte("john"), []byte("john"), []byte("johnson"), 0)
+
+ // Write it to a page.
+ var buf [4096]byte
+ p := (*page)(unsafe.Pointer(&buf[0]))
+ n.write(p)
+
+ // Read the page back in.
+ n2 := &node{}
+ n2.read(p)
+
+ // Check that the two pages are the same.
+ assert.Equal(t, len(n2.inodes), 3)
+ assert.Equal(t, n2.inodes[0].key, []byte("john"))
+ assert.Equal(t, n2.inodes[0].value, []byte("johnson"))
+ assert.Equal(t, n2.inodes[1].key, []byte("ricki"))
+ assert.Equal(t, n2.inodes[1].value, []byte("lake"))
+ assert.Equal(t, n2.inodes[2].key, []byte("susy"))
+ assert.Equal(t, n2.inodes[2].value, []byte("que"))
+}
+
+// Ensure that a node can split into appropriate subgroups.
+func TestNodeSplit(t *testing.T) {
+ // Create a node.
+ n := &node{inodes: make(inodes, 0)}
+ n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
+
+ // Split between 3 & 4.
+ nodes := n.split(100)
+
+ assert.Equal(t, len(nodes), 2)
+ assert.Equal(t, len(nodes[0].inodes), 2)
+ assert.Equal(t, len(nodes[1].inodes), 3)
+}
+
+// Ensure that a page with the minimum number of inodes just returns a single node.
+func TestNodeSplitWithMinKeys(t *testing.T) {
+ // Create a node.
+ n := &node{inodes: make(inodes, 0)}
+ n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+
+ // Split.
+ nodes := n.split(20)
+ assert.Equal(t, len(nodes), 1)
+ assert.Equal(t, len(nodes[0].inodes), 2)
+}
+
+// Ensure that a node that has keys that all fit on a page just returns one leaf.
+func TestNodeSplitFitsInPage(t *testing.T) {
+ // Create a node.
+ n := &node{inodes: make(inodes, 0)}
+ n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
+ n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
+
+ // Split.
+ nodes := n.split(4096)
+ assert.Equal(t, len(nodes), 1)
+ assert.Equal(t, len(nodes[0].inodes), 5)
+}
diff --git a/page.go b/page.go
index c18c6b4..d05dce6 100644
--- a/page.go
+++ b/page.go
@@ -1,6 +1,8 @@
package bolt
import (
+ "fmt"
+ "os"
"unsafe"
)
@@ -10,6 +12,9 @@ const maxAllocSize = 0xFFFFFFF
const minKeysPerPage = 2
const maxNodesPerPage = 65535
+const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
+const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
+
const (
p_branch = 0x01
p_leaf = 0x02
@@ -28,29 +33,46 @@ type page struct {
ptr uintptr
}
+// typ returns a human readable page type string used for debugging.
+func (p *page) typ() string {
+ if (p.flags & p_branch) != 0 {
+ return "branch"
+ } else if (p.flags & p_leaf) != 0 {
+ return "leaf"
+ } else if (p.flags & p_meta) != 0 {
+ return "meta"
+ } else if (p.flags & p_sys) != 0 {
+ return "system"
+ } else if (p.flags & p_freelist) != 0 {
+ return "freelist"
+ }
+ return fmt.Sprintf("unknown<%02x>", p.flags)
+}
+
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafe.Pointer(&p.ptr))
}
-// lnode retrieves the leaf node by index
-func (p *page) lnode(index int) *lnode {
- return &((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[index]
+// leafPageElement retrieves the leaf node by index
+func (p *page) leafPageElement(index uint16) *leafPageElement {
+ n := &((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
+ return n
}
-// lnodes retrieves a list of leaf nodes.
-func (p *page) lnodes() []lnode {
- return ((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[:]
+// leafPageElements retrieves a list of leaf nodes.
+func (p *page) leafPageElements() []leafPageElement {
+ return ((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
-// bnode retrieves the branch node by index
-func (p *page) bnode(index int) *bnode {
- return &((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[index]
+// branchPageElement retrieves the branch node by index
+func (p *page) branchPageElement(index uint16) *branchPageElement {
+ return &((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
}
-// bnodes retrieves a list of branch nodes.
-func (p *page) bnodes() []bnode {
- return ((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[:]
+// branchPageElements retrieves a list of branch nodes.
+func (p *page) branchPageElements() []branchPageElement {
+ return ((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// freelist retrieves a list of page ids from a freelist page.
@@ -58,8 +80,47 @@ func (p *page) freelist() []pgid {
return ((*[maxNodesPerPage]pgid)(unsafe.Pointer(&p.ptr)))[0:p.count]
}
+// dump writes n bytes of the page to STDERR as hex output.
+func (p *page) hexdump(n int) {
+ buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
+ fmt.Fprintf(os.Stderr, "%x\n", buf)
+}
+
type pages []*page
func (s pages) Len() int { return len(s) }
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
+
+// branchPageElement represents a node on a branch page.
+type branchPageElement struct {
+ pos uint32
+ ksize uint32
+ pgid pgid
+}
+
+// key returns a byte slice of the node key.
+func (n *branchPageElement) key() []byte {
+ buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+ return buf[n.pos : n.pos+n.ksize]
+}
+
+// leafPageElement represents a node on a leaf page.
+type leafPageElement struct {
+ flags uint32
+ pos uint32
+ ksize uint32
+ vsize uint32
+}
+
+// key returns a byte slice of the node key.
+func (n *leafPageElement) key() []byte {
+ buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+ return buf[n.pos : n.pos+n.ksize]
+}
+
+// value returns a byte slice of the node value.
+func (n *leafPageElement) value() []byte {
+ buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+ return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
+}
diff --git a/quick_test.go b/quick_test.go
new file mode 100644
index 0000000..d85249d
--- /dev/null
+++ b/quick_test.go
@@ -0,0 +1,65 @@
+package bolt
+
+import (
+ "flag"
+ "math/rand"
+ "reflect"
+ "testing/quick"
+ "time"
+)
+
+// testing/quick defaults to 100 iterations and a random seed.
+// You can override these settings from the command line:
+//
+// -quickchecks The number of iterations to perform.
+// -quick.seed The seed to use for randomizing.
+// -quick.maxitems The maximum number of items to insert into a DB.
+// -quick.maxksize The maximum size of a key.
+// -quick.maxvsize The maximum size of a value.
+//
+
+var seed, testMaxItemCount, testMaxKeySize, testMaxValueSize int
+
+func init() {
+ flag.IntVar(&seed, "quick.seed", int(time.Now().UnixNano())%100000, "")
+ flag.IntVar(&testMaxItemCount, "quick.maxitems", 1024, "")
+ flag.IntVar(&testMaxKeySize, "quick.maxksize", 1024, "")
+ flag.IntVar(&testMaxValueSize, "quick.maxvsize", 1024, "")
+ warn("seed:", seed)
+}
+
+// qc creates a testing/quick configuration.
+func qc() *quick.Config {
+ return &quick.Config{Rand: rand.New(rand.NewSource(int64(seed)))}
+}
+
+type testKeyValuePairs []testKeyValuePair
+
+func (t testKeyValuePairs) Generate(rand *rand.Rand, size int) reflect.Value {
+ n := rand.Intn(testMaxItemCount-1) + 1
+ items := make(testKeyValuePairs, n)
+ for i := 0; i < n; i++ {
+ items[i].Generate(rand, size)
+ }
+ return reflect.ValueOf(items)
+}
+
+type testKeyValuePair struct {
+ Key []byte
+ Value []byte
+}
+
+func (t testKeyValuePair) Generate(rand *rand.Rand, size int) reflect.Value {
+ t.Key = randByteSlice(rand, 1, testMaxKeySize)
+ t.Value = randByteSlice(rand, 0, testMaxValueSize)
+ return reflect.ValueOf(t)
+}
+
+func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte {
+ n := rand.Intn(maxSize - minSize) + minSize
+ b := make([]byte, n)
+ for i := 0; i < n; i++ {
+ b[i] = byte(rand.Intn(255))
+ }
+ return b
+}
diff --git a/rwtransaction.go b/rwtransaction.go
index 145e0cf..2485c62 100644
--- a/rwtransaction.go
+++ b/rwtransaction.go
@@ -9,8 +9,7 @@ import (
// Only one read/write transaction can be active for a DB at a time.
type RWTransaction struct {
Transaction
- branches map[pgid]*branch
- leafs map[pgid]*leaf
+ nodes map[pgid]*node
}
// init initializes the transaction.
@@ -21,7 +20,7 @@ func (t *RWTransaction) init(db *DB) {
// Copy the meta and increase the transaction id.
t.meta = &meta{}
db.meta().copy(t.meta)
- t.meta.txnid += txnid(2)
+ t.meta.txnid += txnid(1)
}
// CreateBucket creates a new bucket.
@@ -32,7 +31,7 @@ func (t *RWTransaction) CreateBucket(name string) error {
} else if len(name) == 0 {
return &Error{"bucket name cannot be blank", nil}
} else if len(name) > MaxBucketNameSize {
- return &Error{"bucket name too large", nil}
+ return &Error{"bucket name too long", nil}
}
// Create a blank root leaf page.
@@ -72,9 +71,9 @@ func (t *RWTransaction) Put(name string, key []byte, value []byte) error {
}
// Insert a new node.
- c := b.Cursor()
- c.Goto(key)
- t.leaf(c).put(key, value)
+ c := b.cursor()
+ c.Get(key)
+ t.node(c.stack).put(key, key, value, 0)
return nil
}
@@ -121,8 +120,8 @@ func (t *RWTransaction) Rollback() {
}
func (t *RWTransaction) close() {
- // Clear temporary pages.
- t.leafs = nil
+ // Clear nodes.
+ t.nodes = nil
// TODO: Release writer lock.
}
@@ -146,55 +145,81 @@ func (t *RWTransaction) allocate(count int) *page {
return p
}
-// spill writes all the leafs and branches to dirty pages.
+// spill writes all the nodes to dirty pages.
func (t *RWTransaction) spill() {
- // Spill leafs first.
- for _, l := range t.leafs {
- t.spillLeaf(l)
+ // Keep track of the current root nodes.
+ // We will update this at the end once all nodes are created.
+ type root struct {
+ node *node
+ pgid pgid
}
+ var roots []root
- // Sort branches by highest depth first.
- branches := make(branches, 0, len(t.branches))
- for _, b := range t.branches {
- branches = append(branches, b)
+ // Sort nodes by highest depth first.
+ nodes := make(nodesByDepth, 0, len(t.nodes))
+ for _, n := range t.nodes {
+ nodes = append(nodes, n)
}
- sort.Sort(branches)
+ sort.Sort(nodes)
- // Spill branches by deepest first.
- for _, b := range branches {
- t.spillBranch(b)
- }
-}
-
-// spillLeaf writes a leaf to one or more dirty pages.
-func (t *RWTransaction) spillLeaf(l *leaf) {
- parent := l.parent
-
- // Split leaf, if necessary.
- leafs := l.split(t.db.pageSize)
-
- // TODO: If this is a root leaf and we split then add a parent branch.
+ // Spill nodes by deepest first.
+ for i := 0; i < len(nodes); i++ {
+ n := nodes[i]
- // Process each resulting leaf.
- previd := leafs[0].pgid
- for index, l := range leafs {
- // Allocate contiguous space for the leaf.
- p := t.allocate((l.size() / t.db.pageSize) + 1)
+ // Save existing root buckets for later.
+ if n.parent == nil && n.pgid != 0 {
+ roots = append(roots, root{n, n.pgid})
+ }
- // Write the leaf to the page.
- l.write(p)
+ // Split nodes and write them.
+ newNodes := n.split(t.db.pageSize)
+
+ // If this is a root node that split then create a parent node.
+ if n.parent == nil && len(newNodes) > 1 {
+ n.parent = &node{
+ isLeaf: false,
+ key: newNodes[0].inodes[0].key,
+ depth: n.depth - 1,
+ inodes: make(inodes, 0),
+ }
+ nodes = append(nodes, n.parent)
+ sort.Sort(nodes)
+ }
- // Insert or replace the node in the parent branch with the new pgid.
- if parent != nil {
- parent.put(previd, p.id, l.items[0].key, (index == 0))
- previd = l.pgid
+ // Write nodes to dirty pages.
+ for i, newNode := range newNodes {
+ // Allocate contiguous space for the node.
+ p := t.allocate((newNode.size() / t.db.pageSize) + 1)
+
+ // Write the node to the page.
+ newNode.write(p)
+ newNode.pgid = p.id
+ newNode.parent = n.parent
+
+ // The first node should use the existing entry, other nodes are inserts.
+ var oldKey []byte
+ if i == 0 {
+ oldKey = n.key
+ } else {
+ oldKey = newNode.inodes[0].key
+ }
+
+ // Update the parent entry.
+ if newNode.parent != nil {
+ newNode.parent.put(oldKey, newNode.inodes[0].key, nil, newNode.pgid)
+ }
}
}
-}
-// spillBranch writes a branch to one or more dirty pages.
-func (t *RWTransaction) spillBranch(l *branch) {
- warn("[pending] RWTransaction.spillBranch()") // TODO
+ // Update roots with new roots.
+ for _, root := range roots {
+ for _, b := range t.sys.buckets {
+ if b.root == root.pgid {
+ b.root = root.node.root().pgid
+ break
+ }
+ }
+ }
}
// write writes any dirty pages to disk.
@@ -231,28 +256,8 @@ func (t *RWTransaction) writeMeta() error {
return nil
}
-// leaf retrieves a leaf object based on the current position of a cursor.
-func (t *RWTransaction) leaf(c *Cursor) *leaf {
- e := c.stack[len(c.stack)-1]
- id := e.page.id
-
- // Retrieve leaf if it has already been fetched.
- if l := t.leafs[id]; l != nil {
- return l
- }
-
- // Otherwise create a leaf and cache it.
- l := &leaf{}
- l.read(t.page(id))
- l.parent = t.branch(c.stack[:len(c.stack)-1])
- t.leafs[id] = l
-
- return l
-}
-
-// branch retrieves a branch object based a cursor stack.
-// This should only be called from leaf().
-func (t *RWTransaction) branch(stack []elem) *branch {
+// node retrieves a node based a cursor stack.
+func (t *RWTransaction) node(stack []elem) *node {
if len(stack) == 0 {
return nil
}
@@ -260,16 +265,16 @@ func (t *RWTransaction) branch(stack []elem) *branch {
// Retrieve branch if it has already been fetched.
e := &stack[len(stack)-1]
id := e.page.id
- if b := t.branches[id]; b != nil {
- return b
+ if n := t.nodes[id]; n != nil {
+ return n
}
// Otherwise create a branch and cache it.
- b := &branch{}
- b.read(t.page(id))
- b.depth = len(stack) - 1
- b.parent = t.branch(stack[:len(stack)-1])
- t.branches[id] = b
+ n := &node{}
+ n.read(t.page(id))
+ n.depth = len(stack) - 1
+ n.parent = t.node(stack[:len(stack)-1])
+ t.nodes[id] = n
- return b
+ return n
}
diff --git a/rwtransaction_test.go b/rwtransaction_test.go
index b6e971e..4d256cc 100644
--- a/rwtransaction_test.go
+++ b/rwtransaction_test.go
@@ -1,6 +1,7 @@
package bolt
import (
+ "strings"
"testing"
"github.com/stretchr/testify/assert"
@@ -28,3 +29,35 @@ func TestTransactionCreateBucket(t *testing.T) {
assert.NoError(t, err)
})
}
+
+// Ensure that a bucket cannot be created twice.
+func TestTransactionRecreateBucket(t *testing.T) {
+ withOpenDB(func(db *DB, path string) {
+ // Create a bucket.
+ err := db.CreateBucket("widgets")
+ assert.NoError(t, err)
+
+ // Create the same bucket again.
+ err = db.CreateBucket("widgets")
+ assert.Equal(t, err, &Error{"bucket already exists", nil})
+ })
+}
+
+// Ensure that a bucket is created with a non-blank name.
+func TestTransactionCreateBucketWithoutName(t *testing.T) {
+ withOpenDB(func(db *DB, path string) {
+ err := db.CreateBucket("")
+ assert.Equal(t, err, &Error{"bucket name cannot be blank", nil})
+ })
+}
+
+// Ensure that a bucket name is not too long.
+func TestTransactionCreateBucketWithLongName(t *testing.T) {
+ withOpenDB(func(db *DB, path string) {
+ err := db.CreateBucket(strings.Repeat("X", 255))
+ assert.NoError(t, err)
+
+ err = db.CreateBucket(strings.Repeat("X", 256))
+ assert.Equal(t, err, &Error{"bucket name too long", nil})
+ })
+}
diff --git a/sys.go b/sys.go
index 657c973..cf15413 100644
--- a/sys.go
+++ b/sys.go
@@ -25,6 +25,16 @@ func (s *sys) get(key string) *bucket {
return s.buckets[key]
}
+// getByRoot retrieves a bucket by root page id.
+func (s *sys) getByRoot(pgid pgid) *bucket {
+ for _, b := range s.buckets {
+ if b.root == pgid {
+ return b
+ }
+ }
+ panic("root not found")
+}
+
// put sets a new value for a bucket.
func (s *sys) put(key string, b *bucket) {
s.buckets[key] = b
@@ -32,7 +42,9 @@ func (s *sys) put(key string, b *bucket) {
// del deletes a bucket by name.
func (s *sys) del(key string) {
- delete(s.buckets, key)
+ if b := s.buckets[key]; b != nil {
+ delete(s.buckets, key)
+ }
}
// read initializes the data from an on-disk page.
@@ -61,7 +73,8 @@ func (s *sys) read(p *page) {
// Associate keys and buckets.
for index, key := range keys {
- s.buckets[key] = buckets[index]
+ b := &bucket{buckets[index].root}
+ s.buckets[key] = b
}
}
diff --git a/transaction.go b/transaction.go
index e479dfb..29337c6 100644
--- a/transaction.go
+++ b/transaction.go
@@ -62,7 +62,7 @@ func (t *Transaction) Cursor(name string) *Cursor {
if b == nil {
return nil
}
- return b.Cursor()
+ return b.cursor()
}
// Get retrieves the value for a key in a named bucket.
diff --git a/warn.go b/warn.go
index 7e9a2a7..eadf664 100644
--- a/warn.go
+++ b/warn.go
@@ -10,5 +10,5 @@ func warn(v ...interface{}) {
}
func warnf(msg string, v ...interface{}) {
- fmt.Fprintf(os.Stderr, msg, v...)
+ fmt.Fprintf(os.Stderr, msg+"\n", v...)
}