Add RWTransaction.Put().

author: Ben Johnson <benbjohnson@yahoo.com> 2014-01-31 13:18:51 -0500
committer: Ben Johnson <benbjohnson@yahoo.com> 2014-02-01 12:30:37 -0500
commit: 1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc (patch)
tree: d6aa197560053444d76dce1bd198a584c051c7b9
parent: Merge pull request #4 from benbjohnson/api (diff)
download: dedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.gz
dedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.xz
22 files changed, 735 insertions, 736 deletions
diff --git a/NOTES b/NOTES
new file mode 100644
index 0000000..967d3aa
--- /dev/null
+++ b/NOTES
@@ -0,0 +1,17 @@
+
+
+          ===0===
+          | d|g |
+          |1|2|3|
+          =======
+           | | |
+     ------  |  -------
+    |        |         |
+===1===   ===2===   ===3===
+|a|b|c|   |d|e|f|   |g|h|i|
+|-|-|-|   |-|-|-|   |-|-|-|
+|*|*|*|   |*|*|*|   |*|*|*|
+|*|*|*|   |*|*|*|   |*|*|*|
+|*|*|*|   |*|*|*|   |*|*|*|
+ 
+
diff --git a/TODO b/TODO
index efd1a81..9c8a1a0 100644
--- a/TODO
+++ b/TODO
@@ -2,7 +2,7 @@ TODO
 ====
 X Open DB.
 X Initialize transaction.
-- Cursor First, Goto(key), Next
+- Cursor First, Get(key), Next
 - RWTransaction.insert()
   - rebalance
   - adjust cursors
diff --git a/bnode.go b/bnode.go
deleted file mode 100644
index 3ad4f58..0000000
--- a/bnode.go
+++ /dev/null
@@ -1,20 +0,0 @@
-package bolt
-
-import (
-	"unsafe"
-)
-
-const bnodeSize = int(unsafe.Sizeof(lnode{}))
-
-// bnode represents a node on a branch page.
-type bnode struct {
-	pos   uint32
-	ksize uint32
-	pgid  pgid
-}
-
-// key returns a byte slice of the node key.
-func (n *bnode) key() []byte {
-	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos : n.pos+n.ksize]
-}
diff --git a/branch.go b/branch.go
deleted file mode 100644
index c4f0b1f..0000000
--- a/branch.go
+++ /dev/null
@@ -1,129 +0,0 @@
-package bolt
-
-import (
-	"bytes"
-	"unsafe"
-)
-
-// branch represents a temporary in-memory branch page.
-type branch struct {
-	pgid   pgid
-	depth  int
-	parent *branch
-	items  branchItems
-}
-
-// size returns the size of the branch after serialization.
-func (b *branch) size() int {
-	var size int = pageHeaderSize
-	for _, item := range b.items {
-		size += bnodeSize + len(item.key)
-	}
-	return size
-}
-
-// put adds a new node or replaces an existing node.
-func (b *branch) put(id pgid, newid pgid, key []byte, replace bool) {
-	var index int
-	for ; index < len(b.items); index++ {
-		if b.items[index].pgid == id {
-			break
-		}
-	}
-
-	if !replace {
-		index++
-		b.items = append(b.items, branchItem{})
-		if index < len(b.items) {
-			copy(b.items[index+1:], b.items[index:])
-		}
-	}
-
-	b.items[index].pgid = newid
-	b.items[index].key = key
-}
-
-// read initializes the item data from an on-disk page.
-func (b *branch) read(p *page) {
-	b.pgid = p.id
-	b.items = make(branchItems, int(p.count))
-	bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
-	for i := 0; i < int(p.count); i++ {
-		bnode := &bnodes[i]
-		item := &b.items[i]
-		item.pgid = bnode.pgid
-		item.key = bnode.key()
-	}
-}
-
-// write writes the items onto a branch page.
-func (b *branch) write(p *page) {
-	// Initialize page.
-	p.flags |= p_branch
-	p.count = uint16(len(b.items))
-
-	// Loop over each item and write it to the page.
-	bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr))
-	buf := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(b.items):]
-	for index, item := range b.items {
-		// Write node.
-		bnode := &bnodes[index]
-		bnode.pgid = item.pgid
-		bnode.pos = uint32(uintptr(unsafe.Pointer(&buf[0])) - uintptr(unsafe.Pointer(bnode)))
-		bnode.ksize = uint32(len(item.key))
-
-		// Write key to the end of the page.
-		copy(buf[0:], item.key)
-		buf = buf[len(item.key):]
-	}
-}
-
-// split divides up the noes in the branch into appropriately sized groups.
-func (b *branch) split(pageSize int) []*branch {
-	// Ignore the split if the page doesn't have at least enough nodes for
-	// multiple pages or if the data can fit on a single page.
-	if len(b.items) <= (minKeysPerPage*2) || b.size() < pageSize {
-		return []*branch{b}
-	}
-
-	// Set fill threshold to 50%.
-	threshold := pageSize / 2
-
-	// Otherwise group into smaller pages and target a given fill size.
-	size := 0
-	current := &branch{}
-	branches := make([]*branch, 0)
-
-	for index, item := range b.items {
-		nodeSize := bnodeSize + len(item.key)
-
-		if len(current.items) >= minKeysPerPage && index < len(b.items)-minKeysPerPage && size+nodeSize > threshold {
-			size = pageHeaderSize
-			branches = append(branches, current)
-			current = &branch{}
-		}
-
-		size += nodeSize
-		current.items = append(current.items, item)
-	}
-	branches = append(branches, current)
-
-	return branches
-}
-
-type branches []*branch
-
-func (s branches) Len() int           { return len(s) }
-func (s branches) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
-func (s branches) Less(i, j int) bool { return s[i].depth < s[j].depth }
-
-type branchItems []branchItem
-
-type branchItem struct {
-	pgid pgid
-	key  []byte
-}
-
-func (s branchItems) Len() int           { return len(s) }
-func (s branchItems) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
-func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/branch_test.go b/branch_test.go
deleted file mode 100644
index 4a810c8..0000000
--- a/branch_test.go
+++ /dev/null
@@ -1,161 +0,0 @@
-package bolt
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-// Ensure that a branch can replace a key.
-func TestBranchPutReplace(t *testing.T) {
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("bar")},
-			branchItem{pgid: 2, key: []byte("baz")},
-			branchItem{pgid: 3, key: []byte("foo")},
-		},
-	}
-	b.put(1, 4, []byte("bar"), true)
-	b.put(2, 5, []byte("boo"), true)
-	assert.Equal(t, len(b.items), 3)
-	assert.Equal(t, b.items[0].pgid, pgid(4))
-	assert.Equal(t, string(b.items[0].key), "bar")
-	assert.Equal(t, b.items[1].pgid, pgid(5))
-	assert.Equal(t, string(b.items[1].key), "boo")
-	assert.Equal(t, b.items[2].pgid, pgid(3))
-	assert.Equal(t, string(b.items[2].key), "foo")
-}
-
-// Ensure that a branch can insert a key.
-func TestBranchPutInsert(t *testing.T) {
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("bar")},
-			branchItem{pgid: 2, key: []byte("foo")},
-		},
-	}
-	b.put(1, 4, []byte("baz"), false)
-	b.put(2, 5, []byte("zzz"), false)
-	assert.Equal(t, len(b.items), 4)
-	assert.Equal(t, b.items[0].pgid, pgid(1))
-	assert.Equal(t, string(b.items[0].key), "bar")
-	assert.Equal(t, b.items[1].pgid, pgid(4))
-	assert.Equal(t, string(b.items[1].key), "baz")
-	assert.Equal(t, b.items[2].pgid, pgid(2))
-	assert.Equal(t, string(b.items[2].key), "foo")
-	assert.Equal(t, b.items[3].pgid, pgid(5))
-	assert.Equal(t, string(b.items[3].key), "zzz")
-}
-
-// Ensure that a branch can deserialize from a page.
-func TestBranchRead(t *testing.T) {
-	// Create a page.
-	var buf [4096]byte
-	page := (*page)(unsafe.Pointer(&buf[0]))
-	page.count = 2
-
-	// Insert 2 items at the beginning. sizeof(bnode) == 16
-	nodes := (*[3]bnode)(unsafe.Pointer(&page.ptr))
-	nodes[0] = bnode{pos: 32, ksize: 3, pgid: 100}  // pos = sizeof(bnode) * 2
-	nodes[1] = bnode{pos: 19, ksize: 10, pgid: 101} // pos = sizeof(bnode) + 3
-
-	// Write data for the nodes at the end.
-	data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
-	copy(data[:], []byte("bar"))
-	copy(data[3:], []byte("helloworld"))
-
-	// Deserialize page into a branch.
-	b := &branch{}
-	b.read(page)
-
-	// Check that there are two items with correct data.
-	assert.Equal(t, len(b.items), 2)
-	assert.Equal(t, b.items[0].key, []byte("bar"))
-	assert.Equal(t, b.items[1].key, []byte("helloworld"))
-}
-
-// Ensure that a branch can serialize itself.
-func TestBranchWrite(t *testing.T) {
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("susy")},
-			branchItem{pgid: 2, key: []byte("ricki")},
-			branchItem{pgid: 3, key: []byte("john")},
-		},
-	}
-
-	// Write it to a page.
-	var buf [4096]byte
-	p := (*page)(unsafe.Pointer(&buf[0]))
-	b.write(p)
-
-	// Read the page back in.
-	b2 := &branch{}
-	b2.read(p)
-
-	// Check that the two pages are the same.
-	assert.Equal(t, len(b2.items), 3)
-	assert.Equal(t, b2.items[0].pgid, pgid(1))
-	assert.Equal(t, b2.items[0].key, []byte("susy"))
-	assert.Equal(t, b2.items[1].pgid, pgid(2))
-	assert.Equal(t, b2.items[1].key, []byte("ricki"))
-	assert.Equal(t, b2.items[2].pgid, pgid(3))
-	assert.Equal(t, b2.items[2].key, []byte("john"))
-}
-
-// Ensure that a branch can split into appropriate subgroups.
-func TestBranchSplit(t *testing.T) {
-	// Create a branch.
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("00000001")},
-			branchItem{pgid: 2, key: []byte("00000002")},
-			branchItem{pgid: 3, key: []byte("00000003")},
-			branchItem{pgid: 4, key: []byte("00000004")},
-			branchItem{pgid: 5, key: []byte("00000005")},
-		},
-	}
-
-	// Split between 3 & 4.
-	branches := b.split(100)
-
-	assert.Equal(t, len(branches), 2)
-	assert.Equal(t, len(branches[0].items), 2)
-	assert.Equal(t, len(branches[1].items), 3)
-}
-
-// Ensure that a branch with the minimum number of items just returns a single branch.
-func TestBranchSplitWithMinKeys(t *testing.T) {
-	// Create a branch.
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("00000001")},
-			branchItem{pgid: 2, key: []byte("00000002")},
-		},
-	}
-
-	// Split.
-	branches := b.split(20)
-	assert.Equal(t, len(branches), 1)
-	assert.Equal(t, len(branches[0].items), 2)
-}
-
-// Ensure that a branch that has keys that all fit on a page just returns one branch.
-func TestBranchSplitFitsInPage(t *testing.T) {
-	// Create a branch.
-	b := &branch{
-		items: branchItems{
-			branchItem{pgid: 1, key: []byte("00000001")},
-			branchItem{pgid: 2, key: []byte("00000002")},
-			branchItem{pgid: 3, key: []byte("00000003")},
-			branchItem{pgid: 4, key: []byte("00000004")},
-			branchItem{pgid: 5, key: []byte("00000005")},
-		},
-	}
-
-	// Split.
-	branches := b.split(4096)
-	assert.Equal(t, len(branches), 1)
-	assert.Equal(t, len(branches[0].items), 5)
-}
diff --git a/bucket.go b/bucket.go
index 6cfe1ca..b1c1cc0 100644
--- a/bucket.go
+++ b/bucket.go
@@ -15,13 +15,8 @@ func (b *Bucket) Name() string {
 	return b.name
 }
 
-// Get retrieves the value for a key in the bucket.
-func (b *Bucket) Get(key []byte) []byte {
-	return b.Cursor().Get(key)
-}
-
-// Cursor creates a new cursor for this bucket.
-func (b *Bucket) Cursor() *Cursor {
+// cursor creates a new cursor for this bucket.
+func (b *Bucket) cursor() *Cursor {
 	return &Cursor{
 		transaction: b.transaction,
 		root:        b.root,
diff --git a/cursor.go b/cursor.go
index 7b8c9bb..d56119b 100644
--- a/cursor.go
+++ b/cursor.go
@@ -14,7 +14,7 @@ type Cursor struct {
 // elem represents a node on a page that's on the cursor's stack.
 type elem struct {
 	page  *page
-	index int
+	index uint16
 }
 
 // First moves the cursor to the first item in the bucket and returns its key and data.
@@ -30,25 +30,29 @@ func (c *Cursor) Next() ([]byte, []byte) {
 
 // Get positions the cursor at a specific key and returns the its value.
 func (c *Cursor) Get(key []byte) []byte {
-	if c.Goto(key) {
-		return c.node().value()
-	}
-	return nil
-}
-
-// Goto positions the cursor at a specific key.
-// Returns true if an exact match or false if positioned after the closest match.
-func (c *Cursor) Goto(key []byte) bool {
-	// TODO(benbjohnson): Optimize for specific use cases.
-
 	// Start from root page and traverse to correct page.
 	c.stack = c.stack[:0]
 	c.search(key, c.transaction.page(c.root))
+	p, index := c.top()
+
+	// If the cursor is pointing to the end of page then return nil.
+	if index == p.count {
+		return nil
+	}
 
-	return false
+	// If our target node isn't the same key as what's passed in then return nil.
+	// c.page().hexdump(512)
+	if !bytes.Equal(key, c.node().key()) {
+		return nil
+	}
+
+	return c.node().value()
 }
 
 func (c *Cursor) search(key []byte, p *page) {
+	if (p.flags & (p_branch | p_leaf)) == 0 {
+		panic("invalid page type: " + p.typ())
+	}
 	e := elem{page: p}
 	c.stack = append(c.stack, e)
 
@@ -58,12 +62,26 @@ func (c *Cursor) search(key []byte, p *page) {
 		return
 	}
 
-	// Binary search for the correct branch node.
-	nodes := p.bnodes()
-	e.index = sort.Search(int(p.count)-1, func(i int) bool { return bytes.Compare(nodes[i+1].key(), key) != -1 })
+	// Binary search for the correct range.
+	inodes := p.branchPageElements()
+
+	var exact bool
+	index := sort.Search(int(p.count), func(i int) bool {
+		// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
+		// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
+		ret := bytes.Compare(inodes[i].key(), key)
+		if ret == 0 {
+			exact = true
+		}
+		return ret != -1
+	})
+	if !exact && index > 0 {
+		index--
+	}
+	e.index = uint16(index)
 
 	// Recursively search to the next page.
-	c.search(key, c.transaction.page(nodes[e.index].pgid))
+	c.search(key, c.transaction.page(inodes[e.index].pgid))
 }
 
 // nsearch searches a leaf node for the index of the node that matches key.
@@ -71,16 +89,17 @@ func (c *Cursor) nsearch(key []byte, p *page) {
 	e := &c.stack[len(c.stack)-1]
 
 	// Binary search for the correct leaf node index.
-	nodes := p.lnodes()
-	e.index = sort.Search(int(p.count), func(i int) bool {
-		return bytes.Compare(nodes[i].key(), key) != -1
+	inodes := p.leafPageElements()
+	index := sort.Search(int(p.count), func(i int) bool {
+		return bytes.Compare(inodes[i].key(), key) != -1
 	})
+	e.index = uint16(index)
 }
 
 // top returns the page and leaf node that the cursor is currently pointing at.
-func (c *Cursor) top() (*page, *lnode) {
+func (c *Cursor) top() (*page, uint16) {
 	elem := c.stack[len(c.stack)-1]
-	return elem.page, elem.page.lnode(elem.index)
+	return elem.page, elem.index
 }
 
 // page returns the page that the cursor is currently pointing at.
@@ -89,7 +108,7 @@ func (c *Cursor) page() *page {
 }
 
 // node returns the leaf node that the cursor is currently positioned on.
-func (c *Cursor) node() *lnode {
+func (c *Cursor) node() *leafPageElement {
 	elem := c.stack[len(c.stack)-1]
-	return elem.page.lnode(elem.index)
+	return elem.page.leafPageElement(elem.index)
 }
diff --git a/db.go b/db.go
index 646c31b..41fbc64 100644
--- a/db.go
+++ b/db.go
@@ -1,6 +1,7 @@
 package bolt
 
 import (
+	"io"
 	"os"
 	"sync"
 	"syscall"
@@ -114,7 +115,7 @@ func (db *DB) mmap() error {
 	}
 
 	// TEMP(benbjohnson): Set max size to 1MB.
-	size := 2 << 20
+	size := 2 << 30
 
 	// Memory-map the data file as a byte slice.
 	if db.data, err = db.syscall.Mmap(int(db.file.Fd()), 0, size, syscall.PROT_READ, syscall.MAP_SHARED); err != nil {
@@ -224,10 +225,7 @@ func (db *DB) RWTransaction() (*RWTransaction, error) {
 	}
 
 	// Create a transaction associated with the database.
-	t := &RWTransaction{
-		branches: make(map[pgid]*branch),
-		leafs:    make(map[pgid]*leaf),
-	}
+	t := &RWTransaction{nodes: make(map[pgid]*node)}
 	t.init(db)
 
 	return t, nil
@@ -319,6 +317,44 @@ func (db *DB) Delete(name string, key []byte) error {
 	return t.Commit()
 }
 
+// Copy writes the entire database to a writer.
+func (db *DB) Copy(w io.Writer) error {
+	if !db.opened {
+		return DatabaseNotOpenError
+	}
+
+	// Maintain a reader transaction so pages don't get reclaimed.
+	t, err := db.Transaction()
+	if err != nil {
+		return err
+	}
+	defer t.Close()
+
+	// Open reader on the database.
+	f, err := os.Open(db.path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	// Copy everything.
+	if _, err := io.Copy(w, f); err != nil {
+		return err
+	}
+	return nil
+}
+
+// CopyFile copies the entire database to file at the given path.
+func (db *DB) CopyFile(path string) error {
+	f, err := os.Create(path)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	return db.Copy(f)
+}
+
 // page retrieves a page reference from the mmap based on the current page size.
 func (db *DB) page(id pgid) *page {
 	return (*page)(unsafe.Pointer(&db.data[id*pgid(db.pageSize)]))
diff --git a/db_test.go b/db_test.go
index 53988b7..50ec3c3 100644
--- a/db_test.go
+++ b/db_test.go
@@ -1,11 +1,14 @@
 package bolt
 
 import (
+	"bytes"
+	"fmt"
 	"io"
 	"io/ioutil"
 	"os"
 	"syscall"
 	"testing"
+	"testing/quick"
 	"time"
 	"unsafe"
 
@@ -99,25 +102,6 @@ func TestDBMmapStatError(t *testing.T) {
 	})
 }
 
-// Ensure that mmap errors get returned.
-/*
-func TestDBMmapError(t *testing.T) {
-	withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
-		exp := errors.New("")
-		file, metafile := &mockfile{}, &mockfile{}
-		mockos.On("OpenFile", path, os.O_RDWR|os.O_CREATE, os.FileMode(0666)).Return(file, nil)
-		mockos.On("OpenFile", path, os.O_RDWR|os.O_SYNC, os.FileMode(0666)).Return(metafile, nil)
-		mockos.On("Getpagesize").Return(0x1000)
-		file.On("ReadAt", mock.Anything, int64(0)).Return(0, nil)
-		file.On("Stat").Return(&mockfileinfo{"", 0x2000, 0666, time.Now(), false, nil}, nil)
-		metafile.On("WriteAt", mock.Anything, int64(0)).Return(0, nil)
-		mocksyscall.On("Mmap", 0, int64(0), 0x2000, syscall.PROT_READ, syscall.MAP_SHARED).Return(([]byte)(nil), exp)
-		err := db.Open(path, 0666)
-		assert.Equal(t, err, exp)
-	})
-}
-*/
-
 // Ensure that corrupt meta0 page errors get returned.
 func TestDBCorruptMeta0(t *testing.T) {
 	withMockDB(func(db *DB, mockos *mockos, mocksyscall *mocksyscall, path string) {
@@ -150,10 +134,6 @@ func TestDBCorruptMeta0(t *testing.T) {
 	})
 }
 
-//--------------------------------------
-// Transaction()
-//--------------------------------------
-
 // Ensure that a database cannot open a transaction when it's not open.
 func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
 	withDB(func(db *DB, path string) {
@@ -163,6 +143,54 @@ func TestDBTransactionDatabaseNotOpenError(t *testing.T) {
 	})
 }
 
+// Ensure that a bucket can write a key/value.
+func TestDBPut(t *testing.T) {
+	withOpenDB(func(db *DB, path string) {
+		db.CreateBucket("widgets")
+		err := db.Put("widgets", []byte("foo"), []byte("bar"))
+		assert.NoError(t, err)
+		value, err := db.Get("widgets", []byte("foo"))
+		if assert.NoError(t, err) {
+			assert.Equal(t, value, []byte("bar"))
+		}
+	})
+}
+
+// Ensure that a bucket can write random keys and values across multiple txns.
+func TestDBPutRandom(t *testing.T) {
+	f := func(items testKeyValuePairs) bool {
+		withOpenDB(func(db *DB, path string) {
+			db.CreateBucket("widgets")
+			for _, item := range items {
+				if len(item.Key) == 0 {
+					continue
+				}
+				if err := db.Put("widgets", item.Key, item.Value); err != nil {
+					panic("put error: " + err.Error())
+				}
+			}
+			for _, item := range items {
+				if len(item.Key) == 0 {
+					continue
+				}
+				value, err := db.Get("widgets", item.Key)
+				if err != nil {
+					panic("get error: " + err.Error())
+				}
+				if !bytes.Equal(value, []byte(item.Value)) {
+					// db.CopyFile("/tmp/bolt.random.db")
+					t.Fatalf("value mismatch:\n%x\n%x", item.Value, value)
+				}
+			}
+			fmt.Fprint(os.Stderr, ".")
+		})
+		return true
+	}
+	if err := quick.Check(f, qc()); err != nil {
+		t.Error(err)
+	}
+}
+
 // withDB executes a function with a database reference.
 func withDB(fn func(*DB, string)) {
 	f, _ := ioutil.TempFile("", "bolt-")
diff --git a/leaf.go b/leaf.go
deleted file mode 100644
index c86e041..0000000
--- a/leaf.go
+++ /dev/null
@@ -1,120 +0,0 @@
-package bolt
-
-import (
-	"bytes"
-	"sort"
-	"unsafe"
-)
-
-// leaf represents an in-memory, deserialized leaf page.
-type leaf struct {
-	pgid   pgid
-	parent *branch
-	items  leafItems
-}
-
-// size returns the size of the leaf after serialization.
-func (l *leaf) size() int {
-	var size int = pageHeaderSize
-	for _, item := range l.items {
-		size += lnodeSize + len(item.key) + len(item.value)
-	}
-	return size
-}
-
-// put inserts or replaces a key on a leaf page.
-func (l *leaf) put(key []byte, value []byte) {
-	// Find insertion index.
-	index := sort.Search(len(l.items), func(i int) bool { return bytes.Compare(l.items[i].key, key) != -1 })
-
-	// If there is no existing key then add a new item.
-	if index == len(l.items) {
-		l.items = append(l.items, leafItem{})
-	} else if len(l.items) == 0 || !bytes.Equal(l.items[index].key, key) {
-		l.items = append(l.items, leafItem{})
-		copy(l.items[index+1:], l.items[index:])
-	}
-	l.items[index].key = key
-	l.items[index].value = value
-}
-
-// read initializes the item data from an on-disk page.
-func (l *leaf) read(p *page) {
-	l.pgid = p.id
-	l.items = make(leafItems, int(p.count))
-	lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
-	for i := 0; i < int(p.count); i++ {
-		lnode := &lnodes[i]
-		item := &l.items[i]
-		item.key = lnode.key()
-		item.value = lnode.value()
-	}
-}
-
-// write writes the items onto one or more leaf pages.
-func (l *leaf) write(p *page) {
-	// Initialize page.
-	p.flags |= p_leaf
-	p.count = uint16(len(l.items))
-
-	// Loop over each item and write it to the page.
-	lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
-	b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):]
-	for index, item := range l.items {
-		// Write node.
-		lnode := &lnodes[index]
-		lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
-		lnode.ksize = uint32(len(item.key))
-		lnode.vsize = uint32(len(item.value))
-
-		// Write data to the end of the page.
-		copy(b[0:], item.key)
-		b = b[len(item.key):]
-		copy(b[0:], item.value)
-		b = b[len(item.value):]
-	}
-}
-
-// split divides up the noes in the page into appropriately sized groups.
-func (l *leaf) split(pageSize int) []*leaf {
-	// Ignore the split if the page doesn't have at least enough nodes for
-	// multiple pages or if the data can fit on a single page.
-	if len(l.items) <= (minKeysPerPage*2) || l.size() < pageSize {
-		return []*leaf{l}
-	}
-
-	// Set fill threshold to 50%.
-	threshold := pageSize / 2
-
-	// Otherwise group into smaller pages and target a given fill size.
-	size := 0
-	current := &leaf{}
-	leafs := make([]*leaf, 0)
-
-	for index, item := range l.items {
-		nodeSize := lnodeSize + len(item.key) + len(item.value)
-
-		if len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold {
-			size = pageHeaderSize
-			leafs = append(leafs, current)
-			current = &leaf{}
-		}
-
-		size += nodeSize
-		current.items = append(current.items, item)
-	}
-	leafs = append(leafs, current)
-
-	return leafs
-}
-
-type leafItems []leafItem
-
-type leafItem struct {
-	key   []byte
-	value []byte
-}
-
-func (s leafItems) Len() int           { return len(s) }
-func (s leafItems) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
-func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/leaf_test.go b/leaf_test.go
deleted file mode 100644
index ce98e37..0000000
--- a/leaf_test.go
+++ /dev/null
@@ -1,124 +0,0 @@
-package bolt
-
-import (
-	"testing"
-	"unsafe"
-
-	"github.com/stretchr/testify/assert"
-)
-
-// Ensure that a leaf can insert a key/value.
-func TestLeafPut(t *testing.T) {
-	l := &leaf{items: make(leafItems, 0)}
-	l.put([]byte("baz"), []byte("2"))
-	l.put([]byte("foo"), []byte("0"))
-	l.put([]byte("bar"), []byte("1"))
-	l.put([]byte("foo"), []byte("3"))
-	assert.Equal(t, len(l.items), 3)
-	assert.Equal(t, l.items[0], leafItem{[]byte("bar"), []byte("1")})
-	assert.Equal(t, l.items[1], leafItem{[]byte("baz"), []byte("2")})
-	assert.Equal(t, l.items[2], leafItem{[]byte("foo"), []byte("3")})
-}
-
-// Ensure that a leaf can deserialize from a page.
-func TestLeafRead(t *testing.T) {
-	// Create a page.
-	var buf [4096]byte
-	page := (*page)(unsafe.Pointer(&buf[0]))
-	page.count = 2
-
-	// Insert 2 leaf items at the beginning. sizeof(lnode) == 16
-	nodes := (*[3]lnode)(unsafe.Pointer(&page.ptr))
-	nodes[0] = lnode{flags: 0, pos: 32, ksize: 3, vsize: 4}  // pos = sizeof(lnode) * 2
-	nodes[1] = lnode{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(lnode) + 3 + 4
-
-	// Write data for the nodes at the end.
-	data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
-	copy(data[:], []byte("barfooz"))
-	copy(data[7:], []byte("helloworldbye"))
-
-	// Deserialize page into a leaf.
-	l := &leaf{}
-	l.read(page)
-
-	// Check that there are two items with correct data.
-	assert.Equal(t, len(l.items), 2)
-	assert.Equal(t, l.items[0].key, []byte("bar"))
-	assert.Equal(t, l.items[0].value, []byte("fooz"))
-	assert.Equal(t, l.items[1].key, []byte("helloworld"))
-	assert.Equal(t, l.items[1].value, []byte("bye"))
-}
-
-// Ensure that a leaf can serialize itself.
-func TestLeafWrite(t *testing.T) {
-	// Create a leaf.
-	l := &leaf{items: make(leafItems, 0)}
-	l.put([]byte("susy"), []byte("que"))
-	l.put([]byte("ricki"), []byte("lake"))
-	l.put([]byte("john"), []byte("johnson"))
-
-	// Write it to a page.
-	var buf [4096]byte
-	p := (*page)(unsafe.Pointer(&buf[0]))
-	l.write(p)
-
-	// Read the page back in.
-	l2 := &leaf{}
-	l2.read(p)
-
-	// Check that the two pages are the same.
-	assert.Equal(t, len(l2.items), 3)
-	assert.Equal(t, l2.items[0].key, []byte("john"))
-	assert.Equal(t, l2.items[0].value, []byte("johnson"))
-	assert.Equal(t, l2.items[1].key, []byte("ricki"))
-	assert.Equal(t, l2.items[1].value, []byte("lake"))
-	assert.Equal(t, l2.items[2].key, []byte("susy"))
-	assert.Equal(t, l2.items[2].value, []byte("que"))
-}
-
-// Ensure that a leaf can split into appropriate subgroups.
-func TestLeafSplit(t *testing.T) {
-	// Create a leaf.
-	l := &leaf{items: make(leafItems, 0)}
-	l.put([]byte("00000001"), []byte("0123456701234567"))
-	l.put([]byte("00000002"), []byte("0123456701234567"))
-	l.put([]byte("00000003"), []byte("0123456701234567"))
-	l.put([]byte("00000004"), []byte("0123456701234567"))
-	l.put([]byte("00000005"), []byte("0123456701234567"))
-
-	// Split between 3 & 4.
-	leafs := l.split(100)
-
-	assert.Equal(t, len(leafs), 2)
-	assert.Equal(t, len(leafs[0].items), 2)
-	assert.Equal(t, len(leafs[1].items), 3)
-}
-
-// Ensure that a leaf with the minimum number of items just returns a single leaf.
-func TestLeafSplitWithMinKeys(t *testing.T) {
-	// Create a leaf.
-	l := &leaf{items: make(leafItems, 0)}
-	l.put([]byte("00000001"), []byte("0123456701234567"))
-	l.put([]byte("00000002"), []byte("0123456701234567"))
-
-	// Split.
-	leafs := l.split(20)
-	assert.Equal(t, len(leafs), 1)
-	assert.Equal(t, len(leafs[0].items), 2)
-}
-
-// Ensure that a leaf that has keys that all fit on a page just returns one leaf.
-func TestLeafSplitFitsInPage(t *testing.T) {
-	// Create a leaf.
-	l := &leaf{items: make(leafItems, 0)}
-	l.put([]byte("00000001"), []byte("0123456701234567"))
-	l.put([]byte("00000002"), []byte("0123456701234567"))
-	l.put([]byte("00000003"), []byte("0123456701234567"))
-	l.put([]byte("00000004"), []byte("0123456701234567"))
-	l.put([]byte("00000005"), []byte("0123456701234567"))
-
-	// Split.
-	leafs := l.split(4096)
-	assert.Equal(t, len(leafs), 1)
-	assert.Equal(t, len(leafs[0].items), 5)
-}
diff --git a/lnode.go b/lnode.go
deleted file mode 100644
index dd6f494..0000000
--- a/lnode.go
+++ /dev/null
@@ -1,27 +0,0 @@
-package bolt
-
-import (
-	"unsafe"
-)
-
-const lnodeSize = int(unsafe.Sizeof(lnode{}))
-
-// lnode represents a node on a leaf page.
-type lnode struct {
-	flags uint32
-	pos   uint32
-	ksize uint32
-	vsize uint32
-}
-
-// key returns a byte slice of the node key.
-func (n *lnode) key() []byte {
-	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos : n.pos+n.ksize]
-}
-
-// value returns a byte slice of the node value.
-func (n *lnode) value() []byte {
-	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
-	return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
-}
diff --git a/meta.go b/meta.go
index 1ef9ffc..871d092 100644
--- a/meta.go
+++ b/meta.go
@@ -6,9 +6,10 @@ type meta struct {
 	magic    uint32
 	version  uint32
 	pageSize uint32
-	pgid     pgid
-	free     pgid
+	flags    uint32
 	sys      pgid
+	free     pgid
+	pgid     pgid
 	txnid    txnid
 }
 
diff --git a/node.go b/node.go
new file mode 100644
index 0000000..8d03681
--- /dev/null
+++ b/node.go
@@ -0,0 +1,178 @@
+package bolt
+
+import (
+	"bytes"
+	"sort"
+	"unsafe"
+)
+
+// node represents an in-memory, deserialized page.
+type node struct {
+	isLeaf bool
+	key    []byte
+	depth  int
+	pgid   pgid
+	parent *node
+	inodes inodes
+}
+
+// size returns the size of the node after serialization.
+func (n *node) size() int {
+	var elementSize int = n.pageElementSize()
+
+	var size int = pageHeaderSize
+	for _, item := range n.inodes {
+		size += elementSize + len(item.key) + len(item.value)
+	}
+	return size
+}
+
+// pageElementSize returns the size of each page element based on the type of node.
+func (n *node) pageElementSize() int {
+	if n.isLeaf {
+		return leafPageElementSize
+	}
+	return branchPageElementSize
+}
+
+// root returns the root node in the tree.
+func (n *node) root() *node {
+	if n.parent == nil {
+		return n
+	}
+	return n.parent
+}
+
+// put inserts a key/value.
+func (n *node) put(oldKey, newKey, value []byte, pgid pgid) {
+	// Find insertion index.
+	index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
+
+	// Add capacity and shift nodes if we don't have an exact match and need to insert.
+	exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
+	if !exact {
+		n.inodes = append(n.inodes, inode{})
+		copy(n.inodes[index+1:], n.inodes[index:])
+	}
+
+	inode := &n.inodes[index]
+	inode.key = newKey
+	inode.value = value
+	inode.pgid = pgid
+}
+
+// read initializes the node from a page.
+func (n *node) read(p *page) {
+	n.pgid = p.id
+	n.isLeaf = ((p.flags & p_leaf) != 0)
+	n.inodes = make(inodes, int(p.count))
+
+	for i := 0; i < int(p.count); i++ {
+		inode := &n.inodes[i]
+		if n.isLeaf {
+			elem := p.leafPageElement(uint16(i))
+			inode.key = elem.key()
+			inode.value = elem.value()
+		} else {
+			elem := p.branchPageElement(uint16(i))
+			inode.pgid = elem.pgid
+			inode.key = elem.key()
+		}
+	}
+
+	// Save first key so we can find the node in the parent when we spill.
+	if len(n.inodes) > 0 {
+		n.key = n.inodes[0].key
+	} else {
+		n.key = nil
+	}
+}
+
+// write writes the items onto one or more pages.
+func (n *node) write(p *page) {
+	// Initialize page.
+	if n.isLeaf {
+		p.flags |= p_leaf
+	} else {
+		p.flags |= p_branch
+	}
+	p.count = uint16(len(n.inodes))
+
+	// Loop over each item and write it to the page.
+	b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
+	for i, item := range n.inodes {
+		// Write the page element.
+		if n.isLeaf {
+			elem := p.leafPageElement(uint16(i))
+			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+			elem.ksize = uint32(len(item.key))
+			elem.vsize = uint32(len(item.value))
+		} else {
+			elem := p.branchPageElement(uint16(i))
+			elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
+			elem.ksize = uint32(len(item.key))
+			elem.pgid = item.pgid
+		}
+
+		// Write data for the element to the end of the page.
+		copy(b[0:], item.key)
+		b = b[len(item.key):]
+		copy(b[0:], item.value)
+		b = b[len(item.value):]
+	}
+}
+
+// split divides up the node into appropriately sized nodes.
+func (n *node) split(pageSize int) []*node {
+	// Ignore the split if the page doesn't have at least enough nodes for
+	// multiple pages or if the data can fit on a single page.
+	if len(n.inodes) <= (minKeysPerPage*2) || n.size() < pageSize {
+		return []*node{n}
+	}
+
+	// Set fill threshold to 50%.
+	threshold := pageSize / 2
+
+	// Group into smaller pages and target a given fill size.
+	size := 0
+	current := &node{isLeaf: n.isLeaf}
+	nodes := make([]*node, 0)
+
+	for i, inode := range n.inodes {
+		elemSize := n.pageElementSize() + len(inode.key) + len(inode.value)
+
+		if len(current.inodes) >= minKeysPerPage && i < len(n.inodes)-minKeysPerPage && size+elemSize > threshold {
+			size = pageHeaderSize
+			nodes = append(nodes, current)
+			current = &node{isLeaf: n.isLeaf}
+		}
+
+		size += elemSize
+		current.inodes = append(current.inodes, inode)
+	}
+	nodes = append(nodes, current)
+
+	return nodes
+}
+
+// nodesByDepth sorts a list of branches by deepest first.
+type nodesByDepth []*node
+
+func (s nodesByDepth) Len() int           { return len(s) }
+func (s nodesByDepth) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s nodesByDepth) Less(i, j int) bool { return s[i].depth > s[j].depth }
+
+// inode represents an internal node inside of a node.
+// It can be used to point to elements in a page or point
+// to an element which hasn't been added to a page yet.
+type inode struct {
+	pgid  pgid
+	key   []byte
+	value []byte
+}
+
+type inodes []inode
+
+func (s inodes) Len() int           { return len(s) }
+func (s inodes) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+func (s inodes) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
diff --git a/node_test.go b/node_test.go
new file mode 100644
index 0000000..15d6498
--- /dev/null
+++ b/node_test.go
@@ -0,0 +1,129 @@
+package bolt
+
+import (
+	"testing"
+	"unsafe"
+
+	"github.com/stretchr/testify/assert"
+)
+
+// Ensure that a node can insert a key/value.
+func TestNodePut(t *testing.T) {
+	n := &node{inodes: make(inodes, 0)}
+	n.put([]byte("baz"), []byte("baz"), []byte("2"), 0)
+	n.put([]byte("foo"), []byte("foo"), []byte("0"), 0)
+	n.put([]byte("bar"), []byte("bar"), []byte("1"), 0)
+	n.put([]byte("foo"), []byte("foo"), []byte("3"), 0)
+	assert.Equal(t, len(n.inodes), 3)
+	assert.Equal(t, n.inodes[0].key, []byte("bar"))
+	assert.Equal(t, n.inodes[0].value, []byte("1"))
+	assert.Equal(t, n.inodes[1].key, []byte("baz"))
+	assert.Equal(t, n.inodes[1].value, []byte("2"))
+	assert.Equal(t, n.inodes[2].key, []byte("foo"))
+	assert.Equal(t, n.inodes[2].value, []byte("3"))
+}
+
+// Ensure that a node can deserialize from a leaf page.
+func TestNodeReadLeafPage(t *testing.T) {
+	// Create a page.
+	var buf [4096]byte
+	page := (*page)(unsafe.Pointer(&buf[0]))
+	page.flags = p_leaf
+	page.count = 2
+
+	// Insert 2 elements at the beginning. sizeof(leafPageElement) == 16
+	nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr))
+	nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4}  // pos = sizeof(leafPageElement) * 2
+	nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4
+
+	// Write data for the nodes at the end.
+	data := (*[4096]byte)(unsafe.Pointer(&nodes[2]))
+	copy(data[:], []byte("barfooz"))
+	copy(data[7:], []byte("helloworldbye"))
+
+	// Deserialize page into a leaf.
+	n := &node{}
+	n.read(page)
+
+	// Check that there are two inodes with correct data.
+	assert.True(t, n.isLeaf)
+	assert.Equal(t, len(n.inodes), 2)
+	assert.Equal(t, n.inodes[0].key, []byte("bar"))
+	assert.Equal(t, n.inodes[0].value, []byte("fooz"))
+	assert.Equal(t, n.inodes[1].key, []byte("helloworld"))
+	assert.Equal(t, n.inodes[1].value, []byte("bye"))
+}
+
+// Ensure that a node can serialize into a leaf page.
+func TestNodeWriteLeafPage(t *testing.T) {
+	// Create a node.
+	n := &node{isLeaf: true, inodes: make(inodes, 0)}
+	n.put([]byte("susy"), []byte("susy"), []byte("que"), 0)
+	n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0)
+	n.put([]byte("john"), []byte("john"), []byte("johnson"), 0)
+
+	// Write it to a page.
+	var buf [4096]byte
+	p := (*page)(unsafe.Pointer(&buf[0]))
+	n.write(p)
+
+	// Read the page back in.
+	n2 := &node{}
+	n2.read(p)
+
+	// Check that the two pages are the same.
+	assert.Equal(t, len(n2.inodes), 3)
+	assert.Equal(t, n2.inodes[0].key, []byte("john"))
+	assert.Equal(t, n2.inodes[0].value, []byte("johnson"))
+	assert.Equal(t, n2.inodes[1].key, []byte("ricki"))
+	assert.Equal(t, n2.inodes[1].value, []byte("lake"))
+	assert.Equal(t, n2.inodes[2].key, []byte("susy"))
+	assert.Equal(t, n2.inodes[2].value, []byte("que"))
+}
+
+// Ensure that a node can split into appropriate subgroups.
+func TestNodeSplit(t *testing.T) {
+	// Create a node.
+	n := &node{inodes: make(inodes, 0)}
+	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
+
+	// Split between 3 & 4.
+	nodes := n.split(100)
+
+	assert.Equal(t, len(nodes), 2)
+	assert.Equal(t, len(nodes[0].inodes), 2)
+	assert.Equal(t, len(nodes[1].inodes), 3)
+}
+
+// Ensure that a page with the minimum number of inodes just returns a single node.
+func TestNodeSplitWithMinKeys(t *testing.T) {
+	// Create a node.
+	n := &node{inodes: make(inodes, 0)}
+	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+
+	// Split.
+	nodes := n.split(20)
+	assert.Equal(t, len(nodes), 1)
+	assert.Equal(t, len(nodes[0].inodes), 2)
+}
+
+// Ensure that a node that has keys that all fit on a page just returns one leaf.
+func TestNodeSplitFitsInPage(t *testing.T) {
+	// Create a node.
+	n := &node{inodes: make(inodes, 0)}
+	n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0)
+	n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0)
+
+	// Split.
+	nodes := n.split(4096)
+	assert.Equal(t, len(nodes), 1)
+	assert.Equal(t, len(nodes[0].inodes), 5)
+}
diff --git a/page.go b/page.go
index c18c6b4..d05dce6 100644
--- a/page.go
+++ b/page.go
@@ -1,6 +1,8 @@
 package bolt
 
 import (
+	"fmt"
+	"os"
 	"unsafe"
 )
 
@@ -10,6 +12,9 @@ const maxAllocSize = 0xFFFFFFF
 const minKeysPerPage = 2
 const maxNodesPerPage = 65535
 
+const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
+const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
+
 const (
 	p_branch   = 0x01
 	p_leaf     = 0x02
@@ -28,29 +33,46 @@ type page struct {
 	ptr      uintptr
 }
 
+// typ returns a human readable page type string used for debugging.
+func (p *page) typ() string {
+	if (p.flags & p_branch) != 0 {
+		return "branch"
+	} else if (p.flags & p_leaf) != 0 {
+		return "leaf"
+	} else if (p.flags & p_meta) != 0 {
+		return "meta"
+	} else if (p.flags & p_sys) != 0 {
+		return "system"
+	} else if (p.flags & p_freelist) != 0 {
+		return "freelist"
+	}
+	return fmt.Sprintf("unknown<%02x>", p.flags)
+}
+
 // meta returns a pointer to the metadata section of the page.
 func (p *page) meta() *meta {
 	return (*meta)(unsafe.Pointer(&p.ptr))
 }
 
-// lnode retrieves the leaf node by index
-func (p *page) lnode(index int) *lnode {
-	return &((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[index]
+// leafPageElement retrieves the leaf node by index
+func (p *page) leafPageElement(index uint16) *leafPageElement {
+	n := &((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
+	return n
 }
 
-// lnodes retrieves a list of leaf nodes.
-func (p *page) lnodes() []lnode {
-	return ((*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr)))[:]
+// leafPageElements retrieves a list of leaf nodes.
+func (p *page) leafPageElements() []leafPageElement {
+	return ((*[maxNodesPerPage]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
 }
 
-// bnode retrieves the branch node by index
-func (p *page) bnode(index int) *bnode {
-	return &((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[index]
+// branchPageElement retrieves the branch node by index
+func (p *page) branchPageElement(index uint16) *branchPageElement {
+	return &((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
 }
 
-// bnodes retrieves a list of branch nodes.
-func (p *page) bnodes() []bnode {
-	return ((*[maxNodesPerPage]bnode)(unsafe.Pointer(&p.ptr)))[:]
+// branchPageElements retrieves a list of branch nodes.
+func (p *page) branchPageElements() []branchPageElement {
+	return ((*[maxNodesPerPage]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
 }
 
 // freelist retrieves a list of page ids from a freelist page.
@@ -58,8 +80,47 @@ func (p *page) freelist() []pgid {
 	return ((*[maxNodesPerPage]pgid)(unsafe.Pointer(&p.ptr)))[0:p.count]
 }
 
+// dump writes n bytes of the page to STDERR as hex output.
+func (p *page) hexdump(n int) {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
+	fmt.Fprintf(os.Stderr, "%x\n", buf)
+}
+
 type pages []*page
 
 func (s pages) Len() int           { return len(s) }
 func (s pages) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
 func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
+
+// branchPageElement represents a node on a branch page.
+type branchPageElement struct {
+	pos   uint32
+	ksize uint32
+	pgid  pgid
+}
+
+// key returns a byte slice of the node key.
+func (n *branchPageElement) key() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return buf[n.pos : n.pos+n.ksize]
+}
+
+// leafPageElement represents a node on a leaf page.
+type leafPageElement struct {
+	flags uint32
+	pos   uint32
+	ksize uint32
+	vsize uint32
+}
+
+// key returns a byte slice of the node key.
+func (n *leafPageElement) key() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return buf[n.pos : n.pos+n.ksize]
+}
+
+// value returns a byte slice of the node value.
+func (n *leafPageElement) value() []byte {
+	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
+	return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
+}
diff --git a/quick_test.go b/quick_test.go
new file mode 100644
index 0000000..d85249d
--- /dev/null
+++ b/quick_test.go
@@ -0,0 +1,65 @@
+package bolt
+
+import (
+	"flag"
+	"math/rand"
+	"reflect"
+	"testing/quick"
+	"time"
+)
+
+// testing/quick defaults to 100 iterations and a random seed.
+// You can override these settings from the command line:
+//
+//   -quickchecks     The number of iterations to perform.
+//   -quick.seed      The seed to use for randomizing.
+//   -quick.maxitems  The maximum number of items to insert into a DB.
+//   -quick.maxksize  The maximum size of a key.
+//   -quick.maxvsize  The maximum size of a value.
+//
+
+var seed, testMaxItemCount, testMaxKeySize, testMaxValueSize int
+
+func init() {
+	flag.IntVar(&seed, "quick.seed", int(time.Now().UnixNano())%100000, "")
+	flag.IntVar(&testMaxItemCount, "quick.maxitems", 1024, "")
+	flag.IntVar(&testMaxKeySize, "quick.maxksize", 1024, "")
+	flag.IntVar(&testMaxValueSize, "quick.maxvsize", 1024, "")
+	warn("seed:", seed)
+}
+
+// qc creates a testing/quick configuration.
+func qc() *quick.Config {
+	return &quick.Config{Rand: rand.New(rand.NewSource(int64(seed)))}
+}
+
+type testKeyValuePairs []testKeyValuePair
+
+func (t testKeyValuePairs) Generate(rand *rand.Rand, size int) reflect.Value {
+	n := rand.Intn(testMaxItemCount-1) + 1
+	items := make(testKeyValuePairs, n)
+	for i := 0; i < n; i++ {
+		items[i].Generate(rand, size)
+	}
+	return reflect.ValueOf(items)
+}
+
+type testKeyValuePair struct {
+	Key   []byte
+	Value []byte
+}
+
+func (t testKeyValuePair) Generate(rand *rand.Rand, size int) reflect.Value {
+	t.Key = randByteSlice(rand, 1, testMaxKeySize)
+	t.Value = randByteSlice(rand, 0, testMaxValueSize)
+	return reflect.ValueOf(t)
+}
+
+func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte {
+	n := rand.Intn(maxSize - minSize) + minSize
+	b := make([]byte, n)
+	for i := 0; i < n; i++ {
+		b[i] = byte(rand.Intn(255))
+	}
+	return b
+}
diff --git a/rwtransaction.go b/rwtransaction.go
index 145e0cf..2485c62 100644
--- a/rwtransaction.go
+++ b/rwtransaction.go
@@ -9,8 +9,7 @@ import (
 // Only one read/write transaction can be active for a DB at a time.
 type RWTransaction struct {
 	Transaction
-	branches map[pgid]*branch
-	leafs    map[pgid]*leaf
+	nodes map[pgid]*node
 }
 
 // init initializes the transaction.
@@ -21,7 +20,7 @@ func (t *RWTransaction) init(db *DB) {
 	// Copy the meta and increase the transaction id. 
 	t.meta = &meta{}
 	db.meta().copy(t.meta)
-	t.meta.txnid += txnid(2)
+	t.meta.txnid += txnid(1)
 }
 
 // CreateBucket creates a new bucket.
@@ -32,7 +31,7 @@ func (t *RWTransaction) CreateBucket(name string) error {
 	} else if len(name) == 0 {
 		return &Error{"bucket name cannot be blank", nil}
 	} else if len(name) > MaxBucketNameSize {
-		return &Error{"bucket name too large", nil}
+		return &Error{"bucket name too long", nil}
 	}
 
 	// Create a blank root leaf page.
@@ -72,9 +71,9 @@ func (t *RWTransaction) Put(name string, key []byte, value []byte) error {
 	}
 
 	// Insert a new node.
-	c := b.Cursor()
-	c.Goto(key)
-	t.leaf(c).put(key, value)
+	c := b.cursor()
+	c.Get(key)
+	t.node(c.stack).put(key, key, value, 0)
 
 	return nil
 }
@@ -121,8 +120,8 @@ func (t *RWTransaction) Rollback() {
 }
 
 func (t *RWTransaction) close() {
-	// Clear temporary pages.
-	t.leafs = nil
+	// Clear nodes.
+	t.nodes = nil
 
 	// TODO: Release writer lock.
 }
@@ -146,55 +145,81 @@ func (t *RWTransaction) allocate(count int) *page {
 	return p
 }
 
-// spill writes all the leafs and branches to dirty pages.
+// spill writes all the nodes to dirty pages.
 func (t *RWTransaction) spill() {
-	// Spill leafs first.
-	for _, l := range t.leafs {
-		t.spillLeaf(l)
+	// Keep track of the current root nodes.
+	// We will update this at the end once all nodes are created.
+	type root struct {
+		node *node
+		pgid pgid
 	}
+	var roots []root
 
-	// Sort branches by highest depth first.
-	branches := make(branches, 0, len(t.branches))
-	for _, b := range t.branches {
-		branches = append(branches, b)
+	// Sort nodes by highest depth first.
+	nodes := make(nodesByDepth, 0, len(t.nodes))
+	for _, n := range t.nodes {
+		nodes = append(nodes, n)
 	}
-	sort.Sort(branches)
+	sort.Sort(nodes)
 
-	// Spill branches by deepest first.
-	for _, b := range branches {
-		t.spillBranch(b)
-	}
-}
-
-// spillLeaf writes a leaf to one or more dirty pages.
-func (t *RWTransaction) spillLeaf(l *leaf) {
-	parent := l.parent
-
-	// Split leaf, if necessary.
-	leafs := l.split(t.db.pageSize)
-
-	// TODO: If this is a root leaf and we split then add a parent branch.
+	// Spill nodes by deepest first.
+	for i := 0; i < len(nodes); i++ {
+		n := nodes[i]
 
-	// Process each resulting leaf.
-	previd := leafs[0].pgid
-	for index, l := range leafs {
-		// Allocate contiguous space for the leaf.
-		p := t.allocate((l.size() / t.db.pageSize) + 1)
+		// Save existing root buckets for later.
+		if n.parent == nil && n.pgid != 0 {
+			roots = append(roots, root{n, n.pgid})
+		}
 
-		// Write the leaf to the page.
-		l.write(p)
+		// Split nodes and write them.
+		newNodes := n.split(t.db.pageSize)
+
+		// If this is a root node that split then create a parent node.
+		if n.parent == nil && len(newNodes) > 1 {
+			n.parent = &node{
+				isLeaf: false,
+				key:    newNodes[0].inodes[0].key,
+				depth:  n.depth - 1,
+				inodes: make(inodes, 0),
+			}
+			nodes = append(nodes, n.parent)
+			sort.Sort(nodes)
+		}
 
-		// Insert or replace the node in the parent branch with the new pgid.
-		if parent != nil {
-			parent.put(previd, p.id, l.items[0].key, (index == 0))
-			previd = l.pgid
+		// Write nodes to dirty pages.
+		for i, newNode := range newNodes {
+			// Allocate contiguous space for the node.
+			p := t.allocate((newNode.size() / t.db.pageSize) + 1)
+
+			// Write the node to the page.
+			newNode.write(p)
+			newNode.pgid = p.id
+			newNode.parent = n.parent
+
+			// The first node should use the existing entry, other nodes are inserts.
+			var oldKey []byte
+			if i == 0 {
+				oldKey = n.key
+			} else {
+				oldKey = newNode.inodes[0].key
+			}
+
+			// Update the parent entry.
+			if newNode.parent != nil {
+				newNode.parent.put(oldKey, newNode.inodes[0].key, nil, newNode.pgid)
+			}
 		}
 	}
-}
 
-// spillBranch writes a branch to one or more dirty pages.
-func (t *RWTransaction) spillBranch(l *branch) {
-	warn("[pending] RWTransaction.spillBranch()") // TODO
+	// Update roots with new roots.
+	for _, root := range roots {
+		for _, b := range t.sys.buckets {
+			if b.root == root.pgid {
+				b.root = root.node.root().pgid
+				break
+			}
+		}
+	}
 }
 
 // write writes any dirty pages to disk.
@@ -231,28 +256,8 @@ func (t *RWTransaction) writeMeta() error {
 	return nil
 }
 
-// leaf retrieves a leaf object based on the current position of a cursor.
-func (t *RWTransaction) leaf(c *Cursor) *leaf {
-	e := c.stack[len(c.stack)-1]
-	id := e.page.id
-
-	// Retrieve leaf if it has already been fetched.
-	if l := t.leafs[id]; l != nil {
-		return l
-	}
-
-	// Otherwise create a leaf and cache it.
-	l := &leaf{}
-	l.read(t.page(id))
-	l.parent = t.branch(c.stack[:len(c.stack)-1])
-	t.leafs[id] = l
-
-	return l
-}
-
-// branch retrieves a branch object based a cursor stack.
-// This should only be called from leaf().
-func (t *RWTransaction) branch(stack []elem) *branch {
+// node retrieves a node based a cursor stack.
+func (t *RWTransaction) node(stack []elem) *node {
 	if len(stack) == 0 {
 		return nil
 	}
@@ -260,16 +265,16 @@ func (t *RWTransaction) branch(stack []elem) *branch {
 	// Retrieve branch if it has already been fetched.
 	e := &stack[len(stack)-1]
 	id := e.page.id
-	if b := t.branches[id]; b != nil {
-		return b
+	if n := t.nodes[id]; n != nil {
+		return n
 	}
 
 	// Otherwise create a branch and cache it.
-	b := &branch{}
-	b.read(t.page(id))
-	b.depth = len(stack) - 1
-	b.parent = t.branch(stack[:len(stack)-1])
-	t.branches[id] = b
+	n := &node{}
+	n.read(t.page(id))
+	n.depth = len(stack) - 1
+	n.parent = t.node(stack[:len(stack)-1])
+	t.nodes[id] = n
 
-	return b
+	return n
 }
diff --git a/rwtransaction_test.go b/rwtransaction_test.go
index b6e971e..4d256cc 100644
--- a/rwtransaction_test.go
+++ b/rwtransaction_test.go
@@ -1,6 +1,7 @@
 package bolt
 
 import (
+	"strings"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
@@ -28,3 +29,35 @@ func TestTransactionCreateBucket(t *testing.T) {
 		assert.NoError(t, err)
 	})
 }
+
+// Ensure that a bucket cannot be created twice.
+func TestTransactionRecreateBucket(t *testing.T) {
+	withOpenDB(func(db *DB, path string) {
+		// Create a bucket.
+		err := db.CreateBucket("widgets")
+		assert.NoError(t, err)
+
+		// Create the same bucket again.
+		err = db.CreateBucket("widgets")
+		assert.Equal(t, err, &Error{"bucket already exists", nil})
+	})
+}
+
+// Ensure that a bucket is created with a non-blank name.
+func TestTransactionCreateBucketWithoutName(t *testing.T) {
+	withOpenDB(func(db *DB, path string) {
+		err := db.CreateBucket("")
+		assert.Equal(t, err, &Error{"bucket name cannot be blank", nil})
+	})
+}
+
+// Ensure that a bucket name is not too long.
+func TestTransactionCreateBucketWithLongName(t *testing.T) {
+	withOpenDB(func(db *DB, path string) {
+		err := db.CreateBucket(strings.Repeat("X", 255))
+		assert.NoError(t, err)
+
+		err = db.CreateBucket(strings.Repeat("X", 256))
+		assert.Equal(t, err, &Error{"bucket name too long", nil})
+	})
+}
diff --git a/sys.go b/sys.go
index 657c973..cf15413 100644
--- a/sys.go
+++ b/sys.go
@@ -25,6 +25,16 @@ func (s *sys) get(key string) *bucket {
 	return s.buckets[key]
 }
 
+// getByRoot retrieves a bucket by root page id.
+func (s *sys) getByRoot(pgid pgid) *bucket {
+	for _, b := range s.buckets {
+		if b.root == pgid {
+			return b
+		}
+	}
+	panic("root not found")
+}
+
 // put sets a new value for a bucket.
 func (s *sys) put(key string, b *bucket) {
 	s.buckets[key] = b
@@ -32,7 +42,9 @@ func (s *sys) put(key string, b *bucket) {
 
 // del deletes a bucket by name.
 func (s *sys) del(key string) {
-	delete(s.buckets, key)
+	if b := s.buckets[key]; b != nil {
+		delete(s.buckets, key)
+	}
 }
 
 // read initializes the data from an on-disk page.
@@ -61,7 +73,8 @@ func (s *sys) read(p *page) {
 
 	// Associate keys and buckets.
 	for index, key := range keys {
-		s.buckets[key] = buckets[index]
+		b := &bucket{buckets[index].root}
+		s.buckets[key] = b
 	}
 }
 
diff --git a/transaction.go b/transaction.go
index e479dfb..29337c6 100644
--- a/transaction.go
+++ b/transaction.go
@@ -62,7 +62,7 @@ func (t *Transaction) Cursor(name string) *Cursor {
 	if b == nil {
 		return nil
 	}
-	return b.Cursor()
+	return b.cursor()
 }
 
 // Get retrieves the value for a key in a named bucket.
diff --git a/warn.go b/warn.go
index 7e9a2a7..eadf664 100644
--- a/warn.go
+++ b/warn.go
@@ -10,5 +10,5 @@ func warn(v ...interface{}) {
 }
 
 func warnf(msg string, v ...interface{}) {
-	fmt.Fprintf(os.Stderr, msg, v...)
+	fmt.Fprintf(os.Stderr, msg+"\n", v...)
 }
author	Ben Johnson <benbjohnson@yahoo.com>	2014-01-31 13:18:51 -0500
committer	Ben Johnson <benbjohnson@yahoo.com>	2014-02-01 12:30:37 -0500
commit	1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc (patch)
tree	d6aa197560053444d76dce1bd198a584c051c7b9
parent	Merge pull request #4 from benbjohnson/api (diff)
download	dedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.gz dedo-1a17a2cf1ee8b509dd00b7f29a01c13108acb2cc.tar.xz