aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--branch.go60
-rw-r--r--leaf.go135
-rw-r--r--leaf_test.go45
-rw-r--r--rwtransaction.go1
4 files changed, 135 insertions, 106 deletions
diff --git a/branch.go b/branch.go
new file mode 100644
index 0000000..77cc1ca
--- /dev/null
+++ b/branch.go
@@ -0,0 +1,60 @@
+package bolt
+
+import (
+ "bytes"
+ "sort"
+ "unsafe"
+)
+
+// branch represents a temporary in-memory branch page.
+type branch struct {
+ parent *branch
+ items branchItems
+}
+
+// insert inserts a new item after a given pgid.
+func (b *branch) insert(key []byte, previd pgid, id pgid) {
+ // Find previous insertion index.
+ index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= previd })
+
+ // If there is no existing key then add a new item.
+ b.items = append(b.items, branchItem{})
+ if index < len(b.items) {
+ copy(b.items[index+1:], b.items[index:])
+ }
+
+ b.items[index].pgid = id
+ b.items[index].key = key
+}
+
+// replace swaps out an existing node id for a new one id.
+func (b *branch) replace(oldid pgid, newid pgid, key []byte) {
+ index := sort.Search(len(b.items), func(i int) bool { return b.items[i].pgid >= oldid })
+ b.items[index].pgid = newid
+ b.items[index].key = key
+}
+
+// read initializes the item data from an on-disk page.
+func (b *branch) read(page *page) {
+ ncount := int(page.count)
+ b.items = make(branchItems, ncount)
+ bnodes := (*[maxNodesPerPage]bnode)(unsafe.Pointer(&page.ptr))
+ for i := 0; i < ncount; i++ {
+ bnode := &bnodes[i]
+ item := &b.items[i]
+ item.key = bnode.key()
+ }
+}
+
+
+type branchItems []branchItem
+
+type branchItem struct {
+ pgid pgid
+ key []byte
+}
+
+func (s branchItems) Len() int { return len(s) }
+func (s branchItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s branchItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
+
diff --git a/leaf.go b/leaf.go
index 23b35eb..dcd343f 100644
--- a/leaf.go
+++ b/leaf.go
@@ -9,16 +9,10 @@ import (
// leaf represents a temporary in-memory leaf page.
// It is deserialized from an memory-mapped page and is not restricted by page size.
type leaf struct {
+ parent *branch
items leafItems
}
-type leafItems []leafItem
-
-type leafItem struct {
- key []byte
- value []byte
-}
-
// put inserts or replaces a key on a leaf page.
func (l *leaf) put(key []byte, value []byte) {
// Find insertion index.
@@ -35,11 +29,20 @@ func (l *leaf) put(key []byte, value []byte) {
l.items[index].value = value
}
+// size returns the size of the leaf after serialization.
+func (l *leaf) size() int {
+ var size int = pageHeaderSize
+ for _, item := range l.items {
+ size += lnodeSize + len(item.key) + len(item.value)
+ }
+ return size
+}
+
// read initializes the item data from an on-disk page.
-func (l *leaf) read(page *page) {
- ncount := int(page.count)
+func (l *leaf) read(p *page) {
+ ncount := int(p.count)
l.items = make(leafItems, ncount)
- lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr))
+ lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
for i := 0; i < ncount; i++ {
lnode := &lnodes[i]
item := &l.items[i]
@@ -49,86 +52,70 @@ func (l *leaf) read(page *page) {
}
// write writes the items onto one or more leaf pages.
-func (l *leaf) write(pageSize int, allocate func(size int) (*page, error)) ([]*page, error) {
- var pages []*page
-
- for _, items := range l.split(pageSize) {
- // Determine the total page size.
- var size int = pageHeaderSize
- for _, item := range l.items {
- size += lnodeSize + len(item.key) + len(item.value)
- }
-
- // Allocate pages.
- page, err := allocate(size)
- if err != nil {
- return nil, err
- }
- page.flags |= p_leaf
- page.count = uint16(len(items))
-
- // Loop over each item and write it to the page.
- lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&page.ptr))
- b := (*[maxAllocSize]byte)(unsafe.Pointer(&page.ptr))[lnodeSize*len(items):]
- for index, item := range items {
- // Write item.
- lnode := &lnodes[index]
- lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
- lnode.ksize = uint32(len(item.key))
- lnode.vsize = uint32(len(item.value))
-
- // Write data to the end of the page.
- copy(b[0:], item.key)
- b = b[len(item.key):]
- copy(b[0:], item.value)
- b = b[len(item.value):]
- }
-
- pages = append(pages, page)
+func (l *leaf) write(p *page) {
+ // Initialize page.
+ p.flags |= p_leaf
+ p.count = uint16(len(l.items))
+
+ // Loop over each item and write it to the page.
+ lnodes := (*[maxNodesPerPage]lnode)(unsafe.Pointer(&p.ptr))
+ b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[lnodeSize*len(l.items):]
+ for index, item := range l.items {
+ // Write node.
+ lnode := &lnodes[index]
+ lnode.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(lnode)))
+ lnode.ksize = uint32(len(item.key))
+ lnode.vsize = uint32(len(item.value))
+
+ // Write data to the end of the page.
+ copy(b[0:], item.key)
+ b = b[len(item.key):]
+ copy(b[0:], item.value)
+ b = b[len(item.value):]
}
-
- return pages, nil
}
// split divides up the noes in the page into appropriately sized groups.
-func (l *leaf) split(pageSize int) []leafItems {
- // If we don't have enough items for multiple pages then just return the items.
- if len(l.items) <= (minKeysPerPage * 2) {
- return []leafItems{l.items}
+func (l *leaf) split(pageSize int) []*leaf {
+ // Ignore the split if the page doesn't have at least enough nodes for
+ // multiple pages or if the data can fit on a single page.
+ if len(l.items) <= (minKeysPerPage * 2) || l.size() < pageSize {
+ return []*leaf{l}
}
- // If we're not larger than one page then just return the items.
- var totalSize int = pageHeaderSize
- for _, item := range l.items {
- totalSize += lnodeSize + len(item.key) + len(item.value)
- }
- if totalSize < pageSize {
- return []leafItems{l.items}
- }
-
- // Otherwise group into smaller pages and target a given fill size.
- var size int
- var group leafItems
- var groups []leafItems
-
// Set fill threshold to 25%.
threshold := pageSize >> 4
+ // Otherwise group into smaller pages and target a given fill size.
+ size := 0
+ current := &leaf{}
+ leafs := make([]*leaf, 0)
+
for index, item := range l.items {
nodeSize := lnodeSize + len(item.key) + len(item.value)
- if group == nil || (len(group) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) {
+ if (len(current.items) >= minKeysPerPage && index < len(l.items)-minKeysPerPage && size+nodeSize > threshold) {
size = pageHeaderSize
- if group != nil {
- groups = append(groups, group)
- }
- group = make(leafItems, 0)
+ leafs = append(leafs, current)
+ current = &leaf{}
}
size += nodeSize
- group = append(group, item)
+ current.items = append(current.items, item)
}
- groups = append(groups, group)
+ leafs = append(leafs, current)
- return groups
+ return leafs
}
+
+type leafItems []leafItem
+
+type leafItem struct {
+ key []byte
+ value []byte
+}
+
+func (s leafItems) Len() int { return len(s) }
+func (s leafItems) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
+func (s leafItems) Less(i, j int) bool { return bytes.Compare(s[i].key, s[j].key) == -1 }
+
diff --git a/leaf_test.go b/leaf_test.go
index ad23074..3b2235f 100644
--- a/leaf_test.go
+++ b/leaf_test.go
@@ -59,15 +59,12 @@ func TestLeafWrite(t *testing.T) {
// Write it to a page.
var buf [4096]byte
- allocate := func(size int) (*page, error) {
- return (*page)(unsafe.Pointer(&buf[0])), nil
- }
- pages, err := l.write(4096, allocate)
- assert.NoError(t, err)
+ p := (*page)(unsafe.Pointer(&buf[0]))
+ l.write(p)
// Read the page back in.
l2 := &leaf{}
- l2.read(pages[0])
+ l2.read(p)
// Check that the two pages are the same.
assert.Equal(t, len(l2.items), 3)
@@ -79,22 +76,6 @@ func TestLeafWrite(t *testing.T) {
assert.Equal(t, l2.items[2].value, []byte("que"))
}
-// Ensure that an error that an allocation error during writing is returned.
-func TestLeafWriteError(t *testing.T) {
- // Create a temp page.
- l := &leaf{items: make(leafItems, 0)}
- l.put([]byte("susy"), []byte("que"))
-
- // Write it to a page.
- exp := &Error{}
- allocate := func(size int) (*page, error) {
- return nil, exp
- }
- pages, err := l.write(4096, allocate)
- assert.Nil(t, pages)
- assert.Equal(t, err, exp)
-}
-
// Ensure that a temporary page can split into appropriate subgroups.
func TestLeafSplit(t *testing.T) {
// Create a temp page.
@@ -106,11 +87,11 @@ func TestLeafSplit(t *testing.T) {
l.put([]byte("00000005"), []byte("0123456701234567"))
// Split between 3 & 4.
- groups := l.split(100)
+ leafs := l.split(100)
- assert.Equal(t, len(groups), 2)
- assert.Equal(t, len(groups[0]), 2)
- assert.Equal(t, len(groups[1]), 3)
+ assert.Equal(t, len(leafs), 2)
+ assert.Equal(t, len(leafs[0].items), 2)
+ assert.Equal(t, len(leafs[1].items), 3)
}
// Ensure that a temporary page with the minimum number of items just returns a single split group.
@@ -121,9 +102,9 @@ func TestLeafSplitWithMinKeys(t *testing.T) {
l.put([]byte("00000002"), []byte("0123456701234567"))
// Split.
- groups := l.split(20)
- assert.Equal(t, len(groups), 1)
- assert.Equal(t, len(groups[0]), 2)
+ leafs := l.split(20)
+ assert.Equal(t, len(leafs), 1)
+ assert.Equal(t, len(leafs[0].items), 2)
}
// Ensure that a temporary page that has keys that all fit on a page just returns one split group.
@@ -137,7 +118,7 @@ func TestLeafSplitFitsInPage(t *testing.T) {
l.put([]byte("00000005"), []byte("0123456701234567"))
// Split.
- groups := l.split(4096)
- assert.Equal(t, len(groups), 1)
- assert.Equal(t, len(groups[0]), 5)
+ leafs := l.split(4096)
+ assert.Equal(t, len(leafs), 1)
+ assert.Equal(t, len(leafs[0].items), 5)
}
diff --git a/rwtransaction.go b/rwtransaction.go
index 4e576cd..5ec0d6e 100644
--- a/rwtransaction.go
+++ b/rwtransaction.go
@@ -8,6 +8,7 @@ import (
// Only one read/write transaction can be active for a DB at a time.
type RWTransaction struct {
Transaction
+ branches map[pgid]*branch
leafs map[pgid]*leaf
}