diff options
Diffstat (limited to 'src/dedo.go')
-rw-r--r-- | src/dedo.go | 1723 |
1 files changed, 1007 insertions, 716 deletions
diff --git a/src/dedo.go b/src/dedo.go index 3850348..2dde5eb 100644 --- a/src/dedo.go +++ b/src/dedo.go @@ -1,3 +1,41 @@ +/// Package dedo implements a low-level key/value store in pure Go. It supports +/// fully serializable transactions, ACID semantics, and lock-free MVCC with +/// multiple readers and a single writer. Dedo can be used for projects that +/// want a simple data store without the need to add large dependencies such as +/// Postgres or MySQL. +/// +/// Dedo is a single-level, zero-copy, B+tree data store. This means that Dedo +/// is optimized for fast read access and does not require recovery in the event +/// of a system crash. Transactions which have not finished committing will +/// simply be rolled back in the event of a crash. +/// +/// The design of Dedo is based on Howard Chu's LMDB database project. +/// +/// +/// == Basics +/// +/// There are only a few types in Dedo: DB, Bucket, Tx, and Cursor. The DB is +/// a collection of buckets and is represented by a single file on disk. A +/// bucket is a collection of unique keys that are associated with values. +/// +/// Transactions provide either read-only or read-write access to the database. +/// Read-only transactions can retrieve key/value pairs and can use Cursors to +/// iterate over the dataset sequentially. Read-write transactions can create +/// and delete buckets and can insert and remove keys. Only one read-write +/// transaction is allowed at a time. +/// +/// +/// == Caveats +/// +/// The database uses a read-only, memory-mapped data file to ensure that +/// applications cannot corrupt the database, however, this means that keys and +/// values returned from Dedo cannot be changed. Writing to a read-only byte +/// slice will cause Go to panic. +/// +/// Keys and values retrieved from the database are only valid for the life of +/// the transaction. When used outside the transaction, these byte slices can +/// point to different data or can point to invalid memory which will cause a +/// panic. package dedo import ( @@ -24,84 +62,87 @@ import ( type pgid uint64 -// bucket represents the on-file representation of a bucket. -// This is stored as the "value" of a bucket key. If the bucket is small enough, -// then its root page can be stored inline in the "value", after the bucket -// header. In the case of inline buckets, the "root" will be 0. +/// bucket represents the on-file representation of a bucket. This is stored as +/// the "value" of a bucket key. If the bucket is small enough, then its root +/// page can be stored inline in the "value", after the bucket header. In the +/// case of inline buckets, the "root" will be 0. type bucket struct { - root pgid // page id of the bucket's root-level page - sequence uint64 // monotonically incrementing, used by NextSequence() + root pgid /// page id of the bucket's root-level page + sequence uint64 /// monotonically incrementing, used by NextSequence() } -// Bucket represents a distinct collection of key/value pairs inside the -// database. Keys aren't unique across different buckets. +/// Bucket represents a distinct collection of key/value pairs inside the +/// database. Keys aren't unique across different buckets. type Bucket struct { ref *bucket - tx *Tx // the associated transaction - buckets map[string]*Bucket // subbucket cache - page *page // inline page reference - rootNode *node // materialized node for the root page. - nodes map[pgid]*node // node cache -} - -// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order. -// Cursors see nested buckets with value == nil. -// Cursors can be obtained from a transaction and are valid as long as the transaction is open. -// -// Keys and values returned from the cursor are only valid for the life of the transaction. -// -// Changing data while traversing with a cursor may cause it to be invalidated -// and return unexpected keys and/or values. You must reposition your cursor -// after mutating data. + tx *Tx /// the associated transaction + buckets map[string]*Bucket /// subbucket cache + page *page /// inline page reference + rootNode *node /// materialized node for the root page + nodes map[pgid]*node /// node cache +} + +/// Cursor represents an iterator that can traverse over all key/value pairs in +/// a bucket in sorted order. Cursors see nested buckets with value == nil. +/// Cursors can be obtained from a transaction and are valid as long as the +/// transaction is open. +/// +/// Keys and values returned from the cursor are only valid for the life of the +/// transaction. +/// +/// Changing data while traversing with a cursor may cause it to be invalidated +/// and return unexpected keys and/or values. You must reposition your cursor +/// after mutating data. type Cursor struct { bucket *Bucket stack []elemRef } -// elemRef represents a reference to an element on a given page/node. +/// elemRef represents a reference to an element on a given page/node. type elemRef struct { page *page node *node index int } -// DB represents a collection of buckets persisted to a file on disk. -// All data access is performed through transactions which can be obtained through the DB. -// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. +/// DB represents a collection of buckets persisted to a file on disk. All data +/// access is performed through transactions which can be obtained through the +/// DB. All the functions on DB will return a ErrDatabaseNotOpen if accessed +/// before Open() is called. type DB struct { - // When enabled, the database will perform a Check() after every commit. - // A panic is issued if the database is in an inconsistent state. This - // flag has a large performance impact so it should only be used for - // debugging purposes. + /// When enabled, the database will perform a Check() after every + /// commit. A panic is issued if the database is in an inconsistent + /// state. This flag has a large performance impact so it should only + /// be used for debugging purposes. StrictMode bool - // MaxBatchSize is the maximum size of a batch. Default value is - // copied from DefaultMaxBatchSize in Open. - // - // If <=0, disables batching. - // - // Do not change concurrently with calls to Batch. + /// MaxBatchSize is the maximum size of a batch. Default value is + /// copied from DefaultMaxBatchSize in Open. + /// + /// If <=0, disables batching. + /// + /// Do not change concurrently with calls to Batch. MaxBatchSize int - // MaxBatchDelay is the maximum delay before a batch starts. - // Default value is copied from DefaultMaxBatchDelay in Open. - // - // If <=0, effectively disables batching. - // - // Do not change concurrently with calls to Batch. + /// MaxBatchDelay is the maximum delay before a batch starts. Default + /// value is copied from DefaultMaxBatchDelay in Open. + /// + /// If <=0, effectively disables batching. + /// + /// Do not change concurrently with calls to Batch. MaxBatchDelay time.Duration - // AllocSize is the amount of space allocated when the database - // needs to create new pages. This is done to amortize the cost - // of truncate() and fsync() when growing the data file. + /// AllocSize is the amount of space allocated when the database needs + /// to create new pages. This is done to amortize the cost of + /// truncate() and fsync() when growing the data file. AllocSize int path string file *os.File - dataref []byte // mmap'ed read-only via PROT_READ, write throws SEGV + dataref []byte /// mmap'ed read-only via PROT_READ, write throws SEGV data *[maxMapSize]byte datasz int - filesz int // current on disk file size + filesz int /// current on disk file size meta0 *meta meta1 *meta pageSize int @@ -115,9 +156,9 @@ type DB struct { batchMu sync.Mutex batch *batch - rwlock sync.Mutex // Allows only one writer at a time. - metalock sync.Mutex // Protects meta page access. - mmaplock sync.RWMutex // Protects mmap access during remapping. + rwlock sync.Mutex /// Allows only one writer at a time. + metalock sync.Mutex /// Protects meta page access. + mmaplock sync.RWMutex /// Protects mmap access during remapping. } type call struct { @@ -148,15 +189,16 @@ type meta struct { checksum uint64 } -// freelist represents a list of all pages that are available for allocation. -// It also tracks pages that have been freed but are still in use by open transactions. +/// freelist represents a list of all pages that are available for allocation. +/// It also tracks pages that have been freed but are still in use by open +/// transactions. type freelist struct { - ids []pgid // all free and available free page ids. - pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. - cache map[pgid]bool // fast lookup of all free and pending page ids. + ids []pgid /// all free and available free page ids + pending map[txid][]pgid /// mapping of soon-to-be free page ids by tx + cache map[pgid]bool /// fast lookup of all free and pending page ids } -// node represents an in-memory, deserialized page. +/// node represents an in-memory, deserialized page. type node struct { bucket *Bucket isLeaf bool @@ -173,9 +215,9 @@ type nodes []*node type pages []*page -// inode represents an internal node inside of a node. -// It can be used to point to elements in a page or point -// to an element which hasn't been added to a page yet. +/// inode represents an internal node inside of a node. +/// It can be used to point to elements in a page or point +/// to an element which hasn't been added to a page yet. type inode struct { flags uint32 pgid pgid @@ -193,14 +235,14 @@ type page struct { ptr uintptr } -// branchPageElement represents a node on a branch page. +/// branchPageElement represents a node on a branch page. type branchPageElement struct { pos uint32 ksize uint32 pgid pgid } -// leafPageElement represents a node on a leaf page. +/// leafPageElement represents a node on a leaf page. type leafPageElement struct { flags uint32 pos uint32 @@ -208,7 +250,7 @@ type leafPageElement struct { vsize uint32 } -// PageInfo represents human readable information about a page. +/// PageInfo represents human readable information about a page. type PageInfo struct { ID int Type string @@ -218,17 +260,18 @@ type PageInfo struct { type pgids []pgid -// txid represents the internal transaction identifier. +/// txid represents the internal transaction identifier. type txid uint64 -// Tx represents a read-only or read/write transaction on the database. -// Read-only transactions can be used for retrieving values for keys and creating cursors. -// Read/write transactions can create and remove buckets and create and remove keys. -// -// IMPORTANT: You must commit or rollback transactions when you are done with -// them. Pages can not be reclaimed by the writer until no more transactions -// are using them. A long running read transaction can cause the database to -// quickly grow. +/// Tx represents a read-only or read/write transaction on the database. +/// Read-only transactions can be used for retrieving values for keys and +/// creating cursors. Read/write transactions can create and remove buckets +/// and create and remove keys. +/// +/// IMPORTANT: You must commit or rollback transactions when you are done with +/// them. Pages can not be reclaimed by the writer until no more transactions +/// are using them. A long running read transaction can cause the database to +/// quickly grow. type Tx struct { writable bool managed bool @@ -239,9 +282,9 @@ type Tx struct { commitHandlers []func() } -// walkFunc is the type of the function called for keys (buckets and "normal" -// values) discovered by Walk. keys is the list of keys to descend to the bucket -// owning the discovered key/value pair k/v. +/// walkFunc is the type of the function called for keys (buckets and "normal" +/// values) discovered by Walk. Keys is the list of keys to descend to the +/// bucket owning the discovered key/value pair k/v. type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error type argsT struct{ @@ -263,32 +306,32 @@ type commandT struct{ const ( - // maxMapSize represents the largest mmap size supported by Bolt. - maxMapSize = 0xFFFFFFFFFFFF // 256TB + /// maxMapSize represents the largest mmap size supported by Dedo. + maxMapSize = 0xFFFFFFFFFFFF /// 256TB - // maxAllocSize is the size used when creating array pointers. + /// maxAllocSize is the size used when creating array pointers. maxAllocSize = 0x7FFFFFFF // FIXME: why? - // MaxKeySize is the maximum length of a key, in bytes. + /// MaxKeySize is the maximum length of a key, in bytes. MaxKeySize = 32768 // FIXME: why? - // MaxValueSize is the maximum length of a value, in bytes. + /// MaxValueSize is the maximum length of a value, in bytes. MaxValueSize = (1 << 31) - 2 bucketHeaderSize = int(unsafe.Sizeof(bucket{})) - // The largest step that can be taken when remapping the mmap. - maxMmapStep = 1 << 30 // 1GB + /// The largest step that can be taken when remapping the mmap. + maxMmapStep = 1 << 30 /// 1GB - // The data file format version. + /// The data file format version. version = 2 - // Represents a marker value to indicate that a file is a Bolt DB. + /// Represents a marker value to indicate that a file is a Dedo DB. magic uint32 = 0xFACADAAB - // Default values if not set in a DB instance. + /// Default values if not set in a DB instance. DefaultMaxBatchSize int = 1000 DefaultMaxBatchDelay = 10 * time.Millisecond DefaultAllocSize = 16 * 1024 * 1024 @@ -298,7 +341,7 @@ const ( minKeysPerPage = 2 branchPageElementSize = int(unsafe.Sizeof(branchPageElement{})) - leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) + leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) branchPageFlag = 0x01 leafPageFlag = 0x02 @@ -307,116 +350,125 @@ const ( bucketLeafFlag = 0x01 - // PageHeaderSize represents the size of the bolt.page header. + /// PageHeaderSize represents the size of the dedo.page header. PageHeaderSize = 16 ) var ( - // trySolo is a special sentinel error value used for signaling that a - // transaction function should be re-run. It should never be seen by - // callers. - trySolo = errors.New("batch function returned an error and should be re-run solo") + /// trySolo is a special sentinel error value used for signaling that a + /// transaction function should be re-run. It should never be seen by + /// callers. + trySolo = errors.New( + "batch function returned an error and should be re-run solo", + ) - // - // These errors can be returned when opening or calling methods on a DB. - // + /// + /// These errors can be returned when opening or calling methods on a + /// DB. + /// - // ErrDatabaseNotOpen is returned when a DB instance is accessed before it - // is opened or after it is closed. + /// ErrDatabaseNotOpen is returned when a DB instance is accessed before + /// it is opened or after it is closed. ErrDatabaseNotOpen = errors.New("database not open") - // ErrInvalid is returned when both meta pages on a database are invalid. - // This typically occurs when a file is not a bolt database. + /// ErrInvalid is returned when both meta pages on a database are + /// invalid. This typically occurs when a file is not a dedo database. ErrInvalid = errors.New("invalid database") - // ErrVersionMismatch is returned when the data file was created with a - // different version of Bolt. + /// ErrVersionMismatch is returned when the data file was created with a + /// different version of Dedo. ErrVersionMismatch = errors.New("version mismatch") - // ErrChecksum is returned when either meta page checksum does not match. + /// ErrChecksum is returned when either meta page checksum does not + /// match. ErrChecksum = errors.New("checksum error") - // - // These errors can occur when beginning or committing a Tx. - // + /// + /// These errors can occur when beginning or committing a Tx. + /// - // ErrTxNotWritable is returned when performing a write operation on a - // read-only transaction. + /// ErrTxNotWritable is returned when performing a write operation on a + /// read-only transaction. ErrTxNotWritable = errors.New("tx not writable") - // ErrTxClosed is returned when committing or rolling back a transaction - // that has already been committed or rolled back. + /// ErrTxClosed is returned when committing or rolling back a + /// transaction that has already been committed or rolled back. ErrTxClosed = errors.New("tx closed") - // - // These errors can occur when putting or deleting a value or a bucket. - // + /// + /// These errors can occur when putting or deleting a value or a bucket. + /// - // ErrBucketNotFound is returned when trying to access a bucket that has - // not been created yet. + /// ErrBucketNotFound is returned when trying to access a bucket that + /// has not been created yet. ErrBucketNotFound = errors.New("bucket not found") - // ErrBucketExists is returned when creating a bucket that already exists. + /// ErrBucketExists is returned when creating a bucket that already + /// exists. ErrBucketExists = errors.New("bucket already exists") - // ErrBucketNameRequired is returned when creating a bucket with a blank name. + /// ErrBucketNameRequired is returned when creating a bucket with a + /// blank name. ErrBucketNameRequired = errors.New("bucket name required") - // ErrKeyRequired is returned when inserting a zero-length key. + /// ErrKeyRequired is returned when inserting a zero-length key. ErrKeyRequired = errors.New("key required") - // ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize. + /// ErrKeyTooLarge is returned when inserting a key that is larger than + /// MaxKeySize. ErrKeyTooLarge = errors.New("key too large") - // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. + /// ErrValueTooLarge is returned when inserting a value that is larger + /// than MaxValueSize. ErrValueTooLarge = errors.New("value too large") - // ErrIncompatibleValue is returned when trying create or delete a bucket - // on an existing non-bucket key or when trying to create or delete a - // non-bucket key on an existing bucket key. + /// ErrIncompatibleValue is returned when trying create or delete a + /// bucket on an existing non-bucket key or when trying to create or + /// delete a non-bucket key on an existing bucket key. ErrIncompatibleValue = errors.New("incompatible value") - - // - // - // - + /// ErrMissingKey is returned when the CLI tries to get a key that it + /// can't find in the specified bucket. ErrMissingKey = errors.New("missing key") - // ErrUsage is returned when a usage message was printed and the process - // should simply exit with an error. + /// ErrUsage is returned when a usage message was printed and the + /// process should simply exit with an error. ErrUsage = errors.New("usage") - // ErrUnknownCommand is returned when a CLI command is not specified. + /// ErrUnknownCommand is returned when a CLI command is not specified. ErrUnknownCommand = errors.New("unknown command") - // ErrPathRequired is returned when the path to a Bolt database is not specified. + /// ErrPathRequired is returned when the path to a Dedo database is not + /// specified. ErrPathRequired = errors.New("path required") - // ErrFileNotFound is returned when a Bolt database does not exist. + /// ErrFileNotFound is returned when a Dedo database does not exist. ErrFileNotFound = errors.New("file not found") - // ErrCorrupt is returned when a checking a data file finds errors. + /// ErrCorrupt is returned when a checking a data file finds errors. ErrCorrupt = errors.New("invalid value") - // ErrNonDivisibleBatchSize is returned when the batch size can't be evenly - // divided by the iteration count. - ErrNonDivisibleBatchSize = errors.New("number of iterations must be divisible by the batch size") + /// ErrNonDivisibleBatchSize is returned when the batch size can't be + /// evenly divided by the iteration count. + ErrNonDivisibleBatchSize = errors.New( + "number of iterations must be divisible by the batch size", + ) - // ErrPageIDRequired is returned when a required page id is not specified. + /// ErrPageIDRequired is returned when a required page id is not + /// specified. ErrPageIDRequired = errors.New("page id required") ) -// fdatasync flushes written data to a file descriptor. +/// fdatasync() flushes written data to a file descriptor. func fdatasync(db *DB) error { return db.file.Sync() } -// flock acquires an advisory lock on a file descriptor. +/// flock() acquires an advisory lock on a file descriptor. func flock(db *DB) error { const lockFlags = syscall.LOCK_EX | syscall.LOCK_NB @@ -433,22 +485,21 @@ func flock(db *DB) error { } } -// funlock releases an advisory lock on a file descriptor. +/// funlock() releases an advisory lock on a file descriptor. func funlock(db *DB) error { return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) } -// mmap memory maps a DB's data file. +/// mmap memory() maps a DB's data file. func mmap(db *DB, sz int) error { - // Map the data file to memory. fd := int(db.file.Fd()) b, err := syscall.Mmap(fd, 0, sz, syscall.PROT_READ, syscall.MAP_SHARED) if err != nil { return err } - // Advise the kernel that the mmap is accessed randomly. - if err := madvise(b, syscall.MADV_RANDOM); err != nil { + err = madvise(b, syscall.MADV_RANDOM) + if err != nil { return fmt.Errorf("madvise: %s", err) } @@ -459,14 +510,12 @@ func mmap(db *DB, sz int) error { return nil } -// munmap unmaps a DB's data file from memory. +/// munmap() unmaps a DB's data file from memory. func munmap(db *DB) error { - // Ignore the unmap if we have no mapped data. if db.dataref == nil { return nil } - // Unmap using the original byte slice. err := syscall.Munmap(db.dataref) db.dataref = nil db.data = nil @@ -474,18 +523,22 @@ func munmap(db *DB) error { return err } -// NOTE: This function is copied from stdlib because it is not available on darwin. func madvise(b []byte, advice int) (err error) { - _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice)) + _, _, e1 := syscall.Syscall( + syscall.SYS_MADVISE, + uintptr(unsafe.Pointer(&b[0])), + uintptr(len(b)), + uintptr(advice), + ) if e1 != 0 { err = e1 } return } -// newBucket returns a new bucket associated with a transaction. +/// newBucket() returns a new bucket associated with a transaction. func newBucket(tx *Tx) Bucket { - var b = Bucket{tx: tx} + b := Bucket{tx: tx} if tx.writable { b.buckets = make(map[string]*Bucket) b.nodes = make(map[pgid]*node) @@ -493,28 +546,28 @@ func newBucket(tx *Tx) Bucket { return b } -// Writable returns whether the bucket is writable. +/// Bucket.Writable() returns whether the bucket is writable. func (b *Bucket) Writable() bool { return b.tx.writable } -// Cursor creates a cursor associated with the bucket. -// The cursor is only valid as long as the transaction is open. -// Do not use a cursor after the transaction is closed. +/// Bucket.Cursor() creates a cursor associated with the bucket. The cursor is +/// only valid as long as the transaction is open. Do not use a cursor after +/// the transaction is closed. func (b *Bucket) Cursor() *Cursor { - // Allocate and return a cursor. return &Cursor{ bucket: b, stack: make([]elemRef, 0), } } -// Bucket retrieves a nested bucket by name. -// Returns nil if the bucket does not exist. -// The bucket instance is only valid for the lifetime of the transaction. +/// Bucket.Bucket() retrieves a nested bucket by name. Returns nil if the +/// bucket does not exist. The bucket instance is only valid for the lifetime +/// of the transaction. func (b *Bucket) Bucket(name []byte) *Bucket { if b.buckets != nil { - if child := b.buckets[string(name)]; child != nil { + child := b.buckets[string(name)] + if child != nil { return child } } @@ -529,7 +582,7 @@ func (b *Bucket) Bucket(name []byte) *Bucket { } // Otherwise create a bucket and cache it. - var child = b.openBucket(v) + child := b.openBucket(v) if b.buckets != nil { b.buckets[string(name)] = child } @@ -537,13 +590,13 @@ func (b *Bucket) Bucket(name []byte) *Bucket { return child } -// Helper method that re-interprets a sub-bucket value -// from a parent into a Bucket +/// Bucket.Helper() method that re-interprets a sub-bucket value from a parent +/// into a Bucket func (b *Bucket) openBucket(value []byte) *Bucket { - var child = newBucket(b.tx) + child := newBucket(b.tx) - // If this is a writable transaction then we need to copy the bucket entry. - // Read-only transactions can point directly at the mmap entry. + // If this is a writable transaction then we need to copy the bucket + // entry. Read-only transactions can point directly at the mmap entry. if b.tx.writable { child.ref = &bucket{} *child.ref = *(*bucket)(unsafe.Pointer(&value[0])) @@ -559,9 +612,10 @@ func (b *Bucket) openBucket(value []byte) *Bucket { return &child } -// CreateBucket creates a new bucket at the given key and returns the new bucket. -// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. +/// Bucket.CreateBucket() creates a new bucket at the given key and returns the +/// new bucket. Returns an error if the key already exists, if the bucket name +/// is blank, or if the bucket name is too long. The bucket instance is only +/// valid for the lifetime of the transaction. func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { if b.tx.db == nil { return nil, ErrTxClosed @@ -584,27 +638,29 @@ func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { } // Create empty, inline bucket. - var bucket = Bucket{ + bucket := Bucket{ ref: &bucket{}, rootNode: &node{isLeaf: true}, } - var value = bucket.write() + value := bucket.write() // Insert into node. key = cloneBytes(key) c.node().put(key, key, value, 0, bucketLeafFlag) // Since subbuckets are not allowed on inline buckets, we need to - // dereference the inline page, if it exists. This will cause the bucket - // to be treated as a regular, non-inline bucket for the rest of the tx. + // dereference the inline page, if it exists. This will cause the + // bucket to be treated as a regular, non-inline bucket for the rest of + // the tx. b.page = nil return b.Bucket(key), nil } -// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it. -// Returns an error if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. +/// Bucket.CreateBucketIfNotExists() creates a new bucket if it doesn't already +/// exist and returns a reference to it. Returns an error if the bucket name is +/// blank, or if the bucket name is too long. The bucket instance is only valid +/// for the lifetime of the transaction. func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { child, err := b.CreateBucket(key) if err == ErrBucketExists { @@ -615,8 +671,8 @@ func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { return child, nil } -// DeleteBucket deletes a bucket at the given key. -// Returns an error if the bucket does not exists, or if the key represents a non-bucket value. +/// Bucket.DeleteBucket() deletes a bucket at the given key. Returns an error +/// if the bucket does not exists, or if the key represents a non-bucket value. func (b *Bucket) DeleteBucket(key []byte) error { if b.tx.db == nil { return ErrTxClosed @@ -639,7 +695,8 @@ func (b *Bucket) DeleteBucket(key []byte) error { child := b.Bucket(key) err := child.ForEach(func(k, v []byte) error { if v == nil { - if err := child.DeleteBucket(k); err != nil { + err := child.DeleteBucket(k) + if err != nil { return fmt.Errorf("delete bucket: %s", err) } } @@ -663,9 +720,9 @@ func (b *Bucket) DeleteBucket(key []byte) error { return nil } -// Get retrieves the value for a key in the bucket. -// Returns a nil value if the key does not exist or if the key is a nested bucket. -// The returned value is only valid for the life of the transaction. +/// Bucket.Get() retrieves the value for a key in the bucket. Returns a nil +/// value if the key does not exist or if the key is a nested bucket. The +/// returned value is only valid for the life of the transaction. func (b *Bucket) Get(key []byte) []byte { k, v, flags := b.Cursor().seek(key) @@ -674,17 +731,19 @@ func (b *Bucket) Get(key []byte) []byte { return nil } - // If our target node isn't the same key as what's passed in then return nil. + // If our target node isn't the same key as what's passed in then return + // nil. if !bytes.Equal(key, k) { return nil } return v } -// Put sets the value for a key in the bucket. -// If the key exist then its previous value will be overwritten. -// Supplied value must remain valid for the life of the transaction. -// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. +/// Bucket.Put() sets the value for a key in the bucket. If the key exist then +/// its previous value will be overwritten. Supplied value must remain valid +/// for the life of the transaction. Returns an error if the bucket was created +/// from a read-only transaction, if the key is blank, if the key is too large, +/// or if the value is too large. func (b *Bucket) Put(key []byte, value []byte) error { if b.tx.db == nil { return ErrTxClosed @@ -714,9 +773,9 @@ func (b *Bucket) Put(key []byte, value []byte) error { return nil } -// Delete removes a key from the bucket. -// If the key does not exist then nothing is done and a nil error is returned. -// Returns an error if the bucket was created from a read-only transaction. +/// Bucket.Delete() removes a key from the bucket. If the key does not exist +/// then nothing is done and a nil error is returned. Returns an error if the +/// bucket was created from a read-only transaction. func (b *Bucket) Delete(key []byte) error { if b.tx.db == nil { return ErrTxClosed @@ -739,12 +798,13 @@ func (b *Bucket) Delete(key []byte) error { return nil } -// Sequence returns the current integer for the bucket without incrementing it. +/// Bucket.Sequence() returns the current integer for the bucket without +/// incrementing it. func (b *Bucket) Sequence() uint64 { return b.ref.sequence } -// SetSequence updates the sequence number for the bucket. +/// Bucket.SetSequence() updates the sequence number for the bucket. func (b *Bucket) SetSequence(v uint64) error { if b.tx.db == nil { return ErrTxClosed @@ -763,7 +823,7 @@ func (b *Bucket) SetSequence(v uint64) error { return nil } -// NextSequence returns an autoincrementing integer for the bucket. +/// Bucket.NextSequence() returns an autoincrementing integer for the bucket. func (b *Bucket) NextSequence() (uint64, error) { if b.tx.db == nil { return 0, ErrTxClosed @@ -782,24 +842,26 @@ func (b *Bucket) NextSequence() (uint64, error) { return b.ref.sequence, nil } -// ForEach executes a function for each key/value pair in a bucket. -// If the provided function returns an error then the iteration is stopped and -// the error is returned to the caller. The provided function must not modify -// the bucket; this will result in undefined behavior. +/// Bucket.ForEach() executes a function for each key/value pair in a bucket. +/// If the provided function returns an error then the iteration is stopped and +/// the error is returned to the caller. The provided function must not modify +/// the bucket; this will result in undefined behavior. func (b *Bucket) ForEach(fn func(k, v []byte) error) error { if b.tx.db == nil { return ErrTxClosed } c := b.Cursor() for k, v := c.First(); k != nil; k, v = c.Next() { - if err := fn(k, v); err != nil { + err := fn(k, v) + if err != nil { return err } } return nil } -// forEachPage iterates over every page in a bucket, including inline pages. +/// Bucket.forEachPage() iterates over every page in a bucket, including inline +/// pages. func (b *Bucket) forEachPage(fn func(*page, int)) { // If we have an inline page then just use that. if b.page != nil { @@ -811,8 +873,8 @@ func (b *Bucket) forEachPage(fn func(*page, int)) { b.tx.forEachPage(b.ref.root, 0, fn) } -// forEachPageNode iterates over every page (or node) in a bucket. -// This also includes inline pages. +/// Bucket.forEachPageNode() iterates over every page (or node) in a bucket. +/// This also includes inline pages. func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { // If we have an inline page or root node then just use that. if b.page != nil { @@ -822,8 +884,12 @@ func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { b._forEachPageNode(b.ref.root, 0, fn) } -func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) { - var p, n = b.pageNode(pgid) +func (b *Bucket) _forEachPageNode( + pgid pgid, + depth int, + fn func(*page, *node, int), +) { + p, n := b.pageNode(pgid) // Execute function. fn(p, n, depth) @@ -845,25 +911,27 @@ func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, in } } -// spill writes all the nodes for this bucket to dirty pages. +/// Bucket.spill() writes all the nodes for this bucket to dirty pages. func (b *Bucket) spill() error { // Spill all child buckets first. for name, child := range b.buckets { - // If the child bucket is small enough and it has no child buckets then - // write it inline into the parent bucket's page. Otherwise spill it - // like a normal bucket and make the parent value a pointer to the page. + // If the child bucket is small enough and it has no child + // buckets then write it inline into the parent bucket's page. + // Otherwise spill it like a normal bucket and make the parent + // value a pointer to the page. var value []byte if child.inlineable() { child.free() value = child.write() } else { - if err := child.spill(); err != nil { + err := child.spill() + if err != nil { return err } // Update the child bucket header in this bucket. value = make([]byte, unsafe.Sizeof(bucket{})) - var bucket = (*bucket)(unsafe.Pointer(&value[0])) + bucket := (*bucket)(unsafe.Pointer(&value[0])) *bucket = *child.ref } @@ -873,15 +941,28 @@ func (b *Bucket) spill() error { } // Update parent node. - var c = b.Cursor() + c := b.Cursor() k, _, flags := c.seek([]byte(name)) if !bytes.Equal([]byte(name), k) { - panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k)) + panic(fmt.Sprintf( + "misplaced bucket header: %x -> %x", + []byte(name), + k, + )) } if flags&bucketLeafFlag == 0 { - panic(fmt.Sprintf("unexpected bucket header flag: %x", flags)) + panic(fmt.Sprintf( + "unexpected bucket header flag: %x", + flags, + )) } - c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag) + c.node().put( + []byte(name), + []byte(name), + value, + 0, + bucketLeafFlag, + ) } // Ignore if there's not a materialized root node. @@ -890,33 +971,38 @@ func (b *Bucket) spill() error { } // Spill nodes. - if err := b.rootNode.spill(); err != nil { + err := b.rootNode.spill() + if err != nil { return err } b.rootNode = b.rootNode.root() // Update the root node for this bucket. if b.rootNode.pgid >= b.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid)) + panic(fmt.Sprintf( + "pgid (%d) above high water mark (%d)", + b.rootNode.pgid, + b.tx.meta.pgid, + )) } b.ref.root = b.rootNode.pgid return nil } -// inlineable returns true if a bucket is small enough to be written inline -// and if it contains no subbuckets. Otherwise returns false. +/// Bucket.inlineable() returns true if a bucket is small enough to be written +/// inline and if it contains no subbuckets. Otherwise returns false. func (b *Bucket) inlineable() bool { - var n = b.rootNode + n := b.rootNode // Bucket must only contain a single leaf node. if n == nil || !n.isLeaf { return false } - // Bucket is not inlineable if it contains subbuckets or if it goes beyond - // our threshold for inline bucket size. - var size = pageHeaderSize + // Bucket is not inlineable if it contains subbuckets or if it goes + // beyond our threshold for inline bucket size. + size := pageHeaderSize for _, inode := range n.inodes { size += leafPageElementSize + len(inode.key) + len(inode.value) @@ -930,29 +1016,30 @@ func (b *Bucket) inlineable() bool { return true } -// Returns the maximum total size of a bucket to make it a candidate for inlining. +/// Bucket.maInlineBucketSize() returns the maximum total size of a bucket to +/// make it a candidate for inlining. func (b *Bucket) maxInlineBucketSize() int { return b.tx.db.pageSize / 4 } -// write allocates and writes a bucket to a byte slice. +/// Bucket.write() allocates and writes a bucket to a byte slice. func (b *Bucket) write() []byte { // Allocate the appropriate size. - var n = b.rootNode - var value = make([]byte, bucketHeaderSize+n.size()) + n := b.rootNode + value := make([]byte, bucketHeaderSize+n.size()) // Write a bucket header. - var bucket = (*bucket)(unsafe.Pointer(&value[0])) + bucket := (*bucket)(unsafe.Pointer(&value[0])) *bucket = *b.ref // Convert byte slice to a fake page and write the root node. - var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) + p := (*page)(unsafe.Pointer(&value[bucketHeaderSize])) n.write(p) return value } -// rebalance attempts to balance all nodes. +/// Bucket.rebalance() attempts to balance all nodes. func (b *Bucket) rebalance() { for _, n := range b.nodes { n.rebalance() @@ -962,17 +1049,19 @@ func (b *Bucket) rebalance() { } } -// node creates a node from a page and associates it with a given parent. +/// Bucket.node() creates a node from a page and associates it with a given +/// parent. func (b *Bucket) node(pgid pgid, parent *node) *node { - _assert(b.nodes != nil, "nodes map expected") + g.Assert(b.nodes != nil, "nodes map expected") // Retrieve node if it's already been created. - if n := b.nodes[pgid]; n != nil { + n := b.nodes[pgid] + if n != nil { return n } // Otherwise create a node and cache it. - n := &node{bucket: b, parent: parent} + n = &node{bucket: b, parent: parent} if parent == nil { b.rootNode = n } else { @@ -980,7 +1069,7 @@ func (b *Bucket) node(pgid pgid, parent *node) *node { } // Use the inline page if this is an inline bucket. - var p = b.page + p := b.page if p == nil { p = b.tx.page(pgid) } @@ -992,13 +1081,13 @@ func (b *Bucket) node(pgid pgid, parent *node) *node { return n } -// free recursively frees all pages in the bucket. +/// Bucket.free() recursively frees all pages in the bucket. func (b *Bucket) free() { if b.ref.root == 0 { return } - var tx = b.tx + tx := b.tx b.forEachPageNode(func(p *page, n *node, _ int) { if p != nil { tx.db.freelist.free(tx.meta.txid, p) @@ -1009,7 +1098,7 @@ func (b *Bucket) free() { b.ref.root = 0 } -// dereference removes all references to the old mmap. +/// Bucket.dereference() removes all references to the old mmap. func (b *Bucket) dereference() { if b.rootNode != nil { b.rootNode.root().dereference() @@ -1020,14 +1109,19 @@ func (b *Bucket) dereference() { } } -// pageNode returns the in-memory node, if it exists. -// Otherwise returns the underlying page. +/// Bucket.pageNode() returns the in-memory node, if it exists. Otherwise +/// returns the underlying page. func (b *Bucket) pageNode(id pgid) (*page, *node) { // Inline buckets have a fake page embedded in their value so treat them - // differently. We'll return the rootNode (if available) or the fake page. + // differently. We'll return the rootNode (if available) or the fake + // page. if b.ref.root == 0 { if id != 0 { - panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id)) + panic(fmt.Sprintf( + "inline bucket non-zero page access(2): " + + "%d != 0", + id, + )) } if b.rootNode != nil { return nil, b.rootNode @@ -1037,32 +1131,35 @@ func (b *Bucket) pageNode(id pgid) (*page, *node) { // Check the node cache for non-inline buckets. if b.nodes != nil { - if n := b.nodes[id]; n != nil { + n := b.nodes[id] + if n != nil { return nil, n } } - // Finally lookup the page from the transaction if no node is materialized. + // Finally lookup the page from the transaction if no node is + // materialized. return b.tx.page(id), nil } -// cloneBytes returns a copy of a given slice. +/// cloneBytes() returns a copy of a given slice. func cloneBytes(v []byte) []byte { - var clone = make([]byte, len(v)) + clone := make([]byte, len(v)) copy(clone, v) return clone } -// Bucket returns the bucket that this cursor was created from. +/// Cursor.Bucket() returns the bucket that this cursor was created from. func (c *Cursor) Bucket() *Bucket { return c.bucket } -// First moves the cursor to the first item in the bucket and returns its key and value. -// If the bucket is empty then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. +/// Cursor.First() moves the cursor to the first item in the bucket and returns +/// its key and value. If the bucket is empty then a nil key and value are +/// returned. The returned key and value are only valid for the life of the +/// transaction. func (c *Cursor) First() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + g.Assert(c.bucket.tx.db != nil, "tx closed") c.stack = c.stack[:0] p, n := c.bucket.pageNode(c.bucket.ref.root) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) @@ -1082,11 +1179,12 @@ func (c *Cursor) First() (key []byte, value []byte) { } -// Last moves the cursor to the last item in the bucket and returns its key and value. -// If the bucket is empty then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. +/// Cursor.Last() moves the cursor to the last item in the bucket and returns +/// its key and value. If the bucket is empty then a nil key and value are +/// returned. The returned key and value are only valid for the life of the +/// transaction. func (c *Cursor) Last() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + g.Assert(c.bucket.tx.db != nil, "tx closed") c.stack = c.stack[:0] p, n := c.bucket.pageNode(c.bucket.ref.root) ref := elemRef{page: p, node: n} @@ -1100,11 +1198,12 @@ func (c *Cursor) Last() (key []byte, value []byte) { return k, v } -// Next moves the cursor to the next item in the bucket and returns its key and value. -// If the cursor is at the end of the bucket then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. +/// Cursor.Next() moves the cursor to the next item in the bucket and returns +/// its key and value. If the cursor is at the end of the bucket then a nil key +/// and value are returned. The returned key and value are only valid for the +/// life of the transaction. func (c *Cursor) Next() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + g.Assert(c.bucket.tx.db != nil, "tx closed") k, v, flags := c.next() if (flags & uint32(bucketLeafFlag)) != 0 { return k, nil @@ -1112,11 +1211,12 @@ func (c *Cursor) Next() (key []byte, value []byte) { return k, v } -// Prev moves the cursor to the previous item in the bucket and returns its key and value. -// If the cursor is at the beginning of the bucket then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. +/// Cursor.Prev() moves the cursor to the previous item in the bucket and +/// returns its key and value. If the cursor is at the beginning of the bucket +/// then a nil key and value are returned. The returned key and value are only +/// valid for the life of the transaction. func (c *Cursor) Prev() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") + g.Assert(c.bucket.tx.db != nil, "tx closed") // Attempt to move back one element until we're successful. // Move up the stack as we hit the beginning of each page in our stack. @@ -1134,7 +1234,8 @@ func (c *Cursor) Prev() (key []byte, value []byte) { return nil, nil } - // Move down the stack to find the last element of the last leaf under this branch. + // Move down the stack to find the last element of the last leaf under + // this branch. c.last() k, v, flags := c.keyValue() if (flags & uint32(bucketLeafFlag)) != 0 { @@ -1143,15 +1244,17 @@ func (c *Cursor) Prev() (key []byte, value []byte) { return k, v } -// Seek moves the cursor to a given key and returns it. -// If the key does not exist then the next key is used. If no keys -// follow, a nil key is returned. -// The returned key and value are only valid for the life of the transaction. +/// Cursor.Seek() moves the cursor to a given key and returns it. If the key +/// does not exist then the next key is used. If no keys follow, a nil key is +/// returned. The returned key and value are only valid for the life of the +/// transaction. func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { k, v, flags := c.seek(seek) - // If we ended up after the last element of a page then move to the next one. - if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() { + // If we ended up after the last element of a page then move to the next + // one. + ref := &c.stack[len(c.stack)-1] + if ref.index >= ref.count() { k, v, flags = c.next() } @@ -1159,12 +1262,14 @@ func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { return nil, nil } else if (flags & uint32(bucketLeafFlag)) != 0 { return k, nil + } else { + return k, v } - return k, v } -// Delete removes the current key/value under the cursor from the bucket. -// Delete fails if current key/value is a bucket or if the transaction is not writable. +/// Cursor.Delete() removes the current key/value under the cursor from the +/// bucket. Cursor.Delete() fails if current key/value is a bucket or if the +/// transaction is not writable. func (c *Cursor) Delete() error { if c.bucket.tx.db == nil { return ErrTxClosed @@ -1182,10 +1287,10 @@ func (c *Cursor) Delete() error { return nil } -// seek moves the cursor to a given key and returns it. -// If the key does not exist then the next key is used. +/// Cursor.seek() moves the cursor to a given key and returns it. If the key +/// does not exist then the next key is used. func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { - _assert(c.bucket.tx.db != nil, "tx closed") + g.Assert(c.bucket.tx.db != nil, "tx closed") // Start from root page/node and traverse to correct page. c.stack = c.stack[:0] @@ -1201,11 +1306,12 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { return c.keyValue() } -// first moves the cursor to the first leaf element under the last page in the stack. +/// Cursor.first() moves the cursor to the first leaf element under the last +/// page in the stack. func (c *Cursor) first() { for { // Exit when we hit a leaf page. - var ref = &c.stack[len(c.stack)-1] + ref := &c.stack[len(c.stack)-1] if ref.isLeaf() { break } @@ -1215,14 +1321,17 @@ func (c *Cursor) first() { if ref.node != nil { pgid = ref.node.inodes[ref.index].pgid } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + pgid = ref.page.branchPageElement( + uint16(ref.index), + ).pgid } p, n := c.bucket.pageNode(pgid) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) } } -// last moves the cursor to the last leaf element under the last page in the stack. +/// Cursor.last() moves the cursor to the last leaf element under the last page +/// in the stack. func (c *Cursor) last() { for { // Exit when we hit a leaf page. @@ -1236,22 +1345,26 @@ func (c *Cursor) last() { if ref.node != nil { pgid = ref.node.inodes[ref.index].pgid } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + pgid = ref.page.branchPageElement( + uint16(ref.index), + ).pgid } p, n := c.bucket.pageNode(pgid) - var nextRef = elemRef{page: p, node: n} + nextRef := elemRef{page: p, node: n} nextRef.index = nextRef.count() - 1 c.stack = append(c.stack, nextRef) } } -// next moves to the next leaf element and returns the key and value. -// If the cursor is at the last leaf element then it stays there and returns nil. +/// Cursor.next() moves to the next leaf element and returns the key and value. +/// If the cursor is at the last leaf element then it stays there and returns +/// nil. func (c *Cursor) next() (key []byte, value []byte, flags uint32) { for { // Attempt to move over one element until we're successful. - // Move up the stack as we hit the end of each page in our stack. + // Move up the stack as we hit the end of each page in our + // stack. var i int for i = len(c.stack) - 1; i >= 0; i-- { elem := &c.stack[i] @@ -1261,18 +1374,19 @@ func (c *Cursor) next() (key []byte, value []byte, flags uint32) { } } - // If we've hit the root page then stop and return. This will leave the - // cursor on the last element of the last page. + // If we've hit the root page then stop and return. This will + // leave the cursor on the last element of the last page. if i == -1 { return nil, nil, 0 } - // Otherwise start from where we left off in the stack and find the - // first element of the first leaf page. + // Otherwise start from where we left off in the stack and find + // the first element of the first leaf page. c.stack = c.stack[:i+1] c.first() - // If this is an empty page then restart and move back up the stack. + // If this is an empty page then restart and move back up the + // stack. // https://github.com/boltdb/bolt/issues/450 if c.stack[len(c.stack)-1].count() == 0 { continue @@ -1282,7 +1396,8 @@ func (c *Cursor) next() (key []byte, value []byte, flags uint32) { } } -// search recursively performs a binary search against a given page/node until it finds a given key. +/// Cursor.search() recursively performs a binary search against a given +/// page/node until it finds a given key. func (c *Cursor) search(key []byte, pgid pgid) { p, n := c.bucket.pageNode(pgid) if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 { @@ -1307,8 +1422,9 @@ func (c *Cursor) search(key []byte, pgid pgid) { func (c *Cursor) searchNode(key []byte, n *node) { var exact bool index := sort.Search(len(n.inodes), func(i int) bool { - // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. - // sort.Search() finds the lowest index where f() != -1 but we need the highest index. + // TODO(benbjohnson): Optimize this range search. It's a bit + // hacky right now. sort.Search() finds the lowest index where + // f() != -1 but we need the highest index. ret := bytes.Compare(n.inodes[i].key, key) if ret == 0 { exact = true @@ -1330,8 +1446,9 @@ func (c *Cursor) searchPage(key []byte, p *page) { var exact bool index := sort.Search(int(p.count), func(i int) bool { - // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. - // sort.Search() finds the lowest index where f() != -1 but we need the highest index. + // TODO(benbjohnson): Optimize this range search. It's a bit + // hacky right now. sort.Search() finds the lowest index where + // f() != -1 but we need the highest index. ret := bytes.Compare(inodes[i].key(), key) if ret == 0 { exact = true @@ -1347,7 +1464,7 @@ func (c *Cursor) searchPage(key []byte, p *page) { c.search(key, inodes[index].pgid) } -// nsearch searches the leaf node on the top of the stack for a key. +/// Cursor.nsearch() searches the leaf node on the top of the stack for a key. func (c *Cursor) nsearch(key []byte) { e := &c.stack[len(c.stack)-1] p, n := e.page, e.node @@ -1369,7 +1486,7 @@ func (c *Cursor) nsearch(key []byte) { e.index = index } -// keyValue returns the key and value of the current leaf element. +/// Cursor.keyValue() returns the key and value of the current leaf element. func (c *Cursor) keyValue() ([]byte, []byte, uint32) { ref := &c.stack[len(c.stack)-1] if ref.count() == 0 || ref.index >= ref.count() { @@ -1387,29 +1504,33 @@ func (c *Cursor) keyValue() ([]byte, []byte, uint32) { return elem.key(), elem.value(), elem.flags } -// node returns the node that the cursor is currently positioned on. +/// Cursor.node() returns the node that the cursor is currently positioned on. func (c *Cursor) node() *node { - _assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") + g.Assert( + len(c.stack) > 0, + "accessing a node with a zero-length cursor stack", + ) // If the top of the stack is a leaf node then just return it. - if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() { + ref := &c.stack[len(c.stack)-1] + if ref.node != nil && ref.isLeaf() { return ref.node } // Start from root and traverse down the hierarchy. - var n = c.stack[0].node + n := c.stack[0].node if n == nil { n = c.bucket.node(c.stack[0].page.id, nil) } for _, ref := range c.stack[:len(c.stack)-1] { - _assert(!n.isLeaf, "expected branch node") + g.Assert(!n.isLeaf, "expected branch node") n = n.childAt(int(ref.index)) } - _assert(n.isLeaf, "expected leaf node") + g.Assert(n.isLeaf, "expected leaf node") return n } -// isLeaf returns whether the ref is pointing at a leaf page/node. +/// elemRef.isLeaf() returns whether the ref is pointing at a leaf page/node. func (r *elemRef) isLeaf() bool { if r.node != nil { return r.node.isLeaf @@ -1417,7 +1538,7 @@ func (r *elemRef) isLeaf() bool { return (r.page.flags & leafPageFlag) != 0 } -// count returns the number of inodes or page elements. +/// elemRef.count() returns the number of inodes or page elements. func (r *elemRef) count() int { if r.node != nil { return len(r.node.inodes) @@ -1425,7 +1546,7 @@ func (r *elemRef) count() int { return int(r.page.count) } -// Path returns the path to currently open database file. +/// DB.Path() returns the path to currently open database file. func (db *DB) Path() string { return db.path } @@ -1478,7 +1599,7 @@ func readPageSize(db *DB) (int, error) { return int(m.pageSize), nil } -// init creates a new database file and initializes its meta pages. +/// initDB() creates a new database file and initializes its meta pages. func initDB(db *DB, size int64) error { if size != 0 { pageSize, err := readPageSize(db) @@ -1524,18 +1645,21 @@ func initDB(db *DB, size int64) error { p.count = 0 // Write the buffer to our data file. - if _, err := db.file.WriteAt(buf, 0); err != nil { + _, err := db.file.WriteAt(buf, 0) + if err != nil { return err } - if err := fdatasync(db); err != nil { + + err = fdatasync(db) + if err != nil { return err } return nil } -// Open creates and opens a database at the given path. -// If the file does not exist then it will be created automatically. +/// Open creates and opens a database at the given path. If the file does not +/// exist then it will be created automatically. func Open(path string) (*DB, error) { file, err := openFile(path) if err != nil { @@ -1583,8 +1707,8 @@ func Open(path string) (*DB, error) { return db, nil } -// mmap opens the underlying memory-mapped file and initializes the meta references. -// minsz is the minimum size that the new mmap can be. +/// DB.mmap() opens the underlying memory-mapped file and initializes the meta +/// references. minsz is the minimum size that the new mmap can be. func (db *DB) mmap(minsz int) error { db.mmaplock.Lock() defer db.mmaplock.Unlock() @@ -1597,7 +1721,7 @@ func (db *DB) mmap(minsz int) error { } // Ensure the size is at least the minimum size. - var size = int(info.Size()) + size := int(info.Size()) if size < minsz { size = minsz } @@ -1612,12 +1736,14 @@ func (db *DB) mmap(minsz int) error { } // Unmap existing data before continuing. - if err := db.munmap(); err != nil { + err = db.munmap() + if err != nil { return err } // Memory-map the data file as a byte slice. - if err := mmap(db, size); err != nil { + err = mmap(db, size) + if err != nil { return err } @@ -1625,9 +1751,9 @@ func (db *DB) mmap(minsz int) error { db.meta0 = db.page(0).meta() db.meta1 = db.page(1).meta() - // Validate the meta pages. We only return an error if both meta pages fail - // validation, since meta0 failing validation means that it wasn't saved - // properly -- but we can recover using meta1. And vice-versa. + // Validate the meta pages. We only return an error if both meta pages + // fail validation, since meta0 failing validation means that it wasn't + // saved properly -- but we can recover using meta1. And vice-versa. err0 := db.meta0.validate() err1 := db.meta1.validate() if err0 != nil && err1 != nil { @@ -1637,17 +1763,19 @@ func (db *DB) mmap(minsz int) error { return nil } -// munmap unmaps the data file from memory. +/// DB.munmap() unmaps the data file from memory. func (db *DB) munmap() error { - if err := munmap(db); err != nil { + err := munmap(db) + if err != nil { return fmt.Errorf("unmap error: " + err.Error()) } return nil } -// mmapSize determines the appropriate size for the mmap given the current size -// of the database. The minimum size is 32KB and doubles until it reaches 1GB. -// Returns an error if the new mmap size is greater than the max allowed. +/// DB. mmapSize() determines the appropriate size for the mmap given the +/// current size of the database. The minimum size is 32KB and doubles until it +/// reaches 1GB. Returns an error if the new mmap size is greater than the max +/// allowed. func (db *DB) mmapSize(size int) (int, error) { // Double the size from 32KB until 1GB. for i := uint(15); i <= 30; i++ { @@ -1663,7 +1791,8 @@ func (db *DB) mmapSize(size int) (int, error) { // If larger than 1GB then grow by 1GB at a time. sz := int64(size) - if remainder := sz % int64(maxMmapStep); remainder > 0 { + remainder := sz % int64(maxMmapStep) + if remainder > 0 { sz += int64(maxMmapStep) - remainder } @@ -1682,8 +1811,8 @@ func (db *DB) mmapSize(size int) (int, error) { return int(sz), nil } -// Close releases all database resources. -// All transactions must be closed before closing the database. +/// DB.Close() releases all database resources. All transactions must be closed +/// before closing the database. func (db *DB) Close() error { db.rwlock.Lock() defer db.rwlock.Unlock() @@ -1707,7 +1836,8 @@ func (db *DB) close() error { db.freelist = nil // Close the mmap. - if err := db.munmap(); err != nil { + err := db.munmap() + if err != nil { return err } @@ -1715,12 +1845,14 @@ func (db *DB) close() error { if db.file != nil { // No need to unlock read-only file. // Unlock the file. - if err := funlock(db); err != nil { - log.Printf("bolt.Close(): funlock error: %s", err) + err := funlock(db) + if err != nil { + log.Printf("dedo.Close(): funlock error: %s", err) } // Close the file descriptor. - if err := db.file.Close(); err != nil { + err = db.file.Close() + if err != nil { return fmt.Errorf("db file close: %s", err) } db.file = nil @@ -1730,23 +1862,22 @@ func (db *DB) close() error { return nil } -// Begin starts a new transaction. -// Multiple read-only transactions can be used concurrently but only one -// write transaction can be used at a time. Starting multiple write transactions -// will cause the calls to block and be serialized until the current write -// transaction finishes. -// -// Transactions should not be dependent on one another. Opening a read -// transaction and a write transaction in the same goroutine can cause the -// writer to deadlock because the database periodically needs to re-mmap itself -// as it grows and it cannot do that while a read transaction is open. -// -// If a long running read transaction (for example, a snapshot transaction) is -// needed, you might want to set DB.InitialMmapSize to a large enough value -// to avoid potential blocking of write transaction. -// -// IMPORTANT: You must close read-only transactions after you are finished or -// else the database will not reclaim old pages. +/// DB.Begin() starts a new transaction. Multiple read-only transactions can be +/// used concurrently but only one write transaction can be used at a time. +/// Starting multiple write transactions will cause the calls to block and be +/// serialized until the current write transaction finishes. +/// +/// Transactions should not be dependent on one another. Opening a read +/// transaction and a write transaction in the same goroutine can cause the +/// writer to deadlock because the database periodically needs to re-mmap itself +/// as it grows and it cannot do that while a read transaction is open. +/// +/// If a long running read transaction (for example, a snapshot transaction) is +/// needed, you might want to set DB.InitialMmapSize to a large enough value to +/// avoid potential blocking of write transaction. +/// +/// IMPORTANT: You must close read-only transactions after you are finished or +/// else the database will not reclaim old pages. func (db *DB) Begin(writable bool) (*Tx, error) { if writable { return db.beginRWTx() @@ -1756,14 +1887,14 @@ func (db *DB) Begin(writable bool) (*Tx, error) { } func (db *DB) beginTx() (*Tx, error) { - // Lock the meta pages while we initialize the transaction. We obtain + // Lock the meta pages while we initialize the transaction. We obtain // the meta lock before the mmap lock because that's the order that the // write transaction will obtain them. db.metalock.Lock() - // Obtain a read-only lock on the mmap. When the mmap is remapped it will - // obtain a write lock so all transactions must finish before it can be - // remapped. + // Obtain a read-only lock on the mmap. When the mmap is remapped it + // will obtain a write lock so all transactions must finish before it + // can be remapped. db.mmaplock.RLock() // Exit if the database is not open yet. @@ -1787,8 +1918,8 @@ func (db *DB) beginTx() (*Tx, error) { } func (db *DB) beginRWTx() (*Tx, error) { - // Obtain writer lock. This is released by the transaction when it closes. - // This enforces only one writer transaction at a time. + // Obtain writer lock. This is released by the transaction when it + // closes. This enforces only one writer transaction at a time. db.rwlock.Lock() // Once we have the writer lock then we can lock the meta pages so that @@ -1808,7 +1939,7 @@ func (db *DB) beginRWTx() (*Tx, error) { db.rwtx = t // Free any pages associated with closed read-only transactions. - var minid txid = 0xFFFFFFFFFFFFFFFF + minid := txid(0xFFFFFFFFFFFFFFFF) for _, t := range db.txs { if t.meta.txid < minid { minid = t.meta.txid @@ -1821,7 +1952,7 @@ func (db *DB) beginRWTx() (*Tx, error) { return t, nil } -// removeTx removes a transaction from the database. +/// DB.removeTx() removes a transaction from the database. func (db *DB) removeTx(tx *Tx) { // Release the read lock on the mmap. db.mmaplock.RUnlock() @@ -1844,13 +1975,14 @@ func (db *DB) removeTx(tx *Tx) { db.metalock.Unlock() } -// Update executes a function within the context of a read-write managed transaction. -// If no error is returned from the function then the transaction is committed. -// If an error is returned then the entire transaction is rolled back. -// Any error that is returned from the function or returned from the commit is -// returned from the Update() method. -// -// Attempting to manually commit or rollback within the function will cause a panic. +/// DB.Update() executes a function within the context of a read-write managed +/// transaction. If no error is returned from the function then the transaction +/// is committed. If an error is returned then the entire transaction is rolled +/// back. Any error that is returned from the function or returned from the +/// commit is returned from the DB.Update() method. +/// +/// Attempting to manually commit or rollback within the function will cause a +/// panic. func (db *DB) Update(fn func(*Tx) error) error { t, err := db.Begin(true) if err != nil { @@ -1864,10 +1996,12 @@ func (db *DB) Update(fn func(*Tx) error) error { } }() - // Mark as a managed tx so that the inner function cannot manually commit. + // Mark as a managed tx so that the inner function cannot manually + // commit. t.managed = true - // If an error is returned from the function then rollback and return error. + // If an error is returned from the function then rollback and return + // error. err = fn(t) t.managed = false if err != nil { @@ -1878,10 +2012,11 @@ func (db *DB) Update(fn func(*Tx) error) error { return t.Commit() } -// View executes a function within the context of a managed read-only transaction. -// Any error that is returned from the function is returned from the View() method. -// -// Attempting to manually rollback within the function will cause a panic. +/// DB.View() executes a function within the context of a managed read-only +/// transaction. Any error that is returned from the function is returned from +/// the DB.View() method. +/// +/// Attempting to manually rollback within the function will cause a panic. func (db *DB) View(fn func(*Tx) error) error { t, err := db.Begin(false) if err != nil { @@ -1895,7 +2030,8 @@ func (db *DB) View(fn func(*Tx) error) error { } }() - // Mark as a managed tx so that the inner function cannot manually rollback. + // Mark as a managed tx so that the inner function cannot manually + // rollback. t.managed = true // If an error is returned from the function then pass it through. @@ -1906,40 +2042,48 @@ func (db *DB) View(fn func(*Tx) error) error { return err } - if err := t.Rollback(); err != nil { + err = t.Rollback() + if err != nil { return err } return nil } -// Batch calls fn as part of a batch. It behaves similar to Update, -// except: -// -// 1. concurrent Batch calls can be combined into a single Bolt -// transaction. -// -// 2. the function passed to Batch may be called multiple times, -// regardless of whether it returns error or not. -// -// This means that Batch function side effects must be idempotent and -// take permanent effect only after a successful return is seen in -// caller. -// -// The maximum batch size and delay can be adjusted with DB.MaxBatchSize -// and DB.MaxBatchDelay, respectively. -// -// Batch is only useful when there are multiple goroutines calling it. +func needsNewBatch(batch *batch, max int) bool { + return (batch == nil) || (batch != nil && len(batch.calls) >= max) +} + +/// DB.Batch() calls fn as part of a batch. It behaves similar to Update, +/// except: +/// +/// 1. concurrent DB.Batch() calls can be combined into a single Dedo +/// transaction. +/// +/// 2. the function passed to DB.Batch() may be called multiple times, +/// regardless of whether it returns error or not. +/// +/// This means that DB.Batch() function side effects must be idempotent and take +/// permanent effect only after a successful return is seen in caller. +/// +/// The maximum batch size and delay can be adjusted with DB.MaxBatchSize and +/// DB.MaxBatchDelay, respectively. +/// +/// DB.Batch() is only useful when there are multiple goroutines calling it. func (db *DB) Batch(fn func(*Tx) error) error { errCh := make(chan error, 1) db.batchMu.Lock() - if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { - // There is no existing batch, or the existing batch is full; start a new one. + if needsNewBatch(db.batch, db.MaxBatchSize) { + // There is no existing batch, or the existing batch is full; + // start a new one. db.batch = &batch{ db: db, } - db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) + db.batch.timer = time.AfterFunc( + db.MaxBatchDelay, + db.batch.trigger, + ) } db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) if len(db.batch.calls) >= db.MaxBatchSize { @@ -1955,13 +2099,13 @@ func (db *DB) Batch(fn func(*Tx) error) error { return err } -// trigger runs the batch if it hasn't already been run. +/// batch.trigger() runs the batch if it hasn't already been run. func (b *batch) trigger() { b.start.Do(b.run) } -// run performs the transactions in the batch and communicates results -// back to DB.Batch. +/// batch.run() performs the transactions in the batch and communicates results +/// back to DB.Batch. func (b *batch) run() { b.db.batchMu.Lock() b.timer.Stop() @@ -1974,10 +2118,11 @@ func (b *batch) run() { retry: for len(b.calls) > 0 { - var failIdx = -1 + failIdx := -1 err := b.db.Update(func(tx *Tx) error { for i, c := range b.calls { - if err := safelyCall(c.fn, tx); err != nil { + err := safelyCall(c.fn, tx) + if err != nil { failIdx = i return err } @@ -1986,17 +2131,20 @@ retry: }) if failIdx >= 0 { - // take the failing transaction out of the batch. it's - // safe to shorten b.calls here because db.batch no longer - // points to us, and we hold the mutex anyway. + // take the failing transaction out of the batch. it's + // safe to shorten b.calls here because db.batch no + // longer points to us, and we hold the mutex anyway. c := b.calls[failIdx] - b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] - // tell the submitter re-run it solo, continue with the rest of the batch + b.calls[failIdx], b.calls = + b.calls[ len(b.calls) - 1], + b.calls[:len(b.calls) - 1] + // tell the submitter re-run it solo, continue with the + // rest of the batch c.err <- trySolo continue retry } - // pass success, or bolt internal errors, to all callers + // pass success, or dedo internal errors, to all callers for _, c := range b.calls { c.err <- err } @@ -2005,7 +2153,8 @@ retry: } func (p panicked) Error() string { - if err, ok := p.reason.(error); ok { + err, ok := p.reason.(error) + if ok { return err.Error() } return fmt.Sprintf("panic: %v", p.reason) @@ -2013,29 +2162,32 @@ func (p panicked) Error() string { func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { defer func() { - if p := recover(); p != nil { + p := recover() + if p != nil { err = panicked{p} } }() return fn(tx) } -// page retrieves a page reference from the mmap based on the current page size. +/// db.page() retrieves a page reference from the mmap based on the current page +/// size. func (db *DB) page(id pgid) *page { pos := id * pgid(db.pageSize) return (*page)(unsafe.Pointer(&db.data[pos])) } -// pageInBuffer retrieves a page reference from a given byte array based on the current page size. +/// pageInBuffer() retrieves a page reference from a given byte array based on +/// the current page size. func pageInBuffer(db *DB, b []byte, id pgid) *page { return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)])) } -// meta retrieves the current meta page reference. +/// db.meta() retrieves the current meta page reference. func (db *DB) meta() *meta { // We have to return the meta with the highest txid which doesn't fail - // validation. Otherwise, we can cause errors when in fact the database is - // in a consistent state. metaA is the one with the higher txid. + // validation. Otherwise, we can cause errors when in fact the database + // is in a consistent state. metaA is the one with the higher txid. metaA := db.meta0 metaB := db.meta1 if db.meta1.txid > db.meta0.txid { @@ -2043,19 +2195,24 @@ func (db *DB) meta() *meta { metaB = db.meta0 } - // Use higher meta page if valid. Otherwise fallback to previous, if valid. - if err := metaA.validate(); err == nil { + // Use higher meta page if valid. Otherwise fallback to previous, if + // valid. + err := metaA.validate() + if err == nil { return metaA - } else if err := metaB.validate(); err == nil { + } + + err = metaB.validate() + if err == nil { return metaB } - // This should never be reached, because both meta1 and meta0 were validated - // on mmap() and we do fsync() on every write. - panic("bolt.DB.meta(): invalid meta pages") + // This should never be reached, because both meta1 and meta0 were + // validated on mmap() and we do fsync() on every write. + panic("dedo.DB.meta(): invalid meta pages") } -// allocate returns a contiguous block of memory starting at a given page. +/// db.allocate() returns a contiguous block of memory starting at a given page. func (db *DB) allocate(count int) (*page, error) { // Allocate a temporary buffer for the page. var buf []byte @@ -2068,15 +2225,17 @@ func (db *DB) allocate(count int) (*page, error) { p.overflow = uint32(count - 1) // Use pages from the freelist if they are available. - if p.id = db.freelist.allocate(count); p.id != 0 { + p.id = db.freelist.allocate(count) + if p.id != 0 { return p, nil } // Resize mmap() if we're at the end. p.id = db.rwtx.meta.pgid - var minsz = int((p.id+pgid(count))+1) * db.pageSize + minsz := int((p.id+pgid(count))+1) * db.pageSize if minsz >= db.datasz { - if err := db.mmap(minsz); err != nil { + err := db.mmap(minsz) + if err != nil { return nil, fmt.Errorf("mmap allocate error: %s", err) } } @@ -2087,15 +2246,16 @@ func (db *DB) allocate(count int) (*page, error) { return p, nil } -// grow grows the size of the database to the given sz. +/// db.grow() grows the size of the database to the given sz. func (db *DB) grow(sz int) error { // Ignore if the new size is less than available file size. if sz <= db.filesz { return nil } - // If the data is smaller than the alloc size then only allocate what's needed. - // Once it goes over the allocation size then allocate in chunks. + // If the data is smaller than the alloc size then only allocate what's + // needed. Once it goes over the allocation size then allocate in + // chunks. if db.datasz < db.AllocSize { sz = db.datasz } else { @@ -2104,10 +2264,13 @@ func (db *DB) grow(sz int) error { // Truncate and fsync to ensure file size metadata is flushed. // https://github.com/boltdb/bolt/issues/284 - if err := db.file.Truncate(int64(sz)); err != nil { + err := db.file.Truncate(int64(sz)) + if err != nil { return fmt.Errorf("file resize error: %s", err) } - if err := db.file.Sync(); err != nil { + + err = db.file.Sync() + if err != nil { return fmt.Errorf("file sync error: %s", err) } @@ -2115,7 +2278,8 @@ func (db *DB) grow(sz int) error { return nil } -// validate checks the marker bytes and version of the meta page to ensure it matches this binary. +/// meta.validate() checks the marker bytes and version of the meta page to +/// ensure it matches this binary. func (m *meta) validate() error { if m.magic != magic { return ErrInvalid @@ -2127,20 +2291,29 @@ func (m *meta) validate() error { return nil } -// copy copies one meta object to another. +/// meta.copy() copies one meta object to another. func (m *meta) copy(dest *meta) { *dest = *m } -// write writes the meta onto a page. +/// meta.write() writes the meta onto a page. func (m *meta) write(p *page) { if m.root.root >= m.pgid { - panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) + panic(fmt.Sprintf( + "root bucket pgid (%d) above high water mark (%d)", + m.root.root, + m.pgid, + )) } else if m.freelist >= m.pgid { - panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) + panic(fmt.Sprintf( + "freelist pgid (%d) above high water mark (%d)", + m.freelist, + m.pgid, + )) } - // Page id is either going to be 0 or 1 which we can determine by the transaction ID. + // Page id is either going to be 0 or 1 which we can determine by the + // transaction ID. p.id = pgid(m.txid % 2) p.flags |= metaPageFlag @@ -2150,66 +2323,16 @@ func (m *meta) write(p *page) { m.copy(p.meta()) } -// generates the checksum for the meta. +/// meta.sum64() generates the checksum for the meta. func (m *meta) sum64() uint64 { - var h = fnv.New64a() - _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:]) + h := fnv.New64a() + _, _ = h.Write( + (*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:], + ) return h.Sum64() } -// _assert will panic with a given formatted message if the given condition is false. -func _assert(condition bool, msg string, v ...interface{}) { - if !condition { - panic(fmt.Sprintf("assertion failed: "+msg, v...)) - } -} - - -/* -Package bolt implements a low-level key/value store in pure Go. It supports -fully serializable transactions, ACID semantics, and lock-free MVCC with -multiple readers and a single writer. Bolt can be used for projects that -want a simple data store without the need to add large dependencies such as -Postgres or MySQL. - -Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is -optimized for fast read access and does not require recovery in the event of a -system crash. Transactions which have not finished committing will simply be -rolled back in the event of a crash. - -The design of Bolt is based on Howard Chu's LMDB database project. - -Bolt currently works on Windows, Mac OS X, and Linux. - - -Basics - -There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is -a collection of buckets and is represented by a single file on disk. A bucket is -a collection of unique keys that are associated with values. - -Transactions provide either read-only or read-write access to the database. -Read-only transactions can retrieve key/value pairs and can use Cursors to -iterate over the dataset sequentially. Read-write transactions can create and -delete buckets and can insert and remove keys. Only one read-write transaction -is allowed at a time. - - -Caveats - -The database uses a read-only, memory-mapped data file to ensure that -applications cannot corrupt the database, however, this means that keys and -values returned from Bolt cannot be changed. Writing to a read-only byte slice -will cause Go to panic. - -Keys and values retrieved from the database are only valid for the life of -the transaction. When used outside the transaction, these byte slices can -point to different data or can point to invalid memory which will cause a panic. - - -*/ - -// newFreelist returns an empty, initialized freelist. +/// newFreelist() returns an empty, initialized freelist. func newFreelist() *freelist { return &freelist{ pending: make(map[txid][]pgid), @@ -2217,37 +2340,39 @@ func newFreelist() *freelist { } } -// size returns the size of the page after serialization. +/// freelist.size() returns the size of the page after serialization. func (f *freelist) size() int { n := f.count() if n >= 0xFFFF { - // The first element will be used to store the count. See freelist.write. + // The first element will be used to store the count. See + // freelist.write. n++ } return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n) } -// count returns count of pages on the freelist +/// freelist.count() returns count of pages on the freelist func (f *freelist) count() int { return f.free_count() + f.pending_count() } -// free_count returns count of free pages +/// freelist.free_count() returns count of free pages func (f *freelist) free_count() int { return len(f.ids) } -// pending_count returns count of pending pages +/// freelist.pending_count() returns count of pending pages func (f *freelist) pending_count() int { - var count int + count := 0 for _, list := range f.pending { count += len(list) } return count } -// copyall copies into dst a list of all free ids and all pending ids in one sorted list. -// f.count returns the minimum length required for dst. +/// freelist.copyall() copies into dst a list of all free ids and all pending +/// ids in one sorted list. +/// f.count returns the minimum length required for dst. func (f *freelist) copyall(dst []pgid) { m := make(pgids, 0, f.pending_count()) for _, list := range f.pending { @@ -2257,14 +2382,16 @@ func (f *freelist) copyall(dst []pgid) { mergepgids(dst, f.ids, m) } -// allocate returns the starting page id of a contiguous list of pages of a given size. -// If a contiguous block cannot be found then 0 is returned. +/// freelist.allocate() returns the starting page id of a contiguous list of +/// pages of a given size. If a contiguous block cannot be found then 0 is +/// returned. func (f *freelist) allocate(n int) pgid { if len(f.ids) == 0 { return 0 } - var initial, previd pgid + initial := pgid(0) + previd := pgid(0) for i, id := range f.ids { if id <= 1 { panic(fmt.Sprintf("invalid page allocation: %d", id)) @@ -2277,10 +2404,10 @@ func (f *freelist) allocate(n int) pgid { // If we found a contiguous block then remove it and return it. if (id-initial)+1 == pgid(n) { - // If we're allocating off the beginning then take the fast path - // and just adjust the existing slice. This will use extra memory - // temporarily but the append() in free() will realloc the slice - // as is necessary. + // If we're allocating off the beginning then take the + // fast path and just adjust the existing slice. This + // will use extra memory temporarily but the append() in + // free() will realloc the slice as is necessary. if (i + 1) == n { f.ids = f.ids[i+1:] } else { @@ -2301,15 +2428,15 @@ func (f *freelist) allocate(n int) pgid { return 0 } -// free releases a page and its overflow for a given transaction id. -// If the page is already free then a panic will occur. +/// freelist.free() releases a page and its overflow for a given transaction id. +/// If the page is already free then a panic will occur. func (f *freelist) free(txid txid, p *page) { if p.id <= 1 { panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) } // Free page and all its overflow pages. - var ids = f.pending[txid] + ids := f.pending[txid] for id := p.id; id <= p.id+pgid(p.overflow); id++ { // Verify that page is not already free. if f.cache[id] { @@ -2323,13 +2450,14 @@ func (f *freelist) free(txid txid, p *page) { f.pending[txid] = ids } -// release moves all page ids for a transaction id (or older) to the freelist. +/// release moves all page ids for a transaction id (or older) to the freelist. func (f *freelist) release(txid txid) { m := make(pgids, 0) for tid, ids := range f.pending { if tid <= txid { - // Move transaction's pending pages to the available freelist. - // Don't remove from the cache since the page is still free. + // Move transaction's pending pages to the available + // freelist. Don't remove from the cache since the page + // is still free. m = append(m, ids...) delete(f.pending, tid) } @@ -2338,7 +2466,7 @@ func (f *freelist) release(txid txid) { f.ids = pgids(f.ids).merge(m) } -// rollback removes the pages from a given pending tx. +/// rollback removes the pages from a given pending tx. func (f *freelist) rollback(txid txid) { // Remove page ids from cache. for _, id := range f.pending[txid] { @@ -2349,15 +2477,16 @@ func (f *freelist) rollback(txid txid) { delete(f.pending, txid) } -// freed returns whether a given page is in the free list. +/// freed returns whether a given page is in the free list. func (f *freelist) freed(pgid pgid) bool { return f.cache[pgid] } -// read initializes the freelist from a freelist page. +/// freelist.read() initializes the freelist from a freelist page. func (f *freelist) read(p *page) { - // If the page.count is at the max uint16 value (64k) then it's considered - // an overflow and the size of the freelist is stored as the first element. + // If the page.count is at the max uint16 value (64k) then it's + // considered an overflow and the size of the freelist is stored as the + // first element. idx, count := 0, int(p.count) if count == 0xFFFF { idx = 1 @@ -2368,7 +2497,7 @@ func (f *freelist) read(p *page) { if count == 0 { f.ids = nil } else { - ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count] + ids := (*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr))[idx:count] f.ids = make([]pgid, len(ids)) copy(f.ids, ids) @@ -2380,17 +2509,18 @@ func (f *freelist) read(p *page) { f.reindex() } -// write writes the page ids onto a freelist page. All free and pending ids are -// saved to disk since in the event of a program crash, all pending ids will -// become free. +/// freelist.write() writes the page ids onto a freelist page. All free and +/// pending ids are saved to disk since in the event of a program crash, all +/// pending ids will become free. func (f *freelist) write(p *page) error { // Combine the old free pgids and pgids waiting on an open transaction. // Update the header flag. p.flags |= freelistPageFlag - // The page.count can only hold up to 64k elements so if we overflow that - // number then we handle it by putting the size in the first element. + // The page.count can only hold up to 64k elements so if we overflow + // that number then we handle it by putting the size in the first + // element. lenids := f.count() if lenids == 0 { p.count = uint16(lenids) @@ -2399,14 +2529,15 @@ func (f *freelist) write(p *page) error { f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:]) } else { p.count = 0xFFFF - ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids) + pageID := pgid(lenids) + ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pageID f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:]) } return nil } -// reload reads the freelist from a page and filters out pending items. +/// reload reads the freelist from a page and filters out pending items. func (f *freelist) reload(p *page) { f.read(p) @@ -2420,7 +2551,7 @@ func (f *freelist) reload(p *page) { // Check each page in the freelist and build a new available freelist // with any pages not in the pending lists. - var a []pgid + a := []pgid{} for _, id := range f.ids { if !pcache[id] { a = append(a, id) @@ -2428,12 +2559,13 @@ func (f *freelist) reload(p *page) { } f.ids = a - // Once the available list is rebuilt then rebuild the free cache so that - // it includes the available and pending free pages. + // Once the available list is rebuilt then rebuild the free cache so + // that it includes the available and pending free pages. f.reindex() } -// reindex rebuilds the free cache based on available and pending free lists. +/// freelist.reindex() rebuilds the free cache based on available and pending +/// free lists. func (f *freelist) reindex() { f.cache = make(map[pgid]bool, len(f.ids)) for _, id := range f.ids { @@ -2446,7 +2578,7 @@ func (f *freelist) reindex() { } } -// root returns the top-level node this node is attached to. +/// node.root() returns the top-level node this node is attached to. func (n *node) root() *node { if n.parent == nil { return n @@ -2454,7 +2586,7 @@ func (n *node) root() *node { return n.parent.root() } -// minKeys returns the minimum number of inodes this node should have. +/// node.minKeys() returns the minimum number of inodes this node should have. func (n *node) minKeys() int { if n.isLeaf { return 1 @@ -2462,7 +2594,7 @@ func (n *node) minKeys() int { return 2 } -// size returns the size of the node after serialization. +/// node.size() returns the size of the node after serialization. func (n *node) size() int { sz, elsz := pageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { @@ -2472,9 +2604,9 @@ func (n *node) size() int { return sz } -// sizeLessThan returns true if the node is less than a given size. -// This is an optimization to avoid calculating a large node when we only need -// to know if it fits inside a certain page size. +/// node.sizeLessThan() returns true if the node is less than a given size. +/// This is an optimization to avoid calculating a large node when we only need +/// to know if it fits inside a certain page size. func (n *node) sizeLessThan(v int) bool { sz, elsz := pageHeaderSize, n.pageElementSize() for i := 0; i < len(n.inodes); i++ { @@ -2487,15 +2619,17 @@ func (n *node) sizeLessThan(v int) bool { return true } -// pageElementSize returns the size of each page element based on the type of node. +/// node.pageElementSize() returns the size of each page element based on the +/// type of node. func (n *node) pageElementSize() int { if n.isLeaf { return leafPageElementSize + } else { + return branchPageElementSize } - return branchPageElementSize } -// childAt returns the child node at a given index. +/// node.childAt() returns the child node at a given index. func (n *node) childAt(index int) *node { if n.isLeaf { panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index)) @@ -2503,18 +2637,20 @@ func (n *node) childAt(index int) *node { return n.bucket.node(n.inodes[index].pgid, n) } -// childIndex returns the index of a given child node. +/// node.childIndex() returns the index of a given child node. func (n *node) childIndex(child *node) int { - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 }) + index := sort.Search(len(n.inodes), func(i int) bool { + return bytes.Compare(n.inodes[i].key, child.key) != -1 + }) return index } -// numChildren returns the number of children. +/// node.numChildren() returns the number of children. func (n *node) numChildren() int { return len(n.inodes) } -// nextSibling returns the next node with the same parent. +/// node.nextSibling() returns the next node with the same parent. func (n *node) nextSibling() *node { if n.parent == nil { return nil @@ -2526,7 +2662,7 @@ func (n *node) nextSibling() *node { return n.parent.childAt(index + 1) } -// prevSibling returns the previous node with the same parent. +/// node.prevSibling() returns the previous node with the same parent. func (n *node) prevSibling() *node { if n.parent == nil { return nil @@ -2538,10 +2674,14 @@ func (n *node) prevSibling() *node { return n.parent.childAt(index - 1) } -// put inserts a key/value. +/// node.put() inserts a key/value. func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { if pgid >= n.bucket.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid)) + panic(fmt.Sprintf( + "pgid (%d) above high water mark (%d)", + pgid, + n.bucket.tx.meta.pgid, + )) } else if len(oldKey) <= 0 { panic("put: zero-length old key") } else if len(newKey) <= 0 { @@ -2549,10 +2689,16 @@ func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { } // Find insertion index. - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 }) + index := sort.Search(len(n.inodes), func(i int) bool { + return bytes.Compare(n.inodes[i].key, oldKey) != -1 + }) - // Add capacity and shift nodes if we don't have an exact match and need to insert. - exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)) + // Add capacity and shift nodes if we don't have an exact match and need + // to insert. + exact := + len(n.inodes) > 0 && + index < len(n.inodes) && + bytes.Equal(n.inodes[index].key, oldKey) if !exact { n.inodes = append(n.inodes, inode{}) copy(n.inodes[index+1:], n.inodes[index:]) @@ -2563,13 +2709,15 @@ func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { inode.key = newKey inode.value = value inode.pgid = pgid - _assert(len(inode.key) > 0, "put: zero-length inode key") + g.Assert(len(inode.key) > 0, "put: zero-length inode key") } -// del removes a key from the node. +/// node.del() removes a key from the node. func (n *node) del(key []byte) { // Find index of key. - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 }) + index := sort.Search(len(n.inodes), func(i int) bool { + return bytes.Compare(n.inodes[i].key, key) != -1 + }) // Exit if the key isn't found. if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) { @@ -2583,7 +2731,7 @@ func (n *node) del(key []byte) { n.unbalanced = true } -// read initializes the node from a page. +/// node.read() initializes the node from a page. func (n *node) read(p *page) { n.pgid = p.id n.isLeaf = ((p.flags & leafPageFlag) != 0) @@ -2601,19 +2749,23 @@ func (n *node) read(p *page) { inode.pgid = elem.pgid inode.key = elem.key() } - _assert(len(inode.key) > 0, "read: zero-length inode key") + g.Assert(len(inode.key) > 0, "read: zero-length inode key") } // Save first key so we can find the node in the parent when we spill. if len(n.inodes) > 0 { n.key = n.inodes[0].key - _assert(len(n.key) > 0, "read: zero-length node key") + g.Assert(len(n.key) > 0, "read: zero-length node key") } else { n.key = nil } } -// write writes the items onto one or more pages. +func u32position(a unsafe.Pointer, b unsafe.Pointer) uint32 { + return uint32(uintptr(a) - uintptr(b)) +} + +/// node.write() writes the items onto one or more pages. func (n *node) write(p *page) { // Initialize page. if n.isLeaf { @@ -2623,7 +2775,11 @@ func (n *node) write(p *page) { } if len(n.inodes) >= 0xFFFF { - panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id)) + panic(fmt.Sprintf( + "inode overflow: %d (pgid=%d)", + len(n.inodes), + p.id, + )) } p.count = uint16(len(n.inodes)) @@ -2633,27 +2789,37 @@ func (n *node) write(p *page) { } // Loop over each item and write it to the page. - b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):] + nth := n.pageElementSize() * len(n.inodes) + b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[nth:] for i, item := range n.inodes { - _assert(len(item.key) > 0, "write: zero-length inode key") + g.Assert(len(item.key) > 0, "write: zero-length inode key") // Write the page element. if n.isLeaf { elem := p.leafPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.pos = u32position( + unsafe.Pointer(&b[0]), + unsafe.Pointer(elem), + ) elem.flags = item.flags elem.ksize = uint32(len(item.key)) elem.vsize = uint32(len(item.value)) } else { elem := p.branchPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.pos = u32position( + unsafe.Pointer(&b[0]), + unsafe.Pointer(elem), + ) elem.ksize = uint32(len(item.key)) elem.pgid = item.pgid - _assert(elem.pgid != p.id, "write: circular dependency occurred") + g.Assert( + elem.pgid != p.id, + "write: circular dependency occurred", + ) } - // If the length of key+value is larger than the max allocation size - // then we need to reallocate the byte array pointer. + // If the length of key+value is larger than the max allocation + // size then we need to reallocate the byte array pointer. // // See: https://github.com/boltdb/bolt/pull/335 klen, vlen := len(item.key), len(item.value) @@ -2667,15 +2833,12 @@ func (n *node) write(p *page) { copy(b[0:], item.value) b = b[vlen:] } - - // DEBUG ONLY: n.dump() } -// split breaks up a node into multiple smaller nodes, if appropriate. -// This should only be called from the spill() function. +/// node.split() breaks up a node into multiple smaller nodes, if appropriate. +/// This should only be called from the spill() function. func (n *node) split(pageSize int) []*node { - var nodes []*node - + nodes := []*node{} node := n for { // Split node into two. @@ -2694,8 +2857,8 @@ func (n *node) split(pageSize int) []*node { return nodes } -// splitTwo breaks up a node into two smaller nodes, if appropriate. -// This should only be called from the split() function. +/// node.splitTwo() breaks up a node into two smaller nodes, if appropriate. +/// This should only be called from the split() function. func (n *node) splitTwo(pageSize int) (*node, *node) { const fillPercent = 0.5 @@ -2727,20 +2890,25 @@ func (n *node) splitTwo(pageSize int) (*node, *node) { return n, next } -// splitIndex finds the position where a page will fill a given threshold. -// It returns the index as well as the size of the first page. -// This is only be called from split(). +/// node.splitIndex() finds the position where a page will fill a given +/// threshold. It returns the index as well as the size of the first page. +/// This is only be called from split(). func (n *node) splitIndex(threshold int) (index, sz int) { sz = pageHeaderSize - // Loop until we only have the minimum number of keys required for the second page. + // Loop until we only have the minimum number of keys required for the + // second page. for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { index = i inode := n.inodes[i] - elsize := n.pageElementSize() + len(inode.key) + len(inode.value) - - // If we have at least the minimum number of keys and adding another - // node would put us over the threshold then exit and return. + elsize := + n.pageElementSize() + + len(inode.key) + + len(inode.value) + + // If we have at least the minimum number of keys and adding + // another node would put us over the threshold then exit and + // return. if i >= minKeysPerPage && sz+elsize > threshold { break } @@ -2752,29 +2920,31 @@ func (n *node) splitIndex(threshold int) (index, sz int) { return } -// spill writes the nodes to dirty pages and splits nodes as it goes. -// Returns an error if dirty pages cannot be allocated. +/// node.spill() writes the nodes to dirty pages and splits nodes as it goes. +/// Returns an error if dirty pages cannot be allocated. func (n *node) spill() error { - var tx = n.bucket.tx + tx := n.bucket.tx if n.spilled { return nil } - // Spill child nodes first. Child nodes can materialize sibling nodes in - // the case of split-merge so we cannot use a range loop. We have to check - // the children size on every loop iteration. + // Spill child nodes first. Child nodes can materialize sibling nodes + // in the case of split-merge so we cannot use a range loop. We have to + // check the children size on every loop iteration. sort.Sort(n.children) for i := 0; i < len(n.children); i++ { - if err := n.children[i].spill(); err != nil { + err := n.children[i].spill() + if err != nil { return err } } - // We no longer need the child list because it's only used for spill tracking. + // We no longer need the child list because it's only used for spill + // tracking. n.children = nil - // Split nodes into appropriate sizes. The first node will always be n. - var nodes = n.split(tx.db.pageSize) + // Split nodes into appropriate sizes. The first node will always be n. + nodes := n.split(tx.db.pageSize) for _, node := range nodes { // Add node's page to the freelist if it's not new. if node.pgid > 0 { @@ -2790,7 +2960,11 @@ func (n *node) spill() error { // Write the node. if p.id >= tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid)) + panic(fmt.Sprintf( + "pgid (%d) above high water mark (%d)", + p.id, + tx.meta.pgid, + )) } node.pgid = p.id node.write(p) @@ -2798,19 +2972,29 @@ func (n *node) spill() error { // Insert into parent inodes. if node.parent != nil { - var key = node.key + key := node.key if key == nil { key = node.inodes[0].key } - node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0) + node.parent.put( + key, + node.inodes[0].key, + nil, + node.pgid, + 0, + ) node.key = node.inodes[0].key - _assert(len(node.key) > 0, "spill: zero-length node key") + g.Assert( + len(node.key) > 0, + "spill: zero-length node key", + ) } } - // If the root node split and created a new root then we need to spill that - // as well. We'll clear out the children to make sure it doesn't try to respill. + // If the root node split and created a new root then we need to spill + // that as well. We'll clear out the children to make sure it doesn't + // try to respill. if n.parent != nil && n.parent.pgid == 0 { n.children = nil return n.parent.spill() @@ -2819,8 +3003,8 @@ func (n *node) spill() error { return nil } -// rebalance attempts to combine the node with sibling nodes if the node fill -// size is below a threshold or if there are not enough keys. +/// node.rebalance() attempts to combine the node with sibling nodes if the node +/// fill size is below a threshold or if there are not enough keys. func (n *node) rebalance() { if !n.unbalanced { return @@ -2828,14 +3012,15 @@ func (n *node) rebalance() { n.unbalanced = false // Ignore if node is above threshold (25%) and has enough keys. - var threshold = n.bucket.tx.db.pageSize / 4 + threshold := n.bucket.tx.db.pageSize / 4 if n.size() > threshold && len(n.inodes) > n.minKeys() { return } // Root node has special handling. if n.parent == nil { - // If root node is a branch and only has one node then collapse it. + // If root node is a branch and only has one node then collapse + // it. if !n.isLeaf && len(n.inodes) == 1 { // Move root's child up. child := n.bucket.node(n.inodes[0].pgid, n) @@ -2845,7 +3030,8 @@ func (n *node) rebalance() { // Reparent all child nodes being moved. for _, inode := range n.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { + child, ok := n.bucket.nodes[inode.pgid] + if ok { child.parent = n } } @@ -2869,11 +3055,15 @@ func (n *node) rebalance() { return } - _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") + g.Assert( + n.parent.numChildren() > 1, + "parent must have at least 2 children", + ) - // Destination node is right sibling if idx == 0, otherwise left sibling. + // Destination node is right sibling if idx == 0, otherwise left + // sibling. var target *node - var useNextSibling = (n.parent.childIndex(n) == 0) + useNextSibling := n.parent.childIndex(n) == 0 if useNextSibling { target = n.nextSibling() } else { @@ -2884,10 +3074,14 @@ func (n *node) rebalance() { if useNextSibling { // Reparent all child nodes being moved. for _, inode := range target.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { + child, ok := n.bucket.nodes[inode.pgid] + if ok { child.parent.removeChild(child) child.parent = n - child.parent.children = append(child.parent.children, child) + child.parent.children = append( + child.parent.children, + child, + ) } } @@ -2900,10 +3094,14 @@ func (n *node) rebalance() { } else { // Reparent all child nodes being moved. for _, inode := range n.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { + child, ok := n.bucket.nodes[inode.pgid] + if ok { child.parent.removeChild(child) child.parent = target - child.parent.children = append(child.parent.children, child) + child.parent.children = append( + child.parent.children, + child, + ) } } @@ -2915,12 +3113,13 @@ func (n *node) rebalance() { n.free() } - // Either this node or the target node was deleted from the parent so rebalance it. + // Either this node or the target node was deleted from the parent so + // rebalance it. n.parent.rebalance() } -// removes a node from the list of in-memory children. -// This does not affect the inodes. +/// node.removeChild() removes a node from the list of in-memory children. This +/// does not affect the inodes. func (n *node) removeChild(target *node) { for i, child := range n.children { if child == target { @@ -2930,14 +3129,18 @@ func (n *node) removeChild(target *node) { } } -// dereference causes the node to copy all its inode key/value references to heap memory. -// This is required when the mmap is reallocated so inodes are not pointing to stale data. +/// node.dereference() causes the node to copy all its inode key/value +/// references to heap memory. This is required when the mmap is reallocated so +/// inodes are not pointing to stale data. func (n *node) dereference() { if n.key != nil { key := make([]byte, len(n.key)) copy(key, n.key) n.key = key - _assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node") + g.Assert( + n.pgid == 0 || len(n.key) > 0, + "dereference: zero-length node key on existing node", + ) } for i := range n.inodes { @@ -2946,7 +3149,10 @@ func (n *node) dereference() { key := make([]byte, len(inode.key)) copy(key, inode.key) inode.key = key - _assert(len(inode.key) > 0, "dereference: zero-length inode key") + g.Assert( + len(inode.key) > 0, + "dereference: zero-length inode key", + ) value := make([]byte, len(inode.value)) copy(value, inode.value) @@ -2959,19 +3165,30 @@ func (n *node) dereference() { } } -// free adds the node's underlying page to the freelist. +/// node.free() adds the node's underlying page to the freelist. func (n *node) free() { if n.pgid != 0 { - n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid)) + n.bucket.tx.db.freelist.free( + n.bucket.tx.meta.txid, + n.bucket.tx.page(n.pgid), + ) n.pgid = 0 } } -func (s nodes) Len() int { return len(s) } -func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 } +func (s nodes) Len() int { + return len(s) +} + +func (s nodes) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s nodes) Less(i, j int) bool { + return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 +} -// typ returns a human readable page type string used for debugging. +/// typ returns a human readable page type string used for debugging. func (p *page) typ() string { if (p.flags & branchPageFlag) != 0 { return "branch" @@ -2985,18 +3202,18 @@ func (p *page) typ() string { return fmt.Sprintf("unknown<%02x>", p.flags) } -// meta returns a pointer to the metadata section of the page. +/// page.meta() returns a pointer to the metadata section of the page. func (p *page) meta() *meta { return (*meta)(unsafe.Pointer(&p.ptr)) } -// leafPageElement retrieves the leaf node by index +/// page.leafPageElement() retrieves the leaf node by index func (p *page) leafPageElement(index uint16) *leafPageElement { n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] return n } -// leafPageElements retrieves a list of leaf nodes. +/// page.leafPageElements() retrieves a list of leaf nodes. func (p *page) leafPageElements() []leafPageElement { if p.count == 0 { return nil @@ -3004,12 +3221,12 @@ func (p *page) leafPageElements() []leafPageElement { return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:] } -// branchPageElement retrieves the branch node by index +/// page.branchPageElement() retrieves the branch node by index func (p *page) branchPageElement(index uint16) *branchPageElement { return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] } -// branchPageElements retrieves a list of branch nodes. +/// page.branchPageElements() retrieves a list of branch nodes. func (p *page) branchPageElements() []branchPageElement { if p.count == 0 { return nil @@ -3017,33 +3234,53 @@ func (p *page) branchPageElements() []branchPageElement { return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:] } -func (s pages) Len() int { return len(s) } -func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pages) Less(i, j int) bool { return s[i].id < s[j].id } +func (s pages) Len() int { + return len(s) +} + +func (s pages) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s pages) Less(i, j int) bool { + return s[i].id < s[j].id +} -// key returns a byte slice of the node key. +/// branchPageElement.key() returns a byte slice of the node key. func (n *branchPageElement) key() []byte { buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] } -// key returns a byte slice of the node key. +/// leafPageElement.key() returns a byte slice of the node key. func (n *leafPageElement) key() []byte { buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] + return (*[maxAllocSize]byte)( + unsafe.Pointer(&buf[n.pos]), + )[:n.ksize:n.ksize] } -// value returns a byte slice of the node value. +/// leafPageElement.value() returns a byte slice of the node value. func (n *leafPageElement) value() []byte { buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] + return (*[maxAllocSize]byte)( + unsafe.Pointer(&buf[n.pos+n.ksize]), + )[:n.vsize:n.vsize] } -func (s pgids) Len() int { return len(s) } -func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pgids) Less(i, j int) bool { return s[i] < s[j] } +func (s pgids) Len() int { + return len(s) +} -// merge returns the sorted union of a and b. +func (s pgids) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +func (s pgids) Less(i, j int) bool { + return s[i] < s[j] +} + +/// pgids.merge() returns the sorted union of a and b. func (a pgids) merge(b pgids) pgids { // Return the opposite slice if one is nil. if len(a) == 0 { @@ -3057,11 +3294,16 @@ func (a pgids) merge(b pgids) pgids { return merged } -// mergepgids copies the sorted union of a and b into dst. -// If dst is too small, it panics. +/// mergepgids() copies the sorted union of a and b into dst. If dst is too +/// small, it panics. func mergepgids(dst, a, b pgids) { if len(dst) < len(a)+len(b) { - panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b))) + panic(fmt.Errorf( + "mergepgids bad len %d < %d + %d", + len(dst), + len(a), + len(b), + )) } // Copy in the opposite slice if one is nil. if len(a) == 0 { @@ -3076,7 +3318,8 @@ func mergepgids(dst, a, b pgids) { // Merged will hold all elements from both lists. merged := dst[:0] - // Assign lead to the slice with a lower starting value, follow to the higher value. + // Assign lead to the slice with a lower starting value, follow to the + // higher value. lead, follow := a, b if b[0] < a[0] { lead, follow = b, a @@ -3085,7 +3328,9 @@ func mergepgids(dst, a, b pgids) { // Continue while there are elements in the lead. for len(lead) > 0 { // Merge largest prefix of lead that is ahead of follow[0]. - n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] }) + n := sort.Search(len(lead), func(i int) bool { + return lead[i] > follow[0] + }) merged = append(merged, lead[:n]...) if n >= len(lead) { break @@ -3099,7 +3344,7 @@ func mergepgids(dst, a, b pgids) { _ = append(merged, follow...) } -// init initializes the transaction. +/// Tx.init() initializes the transaction. func (tx *Tx) init(db *DB) { tx.db = db tx.pages = nil @@ -3113,90 +3358,96 @@ func (tx *Tx) init(db *DB) { tx.root.ref = &bucket{} *tx.root.ref = tx.meta.root - // Increment the transaction id and add a page cache for writable transactions. + // Increment the transaction id and add a page cache for writable + // transactions. if tx.writable { tx.pages = make(map[pgid]*page) tx.meta.txid += txid(1) } } -// ID returns the transaction id. +/// Tx.ID() returns the transaction id. func (tx *Tx) ID() int { return int(tx.meta.txid) } -// DB returns a reference to the database that created the transaction. +/// Tx.DB() returns a reference to the database that created the transaction. func (tx *Tx) DB() *DB { return tx.db } -// Size returns current database size in bytes as seen by this transaction. +/// Tx.Size() returns current database size in bytes as seen by this +/// transaction. func (tx *Tx) Size() int64 { return int64(tx.meta.pgid) * int64(tx.db.pageSize) } -// Writable returns whether the transaction can perform write operations. +/// Tx.Writable() returns whether the transaction can perform write operations. func (tx *Tx) Writable() bool { return tx.writable } -// Cursor creates a cursor associated with the root bucket. -// All items in the cursor will return a nil value because all root bucket keys point to buckets. -// The cursor is only valid as long as the transaction is open. -// Do not use a cursor after the transaction is closed. +/// Tx.Cursor() creates a cursor associated with the root bucket. All items in +/// the cursor will return a nil value because all root bucket keys point to +/// buckets. The cursor is only valid as long as the transaction is open. Do +/// not use a cursor after the transaction is closed. func (tx *Tx) Cursor() *Cursor { return tx.root.Cursor() } -// Bucket retrieves a bucket by name. -// Returns nil if the bucket does not exist. -// The bucket instance is only valid for the lifetime of the transaction. +/// Tx.Bucket() retrieves a bucket by name. Returns nil if the bucket does not +/// exist. The bucket instance is only valid for the lifetime of the +/// transaction. func (tx *Tx) Bucket(name []byte) *Bucket { return tx.root.Bucket(name) } -// CreateBucket creates a new bucket. -// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. +/// Tx.CreateBucket() creates a new bucket. Returns an error if the bucket +/// already exists, if the bucket name is blank, or if the bucket name is too +/// long. The bucket instance is only valid for the lifetime of the +/// transaction. func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) { return tx.root.CreateBucket(name) } -// CreateBucketIfNotExists creates a new bucket if it doesn't already exist. -// Returns an error if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. +/// Tx.CreateBucketIfNotExists() creates a new bucket if it doesn't already +/// exist. Returns an error if the bucket name is blank, or if the bucket name +/// is too long. The bucket instance is only valid for the lifetime of the +/// transaction. func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) { return tx.root.CreateBucketIfNotExists(name) } -// DeleteBucket deletes a bucket. -// Returns an error if the bucket cannot be found or if the key represents a non-bucket value. +/// Tx.DeleteBucket() deletes a bucket. Returns an error if the bucket cannot +/// be found or if the key represents a non-bucket value. func (tx *Tx) DeleteBucket(name []byte) error { return tx.root.DeleteBucket(name) } -// ForEach executes a function for each bucket in the root. -// If the provided function returns an error then the iteration is stopped and -// the error is returned to the caller. +/// Tx.ForEach() executes a function for each bucket in the root. If the +/// provided function returns an error then the iteration is stopped and the +/// error is returned to the caller. func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error { return tx.root.ForEach(func(k, v []byte) error { - if err := fn(k, tx.root.Bucket(k)); err != nil { + err := fn(k, tx.root.Bucket(k)) + if err != nil { return err } return nil }) } -// OnCommit adds a handler function to be executed after the transaction successfully commits. +/// Tx.OnCommit() adds a handler function to be executed after the transaction +/// successfully commits. func (tx *Tx) OnCommit(fn func()) { tx.commitHandlers = append(tx.commitHandlers, fn) } -// Commit writes all changes to disk and updates the meta page. -// Returns an error if a disk write error occurs, or if Commit is -// called on a read-only transaction. +/// Tx.Commit() writes all changes to disk and updates the meta page. Returns +/// an error if a disk write error occurs, or if Tx.Commiti() is called on a +/// read-only transaction. func (tx *Tx) Commit() error { - _assert(!tx.managed, "managed tx commit not allowed") + g.Assert(!tx.managed, "managed tx commit not allowed") if tx.db == nil { return ErrTxClosed } else if !tx.writable { @@ -3209,7 +3460,8 @@ func (tx *Tx) Commit() error { tx.root.rebalance() // spill data onto dirty pages. - if err := tx.root.spill(); err != nil { + err := tx.root.spill() + if err != nil { tx.rollback() return err } @@ -3219,30 +3471,36 @@ func (tx *Tx) Commit() error { opgid := tx.meta.pgid - // Free the freelist and allocate new pages for it. This will overestimate - // the size of the freelist but not underestimate the size (which would be bad). + // Free the freelist and allocate new pages for it. This will + // overestimate the size of the freelist but not underestimate the size + // (which would be bad). tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) if err != nil { tx.rollback() return err } - if err := tx.db.freelist.write(p); err != nil { + + err = tx.db.freelist.write(p) + if err != nil { tx.rollback() return err } tx.meta.freelist = p.id - // If the high water mark has moved up then attempt to grow the database. + // If the high water mark has moved up then attempt to grow the + // database. if tx.meta.pgid > opgid { - if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + err := tx.db.grow(int(tx.meta.pgid + 1) * tx.db.pageSize) + if err != nil { tx.rollback() return err } } // Write dirty pages to disk. - if err := tx.write(); err != nil { + err = tx.write() + if err != nil { tx.rollback() return err } @@ -3251,7 +3509,7 @@ func (tx *Tx) Commit() error { // Only the first consistency error is reported in the panic. if tx.db.StrictMode { ch := tx.Check() - var errs []string + errs := []string{} for { err, ok := <-ch if !ok { @@ -3265,7 +3523,8 @@ func (tx *Tx) Commit() error { } // Write meta to disk. - if err := tx.writeMeta(); err != nil { + err = tx.writeMeta() + if err != nil { tx.rollback() return err } @@ -3281,10 +3540,10 @@ func (tx *Tx) Commit() error { return nil } -// Rollback closes the transaction and ignores all previous updates. Read-only -// transactions must be rolled back and not committed. +/// Tx.Rollback() closes the transaction and ignores all previous updates. +/// Read-only transactions must be rolled back and not committed. func (tx *Tx) Rollback() error { - _assert(!tx.managed, "managed tx rollback not allowed") + g.Assert(!tx.managed, "managed tx rollback not allowed") if tx.db == nil { return ErrTxClosed } @@ -3322,8 +3581,8 @@ func (tx *Tx) close() { tx.pages = nil } -// WriteTo writes the entire database to a writer. -// If err == nil then exactly tx.Size() bytes will be written into the writer. +/// Tx.WriteTo() writes the entire database to a writer. If err == nil then +/// exactly tx.Size() bytes will be written into the writer. func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { f, err := os.OpenFile(tx.db.path, os.O_RDONLY, 0) if err != nil { @@ -3331,7 +3590,7 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { } defer f.Close() - // Generate a meta page. We use the same page data for both meta pages. + // Generate a meta page. We use the same page data for both meta pages. buf := make([]byte, tx.db.pageSize) page := (*page)(unsafe.Pointer(&buf[0])) page.flags = metaPageFlag @@ -3357,7 +3616,8 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { } // Move past the meta pages in the file. - if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { + _, err = f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET) + if err != nil { return n, fmt.Errorf("seek: %s", err) } @@ -3371,9 +3631,9 @@ func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { return n, f.Close() } -// CopyFile copies the entire database to file at the given path. -// A reader transaction is maintained during the copy so it is safe to continue -// using the database while a copy is in progress. +/// Tx.CopyFile() copies the entire database to file at the given path. A +/// reader transaction is maintained during the copy so it is safe to continue +/// using the database while a copy is in progress. func (tx *Tx) CopyFile(path string, mode os.FileMode) error { f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) if err != nil { @@ -3388,14 +3648,14 @@ func (tx *Tx) CopyFile(path string, mode os.FileMode) error { return f.Close() } -// Check performs several consistency checks on the database for this transaction. -// An error is returned if any inconsistency is found. -// -// It can be safely run concurrently on a writable transaction. However, this -// incurs a high cost for large databases and databases with a lot of subbuckets -// because of caching. This overhead can be removed if running on a read-only -// transaction, however, it is not safe to execute other writer transactions at -// the same time. +/// Tx.Check() performs several consistency checks on the database for this +/// transaction. An error is returned if any inconsistency is found. +/// +/// It can be safely run concurrently on a writable transaction. However, this +/// incurs a high cost for large databases and databases with a lot of +/// subbuckets because of caching. This overhead can be removed if running on a +/// read-only transaction, however, it is not safe to execute other writer +/// transactions at the same time. func (tx *Tx) Check() <-chan error { ch := make(chan error) go tx.check(ch) @@ -3437,7 +3697,20 @@ func (tx *Tx) check(ch chan error) { close(ch) } -func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) { +func isBranchPage(p *page) bool { + return (p.flags & branchPageFlag) == 0 +} + +func isLeafPage(p *page) bool { + return (p.flags & leafPageFlag) == 0 +} + +func (tx *Tx) checkBucket( + b *Bucket, + reachable map[pgid]*page, + freed map[pgid]bool, + ch chan error, +) { // Ignore inline buckets. if b.ref.root == 0 { return @@ -3446,14 +3719,22 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo // Check every page used by this bucket. b.tx.forEachPage(b.ref.root, 0, func(p *page, _ int) { if p.id > tx.meta.pgid { - ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid)) + ch <- fmt.Errorf( + "page %d: out of bounds: %d", + int(p.id), + int(b.tx.meta.pgid), + ) } // Ensure each page is only referenced once. for i := pgid(0); i <= pgid(p.overflow); i++ { - var id = p.id + i - if _, ok := reachable[id]; ok { - ch <- fmt.Errorf("page %d: multiple references", int(id)) + id := p.id + i + _, ok := reachable[id] + if ok { + ch <- fmt.Errorf( + "page %d: multiple references", + int(id), + ) } reachable[id] = p } @@ -3461,21 +3742,26 @@ func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bo // We should only encounter un-freed leaf and branch pages. if freed[p.id] { ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) - } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { - ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ()) + } else if isBranchPage(p) && isLeafPage(p) { + ch <- fmt.Errorf( + "page %d: invalid type: %s", + int(p.id), + p.typ(), + ) } }) // Check each bucket within this bucket. _ = b.ForEach(func(k, v []byte) error { - if child := b.Bucket(k); child != nil { + child := b.Bucket(k) + if child != nil { tx.checkBucket(child, reachable, freed, ch) } return nil }) } -// allocate returns a contiguous block of memory starting at a given page. +/// Tx.allocate() returns a contiguous block of memory starting at a given page. func (tx *Tx) allocate(count int) (*page, error) { p, err := tx.db.allocate(count) if err != nil { @@ -3488,7 +3774,7 @@ func (tx *Tx) allocate(count int) (*page, error) { return p, nil } -// write writes any dirty pages to disk. +/// Tx.write() writes any dirty pages to disk. func (tx *Tx) write() error { // Sort pages by id. pages := make(pages, 0, len(tx.pages)) @@ -3515,7 +3801,8 @@ func (tx *Tx) write() error { // Write chunk to disk. buf := ptr[:sz] - if _, err := tx.db.file.WriteAt(buf, offset); err != nil { + _, err := tx.db.file.WriteAt(buf, offset) + if err != nil { return err } @@ -3525,13 +3812,15 @@ func (tx *Tx) write() error { break } - // Otherwise move offset forward and move pointer to next chunk. + // Otherwise move offset forward and move pointer to + // next chunk. offset += int64(sz) ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz])) } } - if err := fdatasync(tx.db); err != nil { + err := fdatasync(tx.db) + if err != nil { return err } @@ -3544,8 +3833,6 @@ func (tx *Tx) write() error { } buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize] - - // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 for i := range buf { buf[i] = 0 } @@ -3555,7 +3842,7 @@ func (tx *Tx) write() error { return nil } -// writeMeta writes the meta to the disk. +/// Tx.writeMeta() writes the meta to the disk. func (tx *Tx) writeMeta() error { // Create a temporary buffer for the meta page. buf := make([]byte, tx.db.pageSize) @@ -3563,23 +3850,26 @@ func (tx *Tx) writeMeta() error { tx.meta.write(p) // Write the meta page to file. - if _, err := tx.db.file.WriteAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil { + _, err := tx.db.file.WriteAt(buf, int64(p.id)*int64(tx.db.pageSize)) + if err != nil { return err } - if err := fdatasync(tx.db); err != nil { + err = fdatasync(tx.db) + if err != nil { return err } return nil } -// page returns a reference to the page with a given id. -// If page has been written to then a temporary buffered page is returned. +/// Tx.page() returns a reference to the page with a given id. If page has been +/// written to then a temporary buffered page is returned. func (tx *Tx) page(id pgid) *page { // Check the dirty pages first. if tx.pages != nil { - if p, ok := tx.pages[id]; ok { + p, ok := tx.pages[id] + if ok { return p } } @@ -3588,7 +3878,8 @@ func (tx *Tx) page(id pgid) *page { return tx.db.page(id) } -// forEachPage iterates over every page within a given page and executes a function. +/// Tx.forEachPage() iterates over every page within a given page and executes a +/// function. func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) { p := tx.page(pgid) @@ -3604,8 +3895,8 @@ func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) { } } -// Page returns page information for a given page number. -// This is only safe for concurrent use when used by a writable transaction. +/// Tx.Page() returns page information for a given page number. This is only +/// safe for concurrent use when used by a writable transaction. func (tx *Tx) Page(id int) (*PageInfo, error) { if tx.db == nil { return nil, ErrTxClosed |