diff options
50 files changed, 12121 insertions, 12443 deletions
@@ -1,4 +1,14 @@ -*.prof -*.test -*.swp -/bin/ +/src/version.go +/*.bin +/src/*.a +/src/*.bin +/tests/*.a +/tests/*.bin +/tests/functional/*/*.a +/tests/functional/*/*.bin +/tests/fuzz/*/*.a +/tests/fuzz/*/*.bin +/tests/benchmarks/*/*.a +/tests/benchmarks/*/*.bin +/tests/benchmarks/*/*.txt +/tests/fuzz/corpus/ @@ -1,18 +1,170 @@ -BRANCH=`git rev-parse --abbrev-ref HEAD` -COMMIT=`git rev-parse --short HEAD` -GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" +.POSIX: +DATE = 1970-01-01 +VERSION = 0.1.0 +NAME = gkv +NAME_UC = $(NAME) +LANGUAGES = en +## Installation prefix. Defaults to "/usr". +PREFIX = /usr +BINDIR = $(PREFIX)/bin +LIBDIR = $(PREFIX)/lib +GOLIBDIR = $(LIBDIR)/go +INCLUDEDIR = $(PREFIX)/include +SRCDIR = $(PREFIX)/src/$(NAME) +SHAREDIR = $(PREFIX)/share +LOCALEDIR = $(SHAREDIR)/locale +MANDIR = $(SHAREDIR)/man +EXEC = ./ +## Where to store the installation. Empty by default. +DESTDIR = +LDLIBS = --static +GOCFLAGS = -I $(GOLIBDIR) +GOLDFLAGS = -L $(GOLIBDIR) -default: build -race: - @go test -v -race -test.run="TestSimulate_(100op|1000op)" -# go get github.com/kisielk/errcheck -errcheck: - @errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt +.SUFFIXES: +.SUFFIXES: .go .a .bin .bin-check -test: - @go test -v -cover . - @go test -v ./cmd/bolt +.go.a: + go tool compile $(GOCFLAGS) -I $(@D) -o $@ -p $(*F) \ + `find $< $$(if [ $(*F) != main ]; then \ + echo src/$(NAME).go src/version.go; fi) | uniq` -.PHONY: fmt test +.a.bin: + go tool link $(GOLDFLAGS) -L $(@D) -o $@ --extldflags '$(LDLIBS)' $< + + + +all: +include deps.mk + + +libs.a = $(libs.go:.go=.a) +mains.a = $(mains.go:.go=.a) +mains.bin = $(mains.go:.go=.bin) +functional-tests/lib.a = $(functional-tests/lib.go:.go=.a) +fuzz-targets/lib.a = $(fuzz-targets/lib.go:.go=.a) +benchmarks/lib.a = $(benchmarks/lib.go:.go=.a) + +sources = \ + src/$(NAME).go \ + src/version.go \ + src/main.go \ + + +derived-assets = \ + src/version.go \ + $(libs.a) \ + $(mains.a) \ + $(mains.bin) \ + $(NAME).bin \ + +side-assets = \ + tests/fuzz/corpus/ \ + tests/benchmarks/*/main.txt \ + + + +## Default target. Builds all artifacts required for testing +## and installation. +all: $(derived-assets) + + +$(libs.a): Makefile deps.mk +$(libs.a): src/$(NAME).go src/version.go + + +$(fuzz-targets/lib.a): + go tool compile $(GOCFLAGS) -o $@ -p $(NAME) -d=libfuzzer \ + $*.go src/$(NAME).go src/version.go + +src/version.go: Makefile + echo 'package $(NAME); const Version = "$(VERSION)"' > $@ + +$(NAME).bin: src/main.bin + ln -fs $? $@ + + + +tests.bin-check = \ + tests/main.bin-check \ + $(functional-tests/main.go:.go=.bin-check) \ + +$(tests.bin-check): + $(EXEC)$*.bin + +check-unit: $(tests.bin-check) + + +integration-tests = \ + tests/cli-opts.sh \ + tests/integration.sh \ + +.PRECIOUS: $(integration-tests) +$(integration-tests): $(NAME).bin +$(integration-tests): ALWAYS + sh $@ + +check-integration: $(integration-tests) +check-integration: fuzz + + +## Run all tests. Each test suite is isolated, so that a parallel +## build can run tests at the same time. The required artifacts +## are created if missing. +check: check-unit check-integration + + + +FUZZSEC=1 +fuzz-targets/main.bin-check = $(fuzz-targets/main.go:.go=.bin-check) +$(fuzz-targets/main.bin-check): + $(EXEC)$*.bin --test.fuzztime=$(FUZZSEC)s \ + --test.fuzz='.*' --test.fuzzcachedir=tests/fuzz/corpus + +fuzz: $(fuzz-targets/main.bin-check) + + + +benchmarks/main.bin-check = $(benchmarks/main.go:.go=.bin-check) +$(benchmarks/main.bin-check): + rm -f $*.txt + printf '%s\n' '$(EXEC)$*.bin' >> $*.txt + LANG=POSIX.UTF-8 time -p $(EXEC)$*.bin 2>> $*.txt + printf '%s\n' '$*.txt' + +bench: $(benchmarks/main.bin-check) + + + +## Remove *all* derived artifacts produced during the build. +## A dedicated test asserts that this is always true. +clean: + rm -rf $(derived-assets) $(side-assets) + + +## Installs into $(DESTDIR)$(PREFIX). Its dependency target +## ensures that all installable artifacts are crafted beforehand. +install: all + mkdir -p \ + '$(DESTDIR)$(BINDIR)' \ + '$(DESTDIR)$(GOLIBDIR)' \ + '$(DESTDIR)$(SRCDIR)' \ + + cp $(NAME).bin '$(DESTDIR)$(BINDIR)'/$(NAME) + cp src/$(NAME).a '$(DESTDIR)$(GOLIBDIR)' + cp $(sources) '$(DESTDIR)$(SRCDIR)' + +## Uninstalls from $(DESTDIR)$(PREFIX). This is a perfect mirror +## of the "install" target, and removes *all* that was installed. +## A dedicated test asserts that this is always true. +uninstall: + rm -rf \ + '$(DESTDIR)$(BINDIR)'/$(NAME) \ + '$(DESTDIR)$(GOLIBDIR)'/$(NAME).a \ + '$(DESTDIR)$(SRCDIR)' \ + + + +ALWAYS: diff --git a/cmd/bolt/main.go b/cmd/bolt/main.go deleted file mode 100644 index 057eca5..0000000 --- a/cmd/bolt/main.go +++ /dev/null @@ -1,1740 +0,0 @@ -package main - -import ( - "bytes" - "encoding/binary" - "errors" - "flag" - "fmt" - "io" - "io/ioutil" - "math/rand" - "os" - "runtime" - "runtime/pprof" - "strconv" - "strings" - "time" - "unicode" - "unicode/utf8" - "unsafe" - - "github.com/boltdb/bolt" -) - -var ( - // ErrUsage is returned when a usage message was printed and the process - // should simply exit with an error. - ErrUsage = errors.New("usage") - - // ErrUnknownCommand is returned when a CLI command is not specified. - ErrUnknownCommand = errors.New("unknown command") - - // ErrPathRequired is returned when the path to a Bolt database is not specified. - ErrPathRequired = errors.New("path required") - - // ErrFileNotFound is returned when a Bolt database does not exist. - ErrFileNotFound = errors.New("file not found") - - // ErrInvalidValue is returned when a benchmark reads an unexpected value. - ErrInvalidValue = errors.New("invalid value") - - // ErrCorrupt is returned when a checking a data file finds errors. - ErrCorrupt = errors.New("invalid value") - - // ErrNonDivisibleBatchSize is returned when the batch size can't be evenly - // divided by the iteration count. - ErrNonDivisibleBatchSize = errors.New("number of iterations must be divisible by the batch size") - - // ErrPageIDRequired is returned when a required page id is not specified. - ErrPageIDRequired = errors.New("page id required") - - // ErrPageNotFound is returned when specifying a page above the high water mark. - ErrPageNotFound = errors.New("page not found") - - // ErrPageFreed is returned when reading a page that has already been freed. - ErrPageFreed = errors.New("page freed") -) - -// PageHeaderSize represents the size of the bolt.page header. -const PageHeaderSize = 16 - -func main() { - m := NewMain() - if err := m.Run(os.Args[1:]...); err == ErrUsage { - os.Exit(2) - } else if err != nil { - fmt.Println(err.Error()) - os.Exit(1) - } -} - -// Main represents the main program execution. -type Main struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewMain returns a new instance of Main connect to the standard input/output. -func NewMain() *Main { - return &Main{ - Stdin: os.Stdin, - Stdout: os.Stdout, - Stderr: os.Stderr, - } -} - -// Run executes the program. -func (m *Main) Run(args ...string) error { - // Require a command at the beginning. - if len(args) == 0 || strings.HasPrefix(args[0], "-") { - fmt.Fprintln(m.Stderr, m.Usage()) - return ErrUsage - } - - // Execute command. - switch args[0] { - case "help": - fmt.Fprintln(m.Stderr, m.Usage()) - return ErrUsage - case "bench": - return newBenchCommand(m).Run(args[1:]...) - case "check": - return newCheckCommand(m).Run(args[1:]...) - case "compact": - return newCompactCommand(m).Run(args[1:]...) - case "dump": - return newDumpCommand(m).Run(args[1:]...) - case "info": - return newInfoCommand(m).Run(args[1:]...) - case "page": - return newPageCommand(m).Run(args[1:]...) - case "pages": - return newPagesCommand(m).Run(args[1:]...) - case "stats": - return newStatsCommand(m).Run(args[1:]...) - default: - return ErrUnknownCommand - } -} - -// Usage returns the help message. -func (m *Main) Usage() string { - return strings.TrimLeft(` -Bolt is a tool for inspecting bolt databases. - -Usage: - - bolt command [arguments] - -The commands are: - - bench run synthetic benchmark against bolt - check verifies integrity of bolt database - compact copies a bolt database, compacting it in the process - info print basic info - help print this screen - pages print list of pages with their types - stats iterate over all pages and generate usage stats - -Use "bolt [command] -h" for more information about a command. -`, "\n") -} - -// CheckCommand represents the "check" command execution. -type CheckCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewCheckCommand returns a CheckCommand. -func newCheckCommand(m *Main) *CheckCommand { - return &CheckCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *CheckCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path. - path := fs.Arg(0) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Open database. - db, err := bolt.Open(path, 0666, nil) - if err != nil { - return err - } - defer db.Close() - - // Perform consistency check. - return db.View(func(tx *bolt.Tx) error { - var count int - ch := tx.Check() - loop: - for { - select { - case err, ok := <-ch: - if !ok { - break loop - } - fmt.Fprintln(cmd.Stdout, err) - count++ - } - } - - // Print summary of errors. - if count > 0 { - fmt.Fprintf(cmd.Stdout, "%d errors found\n", count) - return ErrCorrupt - } - - // Notify user that database is valid. - fmt.Fprintln(cmd.Stdout, "OK") - return nil - }) -} - -// Usage returns the help message. -func (cmd *CheckCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt check PATH - -Check opens a database at PATH and runs an exhaustive check to verify that -all pages are accessible or are marked as freed. It also verifies that no -pages are double referenced. - -Verification errors will stream out as they are found and the process will -return after all pages have been checked. -`, "\n") -} - -// InfoCommand represents the "info" command execution. -type InfoCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewInfoCommand returns a InfoCommand. -func newInfoCommand(m *Main) *InfoCommand { - return &InfoCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *InfoCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path. - path := fs.Arg(0) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Open the database. - db, err := bolt.Open(path, 0666, nil) - if err != nil { - return err - } - defer db.Close() - - // Print basic database info. - info := db.Info() - fmt.Fprintf(cmd.Stdout, "Page Size: %d\n", info.PageSize) - - return nil -} - -// Usage returns the help message. -func (cmd *InfoCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt info PATH - -Info prints basic information about the Bolt database at PATH. -`, "\n") -} - -// DumpCommand represents the "dump" command execution. -type DumpCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// newDumpCommand returns a DumpCommand. -func newDumpCommand(m *Main) *DumpCommand { - return &DumpCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *DumpCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path and page id. - path := fs.Arg(0) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Read page ids. - pageIDs, err := atois(fs.Args()[1:]) - if err != nil { - return err - } else if len(pageIDs) == 0 { - return ErrPageIDRequired - } - - // Open database to retrieve page size. - pageSize, err := ReadPageSize(path) - if err != nil { - return err - } - - // Open database file handler. - f, err := os.Open(path) - if err != nil { - return err - } - defer func() { _ = f.Close() }() - - // Print each page listed. - for i, pageID := range pageIDs { - // Print a separator. - if i > 0 { - fmt.Fprintln(cmd.Stdout, "===============================================") - } - - // Print page to stdout. - if err := cmd.PrintPage(cmd.Stdout, f, pageID, pageSize); err != nil { - return err - } - } - - return nil -} - -// PrintPage prints a given page as hexadecimal. -func (cmd *DumpCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSize int) error { - const bytesPerLineN = 16 - - // Read page into buffer. - buf := make([]byte, pageSize) - addr := pageID * pageSize - if n, err := r.ReadAt(buf, int64(addr)); err != nil { - return err - } else if n != pageSize { - return io.ErrUnexpectedEOF - } - - // Write out to writer in 16-byte lines. - var prev []byte - var skipped bool - for offset := 0; offset < pageSize; offset += bytesPerLineN { - // Retrieve current 16-byte line. - line := buf[offset : offset+bytesPerLineN] - isLastLine := (offset == (pageSize - bytesPerLineN)) - - // If it's the same as the previous line then print a skip. - if bytes.Equal(line, prev) && !isLastLine { - if !skipped { - fmt.Fprintf(w, "%07x *\n", addr+offset) - skipped = true - } - } else { - // Print line as hexadecimal in 2-byte groups. - fmt.Fprintf(w, "%07x %04x %04x %04x %04x %04x %04x %04x %04x\n", addr+offset, - line[0:2], line[2:4], line[4:6], line[6:8], - line[8:10], line[10:12], line[12:14], line[14:16], - ) - - skipped = false - } - - // Save the previous line. - prev = line - } - fmt.Fprint(w, "\n") - - return nil -} - -// Usage returns the help message. -func (cmd *DumpCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt dump -page PAGEID PATH - -Dump prints a hexadecimal dump of a single page. -`, "\n") -} - -// PageCommand represents the "page" command execution. -type PageCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// newPageCommand returns a PageCommand. -func newPageCommand(m *Main) *PageCommand { - return &PageCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *PageCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path and page id. - path := fs.Arg(0) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Read page ids. - pageIDs, err := atois(fs.Args()[1:]) - if err != nil { - return err - } else if len(pageIDs) == 0 { - return ErrPageIDRequired - } - - // Open database file handler. - f, err := os.Open(path) - if err != nil { - return err - } - defer func() { _ = f.Close() }() - - // Print each page listed. - for i, pageID := range pageIDs { - // Print a separator. - if i > 0 { - fmt.Fprintln(cmd.Stdout, "===============================================") - } - - // Retrieve page info and page size. - p, buf, err := ReadPage(path, pageID) - if err != nil { - return err - } - - // Print basic page info. - fmt.Fprintf(cmd.Stdout, "Page ID: %d\n", p.id) - fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Type()) - fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf)) - - // Print type-specific data. - switch p.Type() { - case "meta": - err = cmd.PrintMeta(cmd.Stdout, buf) - case "leaf": - err = cmd.PrintLeaf(cmd.Stdout, buf) - case "branch": - err = cmd.PrintBranch(cmd.Stdout, buf) - case "freelist": - err = cmd.PrintFreelist(cmd.Stdout, buf) - } - if err != nil { - return err - } - } - - return nil -} - -// PrintMeta prints the data from the meta page. -func (cmd *PageCommand) PrintMeta(w io.Writer, buf []byte) error { - m := (*meta)(unsafe.Pointer(&buf[PageHeaderSize])) - fmt.Fprintf(w, "Version: %d\n", m.version) - fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize) - fmt.Fprintf(w, "Flags: %08x\n", m.flags) - fmt.Fprintf(w, "Root: <pgid=%d>\n", m.root.root) - fmt.Fprintf(w, "Freelist: <pgid=%d>\n", m.freelist) - fmt.Fprintf(w, "HWM: <pgid=%d>\n", m.pgid) - fmt.Fprintf(w, "Txn ID: %d\n", m.txid) - fmt.Fprintf(w, "Checksum: %016x\n", m.checksum) - fmt.Fprintf(w, "\n") - return nil -} - -// PrintLeaf prints the data for a leaf page. -func (cmd *PageCommand) PrintLeaf(w io.Writer, buf []byte) error { - p := (*page)(unsafe.Pointer(&buf[0])) - - // Print number of items. - fmt.Fprintf(w, "Item Count: %d\n", p.count) - fmt.Fprintf(w, "\n") - - // Print each key/value. - for i := uint16(0); i < p.count; i++ { - e := p.leafPageElement(i) - - // Format key as string. - var k string - if isPrintable(string(e.key())) { - k = fmt.Sprintf("%q", string(e.key())) - } else { - k = fmt.Sprintf("%x", string(e.key())) - } - - // Format value as string. - var v string - if (e.flags & uint32(bucketLeafFlag)) != 0 { - b := (*bucket)(unsafe.Pointer(&e.value()[0])) - v = fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence) - } else if isPrintable(string(e.value())) { - v = fmt.Sprintf("%q", string(e.value())) - } else { - v = fmt.Sprintf("%x", string(e.value())) - } - - fmt.Fprintf(w, "%s: %s\n", k, v) - } - fmt.Fprintf(w, "\n") - return nil -} - -// PrintBranch prints the data for a leaf page. -func (cmd *PageCommand) PrintBranch(w io.Writer, buf []byte) error { - p := (*page)(unsafe.Pointer(&buf[0])) - - // Print number of items. - fmt.Fprintf(w, "Item Count: %d\n", p.count) - fmt.Fprintf(w, "\n") - - // Print each key/value. - for i := uint16(0); i < p.count; i++ { - e := p.branchPageElement(i) - - // Format key as string. - var k string - if isPrintable(string(e.key())) { - k = fmt.Sprintf("%q", string(e.key())) - } else { - k = fmt.Sprintf("%x", string(e.key())) - } - - fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.pgid) - } - fmt.Fprintf(w, "\n") - return nil -} - -// PrintFreelist prints the data for a freelist page. -func (cmd *PageCommand) PrintFreelist(w io.Writer, buf []byte) error { - p := (*page)(unsafe.Pointer(&buf[0])) - - // Print number of items. - fmt.Fprintf(w, "Item Count: %d\n", p.count) - fmt.Fprintf(w, "\n") - - // Print each page in the freelist. - ids := (*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)) - for i := uint16(0); i < p.count; i++ { - fmt.Fprintf(w, "%d\n", ids[i]) - } - fmt.Fprintf(w, "\n") - return nil -} - -// PrintPage prints a given page as hexadecimal. -func (cmd *PageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSize int) error { - const bytesPerLineN = 16 - - // Read page into buffer. - buf := make([]byte, pageSize) - addr := pageID * pageSize - if n, err := r.ReadAt(buf, int64(addr)); err != nil { - return err - } else if n != pageSize { - return io.ErrUnexpectedEOF - } - - // Write out to writer in 16-byte lines. - var prev []byte - var skipped bool - for offset := 0; offset < pageSize; offset += bytesPerLineN { - // Retrieve current 16-byte line. - line := buf[offset : offset+bytesPerLineN] - isLastLine := (offset == (pageSize - bytesPerLineN)) - - // If it's the same as the previous line then print a skip. - if bytes.Equal(line, prev) && !isLastLine { - if !skipped { - fmt.Fprintf(w, "%07x *\n", addr+offset) - skipped = true - } - } else { - // Print line as hexadecimal in 2-byte groups. - fmt.Fprintf(w, "%07x %04x %04x %04x %04x %04x %04x %04x %04x\n", addr+offset, - line[0:2], line[2:4], line[4:6], line[6:8], - line[8:10], line[10:12], line[12:14], line[14:16], - ) - - skipped = false - } - - // Save the previous line. - prev = line - } - fmt.Fprint(w, "\n") - - return nil -} - -// Usage returns the help message. -func (cmd *PageCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt page -page PATH pageid [pageid...] - -Page prints one or more pages in human readable format. -`, "\n") -} - -// PagesCommand represents the "pages" command execution. -type PagesCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewPagesCommand returns a PagesCommand. -func newPagesCommand(m *Main) *PagesCommand { - return &PagesCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *PagesCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path. - path := fs.Arg(0) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Open database. - db, err := bolt.Open(path, 0666, nil) - if err != nil { - return err - } - defer func() { _ = db.Close() }() - - // Write header. - fmt.Fprintln(cmd.Stdout, "ID TYPE ITEMS OVRFLW") - fmt.Fprintln(cmd.Stdout, "======== ========== ====== ======") - - return db.Update(func(tx *bolt.Tx) error { - var id int - for { - p, err := tx.Page(id) - if err != nil { - return &PageError{ID: id, Err: err} - } else if p == nil { - break - } - - // Only display count and overflow if this is a non-free page. - var count, overflow string - if p.Type != "free" { - count = strconv.Itoa(p.Count) - if p.OverflowCount > 0 { - overflow = strconv.Itoa(p.OverflowCount) - } - } - - // Print table row. - fmt.Fprintf(cmd.Stdout, "%-8d %-10s %-6s %-6s\n", p.ID, p.Type, count, overflow) - - // Move to the next non-overflow page. - id += 1 - if p.Type != "free" { - id += p.OverflowCount - } - } - return nil - }) -} - -// Usage returns the help message. -func (cmd *PagesCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt pages PATH - -Pages prints a table of pages with their type (meta, leaf, branch, freelist). -Leaf and branch pages will show a key count in the "items" column while the -freelist will show the number of free pages in the "items" column. - -The "overflow" column shows the number of blocks that the page spills over -into. Normally there is no overflow but large keys and values can cause -a single page to take up multiple blocks. -`, "\n") -} - -// StatsCommand represents the "stats" command execution. -type StatsCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewStatsCommand returns a StatsCommand. -func newStatsCommand(m *Main) *StatsCommand { - return &StatsCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *StatsCommand) Run(args ...string) error { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - help := fs.Bool("h", false, "") - if err := fs.Parse(args); err != nil { - return err - } else if *help { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } - - // Require database path. - path, prefix := fs.Arg(0), fs.Arg(1) - if path == "" { - return ErrPathRequired - } else if _, err := os.Stat(path); os.IsNotExist(err) { - return ErrFileNotFound - } - - // Open database. - db, err := bolt.Open(path, 0666, nil) - if err != nil { - return err - } - defer db.Close() - - return db.View(func(tx *bolt.Tx) error { - var s bolt.BucketStats - var count int - if err := tx.ForEach(func(name []byte, b *bolt.Bucket) error { - if bytes.HasPrefix(name, []byte(prefix)) { - s.Add(b.Stats()) - count += 1 - } - return nil - }); err != nil { - return err - } - - fmt.Fprintf(cmd.Stdout, "Aggregate statistics for %d buckets\n\n", count) - - fmt.Fprintln(cmd.Stdout, "Page count statistics") - fmt.Fprintf(cmd.Stdout, "\tNumber of logical branch pages: %d\n", s.BranchPageN) - fmt.Fprintf(cmd.Stdout, "\tNumber of physical branch overflow pages: %d\n", s.BranchOverflowN) - fmt.Fprintf(cmd.Stdout, "\tNumber of logical leaf pages: %d\n", s.LeafPageN) - fmt.Fprintf(cmd.Stdout, "\tNumber of physical leaf overflow pages: %d\n", s.LeafOverflowN) - - fmt.Fprintln(cmd.Stdout, "Tree statistics") - fmt.Fprintf(cmd.Stdout, "\tNumber of keys/value pairs: %d\n", s.KeyN) - fmt.Fprintf(cmd.Stdout, "\tNumber of levels in B+tree: %d\n", s.Depth) - - fmt.Fprintln(cmd.Stdout, "Page size utilization") - fmt.Fprintf(cmd.Stdout, "\tBytes allocated for physical branch pages: %d\n", s.BranchAlloc) - var percentage int - if s.BranchAlloc != 0 { - percentage = int(float32(s.BranchInuse) * 100.0 / float32(s.BranchAlloc)) - } - fmt.Fprintf(cmd.Stdout, "\tBytes actually used for branch data: %d (%d%%)\n", s.BranchInuse, percentage) - fmt.Fprintf(cmd.Stdout, "\tBytes allocated for physical leaf pages: %d\n", s.LeafAlloc) - percentage = 0 - if s.LeafAlloc != 0 { - percentage = int(float32(s.LeafInuse) * 100.0 / float32(s.LeafAlloc)) - } - fmt.Fprintf(cmd.Stdout, "\tBytes actually used for leaf data: %d (%d%%)\n", s.LeafInuse, percentage) - - fmt.Fprintln(cmd.Stdout, "Bucket statistics") - fmt.Fprintf(cmd.Stdout, "\tTotal number of buckets: %d\n", s.BucketN) - percentage = 0 - if s.BucketN != 0 { - percentage = int(float32(s.InlineBucketN) * 100.0 / float32(s.BucketN)) - } - fmt.Fprintf(cmd.Stdout, "\tTotal number on inlined buckets: %d (%d%%)\n", s.InlineBucketN, percentage) - percentage = 0 - if s.LeafInuse != 0 { - percentage = int(float32(s.InlineBucketInuse) * 100.0 / float32(s.LeafInuse)) - } - fmt.Fprintf(cmd.Stdout, "\tBytes used for inlined buckets: %d (%d%%)\n", s.InlineBucketInuse, percentage) - - return nil - }) -} - -// Usage returns the help message. -func (cmd *StatsCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt stats PATH - -Stats performs an extensive search of the database to track every page -reference. It starts at the current meta page and recursively iterates -through every accessible bucket. - -The following errors can be reported: - - already freed - The page is referenced more than once in the freelist. - - unreachable unfreed - The page is not referenced by a bucket or in the freelist. - - reachable freed - The page is referenced by a bucket but is also in the freelist. - - out of bounds - A page is referenced that is above the high water mark. - - multiple references - A page is referenced by more than one other page. - - invalid type - The page type is not "meta", "leaf", "branch", or "freelist". - -No errors should occur in your database. However, if for some reason you -experience corruption, please submit a ticket to the Bolt project page: - - https://github.com/boltdb/bolt/issues -`, "\n") -} - -var benchBucketName = []byte("bench") - -// BenchCommand represents the "bench" command execution. -type BenchCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer -} - -// NewBenchCommand returns a BenchCommand using the -func newBenchCommand(m *Main) *BenchCommand { - return &BenchCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the "bench" command. -func (cmd *BenchCommand) Run(args ...string) error { - // Parse CLI arguments. - options, err := cmd.ParseFlags(args) - if err != nil { - return err - } - - // Remove path if "-work" is not set. Otherwise keep path. - if options.Work { - fmt.Fprintf(cmd.Stdout, "work: %s\n", options.Path) - } else { - defer os.Remove(options.Path) - } - - // Create database. - db, err := bolt.Open(options.Path, 0666, nil) - if err != nil { - return err - } - db.NoSync = options.NoSync - defer db.Close() - - // Write to the database. - var results BenchResults - if err := cmd.runWrites(db, options, &results); err != nil { - return fmt.Errorf("write: %v", err) - } - - // Read from the database. - if err := cmd.runReads(db, options, &results); err != nil { - return fmt.Errorf("bench: read: %s", err) - } - - // Print results. - fmt.Fprintf(os.Stderr, "# Write\t%v\t(%v/op)\t(%v op/sec)\n", results.WriteDuration, results.WriteOpDuration(), results.WriteOpsPerSecond()) - fmt.Fprintf(os.Stderr, "# Read\t%v\t(%v/op)\t(%v op/sec)\n", results.ReadDuration, results.ReadOpDuration(), results.ReadOpsPerSecond()) - fmt.Fprintln(os.Stderr, "") - return nil -} - -// ParseFlags parses the command line flags. -func (cmd *BenchCommand) ParseFlags(args []string) (*BenchOptions, error) { - var options BenchOptions - - // Parse flagset. - fs := flag.NewFlagSet("", flag.ContinueOnError) - fs.StringVar(&options.ProfileMode, "profile-mode", "rw", "") - fs.StringVar(&options.WriteMode, "write-mode", "seq", "") - fs.StringVar(&options.ReadMode, "read-mode", "seq", "") - fs.IntVar(&options.Iterations, "count", 1000, "") - fs.IntVar(&options.BatchSize, "batch-size", 0, "") - fs.IntVar(&options.KeySize, "key-size", 8, "") - fs.IntVar(&options.ValueSize, "value-size", 32, "") - fs.StringVar(&options.CPUProfile, "cpuprofile", "", "") - fs.StringVar(&options.MemProfile, "memprofile", "", "") - fs.StringVar(&options.BlockProfile, "blockprofile", "", "") - fs.Float64Var(&options.FillPercent, "fill-percent", bolt.DefaultFillPercent, "") - fs.BoolVar(&options.NoSync, "no-sync", false, "") - fs.BoolVar(&options.Work, "work", false, "") - fs.StringVar(&options.Path, "path", "", "") - fs.SetOutput(cmd.Stderr) - if err := fs.Parse(args); err != nil { - return nil, err - } - - // Set batch size to iteration size if not set. - // Require that batch size can be evenly divided by the iteration count. - if options.BatchSize == 0 { - options.BatchSize = options.Iterations - } else if options.Iterations%options.BatchSize != 0 { - return nil, ErrNonDivisibleBatchSize - } - - // Generate temp path if one is not passed in. - if options.Path == "" { - f, err := ioutil.TempFile("", "bolt-bench-") - if err != nil { - return nil, fmt.Errorf("temp file: %s", err) - } - f.Close() - os.Remove(f.Name()) - options.Path = f.Name() - } - - return &options, nil -} - -// Writes to the database. -func (cmd *BenchCommand) runWrites(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - // Start profiling for writes. - if options.ProfileMode == "rw" || options.ProfileMode == "w" { - cmd.startProfiling(options) - } - - t := time.Now() - - var err error - switch options.WriteMode { - case "seq": - err = cmd.runWritesSequential(db, options, results) - case "rnd": - err = cmd.runWritesRandom(db, options, results) - case "seq-nest": - err = cmd.runWritesSequentialNested(db, options, results) - case "rnd-nest": - err = cmd.runWritesRandomNested(db, options, results) - default: - return fmt.Errorf("invalid write mode: %s", options.WriteMode) - } - - // Save time to write. - results.WriteDuration = time.Since(t) - - // Stop profiling for writes only. - if options.ProfileMode == "w" { - cmd.stopProfiling() - } - - return err -} - -func (cmd *BenchCommand) runWritesSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - var i = uint32(0) - return cmd.runWritesWithSource(db, options, results, func() uint32 { i++; return i }) -} - -func (cmd *BenchCommand) runWritesRandom(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - r := rand.New(rand.NewSource(time.Now().UnixNano())) - return cmd.runWritesWithSource(db, options, results, func() uint32 { return r.Uint32() }) -} - -func (cmd *BenchCommand) runWritesSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - var i = uint32(0) - return cmd.runWritesWithSource(db, options, results, func() uint32 { i++; return i }) -} - -func (cmd *BenchCommand) runWritesRandomNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - r := rand.New(rand.NewSource(time.Now().UnixNano())) - return cmd.runWritesWithSource(db, options, results, func() uint32 { return r.Uint32() }) -} - -func (cmd *BenchCommand) runWritesWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error { - results.WriteOps = options.Iterations - - for i := 0; i < options.Iterations; i += options.BatchSize { - if err := db.Update(func(tx *bolt.Tx) error { - b, _ := tx.CreateBucketIfNotExists(benchBucketName) - b.FillPercent = options.FillPercent - - for j := 0; j < options.BatchSize; j++ { - key := make([]byte, options.KeySize) - value := make([]byte, options.ValueSize) - - // Write key as uint32. - binary.BigEndian.PutUint32(key, keySource()) - - // Insert key/value. - if err := b.Put(key, value); err != nil { - return err - } - } - - return nil - }); err != nil { - return err - } - } - return nil -} - -func (cmd *BenchCommand) runWritesNestedWithSource(db *bolt.DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error { - results.WriteOps = options.Iterations - - for i := 0; i < options.Iterations; i += options.BatchSize { - if err := db.Update(func(tx *bolt.Tx) error { - top, err := tx.CreateBucketIfNotExists(benchBucketName) - if err != nil { - return err - } - top.FillPercent = options.FillPercent - - // Create bucket key. - name := make([]byte, options.KeySize) - binary.BigEndian.PutUint32(name, keySource()) - - // Create bucket. - b, err := top.CreateBucketIfNotExists(name) - if err != nil { - return err - } - b.FillPercent = options.FillPercent - - for j := 0; j < options.BatchSize; j++ { - var key = make([]byte, options.KeySize) - var value = make([]byte, options.ValueSize) - - // Generate key as uint32. - binary.BigEndian.PutUint32(key, keySource()) - - // Insert value into subbucket. - if err := b.Put(key, value); err != nil { - return err - } - } - - return nil - }); err != nil { - return err - } - } - return nil -} - -// Reads from the database. -func (cmd *BenchCommand) runReads(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - // Start profiling for reads. - if options.ProfileMode == "r" { - cmd.startProfiling(options) - } - - t := time.Now() - - var err error - switch options.ReadMode { - case "seq": - switch options.WriteMode { - case "seq-nest", "rnd-nest": - err = cmd.runReadsSequentialNested(db, options, results) - default: - err = cmd.runReadsSequential(db, options, results) - } - default: - return fmt.Errorf("invalid read mode: %s", options.ReadMode) - } - - // Save read time. - results.ReadDuration = time.Since(t) - - // Stop profiling for reads. - if options.ProfileMode == "rw" || options.ProfileMode == "r" { - cmd.stopProfiling() - } - - return err -} - -func (cmd *BenchCommand) runReadsSequential(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - return db.View(func(tx *bolt.Tx) error { - t := time.Now() - - for { - var count int - - c := tx.Bucket(benchBucketName).Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - if v == nil { - return errors.New("invalid value") - } - count++ - } - - if options.WriteMode == "seq" && count != options.Iterations { - return fmt.Errorf("read seq: iter mismatch: expected %d, got %d", options.Iterations, count) - } - - results.ReadOps += count - - // Make sure we do this for at least a second. - if time.Since(t) >= time.Second { - break - } - } - - return nil - }) -} - -func (cmd *BenchCommand) runReadsSequentialNested(db *bolt.DB, options *BenchOptions, results *BenchResults) error { - return db.View(func(tx *bolt.Tx) error { - t := time.Now() - - for { - var count int - var top = tx.Bucket(benchBucketName) - if err := top.ForEach(func(name, _ []byte) error { - c := top.Bucket(name).Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - if v == nil { - return ErrInvalidValue - } - count++ - } - return nil - }); err != nil { - return err - } - - if options.WriteMode == "seq-nest" && count != options.Iterations { - return fmt.Errorf("read seq-nest: iter mismatch: expected %d, got %d", options.Iterations, count) - } - - results.ReadOps += count - - // Make sure we do this for at least a second. - if time.Since(t) >= time.Second { - break - } - } - - return nil - }) -} - -// File handlers for the various profiles. -var cpuprofile, memprofile, blockprofile *os.File - -// Starts all profiles set on the options. -func (cmd *BenchCommand) startProfiling(options *BenchOptions) { - var err error - - // Start CPU profiling. - if options.CPUProfile != "" { - cpuprofile, err = os.Create(options.CPUProfile) - if err != nil { - fmt.Fprintf(cmd.Stderr, "bench: could not create cpu profile %q: %v\n", options.CPUProfile, err) - os.Exit(1) - } - pprof.StartCPUProfile(cpuprofile) - } - - // Start memory profiling. - if options.MemProfile != "" { - memprofile, err = os.Create(options.MemProfile) - if err != nil { - fmt.Fprintf(cmd.Stderr, "bench: could not create memory profile %q: %v\n", options.MemProfile, err) - os.Exit(1) - } - runtime.MemProfileRate = 4096 - } - - // Start fatal profiling. - if options.BlockProfile != "" { - blockprofile, err = os.Create(options.BlockProfile) - if err != nil { - fmt.Fprintf(cmd.Stderr, "bench: could not create block profile %q: %v\n", options.BlockProfile, err) - os.Exit(1) - } - runtime.SetBlockProfileRate(1) - } -} - -// Stops all profiles. -func (cmd *BenchCommand) stopProfiling() { - if cpuprofile != nil { - pprof.StopCPUProfile() - cpuprofile.Close() - cpuprofile = nil - } - - if memprofile != nil { - pprof.Lookup("heap").WriteTo(memprofile, 0) - memprofile.Close() - memprofile = nil - } - - if blockprofile != nil { - pprof.Lookup("block").WriteTo(blockprofile, 0) - blockprofile.Close() - blockprofile = nil - runtime.SetBlockProfileRate(0) - } -} - -// BenchOptions represents the set of options that can be passed to "bolt bench". -type BenchOptions struct { - ProfileMode string - WriteMode string - ReadMode string - Iterations int - BatchSize int - KeySize int - ValueSize int - CPUProfile string - MemProfile string - BlockProfile string - StatsInterval time.Duration - FillPercent float64 - NoSync bool - Work bool - Path string -} - -// BenchResults represents the performance results of the benchmark. -type BenchResults struct { - WriteOps int - WriteDuration time.Duration - ReadOps int - ReadDuration time.Duration -} - -// Returns the duration for a single write operation. -func (r *BenchResults) WriteOpDuration() time.Duration { - if r.WriteOps == 0 { - return 0 - } - return r.WriteDuration / time.Duration(r.WriteOps) -} - -// Returns average number of write operations that can be performed per second. -func (r *BenchResults) WriteOpsPerSecond() int { - var op = r.WriteOpDuration() - if op == 0 { - return 0 - } - return int(time.Second) / int(op) -} - -// Returns the duration for a single read operation. -func (r *BenchResults) ReadOpDuration() time.Duration { - if r.ReadOps == 0 { - return 0 - } - return r.ReadDuration / time.Duration(r.ReadOps) -} - -// Returns average number of read operations that can be performed per second. -func (r *BenchResults) ReadOpsPerSecond() int { - var op = r.ReadOpDuration() - if op == 0 { - return 0 - } - return int(time.Second) / int(op) -} - -type PageError struct { - ID int - Err error -} - -func (e *PageError) Error() string { - return fmt.Sprintf("page error: id=%d, err=%s", e.ID, e.Err) -} - -// isPrintable returns true if the string is valid unicode and contains only printable runes. -func isPrintable(s string) bool { - if !utf8.ValidString(s) { - return false - } - for _, ch := range s { - if !unicode.IsPrint(ch) { - return false - } - } - return true -} - -// ReadPage reads page info & full page data from a path. -// This is not transactionally safe. -func ReadPage(path string, pageID int) (*page, []byte, error) { - // Find page size. - pageSize, err := ReadPageSize(path) - if err != nil { - return nil, nil, fmt.Errorf("read page size: %s", err) - } - - // Open database file. - f, err := os.Open(path) - if err != nil { - return nil, nil, err - } - defer f.Close() - - // Read one block into buffer. - buf := make([]byte, pageSize) - if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { - return nil, nil, err - } else if n != len(buf) { - return nil, nil, io.ErrUnexpectedEOF - } - - // Determine total number of blocks. - p := (*page)(unsafe.Pointer(&buf[0])) - overflowN := p.overflow - - // Re-read entire page (with overflow) into buffer. - buf = make([]byte, (int(overflowN)+1)*pageSize) - if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { - return nil, nil, err - } else if n != len(buf) { - return nil, nil, io.ErrUnexpectedEOF - } - p = (*page)(unsafe.Pointer(&buf[0])) - - return p, buf, nil -} - -// ReadPageSize reads page size a path. -// This is not transactionally safe. -func ReadPageSize(path string) (int, error) { - // Open database file. - f, err := os.Open(path) - if err != nil { - return 0, err - } - defer f.Close() - - // Read 4KB chunk. - buf := make([]byte, 4096) - if _, err := io.ReadFull(f, buf); err != nil { - return 0, err - } - - // Read page size from metadata. - m := (*meta)(unsafe.Pointer(&buf[PageHeaderSize])) - return int(m.pageSize), nil -} - -// atois parses a slice of strings into integers. -func atois(strs []string) ([]int, error) { - var a []int - for _, str := range strs { - i, err := strconv.Atoi(str) - if err != nil { - return nil, err - } - a = append(a, i) - } - return a, nil -} - -// DO NOT EDIT. Copied from the "bolt" package. -const maxAllocSize = 0xFFFFFFF - -// DO NOT EDIT. Copied from the "bolt" package. -const ( - branchPageFlag = 0x01 - leafPageFlag = 0x02 - metaPageFlag = 0x04 - freelistPageFlag = 0x10 -) - -// DO NOT EDIT. Copied from the "bolt" package. -const bucketLeafFlag = 0x01 - -// DO NOT EDIT. Copied from the "bolt" package. -type pgid uint64 - -// DO NOT EDIT. Copied from the "bolt" package. -type txid uint64 - -// DO NOT EDIT. Copied from the "bolt" package. -type meta struct { - magic uint32 - version uint32 - pageSize uint32 - flags uint32 - root bucket - freelist pgid - pgid pgid - txid txid - checksum uint64 -} - -// DO NOT EDIT. Copied from the "bolt" package. -type bucket struct { - root pgid - sequence uint64 -} - -// DO NOT EDIT. Copied from the "bolt" package. -type page struct { - id pgid - flags uint16 - count uint16 - overflow uint32 - ptr uintptr -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (p *page) Type() string { - if (p.flags & branchPageFlag) != 0 { - return "branch" - } else if (p.flags & leafPageFlag) != 0 { - return "leaf" - } else if (p.flags & metaPageFlag) != 0 { - return "meta" - } else if (p.flags & freelistPageFlag) != 0 { - return "freelist" - } - return fmt.Sprintf("unknown<%02x>", p.flags) -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (p *page) leafPageElement(index uint16) *leafPageElement { - n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] - return n -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (p *page) branchPageElement(index uint16) *branchPageElement { - return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] -} - -// DO NOT EDIT. Copied from the "bolt" package. -type branchPageElement struct { - pos uint32 - ksize uint32 - pgid pgid -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *branchPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} - -// DO NOT EDIT. Copied from the "bolt" package. -type leafPageElement struct { - flags uint32 - pos uint32 - ksize uint32 - vsize uint32 -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *leafPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos : n.pos+n.ksize] -} - -// DO NOT EDIT. Copied from the "bolt" package. -func (n *leafPageElement) value() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize] -} - -// CompactCommand represents the "compact" command execution. -type CompactCommand struct { - Stdin io.Reader - Stdout io.Writer - Stderr io.Writer - - SrcPath string - DstPath string - TxMaxSize int64 -} - -// newCompactCommand returns a CompactCommand. -func newCompactCommand(m *Main) *CompactCommand { - return &CompactCommand{ - Stdin: m.Stdin, - Stdout: m.Stdout, - Stderr: m.Stderr, - } -} - -// Run executes the command. -func (cmd *CompactCommand) Run(args ...string) (err error) { - // Parse flags. - fs := flag.NewFlagSet("", flag.ContinueOnError) - fs.SetOutput(ioutil.Discard) - fs.StringVar(&cmd.DstPath, "o", "", "") - fs.Int64Var(&cmd.TxMaxSize, "tx-max-size", 65536, "") - if err := fs.Parse(args); err == flag.ErrHelp { - fmt.Fprintln(cmd.Stderr, cmd.Usage()) - return ErrUsage - } else if err != nil { - return err - } else if cmd.DstPath == "" { - return fmt.Errorf("output file required") - } - - // Require database paths. - cmd.SrcPath = fs.Arg(0) - if cmd.SrcPath == "" { - return ErrPathRequired - } - - // Ensure source file exists. - fi, err := os.Stat(cmd.SrcPath) - if os.IsNotExist(err) { - return ErrFileNotFound - } else if err != nil { - return err - } - initialSize := fi.Size() - - // Open source database. - src, err := bolt.Open(cmd.SrcPath, 0444, nil) - if err != nil { - return err - } - defer src.Close() - - // Open destination database. - dst, err := bolt.Open(cmd.DstPath, fi.Mode(), nil) - if err != nil { - return err - } - defer dst.Close() - - // Run compaction. - if err := cmd.compact(dst, src); err != nil { - return err - } - - // Report stats on new size. - fi, err = os.Stat(cmd.DstPath) - if err != nil { - return err - } else if fi.Size() == 0 { - return fmt.Errorf("zero db size") - } - fmt.Fprintf(cmd.Stdout, "%d -> %d bytes (gain=%.2fx)\n", initialSize, fi.Size(), float64(initialSize)/float64(fi.Size())) - - return nil -} - -func (cmd *CompactCommand) compact(dst, src *bolt.DB) error { - // commit regularly, or we'll run out of memory for large datasets if using one transaction. - var size int64 - tx, err := dst.Begin(true) - if err != nil { - return err - } - defer tx.Rollback() - - if err := cmd.walk(src, func(keys [][]byte, k, v []byte, seq uint64) error { - // On each key/value, check if we have exceeded tx size. - sz := int64(len(k) + len(v)) - if size+sz > cmd.TxMaxSize && cmd.TxMaxSize != 0 { - // Commit previous transaction. - if err := tx.Commit(); err != nil { - return err - } - - // Start new transaction. - tx, err = dst.Begin(true) - if err != nil { - return err - } - size = 0 - } - size += sz - - // Create bucket on the root transaction if this is the first level. - nk := len(keys) - if nk == 0 { - bkt, err := tx.CreateBucket(k) - if err != nil { - return err - } - if err := bkt.SetSequence(seq); err != nil { - return err - } - return nil - } - - // Create buckets on subsequent levels, if necessary. - b := tx.Bucket(keys[0]) - if nk > 1 { - for _, k := range keys[1:] { - b = b.Bucket(k) - } - } - - // If there is no value then this is a bucket call. - if v == nil { - bkt, err := b.CreateBucket(k) - if err != nil { - return err - } - if err := bkt.SetSequence(seq); err != nil { - return err - } - return nil - } - - // Otherwise treat it as a key/value pair. - return b.Put(k, v) - }); err != nil { - return err - } - - return tx.Commit() -} - -// walkFunc is the type of the function called for keys (buckets and "normal" -// values) discovered by Walk. keys is the list of keys to descend to the bucket -// owning the discovered key/value pair k/v. -type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error - -// walk walks recursively the bolt database db, calling walkFn for each key it finds. -func (cmd *CompactCommand) walk(db *bolt.DB, walkFn walkFunc) error { - return db.View(func(tx *bolt.Tx) error { - return tx.ForEach(func(name []byte, b *bolt.Bucket) error { - return cmd.walkBucket(b, nil, name, nil, b.Sequence(), walkFn) - }) - }) -} - -func (cmd *CompactCommand) walkBucket(b *bolt.Bucket, keypath [][]byte, k, v []byte, seq uint64, fn walkFunc) error { - // Execute callback. - if err := fn(keypath, k, v, seq); err != nil { - return err - } - - // If this is not a bucket then stop. - if v != nil { - return nil - } - - // Iterate over each child key/value. - keypath = append(keypath, k) - return b.ForEach(func(k, v []byte) error { - if v == nil { - bkt := b.Bucket(k) - return cmd.walkBucket(bkt, keypath, k, nil, bkt.Sequence(), fn) - } - return cmd.walkBucket(b, keypath, k, v, b.Sequence(), fn) - }) -} - -// Usage returns the help message. -func (cmd *CompactCommand) Usage() string { - return strings.TrimLeft(` -usage: bolt compact [options] -o DST SRC - -Compact opens a database at SRC path and walks it recursively, copying keys -as they are found from all buckets, to a newly created database at DST path. - -The original database is left untouched. - -Additional options include: - - -tx-max-size NUM - Specifies the maximum size of individual transactions. - Defaults to 64KB. -`, "\n") -} diff --git a/cmd/bolt/main_test.go b/cmd/bolt/main_test.go deleted file mode 100644 index 0a11ff3..0000000 --- a/cmd/bolt/main_test.go +++ /dev/null @@ -1,356 +0,0 @@ -package main_test - -import ( - "bytes" - crypto "crypto/rand" - "encoding/binary" - "fmt" - "io" - "io/ioutil" - "math/rand" - "os" - "strconv" - "testing" - - "github.com/boltdb/bolt" - "github.com/boltdb/bolt/cmd/bolt" -) - -// Ensure the "info" command can print information about a database. -func TestInfoCommand_Run(t *testing.T) { - db := MustOpen(0666, nil) - db.DB.Close() - defer db.Close() - - // Run the info command. - m := NewMain() - if err := m.Run("info", db.Path); err != nil { - t.Fatal(err) - } -} - -// Ensure the "stats" command executes correctly with an empty database. -func TestStatsCommand_Run_EmptyDatabase(t *testing.T) { - // Ignore - if os.Getpagesize() != 4096 { - t.Skip("system does not use 4KB page size") - } - - db := MustOpen(0666, nil) - defer db.Close() - db.DB.Close() - - // Generate expected result. - exp := "Aggregate statistics for 0 buckets\n\n" + - "Page count statistics\n" + - "\tNumber of logical branch pages: 0\n" + - "\tNumber of physical branch overflow pages: 0\n" + - "\tNumber of logical leaf pages: 0\n" + - "\tNumber of physical leaf overflow pages: 0\n" + - "Tree statistics\n" + - "\tNumber of keys/value pairs: 0\n" + - "\tNumber of levels in B+tree: 0\n" + - "Page size utilization\n" + - "\tBytes allocated for physical branch pages: 0\n" + - "\tBytes actually used for branch data: 0 (0%)\n" + - "\tBytes allocated for physical leaf pages: 0\n" + - "\tBytes actually used for leaf data: 0 (0%)\n" + - "Bucket statistics\n" + - "\tTotal number of buckets: 0\n" + - "\tTotal number on inlined buckets: 0 (0%)\n" + - "\tBytes used for inlined buckets: 0 (0%)\n" - - // Run the command. - m := NewMain() - if err := m.Run("stats", db.Path); err != nil { - t.Fatal(err) - } else if m.Stdout.String() != exp { - t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String()) - } -} - -// Ensure the "stats" command can execute correctly. -func TestStatsCommand_Run(t *testing.T) { - // Ignore - if os.Getpagesize() != 4096 { - t.Skip("system does not use 4KB page size") - } - - db := MustOpen(0666, nil) - defer db.Close() - - if err := db.Update(func(tx *bolt.Tx) error { - // Create "foo" bucket. - b, err := tx.CreateBucket([]byte("foo")) - if err != nil { - return err - } - for i := 0; i < 10; i++ { - if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { - return err - } - } - - // Create "bar" bucket. - b, err = tx.CreateBucket([]byte("bar")) - if err != nil { - return err - } - for i := 0; i < 100; i++ { - if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { - return err - } - } - - // Create "baz" bucket. - b, err = tx.CreateBucket([]byte("baz")) - if err != nil { - return err - } - if err := b.Put([]byte("key"), []byte("value")); err != nil { - return err - } - - return nil - }); err != nil { - t.Fatal(err) - } - db.DB.Close() - - // Generate expected result. - exp := "Aggregate statistics for 3 buckets\n\n" + - "Page count statistics\n" + - "\tNumber of logical branch pages: 0\n" + - "\tNumber of physical branch overflow pages: 0\n" + - "\tNumber of logical leaf pages: 1\n" + - "\tNumber of physical leaf overflow pages: 0\n" + - "Tree statistics\n" + - "\tNumber of keys/value pairs: 111\n" + - "\tNumber of levels in B+tree: 1\n" + - "Page size utilization\n" + - "\tBytes allocated for physical branch pages: 0\n" + - "\tBytes actually used for branch data: 0 (0%)\n" + - "\tBytes allocated for physical leaf pages: 4096\n" + - "\tBytes actually used for leaf data: 1996 (48%)\n" + - "Bucket statistics\n" + - "\tTotal number of buckets: 3\n" + - "\tTotal number on inlined buckets: 2 (66%)\n" + - "\tBytes used for inlined buckets: 236 (11%)\n" - - // Run the command. - m := NewMain() - if err := m.Run("stats", db.Path); err != nil { - t.Fatal(err) - } else if m.Stdout.String() != exp { - t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String()) - } -} - -// Main represents a test wrapper for main.Main that records output. -type Main struct { - *main.Main - Stdin bytes.Buffer - Stdout bytes.Buffer - Stderr bytes.Buffer -} - -// NewMain returns a new instance of Main. -func NewMain() *Main { - m := &Main{Main: main.NewMain()} - m.Main.Stdin = &m.Stdin - m.Main.Stdout = &m.Stdout - m.Main.Stderr = &m.Stderr - return m -} - -// MustOpen creates a Bolt database in a temporary location. -func MustOpen(mode os.FileMode, options *bolt.Options) *DB { - // Create temporary path. - f, _ := ioutil.TempFile("", "bolt-") - f.Close() - os.Remove(f.Name()) - - db, err := bolt.Open(f.Name(), mode, options) - if err != nil { - panic(err.Error()) - } - return &DB{DB: db, Path: f.Name()} -} - -// DB is a test wrapper for bolt.DB. -type DB struct { - *bolt.DB - Path string -} - -// Close closes and removes the database. -func (db *DB) Close() error { - defer os.Remove(db.Path) - return db.DB.Close() -} - -func TestCompactCommand_Run(t *testing.T) { - var s int64 - if err := binary.Read(crypto.Reader, binary.BigEndian, &s); err != nil { - t.Fatal(err) - } - rand.Seed(s) - - dstdb := MustOpen(0666, nil) - dstdb.Close() - - // fill the db - db := MustOpen(0666, nil) - if err := db.Update(func(tx *bolt.Tx) error { - n := 2 + rand.Intn(5) - for i := 0; i < n; i++ { - k := []byte(fmt.Sprintf("b%d", i)) - b, err := tx.CreateBucketIfNotExists(k) - if err != nil { - return err - } - if err := b.SetSequence(uint64(i)); err != nil { - return err - } - if err := fillBucket(b, append(k, '.')); err != nil { - return err - } - } - return nil - }); err != nil { - db.Close() - t.Fatal(err) - } - - // make the db grow by adding large values, and delete them. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("large_vals")) - if err != nil { - return err - } - n := 5 + rand.Intn(5) - for i := 0; i < n; i++ { - v := make([]byte, 1000*1000*(1+rand.Intn(5))) - _, err := crypto.Read(v) - if err != nil { - return err - } - if err := b.Put([]byte(fmt.Sprintf("l%d", i)), v); err != nil { - return err - } - } - return nil - }); err != nil { - db.Close() - t.Fatal(err) - } - if err := db.Update(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("large_vals")).Cursor() - for k, _ := c.First(); k != nil; k, _ = c.Next() { - if err := c.Delete(); err != nil { - return err - } - } - return tx.DeleteBucket([]byte("large_vals")) - }); err != nil { - db.Close() - t.Fatal(err) - } - db.DB.Close() - defer db.Close() - defer dstdb.Close() - - dbChk, err := chkdb(db.Path) - if err != nil { - t.Fatal(err) - } - - m := NewMain() - if err := m.Run("compact", "-o", dstdb.Path, db.Path); err != nil { - t.Fatal(err) - } - - dbChkAfterCompact, err := chkdb(db.Path) - if err != nil { - t.Fatal(err) - } - - dstdbChk, err := chkdb(dstdb.Path) - if err != nil { - t.Fatal(err) - } - - if !bytes.Equal(dbChk, dbChkAfterCompact) { - t.Error("the original db has been touched") - } - if !bytes.Equal(dbChk, dstdbChk) { - t.Error("the compacted db data isn't the same than the original db") - } -} - -func fillBucket(b *bolt.Bucket, prefix []byte) error { - n := 10 + rand.Intn(50) - for i := 0; i < n; i++ { - v := make([]byte, 10*(1+rand.Intn(4))) - _, err := crypto.Read(v) - if err != nil { - return err - } - k := append(prefix, []byte(fmt.Sprintf("k%d", i))...) - if err := b.Put(k, v); err != nil { - return err - } - } - // limit depth of subbuckets - s := 2 + rand.Intn(4) - if len(prefix) > (2*s + 1) { - return nil - } - n = 1 + rand.Intn(3) - for i := 0; i < n; i++ { - k := append(prefix, []byte(fmt.Sprintf("b%d", i))...) - sb, err := b.CreateBucket(k) - if err != nil { - return err - } - if err := fillBucket(sb, append(k, '.')); err != nil { - return err - } - } - return nil -} - -func chkdb(path string) ([]byte, error) { - db, err := bolt.Open(path, 0666, nil) - if err != nil { - return nil, err - } - defer db.Close() - var buf bytes.Buffer - err = db.View(func(tx *bolt.Tx) error { - return tx.ForEach(func(name []byte, b *bolt.Bucket) error { - return walkBucket(b, name, nil, &buf) - }) - }) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - -func walkBucket(parent *bolt.Bucket, k []byte, v []byte, w io.Writer) error { - if _, err := fmt.Fprintf(w, "%d:%x=%x\n", parent.Sequence(), k, v); err != nil { - return err - } - - // not a bucket, exit. - if v != nil { - return nil - } - return parent.ForEach(func(k, v []byte) error { - if v == nil { - return walkBucket(parent.Bucket(k), k, nil, w) - } - return walkBucket(parent, k, v, w) - }) -} @@ -0,0 +1,57 @@ +libs.go = \ + src/gkv.go \ + tests/benchmarks/multiple-writers/gkv.go \ + tests/functional/read-snapshots/gkv.go \ + tests/fuzz/api/gkv.go \ + tests/gkv.go \ + +mains.go = \ + src/main.go \ + tests/benchmarks/multiple-writers/main.go \ + tests/functional/read-snapshots/main.go \ + tests/fuzz/api/main.go \ + tests/main.go \ + +functional-tests/libs.go = \ + tests/functional/read-snapshots/gkv.go \ + +functional-tests/main.go = \ + tests/functional/read-snapshots/main.go \ + +fuzz-targets/lib.go = \ + tests/fuzz/api/gkv.go \ + +fuzz-targets/main.go = \ + tests/fuzz/api/main.go \ + +benchmarks/lib.go = \ + tests/benchmarks/multiple-writers/gkv.go \ + +benchmarks/main.go = \ + tests/benchmarks/multiple-writers/main.go \ + +src/gkv.a: src/gkv.go +src/main.a: src/main.go +tests/benchmarks/multiple-writers/gkv.a: tests/benchmarks/multiple-writers/gkv.go +tests/benchmarks/multiple-writers/main.a: tests/benchmarks/multiple-writers/main.go +tests/functional/read-snapshots/gkv.a: tests/functional/read-snapshots/gkv.go +tests/functional/read-snapshots/main.a: tests/functional/read-snapshots/main.go +tests/fuzz/api/gkv.a: tests/fuzz/api/gkv.go +tests/fuzz/api/main.a: tests/fuzz/api/main.go +tests/gkv.a: tests/gkv.go +tests/main.a: tests/main.go +src/main.bin: src/main.a +tests/benchmarks/multiple-writers/main.bin: tests/benchmarks/multiple-writers/main.a +tests/functional/read-snapshots/main.bin: tests/functional/read-snapshots/main.a +tests/fuzz/api/main.bin: tests/fuzz/api/main.a +tests/main.bin: tests/main.a +src/main.bin-check: src/main.bin +tests/benchmarks/multiple-writers/main.bin-check: tests/benchmarks/multiple-writers/main.bin +tests/functional/read-snapshots/main.bin-check: tests/functional/read-snapshots/main.bin +tests/fuzz/api/main.bin-check: tests/fuzz/api/main.bin +tests/main.bin-check: tests/main.bin +src/main.a: src/$(NAME).a +tests/benchmarks/multiple-writers/main.a: tests/benchmarks/multiple-writers/$(NAME).a +tests/functional/read-snapshots/main.a: tests/functional/read-snapshots/$(NAME).a +tests/fuzz/api/main.a: tests/fuzz/api/$(NAME).a +tests/main.a: tests/$(NAME).a diff --git a/mkdeps.sh b/mkdeps.sh new file mode 100755 index 0000000..b1c61b3 --- /dev/null +++ b/mkdeps.sh @@ -0,0 +1,29 @@ +#!/bin/sh +set -eu + +export LANG=POSIX.UTF-8 + + +libs() { + find src tests -name '*.go' | grep -v '/main\.go$' | + grep -v '/version\.go$' +} + +mains() { + find src tests -name '*.go' | grep '/main\.go$' +} + +libs | varlist 'libs.go' +mains | varlist 'mains.go' + +find tests/functional/*/*.go -not -name main.go | varlist 'functional-tests/libs.go' +find tests/functional/*/main.go | varlist 'functional-tests/main.go' +find tests/fuzz/*/*.go -not -name main.go | varlist 'fuzz-targets/lib.go' +find tests/fuzz/*/main.go | varlist 'fuzz-targets/main.go' +find tests/benchmarks/*/*.go -not -name main.go | varlist 'benchmarks/lib.go' +find tests/benchmarks/*/main.go | varlist 'benchmarks/main.go' + +{ libs; mains; } | sort | sed 's/^\(.*\)\.go$/\1.a:\t\1.go/' +mains | sort | sed 's/^\(.*\)\.go$/\1.bin:\t\1.a/' +mains | sort | sed 's/^\(.*\)\.go$/\1.bin-check:\t\1.bin/' +mains | sort | sed 's|^\(.*\)/main\.go$|\1/main.a:\t\1/$(NAME).a|' diff --git a/src/bolt_386.go b/src/bolt_386.go deleted file mode 100644 index 820d533..0000000 --- a/src/bolt_386.go +++ /dev/null @@ -1,10 +0,0 @@ -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0x7FFFFFFF // 2GB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0xFFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_amd64.go b/src/bolt_amd64.go deleted file mode 100644 index 98fafdb..0000000 --- a/src/bolt_amd64.go +++ /dev/null @@ -1,10 +0,0 @@ -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0xFFFFFFFFFFFF // 256TB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0x7FFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_arm.go b/src/bolt_arm.go deleted file mode 100644 index 7e5cb4b..0000000 --- a/src/bolt_arm.go +++ /dev/null @@ -1,28 +0,0 @@ -package bolt - -import "unsafe" - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0x7FFFFFFF // 2GB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0xFFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned bool - -func init() { - // Simple check to see whether this arch handles unaligned load/stores - // correctly. - - // ARM9 and older devices require load/stores to be from/to aligned - // addresses. If not, the lower 2 bits are cleared and that address is - // read in a jumbled up order. - - // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka15414.html - - raw := [6]byte{0xfe, 0xef, 0x11, 0x22, 0x22, 0x11} - val := *(*uint32)(unsafe.Pointer(uintptr(unsafe.Pointer(&raw)) + 2)) - - brokenUnaligned = val != 0x11222211 -} diff --git a/src/bolt_arm64.go b/src/bolt_arm64.go deleted file mode 100644 index b26d84f..0000000 --- a/src/bolt_arm64.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build arm64 - -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0xFFFFFFFFFFFF // 256TB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0x7FFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_linux.go b/src/bolt_linux.go deleted file mode 100644 index 2b67666..0000000 --- a/src/bolt_linux.go +++ /dev/null @@ -1,10 +0,0 @@ -package bolt - -import ( - "syscall" -) - -// fdatasync flushes written data to a file descriptor. -func fdatasync(db *DB) error { - return syscall.Fdatasync(int(db.file.Fd())) -} diff --git a/src/bolt_openbsd.go b/src/bolt_openbsd.go deleted file mode 100644 index 7058c3d..0000000 --- a/src/bolt_openbsd.go +++ /dev/null @@ -1,27 +0,0 @@ -package bolt - -import ( - "syscall" - "unsafe" -) - -const ( - msAsync = 1 << iota // perform asynchronous writes - msSync // perform synchronous writes - msInvalidate // invalidate cached data -) - -func msync(db *DB) error { - _, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate) - if errno != 0 { - return errno - } - return nil -} - -func fdatasync(db *DB) error { - if db.data != nil { - return msync(db) - } - return db.file.Sync() -} diff --git a/src/bolt_ppc.go b/src/bolt_ppc.go deleted file mode 100644 index 645ddc3..0000000 --- a/src/bolt_ppc.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build ppc - -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0x7FFFFFFF // 2GB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0xFFFFFFF diff --git a/src/bolt_ppc64.go b/src/bolt_ppc64.go deleted file mode 100644 index 9331d97..0000000 --- a/src/bolt_ppc64.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build ppc64 - -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0xFFFFFFFFFFFF // 256TB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0x7FFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_ppc64le.go b/src/bolt_ppc64le.go deleted file mode 100644 index 8c143bc..0000000 --- a/src/bolt_ppc64le.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build ppc64le - -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0xFFFFFFFFFFFF // 256TB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0x7FFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_s390x.go b/src/bolt_s390x.go deleted file mode 100644 index d7c39af..0000000 --- a/src/bolt_s390x.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build s390x - -package bolt - -// maxMapSize represents the largest mmap size supported by Bolt. -const maxMapSize = 0xFFFFFFFFFFFF // 256TB - -// maxAllocSize is the size used when creating array pointers. -const maxAllocSize = 0x7FFFFFFF - -// Are unaligned load/stores broken on this arch? -var brokenUnaligned = false diff --git a/src/bolt_unix.go b/src/bolt_unix.go deleted file mode 100644 index cad62dd..0000000 --- a/src/bolt_unix.go +++ /dev/null @@ -1,89 +0,0 @@ -// +build !windows,!plan9,!solaris - -package bolt - -import ( - "fmt" - "os" - "syscall" - "time" - "unsafe" -) - -// flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { - var t time.Time - for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - flag := syscall.LOCK_SH - if exclusive { - flag = syscall.LOCK_EX - } - - // Otherwise attempt to obtain an exclusive lock. - err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) - if err == nil { - return nil - } else if err != syscall.EWOULDBLOCK { - return err - } - - // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) - } -} - -// funlock releases an advisory lock on a file descriptor. -func funlock(db *DB) error { - return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) -} - -// mmap memory maps a DB's data file. -func mmap(db *DB, sz int) error { - // Map the data file to memory. - b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) - if err != nil { - return err - } - - // Advise the kernel that the mmap is accessed randomly. - if err := madvise(b, syscall.MADV_RANDOM); err != nil { - return fmt.Errorf("madvise: %s", err) - } - - // Save the original byte slice and convert to a byte array pointer. - db.dataref = b - db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0])) - db.datasz = sz - return nil -} - -// munmap unmaps a DB's data file from memory. -func munmap(db *DB) error { - // Ignore the unmap if we have no mapped data. - if db.dataref == nil { - return nil - } - - // Unmap using the original byte slice. - err := syscall.Munmap(db.dataref) - db.dataref = nil - db.data = nil - db.datasz = 0 - return err -} - -// NOTE: This function is copied from stdlib because it is not available on darwin. -func madvise(b []byte, advice int) (err error) { - _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice)) - if e1 != 0 { - err = e1 - } - return -} diff --git a/src/bolt_unix_solaris.go b/src/bolt_unix_solaris.go deleted file mode 100644 index 307bf2b..0000000 --- a/src/bolt_unix_solaris.go +++ /dev/null @@ -1,90 +0,0 @@ -package bolt - -import ( - "fmt" - "os" - "syscall" - "time" - "unsafe" - - "golang.org/x/sys/unix" -) - -// flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { - var t time.Time - for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - var lock syscall.Flock_t - lock.Start = 0 - lock.Len = 0 - lock.Pid = 0 - lock.Whence = 0 - lock.Pid = 0 - if exclusive { - lock.Type = syscall.F_WRLCK - } else { - lock.Type = syscall.F_RDLCK - } - err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) - if err == nil { - return nil - } else if err != syscall.EAGAIN { - return err - } - - // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) - } -} - -// funlock releases an advisory lock on a file descriptor. -func funlock(db *DB) error { - var lock syscall.Flock_t - lock.Start = 0 - lock.Len = 0 - lock.Type = syscall.F_UNLCK - lock.Whence = 0 - return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock) -} - -// mmap memory maps a DB's data file. -func mmap(db *DB, sz int) error { - // Map the data file to memory. - b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) - if err != nil { - return err - } - - // Advise the kernel that the mmap is accessed randomly. - if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil { - return fmt.Errorf("madvise: %s", err) - } - - // Save the original byte slice and convert to a byte array pointer. - db.dataref = b - db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0])) - db.datasz = sz - return nil -} - -// munmap unmaps a DB's data file from memory. -func munmap(db *DB) error { - // Ignore the unmap if we have no mapped data. - if db.dataref == nil { - return nil - } - - // Unmap using the original byte slice. - err := unix.Munmap(db.dataref) - db.dataref = nil - db.data = nil - db.datasz = 0 - return err -} diff --git a/src/bolt_windows.go b/src/bolt_windows.go deleted file mode 100644 index b00fb07..0000000 --- a/src/bolt_windows.go +++ /dev/null @@ -1,144 +0,0 @@ -package bolt - -import ( - "fmt" - "os" - "syscall" - "time" - "unsafe" -) - -// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1 -var ( - modkernel32 = syscall.NewLazyDLL("kernel32.dll") - procLockFileEx = modkernel32.NewProc("LockFileEx") - procUnlockFileEx = modkernel32.NewProc("UnlockFileEx") -) - -const ( - lockExt = ".lock" - - // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx - flagLockExclusive = 2 - flagLockFailImmediately = 1 - - // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx - errLockViolation syscall.Errno = 0x21 -) - -func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { - r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol))) - if r == 0 { - return err - } - return nil -} - -func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { - r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0) - if r == 0 { - return err - } - return nil -} - -// fdatasync flushes written data to a file descriptor. -func fdatasync(db *DB) error { - return db.file.Sync() -} - -// flock acquires an advisory lock on a file descriptor. -func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { - // Create a separate lock file on windows because a process - // cannot share an exclusive lock on the same file. This is - // needed during Tx.WriteTo(). - f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode) - if err != nil { - return err - } - db.lockfile = f - - var t time.Time - for { - // If we're beyond our timeout then return an error. - // This can only occur after we've attempted a flock once. - if t.IsZero() { - t = time.Now() - } else if timeout > 0 && time.Since(t) > timeout { - return ErrTimeout - } - - var flag uint32 = flagLockFailImmediately - if exclusive { - flag |= flagLockExclusive - } - - err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{}) - if err == nil { - return nil - } else if err != errLockViolation { - return err - } - - // Wait for a bit and try again. - time.Sleep(50 * time.Millisecond) - } -} - -// funlock releases an advisory lock on a file descriptor. -func funlock(db *DB) error { - err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{}) - db.lockfile.Close() - os.Remove(db.path + lockExt) - return err -} - -// mmap memory maps a DB's data file. -// Based on: https://github.com/edsrzf/mmap-go -func mmap(db *DB, sz int) error { - if !db.readOnly { - // Truncate the database to the size of the mmap. - if err := db.file.Truncate(int64(sz)); err != nil { - return fmt.Errorf("truncate: %s", err) - } - } - - // Open a file mapping handle. - sizelo := uint32(sz >> 32) - sizehi := uint32(sz) & 0xffffffff - h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil) - if h == 0 { - return os.NewSyscallError("CreateFileMapping", errno) - } - - // Create the memory map. - addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz)) - if addr == 0 { - return os.NewSyscallError("MapViewOfFile", errno) - } - - // Close mapping handle. - if err := syscall.CloseHandle(syscall.Handle(h)); err != nil { - return os.NewSyscallError("CloseHandle", err) - } - - // Convert to a byte array. - db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr))) - db.datasz = sz - - return nil -} - -// munmap unmaps a pointer from a file. -// Based on: https://github.com/edsrzf/mmap-go -func munmap(db *DB) error { - if db.data == nil { - return nil - } - - addr := (uintptr)(unsafe.Pointer(&db.data[0])) - if err := syscall.UnmapViewOfFile(addr); err != nil { - return os.NewSyscallError("UnmapViewOfFile", err) - } - return nil -} diff --git a/src/boltsync_unix.go b/src/boltsync_unix.go deleted file mode 100644 index f504425..0000000 --- a/src/boltsync_unix.go +++ /dev/null @@ -1,8 +0,0 @@ -// +build !windows,!plan9,!linux,!openbsd - -package bolt - -// fdatasync flushes written data to a file descriptor. -func fdatasync(db *DB) error { - return db.file.Sync() -} diff --git a/src/bucket.go b/src/bucket.go deleted file mode 100644 index 0c5bf27..0000000 --- a/src/bucket.go +++ /dev/null @@ -1,777 +0,0 @@ -package bolt - -import ( - "bytes" - "fmt" - "unsafe" -) - -const ( - // MaxKeySize is the maximum length of a key, in bytes. - MaxKeySize = 32768 - - // MaxValueSize is the maximum length of a value, in bytes. - MaxValueSize = (1 << 31) - 2 -) - -const ( - maxUint = ^uint(0) - minUint = 0 - maxInt = int(^uint(0) >> 1) - minInt = -maxInt - 1 -) - -const bucketHeaderSize = int(unsafe.Sizeof(bucket{})) - -const ( - minFillPercent = 0.1 - maxFillPercent = 1.0 -) - -// DefaultFillPercent is the percentage that split pages are filled. -// This value can be changed by setting Bucket.FillPercent. -const DefaultFillPercent = 0.5 - -// Bucket represents a collection of key/value pairs inside the database. -type Bucket struct { - *bucket - tx *Tx // the associated transaction - buckets map[string]*Bucket // subbucket cache - page *page // inline page reference - rootNode *node // materialized node for the root page. - nodes map[pgid]*node // node cache - - // Sets the threshold for filling nodes when they split. By default, - // the bucket will fill to 50% but it can be useful to increase this - // amount if you know that your write workloads are mostly append-only. - // - // This is non-persisted across transactions so it must be set in every Tx. - FillPercent float64 -} - -// bucket represents the on-file representation of a bucket. -// This is stored as the "value" of a bucket key. If the bucket is small enough, -// then its root page can be stored inline in the "value", after the bucket -// header. In the case of inline buckets, the "root" will be 0. -type bucket struct { - root pgid // page id of the bucket's root-level page - sequence uint64 // monotonically incrementing, used by NextSequence() -} - -// newBucket returns a new bucket associated with a transaction. -func newBucket(tx *Tx) Bucket { - var b = Bucket{tx: tx, FillPercent: DefaultFillPercent} - if tx.writable { - b.buckets = make(map[string]*Bucket) - b.nodes = make(map[pgid]*node) - } - return b -} - -// Tx returns the tx of the bucket. -func (b *Bucket) Tx() *Tx { - return b.tx -} - -// Root returns the root of the bucket. -func (b *Bucket) Root() pgid { - return b.root -} - -// Writable returns whether the bucket is writable. -func (b *Bucket) Writable() bool { - return b.tx.writable -} - -// Cursor creates a cursor associated with the bucket. -// The cursor is only valid as long as the transaction is open. -// Do not use a cursor after the transaction is closed. -func (b *Bucket) Cursor() *Cursor { - // Update transaction statistics. - b.tx.stats.CursorCount++ - - // Allocate and return a cursor. - return &Cursor{ - bucket: b, - stack: make([]elemRef, 0), - } -} - -// Bucket retrieves a nested bucket by name. -// Returns nil if the bucket does not exist. -// The bucket instance is only valid for the lifetime of the transaction. -func (b *Bucket) Bucket(name []byte) *Bucket { - if b.buckets != nil { - if child := b.buckets[string(name)]; child != nil { - return child - } - } - - // Move cursor to key. - c := b.Cursor() - k, v, flags := c.seek(name) - - // Return nil if the key doesn't exist or it is not a bucket. - if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 { - return nil - } - - // Otherwise create a bucket and cache it. - var child = b.openBucket(v) - if b.buckets != nil { - b.buckets[string(name)] = child - } - - return child -} - -// Helper method that re-interprets a sub-bucket value -// from a parent into a Bucket -func (b *Bucket) openBucket(value []byte) *Bucket { - var child = newBucket(b.tx) - - // If unaligned load/stores are broken on this arch and value is - // unaligned simply clone to an aligned byte array. - unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0 - - if unaligned { - value = cloneBytes(value) - } - - // If this is a writable transaction then we need to copy the bucket entry. - // Read-only transactions can point directly at the mmap entry. - if b.tx.writable && !unaligned { - child.bucket = &bucket{} - *child.bucket = *(*bucket)(unsafe.Pointer(&value[0])) - } else { - child.bucket = (*bucket)(unsafe.Pointer(&value[0])) - } - - // Save a reference to the inline page if the bucket is inline. - if child.root == 0 { - child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) - } - - return &child -} - -// CreateBucket creates a new bucket at the given key and returns the new bucket. -// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. -func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { - if b.tx.db == nil { - return nil, ErrTxClosed - } else if !b.tx.writable { - return nil, ErrTxNotWritable - } else if len(key) == 0 { - return nil, ErrBucketNameRequired - } - - // Move cursor to correct position. - c := b.Cursor() - k, _, flags := c.seek(key) - - // Return an error if there is an existing key. - if bytes.Equal(key, k) { - if (flags & bucketLeafFlag) != 0 { - return nil, ErrBucketExists - } - return nil, ErrIncompatibleValue - } - - // Create empty, inline bucket. - var bucket = Bucket{ - bucket: &bucket{}, - rootNode: &node{isLeaf: true}, - FillPercent: DefaultFillPercent, - } - var value = bucket.write() - - // Insert into node. - key = cloneBytes(key) - c.node().put(key, key, value, 0, bucketLeafFlag) - - // Since subbuckets are not allowed on inline buckets, we need to - // dereference the inline page, if it exists. This will cause the bucket - // to be treated as a regular, non-inline bucket for the rest of the tx. - b.page = nil - - return b.Bucket(key), nil -} - -// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it. -// Returns an error if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. -func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { - child, err := b.CreateBucket(key) - if err == ErrBucketExists { - return b.Bucket(key), nil - } else if err != nil { - return nil, err - } - return child, nil -} - -// DeleteBucket deletes a bucket at the given key. -// Returns an error if the bucket does not exists, or if the key represents a non-bucket value. -func (b *Bucket) DeleteBucket(key []byte) error { - if b.tx.db == nil { - return ErrTxClosed - } else if !b.Writable() { - return ErrTxNotWritable - } - - // Move cursor to correct position. - c := b.Cursor() - k, _, flags := c.seek(key) - - // Return an error if bucket doesn't exist or is not a bucket. - if !bytes.Equal(key, k) { - return ErrBucketNotFound - } else if (flags & bucketLeafFlag) == 0 { - return ErrIncompatibleValue - } - - // Recursively delete all child buckets. - child := b.Bucket(key) - err := child.ForEach(func(k, v []byte) error { - if v == nil { - if err := child.DeleteBucket(k); err != nil { - return fmt.Errorf("delete bucket: %s", err) - } - } - return nil - }) - if err != nil { - return err - } - - // Remove cached copy. - delete(b.buckets, string(key)) - - // Release all bucket pages to freelist. - child.nodes = nil - child.rootNode = nil - child.free() - - // Delete the node if we have a matching key. - c.node().del(key) - - return nil -} - -// Get retrieves the value for a key in the bucket. -// Returns a nil value if the key does not exist or if the key is a nested bucket. -// The returned value is only valid for the life of the transaction. -func (b *Bucket) Get(key []byte) []byte { - k, v, flags := b.Cursor().seek(key) - - // Return nil if this is a bucket. - if (flags & bucketLeafFlag) != 0 { - return nil - } - - // If our target node isn't the same key as what's passed in then return nil. - if !bytes.Equal(key, k) { - return nil - } - return v -} - -// Put sets the value for a key in the bucket. -// If the key exist then its previous value will be overwritten. -// Supplied value must remain valid for the life of the transaction. -// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. -func (b *Bucket) Put(key []byte, value []byte) error { - if b.tx.db == nil { - return ErrTxClosed - } else if !b.Writable() { - return ErrTxNotWritable - } else if len(key) == 0 { - return ErrKeyRequired - } else if len(key) > MaxKeySize { - return ErrKeyTooLarge - } else if int64(len(value)) > MaxValueSize { - return ErrValueTooLarge - } - - // Move cursor to correct position. - c := b.Cursor() - k, _, flags := c.seek(key) - - // Return an error if there is an existing key with a bucket value. - if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 { - return ErrIncompatibleValue - } - - // Insert into node. - key = cloneBytes(key) - c.node().put(key, key, value, 0, 0) - - return nil -} - -// Delete removes a key from the bucket. -// If the key does not exist then nothing is done and a nil error is returned. -// Returns an error if the bucket was created from a read-only transaction. -func (b *Bucket) Delete(key []byte) error { - if b.tx.db == nil { - return ErrTxClosed - } else if !b.Writable() { - return ErrTxNotWritable - } - - // Move cursor to correct position. - c := b.Cursor() - _, _, flags := c.seek(key) - - // Return an error if there is already existing bucket value. - if (flags & bucketLeafFlag) != 0 { - return ErrIncompatibleValue - } - - // Delete the node if we have a matching key. - c.node().del(key) - - return nil -} - -// Sequence returns the current integer for the bucket without incrementing it. -func (b *Bucket) Sequence() uint64 { return b.bucket.sequence } - -// SetSequence updates the sequence number for the bucket. -func (b *Bucket) SetSequence(v uint64) error { - if b.tx.db == nil { - return ErrTxClosed - } else if !b.Writable() { - return ErrTxNotWritable - } - - // Materialize the root node if it hasn't been already so that the - // bucket will be saved during commit. - if b.rootNode == nil { - _ = b.node(b.root, nil) - } - - // Increment and return the sequence. - b.bucket.sequence = v - return nil -} - -// NextSequence returns an autoincrementing integer for the bucket. -func (b *Bucket) NextSequence() (uint64, error) { - if b.tx.db == nil { - return 0, ErrTxClosed - } else if !b.Writable() { - return 0, ErrTxNotWritable - } - - // Materialize the root node if it hasn't been already so that the - // bucket will be saved during commit. - if b.rootNode == nil { - _ = b.node(b.root, nil) - } - - // Increment and return the sequence. - b.bucket.sequence++ - return b.bucket.sequence, nil -} - -// ForEach executes a function for each key/value pair in a bucket. -// If the provided function returns an error then the iteration is stopped and -// the error is returned to the caller. The provided function must not modify -// the bucket; this will result in undefined behavior. -func (b *Bucket) ForEach(fn func(k, v []byte) error) error { - if b.tx.db == nil { - return ErrTxClosed - } - c := b.Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - if err := fn(k, v); err != nil { - return err - } - } - return nil -} - -// Stat returns stats on a bucket. -func (b *Bucket) Stats() BucketStats { - var s, subStats BucketStats - pageSize := b.tx.db.pageSize - s.BucketN += 1 - if b.root == 0 { - s.InlineBucketN += 1 - } - b.forEachPage(func(p *page, depth int) { - if (p.flags & leafPageFlag) != 0 { - s.KeyN += int(p.count) - - // used totals the used bytes for the page - used := pageHeaderSize - - if p.count != 0 { - // If page has any elements, add all element headers. - used += leafPageElementSize * int(p.count-1) - - // Add all element key, value sizes. - // The computation takes advantage of the fact that the position - // of the last element's key/value equals to the total of the sizes - // of all previous elements' keys and values. - // It also includes the last element's header. - lastElement := p.leafPageElement(p.count - 1) - used += int(lastElement.pos + lastElement.ksize + lastElement.vsize) - } - - if b.root == 0 { - // For inlined bucket just update the inline stats - s.InlineBucketInuse += used - } else { - // For non-inlined bucket update all the leaf stats - s.LeafPageN++ - s.LeafInuse += used - s.LeafOverflowN += int(p.overflow) - - // Collect stats from sub-buckets. - // Do that by iterating over all element headers - // looking for the ones with the bucketLeafFlag. - for i := uint16(0); i < p.count; i++ { - e := p.leafPageElement(i) - if (e.flags & bucketLeafFlag) != 0 { - // For any bucket element, open the element value - // and recursively call Stats on the contained bucket. - subStats.Add(b.openBucket(e.value()).Stats()) - } - } - } - } else if (p.flags & branchPageFlag) != 0 { - s.BranchPageN++ - lastElement := p.branchPageElement(p.count - 1) - - // used totals the used bytes for the page - // Add header and all element headers. - used := pageHeaderSize + (branchPageElementSize * int(p.count-1)) - - // Add size of all keys and values. - // Again, use the fact that last element's position equals to - // the total of key, value sizes of all previous elements. - used += int(lastElement.pos + lastElement.ksize) - s.BranchInuse += used - s.BranchOverflowN += int(p.overflow) - } - - // Keep track of maximum page depth. - if depth+1 > s.Depth { - s.Depth = (depth + 1) - } - }) - - // Alloc stats can be computed from page counts and pageSize. - s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize - s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize - - // Add the max depth of sub-buckets to get total nested depth. - s.Depth += subStats.Depth - // Add the stats for all sub-buckets - s.Add(subStats) - return s -} - -// forEachPage iterates over every page in a bucket, including inline pages. -func (b *Bucket) forEachPage(fn func(*page, int)) { - // If we have an inline page then just use that. - if b.page != nil { - fn(b.page, 0) - return - } - - // Otherwise traverse the page hierarchy. - b.tx.forEachPage(b.root, 0, fn) -} - -// forEachPageNode iterates over every page (or node) in a bucket. -// This also includes inline pages. -func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { - // If we have an inline page or root node then just use that. - if b.page != nil { - fn(b.page, nil, 0) - return - } - b._forEachPageNode(b.root, 0, fn) -} - -func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) { - var p, n = b.pageNode(pgid) - - // Execute function. - fn(p, n, depth) - - // Recursively loop over children. - if p != nil { - if (p.flags & branchPageFlag) != 0 { - for i := 0; i < int(p.count); i++ { - elem := p.branchPageElement(uint16(i)) - b._forEachPageNode(elem.pgid, depth+1, fn) - } - } - } else { - if !n.isLeaf { - for _, inode := range n.inodes { - b._forEachPageNode(inode.pgid, depth+1, fn) - } - } - } -} - -// spill writes all the nodes for this bucket to dirty pages. -func (b *Bucket) spill() error { - // Spill all child buckets first. - for name, child := range b.buckets { - // If the child bucket is small enough and it has no child buckets then - // write it inline into the parent bucket's page. Otherwise spill it - // like a normal bucket and make the parent value a pointer to the page. - var value []byte - if child.inlineable() { - child.free() - value = child.write() - } else { - if err := child.spill(); err != nil { - return err - } - - // Update the child bucket header in this bucket. - value = make([]byte, unsafe.Sizeof(bucket{})) - var bucket = (*bucket)(unsafe.Pointer(&value[0])) - *bucket = *child.bucket - } - - // Skip writing the bucket if there are no materialized nodes. - if child.rootNode == nil { - continue - } - - // Update parent node. - var c = b.Cursor() - k, _, flags := c.seek([]byte(name)) - if !bytes.Equal([]byte(name), k) { - panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k)) - } - if flags&bucketLeafFlag == 0 { - panic(fmt.Sprintf("unexpected bucket header flag: %x", flags)) - } - c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag) - } - - // Ignore if there's not a materialized root node. - if b.rootNode == nil { - return nil - } - - // Spill nodes. - if err := b.rootNode.spill(); err != nil { - return err - } - b.rootNode = b.rootNode.root() - - // Update the root node for this bucket. - if b.rootNode.pgid >= b.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid)) - } - b.root = b.rootNode.pgid - - return nil -} - -// inlineable returns true if a bucket is small enough to be written inline -// and if it contains no subbuckets. Otherwise returns false. -func (b *Bucket) inlineable() bool { - var n = b.rootNode - - // Bucket must only contain a single leaf node. - if n == nil || !n.isLeaf { - return false - } - - // Bucket is not inlineable if it contains subbuckets or if it goes beyond - // our threshold for inline bucket size. - var size = pageHeaderSize - for _, inode := range n.inodes { - size += leafPageElementSize + len(inode.key) + len(inode.value) - - if inode.flags&bucketLeafFlag != 0 { - return false - } else if size > b.maxInlineBucketSize() { - return false - } - } - - return true -} - -// Returns the maximum total size of a bucket to make it a candidate for inlining. -func (b *Bucket) maxInlineBucketSize() int { - return b.tx.db.pageSize / 4 -} - -// write allocates and writes a bucket to a byte slice. -func (b *Bucket) write() []byte { - // Allocate the appropriate size. - var n = b.rootNode - var value = make([]byte, bucketHeaderSize+n.size()) - - // Write a bucket header. - var bucket = (*bucket)(unsafe.Pointer(&value[0])) - *bucket = *b.bucket - - // Convert byte slice to a fake page and write the root node. - var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) - n.write(p) - - return value -} - -// rebalance attempts to balance all nodes. -func (b *Bucket) rebalance() { - for _, n := range b.nodes { - n.rebalance() - } - for _, child := range b.buckets { - child.rebalance() - } -} - -// node creates a node from a page and associates it with a given parent. -func (b *Bucket) node(pgid pgid, parent *node) *node { - _assert(b.nodes != nil, "nodes map expected") - - // Retrieve node if it's already been created. - if n := b.nodes[pgid]; n != nil { - return n - } - - // Otherwise create a node and cache it. - n := &node{bucket: b, parent: parent} - if parent == nil { - b.rootNode = n - } else { - parent.children = append(parent.children, n) - } - - // Use the inline page if this is an inline bucket. - var p = b.page - if p == nil { - p = b.tx.page(pgid) - } - - // Read the page into the node and cache it. - n.read(p) - b.nodes[pgid] = n - - // Update statistics. - b.tx.stats.NodeCount++ - - return n -} - -// free recursively frees all pages in the bucket. -func (b *Bucket) free() { - if b.root == 0 { - return - } - - var tx = b.tx - b.forEachPageNode(func(p *page, n *node, _ int) { - if p != nil { - tx.db.freelist.free(tx.meta.txid, p) - } else { - n.free() - } - }) - b.root = 0 -} - -// dereference removes all references to the old mmap. -func (b *Bucket) dereference() { - if b.rootNode != nil { - b.rootNode.root().dereference() - } - - for _, child := range b.buckets { - child.dereference() - } -} - -// pageNode returns the in-memory node, if it exists. -// Otherwise returns the underlying page. -func (b *Bucket) pageNode(id pgid) (*page, *node) { - // Inline buckets have a fake page embedded in their value so treat them - // differently. We'll return the rootNode (if available) or the fake page. - if b.root == 0 { - if id != 0 { - panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id)) - } - if b.rootNode != nil { - return nil, b.rootNode - } - return b.page, nil - } - - // Check the node cache for non-inline buckets. - if b.nodes != nil { - if n := b.nodes[id]; n != nil { - return nil, n - } - } - - // Finally lookup the page from the transaction if no node is materialized. - return b.tx.page(id), nil -} - -// BucketStats records statistics about resources used by a bucket. -type BucketStats struct { - // Page count statistics. - BranchPageN int // number of logical branch pages - BranchOverflowN int // number of physical branch overflow pages - LeafPageN int // number of logical leaf pages - LeafOverflowN int // number of physical leaf overflow pages - - // Tree statistics. - KeyN int // number of keys/value pairs - Depth int // number of levels in B+tree - - // Page size utilization. - BranchAlloc int // bytes allocated for physical branch pages - BranchInuse int // bytes actually used for branch data - LeafAlloc int // bytes allocated for physical leaf pages - LeafInuse int // bytes actually used for leaf data - - // Bucket statistics - BucketN int // total number of buckets including the top bucket - InlineBucketN int // total number on inlined buckets - InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse) -} - -func (s *BucketStats) Add(other BucketStats) { - s.BranchPageN += other.BranchPageN - s.BranchOverflowN += other.BranchOverflowN - s.LeafPageN += other.LeafPageN - s.LeafOverflowN += other.LeafOverflowN - s.KeyN += other.KeyN - if s.Depth < other.Depth { - s.Depth = other.Depth - } - s.BranchAlloc += other.BranchAlloc - s.BranchInuse += other.BranchInuse - s.LeafAlloc += other.LeafAlloc - s.LeafInuse += other.LeafInuse - - s.BucketN += other.BucketN - s.InlineBucketN += other.InlineBucketN - s.InlineBucketInuse += other.InlineBucketInuse -} - -// cloneBytes returns a copy of a given slice. -func cloneBytes(v []byte) []byte { - var clone = make([]byte, len(v)) - copy(clone, v) - return clone -} diff --git a/src/cursor.go b/src/cursor.go deleted file mode 100644 index 1be9f35..0000000 --- a/src/cursor.go +++ /dev/null @@ -1,400 +0,0 @@ -package bolt - -import ( - "bytes" - "fmt" - "sort" -) - -// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order. -// Cursors see nested buckets with value == nil. -// Cursors can be obtained from a transaction and are valid as long as the transaction is open. -// -// Keys and values returned from the cursor are only valid for the life of the transaction. -// -// Changing data while traversing with a cursor may cause it to be invalidated -// and return unexpected keys and/or values. You must reposition your cursor -// after mutating data. -type Cursor struct { - bucket *Bucket - stack []elemRef -} - -// Bucket returns the bucket that this cursor was created from. -func (c *Cursor) Bucket() *Bucket { - return c.bucket -} - -// First moves the cursor to the first item in the bucket and returns its key and value. -// If the bucket is empty then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. -func (c *Cursor) First() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") - c.stack = c.stack[:0] - p, n := c.bucket.pageNode(c.bucket.root) - c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) - c.first() - - // If we land on an empty page then move to the next value. - // https://github.com/boltdb/bolt/issues/450 - if c.stack[len(c.stack)-1].count() == 0 { - c.next() - } - - k, v, flags := c.keyValue() - if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil - } - return k, v - -} - -// Last moves the cursor to the last item in the bucket and returns its key and value. -// If the bucket is empty then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. -func (c *Cursor) Last() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") - c.stack = c.stack[:0] - p, n := c.bucket.pageNode(c.bucket.root) - ref := elemRef{page: p, node: n} - ref.index = ref.count() - 1 - c.stack = append(c.stack, ref) - c.last() - k, v, flags := c.keyValue() - if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil - } - return k, v -} - -// Next moves the cursor to the next item in the bucket and returns its key and value. -// If the cursor is at the end of the bucket then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. -func (c *Cursor) Next() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") - k, v, flags := c.next() - if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil - } - return k, v -} - -// Prev moves the cursor to the previous item in the bucket and returns its key and value. -// If the cursor is at the beginning of the bucket then a nil key and value are returned. -// The returned key and value are only valid for the life of the transaction. -func (c *Cursor) Prev() (key []byte, value []byte) { - _assert(c.bucket.tx.db != nil, "tx closed") - - // Attempt to move back one element until we're successful. - // Move up the stack as we hit the beginning of each page in our stack. - for i := len(c.stack) - 1; i >= 0; i-- { - elem := &c.stack[i] - if elem.index > 0 { - elem.index-- - break - } - c.stack = c.stack[:i] - } - - // If we've hit the end then return nil. - if len(c.stack) == 0 { - return nil, nil - } - - // Move down the stack to find the last element of the last leaf under this branch. - c.last() - k, v, flags := c.keyValue() - if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil - } - return k, v -} - -// Seek moves the cursor to a given key and returns it. -// If the key does not exist then the next key is used. If no keys -// follow, a nil key is returned. -// The returned key and value are only valid for the life of the transaction. -func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { - k, v, flags := c.seek(seek) - - // If we ended up after the last element of a page then move to the next one. - if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() { - k, v, flags = c.next() - } - - if k == nil { - return nil, nil - } else if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil - } - return k, v -} - -// Delete removes the current key/value under the cursor from the bucket. -// Delete fails if current key/value is a bucket or if the transaction is not writable. -func (c *Cursor) Delete() error { - if c.bucket.tx.db == nil { - return ErrTxClosed - } else if !c.bucket.Writable() { - return ErrTxNotWritable - } - - key, _, flags := c.keyValue() - // Return an error if current value is a bucket. - if (flags & bucketLeafFlag) != 0 { - return ErrIncompatibleValue - } - c.node().del(key) - - return nil -} - -// seek moves the cursor to a given key and returns it. -// If the key does not exist then the next key is used. -func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { - _assert(c.bucket.tx.db != nil, "tx closed") - - // Start from root page/node and traverse to correct page. - c.stack = c.stack[:0] - c.search(seek, c.bucket.root) - ref := &c.stack[len(c.stack)-1] - - // If the cursor is pointing to the end of page/node then return nil. - if ref.index >= ref.count() { - return nil, nil, 0 - } - - // If this is a bucket then return a nil value. - return c.keyValue() -} - -// first moves the cursor to the first leaf element under the last page in the stack. -func (c *Cursor) first() { - for { - // Exit when we hit a leaf page. - var ref = &c.stack[len(c.stack)-1] - if ref.isLeaf() { - break - } - - // Keep adding pages pointing to the first element to the stack. - var pgid pgid - if ref.node != nil { - pgid = ref.node.inodes[ref.index].pgid - } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid - } - p, n := c.bucket.pageNode(pgid) - c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) - } -} - -// last moves the cursor to the last leaf element under the last page in the stack. -func (c *Cursor) last() { - for { - // Exit when we hit a leaf page. - ref := &c.stack[len(c.stack)-1] - if ref.isLeaf() { - break - } - - // Keep adding pages pointing to the last element in the stack. - var pgid pgid - if ref.node != nil { - pgid = ref.node.inodes[ref.index].pgid - } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid - } - p, n := c.bucket.pageNode(pgid) - - var nextRef = elemRef{page: p, node: n} - nextRef.index = nextRef.count() - 1 - c.stack = append(c.stack, nextRef) - } -} - -// next moves to the next leaf element and returns the key and value. -// If the cursor is at the last leaf element then it stays there and returns nil. -func (c *Cursor) next() (key []byte, value []byte, flags uint32) { - for { - // Attempt to move over one element until we're successful. - // Move up the stack as we hit the end of each page in our stack. - var i int - for i = len(c.stack) - 1; i >= 0; i-- { - elem := &c.stack[i] - if elem.index < elem.count()-1 { - elem.index++ - break - } - } - - // If we've hit the root page then stop and return. This will leave the - // cursor on the last element of the last page. - if i == -1 { - return nil, nil, 0 - } - - // Otherwise start from where we left off in the stack and find the - // first element of the first leaf page. - c.stack = c.stack[:i+1] - c.first() - - // If this is an empty page then restart and move back up the stack. - // https://github.com/boltdb/bolt/issues/450 - if c.stack[len(c.stack)-1].count() == 0 { - continue - } - - return c.keyValue() - } -} - -// search recursively performs a binary search against a given page/node until it finds a given key. -func (c *Cursor) search(key []byte, pgid pgid) { - p, n := c.bucket.pageNode(pgid) - if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 { - panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags)) - } - e := elemRef{page: p, node: n} - c.stack = append(c.stack, e) - - // If we're on a leaf page/node then find the specific node. - if e.isLeaf() { - c.nsearch(key) - return - } - - if n != nil { - c.searchNode(key, n) - return - } - c.searchPage(key, p) -} - -func (c *Cursor) searchNode(key []byte, n *node) { - var exact bool - index := sort.Search(len(n.inodes), func(i int) bool { - // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. - // sort.Search() finds the lowest index where f() != -1 but we need the highest index. - ret := bytes.Compare(n.inodes[i].key, key) - if ret == 0 { - exact = true - } - return ret != -1 - }) - if !exact && index > 0 { - index-- - } - c.stack[len(c.stack)-1].index = index - - // Recursively search to the next page. - c.search(key, n.inodes[index].pgid) -} - -func (c *Cursor) searchPage(key []byte, p *page) { - // Binary search for the correct range. - inodes := p.branchPageElements() - - var exact bool - index := sort.Search(int(p.count), func(i int) bool { - // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. - // sort.Search() finds the lowest index where f() != -1 but we need the highest index. - ret := bytes.Compare(inodes[i].key(), key) - if ret == 0 { - exact = true - } - return ret != -1 - }) - if !exact && index > 0 { - index-- - } - c.stack[len(c.stack)-1].index = index - - // Recursively search to the next page. - c.search(key, inodes[index].pgid) -} - -// nsearch searches the leaf node on the top of the stack for a key. -func (c *Cursor) nsearch(key []byte) { - e := &c.stack[len(c.stack)-1] - p, n := e.page, e.node - - // If we have a node then search its inodes. - if n != nil { - index := sort.Search(len(n.inodes), func(i int) bool { - return bytes.Compare(n.inodes[i].key, key) != -1 - }) - e.index = index - return - } - - // If we have a page then search its leaf elements. - inodes := p.leafPageElements() - index := sort.Search(int(p.count), func(i int) bool { - return bytes.Compare(inodes[i].key(), key) != -1 - }) - e.index = index -} - -// keyValue returns the key and value of the current leaf element. -func (c *Cursor) keyValue() ([]byte, []byte, uint32) { - ref := &c.stack[len(c.stack)-1] - if ref.count() == 0 || ref.index >= ref.count() { - return nil, nil, 0 - } - - // Retrieve value from node. - if ref.node != nil { - inode := &ref.node.inodes[ref.index] - return inode.key, inode.value, inode.flags - } - - // Or retrieve value from page. - elem := ref.page.leafPageElement(uint16(ref.index)) - return elem.key(), elem.value(), elem.flags -} - -// node returns the node that the cursor is currently positioned on. -func (c *Cursor) node() *node { - _assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") - - // If the top of the stack is a leaf node then just return it. - if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() { - return ref.node - } - - // Start from root and traverse down the hierarchy. - var n = c.stack[0].node - if n == nil { - n = c.bucket.node(c.stack[0].page.id, nil) - } - for _, ref := range c.stack[:len(c.stack)-1] { - _assert(!n.isLeaf, "expected branch node") - n = n.childAt(int(ref.index)) - } - _assert(n.isLeaf, "expected leaf node") - return n -} - -// elemRef represents a reference to an element on a given page/node. -type elemRef struct { - page *page - node *node - index int -} - -// isLeaf returns whether the ref is pointing at a leaf page/node. -func (r *elemRef) isLeaf() bool { - if r.node != nil { - return r.node.isLeaf - } - return (r.page.flags & leafPageFlag) != 0 -} - -// count returns the number of inodes or page elements. -func (r *elemRef) count() int { - if r.node != nil { - return len(r.node.inodes) - } - return int(r.page.count) -} diff --git a/src/db.go b/src/db.go deleted file mode 100644 index cc3596f..0000000 --- a/src/db.go +++ /dev/null @@ -1,1037 +0,0 @@ -package bolt - -import ( - "errors" - "fmt" - "hash/fnv" - "log" - "os" - "runtime" - "runtime/debug" - "strings" - "sync" - "time" - "unsafe" -) - -// The largest step that can be taken when remapping the mmap. -const maxMmapStep = 1 << 30 // 1GB - -// The data file format version. -const version = 2 - -// Represents a marker value to indicate that a file is a Bolt DB. -const magic uint32 = 0xED0CDAED - -// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when -// syncing changes to a file. This is required as some operating systems, -// such as OpenBSD, do not have a unified buffer cache (UBC) and writes -// must be synchronized using the msync(2) syscall. -const IgnoreNoSync = runtime.GOOS == "openbsd" - -// Default values if not set in a DB instance. -const ( - DefaultMaxBatchSize int = 1000 - DefaultMaxBatchDelay = 10 * time.Millisecond - DefaultAllocSize = 16 * 1024 * 1024 -) - -// default page size for db is set to the OS page size. -var defaultPageSize = os.Getpagesize() - -// DB represents a collection of buckets persisted to a file on disk. -// All data access is performed through transactions which can be obtained through the DB. -// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. -type DB struct { - // When enabled, the database will perform a Check() after every commit. - // A panic is issued if the database is in an inconsistent state. This - // flag has a large performance impact so it should only be used for - // debugging purposes. - StrictMode bool - - // Setting the NoSync flag will cause the database to skip fsync() - // calls after each commit. This can be useful when bulk loading data - // into a database and you can restart the bulk load in the event of - // a system failure or database corruption. Do not set this flag for - // normal use. - // - // If the package global IgnoreNoSync constant is true, this value is - // ignored. See the comment on that constant for more details. - // - // THIS IS UNSAFE. PLEASE USE WITH CAUTION. - NoSync bool - - // When true, skips the truncate call when growing the database. - // Setting this to true is only safe on non-ext3/ext4 systems. - // Skipping truncation avoids preallocation of hard drive space and - // bypasses a truncate() and fsync() syscall on remapping. - // - // https://github.com/boltdb/bolt/issues/284 - NoGrowSync bool - - // If you want to read the entire database fast, you can set MmapFlag to - // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. - MmapFlags int - - // MaxBatchSize is the maximum size of a batch. Default value is - // copied from DefaultMaxBatchSize in Open. - // - // If <=0, disables batching. - // - // Do not change concurrently with calls to Batch. - MaxBatchSize int - - // MaxBatchDelay is the maximum delay before a batch starts. - // Default value is copied from DefaultMaxBatchDelay in Open. - // - // If <=0, effectively disables batching. - // - // Do not change concurrently with calls to Batch. - MaxBatchDelay time.Duration - - // AllocSize is the amount of space allocated when the database - // needs to create new pages. This is done to amortize the cost - // of truncate() and fsync() when growing the data file. - AllocSize int - - path string - file *os.File - lockfile *os.File // windows only - dataref []byte // mmap'ed readonly, write throws SEGV - data *[maxMapSize]byte - datasz int - filesz int // current on disk file size - meta0 *meta - meta1 *meta - pageSize int - opened bool - rwtx *Tx - txs []*Tx - freelist *freelist - stats Stats - - pagePool sync.Pool - - batchMu sync.Mutex - batch *batch - - rwlock sync.Mutex // Allows only one writer at a time. - metalock sync.Mutex // Protects meta page access. - mmaplock sync.RWMutex // Protects mmap access during remapping. - statlock sync.RWMutex // Protects stats access. - - ops struct { - writeAt func(b []byte, off int64) (n int, err error) - } - - // Read only mode. - // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately. - readOnly bool -} - -// Path returns the path to currently open database file. -func (db *DB) Path() string { - return db.path -} - -// GoString returns the Go string representation of the database. -func (db *DB) GoString() string { - return fmt.Sprintf("bolt.DB{path:%q}", db.path) -} - -// String returns the string representation of the database. -func (db *DB) String() string { - return fmt.Sprintf("DB<%q>", db.path) -} - -// Open creates and opens a database at the given path. -// If the file does not exist then it will be created automatically. -// Passing in nil options will cause Bolt to open the database with the default options. -func Open(path string, mode os.FileMode, options *Options) (*DB, error) { - var db = &DB{opened: true} - - // Set default options if no options are provided. - if options == nil { - options = DefaultOptions - } - db.NoGrowSync = options.NoGrowSync - db.MmapFlags = options.MmapFlags - - // Set default values for later DB operations. - db.MaxBatchSize = DefaultMaxBatchSize - db.MaxBatchDelay = DefaultMaxBatchDelay - db.AllocSize = DefaultAllocSize - - flag := os.O_RDWR - if options.ReadOnly { - flag = os.O_RDONLY - db.readOnly = true - } - - // Open data file and separate sync handler for metadata writes. - db.path = path - var err error - if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { - _ = db.close() - return nil, err - } - - // Lock file so that other processes using Bolt in read-write mode cannot - // use the database at the same time. This would cause corruption since - // the two processes would write meta pages and free pages separately. - // The database file is locked exclusively (only one process can grab the lock) - // if !options.ReadOnly. - // The database file is locked using the shared lock (more than one process may - // hold a lock at the same time) otherwise (options.ReadOnly is set). - if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { - _ = db.close() - return nil, err - } - - // Default values for test hooks - db.ops.writeAt = db.file.WriteAt - - // Initialize the database if it doesn't exist. - if info, err := db.file.Stat(); err != nil { - return nil, err - } else if info.Size() == 0 { - // Initialize new files with meta pages. - if err := db.init(); err != nil { - return nil, err - } - } else { - // Read the first meta page to determine the page size. - var buf [0x1000]byte - if _, err := db.file.ReadAt(buf[:], 0); err == nil { - m := db.pageInBuffer(buf[:], 0).meta() - if err := m.validate(); err != nil { - // If we can't read the page size, we can assume it's the same - // as the OS -- since that's how the page size was chosen in the - // first place. - // - // If the first page is invalid and this OS uses a different - // page size than what the database was created with then we - // are out of luck and cannot access the database. - db.pageSize = os.Getpagesize() - } else { - db.pageSize = int(m.pageSize) - } - } - } - - // Initialize page pool. - db.pagePool = sync.Pool{ - New: func() interface{} { - return make([]byte, db.pageSize) - }, - } - - // Memory map the data file. - if err := db.mmap(options.InitialMmapSize); err != nil { - _ = db.close() - return nil, err - } - - // Read in the freelist. - db.freelist = newFreelist() - db.freelist.read(db.page(db.meta().freelist)) - - // Mark the database as opened and return. - return db, nil -} - -// mmap opens the underlying memory-mapped file and initializes the meta references. -// minsz is the minimum size that the new mmap can be. -func (db *DB) mmap(minsz int) error { - db.mmaplock.Lock() - defer db.mmaplock.Unlock() - - info, err := db.file.Stat() - if err != nil { - return fmt.Errorf("mmap stat error: %s", err) - } else if int(info.Size()) < db.pageSize*2 { - return fmt.Errorf("file size too small") - } - - // Ensure the size is at least the minimum size. - var size = int(info.Size()) - if size < minsz { - size = minsz - } - size, err = db.mmapSize(size) - if err != nil { - return err - } - - // Dereference all mmap references before unmapping. - if db.rwtx != nil { - db.rwtx.root.dereference() - } - - // Unmap existing data before continuing. - if err := db.munmap(); err != nil { - return err - } - - // Memory-map the data file as a byte slice. - if err := mmap(db, size); err != nil { - return err - } - - // Save references to the meta pages. - db.meta0 = db.page(0).meta() - db.meta1 = db.page(1).meta() - - // Validate the meta pages. We only return an error if both meta pages fail - // validation, since meta0 failing validation means that it wasn't saved - // properly -- but we can recover using meta1. And vice-versa. - err0 := db.meta0.validate() - err1 := db.meta1.validate() - if err0 != nil && err1 != nil { - return err0 - } - - return nil -} - -// munmap unmaps the data file from memory. -func (db *DB) munmap() error { - if err := munmap(db); err != nil { - return fmt.Errorf("unmap error: " + err.Error()) - } - return nil -} - -// mmapSize determines the appropriate size for the mmap given the current size -// of the database. The minimum size is 32KB and doubles until it reaches 1GB. -// Returns an error if the new mmap size is greater than the max allowed. -func (db *DB) mmapSize(size int) (int, error) { - // Double the size from 32KB until 1GB. - for i := uint(15); i <= 30; i++ { - if size <= 1<<i { - return 1 << i, nil - } - } - - // Verify the requested size is not above the maximum allowed. - if size > maxMapSize { - return 0, fmt.Errorf("mmap too large") - } - - // If larger than 1GB then grow by 1GB at a time. - sz := int64(size) - if remainder := sz % int64(maxMmapStep); remainder > 0 { - sz += int64(maxMmapStep) - remainder - } - - // Ensure that the mmap size is a multiple of the page size. - // This should always be true since we're incrementing in MBs. - pageSize := int64(db.pageSize) - if (sz % pageSize) != 0 { - sz = ((sz / pageSize) + 1) * pageSize - } - - // If we've exceeded the max size then only grow up to the max size. - if sz > maxMapSize { - sz = maxMapSize - } - - return int(sz), nil -} - -// init creates a new database file and initializes its meta pages. -func (db *DB) init() error { - // Set the page size to the OS page size. - db.pageSize = os.Getpagesize() - - // Create two meta pages on a buffer. - buf := make([]byte, db.pageSize*4) - for i := 0; i < 2; i++ { - p := db.pageInBuffer(buf[:], pgid(i)) - p.id = pgid(i) - p.flags = metaPageFlag - - // Initialize the meta page. - m := p.meta() - m.magic = magic - m.version = version - m.pageSize = uint32(db.pageSize) - m.freelist = 2 - m.root = bucket{root: 3} - m.pgid = 4 - m.txid = txid(i) - m.checksum = m.sum64() - } - - // Write an empty freelist at page 3. - p := db.pageInBuffer(buf[:], pgid(2)) - p.id = pgid(2) - p.flags = freelistPageFlag - p.count = 0 - - // Write an empty leaf page at page 4. - p = db.pageInBuffer(buf[:], pgid(3)) - p.id = pgid(3) - p.flags = leafPageFlag - p.count = 0 - - // Write the buffer to our data file. - if _, err := db.ops.writeAt(buf, 0); err != nil { - return err - } - if err := fdatasync(db); err != nil { - return err - } - - return nil -} - -// Close releases all database resources. -// All transactions must be closed before closing the database. -func (db *DB) Close() error { - db.rwlock.Lock() - defer db.rwlock.Unlock() - - db.metalock.Lock() - defer db.metalock.Unlock() - - db.mmaplock.RLock() - defer db.mmaplock.RUnlock() - - return db.close() -} - -func (db *DB) close() error { - if !db.opened { - return nil - } - - db.opened = false - - db.freelist = nil - - // Clear ops. - db.ops.writeAt = nil - - // Close the mmap. - if err := db.munmap(); err != nil { - return err - } - - // Close file handles. - if db.file != nil { - // No need to unlock read-only file. - if !db.readOnly { - // Unlock the file. - if err := funlock(db); err != nil { - log.Printf("bolt.Close(): funlock error: %s", err) - } - } - - // Close the file descriptor. - if err := db.file.Close(); err != nil { - return fmt.Errorf("db file close: %s", err) - } - db.file = nil - } - - db.path = "" - return nil -} - -// Begin starts a new transaction. -// Multiple read-only transactions can be used concurrently but only one -// write transaction can be used at a time. Starting multiple write transactions -// will cause the calls to block and be serialized until the current write -// transaction finishes. -// -// Transactions should not be dependent on one another. Opening a read -// transaction and a write transaction in the same goroutine can cause the -// writer to deadlock because the database periodically needs to re-mmap itself -// as it grows and it cannot do that while a read transaction is open. -// -// If a long running read transaction (for example, a snapshot transaction) is -// needed, you might want to set DB.InitialMmapSize to a large enough value -// to avoid potential blocking of write transaction. -// -// IMPORTANT: You must close read-only transactions after you are finished or -// else the database will not reclaim old pages. -func (db *DB) Begin(writable bool) (*Tx, error) { - if writable { - return db.beginRWTx() - } - return db.beginTx() -} - -func (db *DB) beginTx() (*Tx, error) { - // Lock the meta pages while we initialize the transaction. We obtain - // the meta lock before the mmap lock because that's the order that the - // write transaction will obtain them. - db.metalock.Lock() - - // Obtain a read-only lock on the mmap. When the mmap is remapped it will - // obtain a write lock so all transactions must finish before it can be - // remapped. - db.mmaplock.RLock() - - // Exit if the database is not open yet. - if !db.opened { - db.mmaplock.RUnlock() - db.metalock.Unlock() - return nil, ErrDatabaseNotOpen - } - - // Create a transaction associated with the database. - t := &Tx{} - t.init(db) - - // Keep track of transaction until it closes. - db.txs = append(db.txs, t) - n := len(db.txs) - - // Unlock the meta pages. - db.metalock.Unlock() - - // Update the transaction stats. - db.statlock.Lock() - db.stats.TxN++ - db.stats.OpenTxN = n - db.statlock.Unlock() - - return t, nil -} - -func (db *DB) beginRWTx() (*Tx, error) { - // If the database was opened with Options.ReadOnly, return an error. - if db.readOnly { - return nil, ErrDatabaseReadOnly - } - - // Obtain writer lock. This is released by the transaction when it closes. - // This enforces only one writer transaction at a time. - db.rwlock.Lock() - - // Once we have the writer lock then we can lock the meta pages so that - // we can set up the transaction. - db.metalock.Lock() - defer db.metalock.Unlock() - - // Exit if the database is not open yet. - if !db.opened { - db.rwlock.Unlock() - return nil, ErrDatabaseNotOpen - } - - // Create a transaction associated with the database. - t := &Tx{writable: true} - t.init(db) - db.rwtx = t - - // Free any pages associated with closed read-only transactions. - var minid txid = 0xFFFFFFFFFFFFFFFF - for _, t := range db.txs { - if t.meta.txid < minid { - minid = t.meta.txid - } - } - if minid > 0 { - db.freelist.release(minid - 1) - } - - return t, nil -} - -// removeTx removes a transaction from the database. -func (db *DB) removeTx(tx *Tx) { - // Release the read lock on the mmap. - db.mmaplock.RUnlock() - - // Use the meta lock to restrict access to the DB object. - db.metalock.Lock() - - // Remove the transaction. - for i, t := range db.txs { - if t == tx { - last := len(db.txs) - 1 - db.txs[i] = db.txs[last] - db.txs[last] = nil - db.txs = db.txs[:last] - break - } - } - n := len(db.txs) - - // Unlock the meta pages. - db.metalock.Unlock() - - // Merge statistics. - db.statlock.Lock() - db.stats.OpenTxN = n - db.stats.TxStats.add(&tx.stats) - db.statlock.Unlock() -} - -// Update executes a function within the context of a read-write managed transaction. -// If no error is returned from the function then the transaction is committed. -// If an error is returned then the entire transaction is rolled back. -// Any error that is returned from the function or returned from the commit is -// returned from the Update() method. -// -// Attempting to manually commit or rollback within the function will cause a panic. -func (db *DB) Update(fn func(*Tx) error) error { - t, err := db.Begin(true) - if err != nil { - return err - } - - // Make sure the transaction rolls back in the event of a panic. - defer func() { - if t.db != nil { - t.rollback() - } - }() - - // Mark as a managed tx so that the inner function cannot manually commit. - t.managed = true - - // If an error is returned from the function then rollback and return error. - err = fn(t) - t.managed = false - if err != nil { - _ = t.Rollback() - return err - } - - return t.Commit() -} - -// View executes a function within the context of a managed read-only transaction. -// Any error that is returned from the function is returned from the View() method. -// -// Attempting to manually rollback within the function will cause a panic. -func (db *DB) View(fn func(*Tx) error) error { - t, err := db.Begin(false) - if err != nil { - return err - } - - // Make sure the transaction rolls back in the event of a panic. - defer func() { - if t.db != nil { - t.rollback() - } - }() - - // Mark as a managed tx so that the inner function cannot manually rollback. - t.managed = true - - // If an error is returned from the function then pass it through. - err = fn(t) - t.managed = false - if err != nil { - _ = t.Rollback() - return err - } - - if err := t.Rollback(); err != nil { - return err - } - - return nil -} - -// Batch calls fn as part of a batch. It behaves similar to Update, -// except: -// -// 1. concurrent Batch calls can be combined into a single Bolt -// transaction. -// -// 2. the function passed to Batch may be called multiple times, -// regardless of whether it returns error or not. -// -// This means that Batch function side effects must be idempotent and -// take permanent effect only after a successful return is seen in -// caller. -// -// The maximum batch size and delay can be adjusted with DB.MaxBatchSize -// and DB.MaxBatchDelay, respectively. -// -// Batch is only useful when there are multiple goroutines calling it. -func (db *DB) Batch(fn func(*Tx) error) error { - errCh := make(chan error, 1) - - db.batchMu.Lock() - if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { - // There is no existing batch, or the existing batch is full; start a new one. - db.batch = &batch{ - db: db, - } - db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) - } - db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) - if len(db.batch.calls) >= db.MaxBatchSize { - // wake up batch, it's ready to run - go db.batch.trigger() - } - db.batchMu.Unlock() - - err := <-errCh - if err == trySolo { - err = db.Update(fn) - } - return err -} - -type call struct { - fn func(*Tx) error - err chan<- error -} - -type batch struct { - db *DB - timer *time.Timer - start sync.Once - calls []call -} - -// trigger runs the batch if it hasn't already been run. -func (b *batch) trigger() { - b.start.Do(b.run) -} - -// run performs the transactions in the batch and communicates results -// back to DB.Batch. -func (b *batch) run() { - b.db.batchMu.Lock() - b.timer.Stop() - // Make sure no new work is added to this batch, but don't break - // other batches. - if b.db.batch == b { - b.db.batch = nil - } - b.db.batchMu.Unlock() - -retry: - for len(b.calls) > 0 { - var failIdx = -1 - err := b.db.Update(func(tx *Tx) error { - for i, c := range b.calls { - if err := safelyCall(c.fn, tx); err != nil { - failIdx = i - return err - } - } - return nil - }) - - if failIdx >= 0 { - // take the failing transaction out of the batch. it's - // safe to shorten b.calls here because db.batch no longer - // points to us, and we hold the mutex anyway. - c := b.calls[failIdx] - b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] - // tell the submitter re-run it solo, continue with the rest of the batch - c.err <- trySolo - continue retry - } - - // pass success, or bolt internal errors, to all callers - for _, c := range b.calls { - c.err <- err - } - break retry - } -} - -// trySolo is a special sentinel error value used for signaling that a -// transaction function should be re-run. It should never be seen by -// callers. -var trySolo = errors.New("batch function returned an error and should be re-run solo") - -type panicked struct { - reason interface{} -} - -func (p panicked) Error() string { - if err, ok := p.reason.(error); ok { - return err.Error() - } - return fmt.Sprintf("panic: %v", p.reason) -} - -func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { - defer func() { - if p := recover(); p != nil { - err = panicked{p} - } - }() - return fn(tx) -} - -// Sync executes fdatasync() against the database file handle. -// -// This is not necessary under normal operation, however, if you use NoSync -// then it allows you to force the database file to sync against the disk. -func (db *DB) Sync() error { return fdatasync(db) } - -// Stats retrieves ongoing performance stats for the database. -// This is only updated when a transaction closes. -func (db *DB) Stats() Stats { - db.statlock.RLock() - defer db.statlock.RUnlock() - return db.stats -} - -// This is for internal access to the raw data bytes from the C cursor, use -// carefully, or not at all. -func (db *DB) Info() *Info { - return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize} -} - -// page retrieves a page reference from the mmap based on the current page size. -func (db *DB) page(id pgid) *page { - pos := id * pgid(db.pageSize) - return (*page)(unsafe.Pointer(&db.data[pos])) -} - -// pageInBuffer retrieves a page reference from a given byte array based on the current page size. -func (db *DB) pageInBuffer(b []byte, id pgid) *page { - return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)])) -} - -// meta retrieves the current meta page reference. -func (db *DB) meta() *meta { - // We have to return the meta with the highest txid which doesn't fail - // validation. Otherwise, we can cause errors when in fact the database is - // in a consistent state. metaA is the one with the higher txid. - metaA := db.meta0 - metaB := db.meta1 - if db.meta1.txid > db.meta0.txid { - metaA = db.meta1 - metaB = db.meta0 - } - - // Use higher meta page if valid. Otherwise fallback to previous, if valid. - if err := metaA.validate(); err == nil { - return metaA - } else if err := metaB.validate(); err == nil { - return metaB - } - - // This should never be reached, because both meta1 and meta0 were validated - // on mmap() and we do fsync() on every write. - panic("bolt.DB.meta(): invalid meta pages") -} - -// allocate returns a contiguous block of memory starting at a given page. -func (db *DB) allocate(count int) (*page, error) { - // Allocate a temporary buffer for the page. - var buf []byte - if count == 1 { - buf = db.pagePool.Get().([]byte) - } else { - buf = make([]byte, count*db.pageSize) - } - p := (*page)(unsafe.Pointer(&buf[0])) - p.overflow = uint32(count - 1) - - // Use pages from the freelist if they are available. - if p.id = db.freelist.allocate(count); p.id != 0 { - return p, nil - } - - // Resize mmap() if we're at the end. - p.id = db.rwtx.meta.pgid - var minsz = int((p.id+pgid(count))+1) * db.pageSize - if minsz >= db.datasz { - if err := db.mmap(minsz); err != nil { - return nil, fmt.Errorf("mmap allocate error: %s", err) - } - } - - // Move the page id high water mark. - db.rwtx.meta.pgid += pgid(count) - - return p, nil -} - -// grow grows the size of the database to the given sz. -func (db *DB) grow(sz int) error { - // Ignore if the new size is less than available file size. - if sz <= db.filesz { - return nil - } - - // If the data is smaller than the alloc size then only allocate what's needed. - // Once it goes over the allocation size then allocate in chunks. - if db.datasz < db.AllocSize { - sz = db.datasz - } else { - sz += db.AllocSize - } - - // Truncate and fsync to ensure file size metadata is flushed. - // https://github.com/boltdb/bolt/issues/284 - if !db.NoGrowSync && !db.readOnly { - if runtime.GOOS != "windows" { - if err := db.file.Truncate(int64(sz)); err != nil { - return fmt.Errorf("file resize error: %s", err) - } - } - if err := db.file.Sync(); err != nil { - return fmt.Errorf("file sync error: %s", err) - } - } - - db.filesz = sz - return nil -} - -func (db *DB) IsReadOnly() bool { - return db.readOnly -} - -// Options represents the options that can be set when opening a database. -type Options struct { - // Timeout is the amount of time to wait to obtain a file lock. - // When set to zero it will wait indefinitely. This option is only - // available on Darwin and Linux. - Timeout time.Duration - - // Sets the DB.NoGrowSync flag before memory mapping the file. - NoGrowSync bool - - // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to - // grab a shared lock (UNIX). - ReadOnly bool - - // Sets the DB.MmapFlags flag before memory mapping the file. - MmapFlags int - - // InitialMmapSize is the initial mmap size of the database - // in bytes. Read transactions won't block write transaction - // if the InitialMmapSize is large enough to hold database mmap - // size. (See DB.Begin for more information) - // - // If <=0, the initial map size is 0. - // If initialMmapSize is smaller than the previous database size, - // it takes no effect. - InitialMmapSize int -} - -// DefaultOptions represent the options used if nil options are passed into Open(). -// No timeout is used which will cause Bolt to wait indefinitely for a lock. -var DefaultOptions = &Options{ - Timeout: 0, - NoGrowSync: false, -} - -// Stats represents statistics about the database. -type Stats struct { - // Freelist stats - FreePageN int // total number of free pages on the freelist - PendingPageN int // total number of pending pages on the freelist - FreeAlloc int // total bytes allocated in free pages - FreelistInuse int // total bytes used by the freelist - - // Transaction stats - TxN int // total number of started read transactions - OpenTxN int // number of currently open read transactions - - TxStats TxStats // global, ongoing stats. -} - -// Sub calculates and returns the difference between two sets of database stats. -// This is useful when obtaining stats at two different points and time and -// you need the performance counters that occurred within that time span. -func (s *Stats) Sub(other *Stats) Stats { - if other == nil { - return *s - } - var diff Stats - diff.FreePageN = s.FreePageN - diff.PendingPageN = s.PendingPageN - diff.FreeAlloc = s.FreeAlloc - diff.FreelistInuse = s.FreelistInuse - diff.TxN = s.TxN - other.TxN - diff.TxStats = s.TxStats.Sub(&other.TxStats) - return diff -} - -func (s *Stats) add(other *Stats) { - s.TxStats.add(&other.TxStats) -} - -type Info struct { - Data uintptr - PageSize int -} - -type meta struct { - magic uint32 - version uint32 - pageSize uint32 - flags uint32 - root bucket - freelist pgid - pgid pgid - txid txid - checksum uint64 -} - -// validate checks the marker bytes and version of the meta page to ensure it matches this binary. -func (m *meta) validate() error { - if m.magic != magic { - return ErrInvalid - } else if m.version != version { - return ErrVersionMismatch - } else if m.checksum != 0 && m.checksum != m.sum64() { - return ErrChecksum - } - return nil -} - -// copy copies one meta object to another. -func (m *meta) copy(dest *meta) { - *dest = *m -} - -// write writes the meta onto a page. -func (m *meta) write(p *page) { - if m.root.root >= m.pgid { - panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) - } else if m.freelist >= m.pgid { - panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) - } - - // Page id is either going to be 0 or 1 which we can determine by the transaction ID. - p.id = pgid(m.txid % 2) - p.flags |= metaPageFlag - - // Calculate the checksum. - m.checksum = m.sum64() - - m.copy(p.meta()) -} - -// generates the checksum for the meta. -func (m *meta) sum64() uint64 { - var h = fnv.New64a() - _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:]) - return h.Sum64() -} - -// _assert will panic with a given formatted message if the given condition is false. -func _assert(condition bool, msg string, v ...interface{}) { - if !condition { - panic(fmt.Sprintf("assertion failed: "+msg, v...)) - } -} - -func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } -func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) } - -func printstack() { - stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n") - fmt.Fprintln(os.Stderr, stack) -} diff --git a/src/doc.go b/src/doc.go deleted file mode 100644 index cc93784..0000000 --- a/src/doc.go +++ /dev/null @@ -1,44 +0,0 @@ -/* -Package bolt implements a low-level key/value store in pure Go. It supports -fully serializable transactions, ACID semantics, and lock-free MVCC with -multiple readers and a single writer. Bolt can be used for projects that -want a simple data store without the need to add large dependencies such as -Postgres or MySQL. - -Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is -optimized for fast read access and does not require recovery in the event of a -system crash. Transactions which have not finished committing will simply be -rolled back in the event of a crash. - -The design of Bolt is based on Howard Chu's LMDB database project. - -Bolt currently works on Windows, Mac OS X, and Linux. - - -Basics - -There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is -a collection of buckets and is represented by a single file on disk. A bucket is -a collection of unique keys that are associated with values. - -Transactions provide either read-only or read-write access to the database. -Read-only transactions can retrieve key/value pairs and can use Cursors to -iterate over the dataset sequentially. Read-write transactions can create and -delete buckets and can insert and remove keys. Only one read-write transaction -is allowed at a time. - - -Caveats - -The database uses a read-only, memory-mapped data file to ensure that -applications cannot corrupt the database, however, this means that keys and -values returned from Bolt cannot be changed. Writing to a read-only byte slice -will cause Go to panic. - -Keys and values retrieved from the database are only valid for the life of -the transaction. When used outside the transaction, these byte slices can -point to different data or can point to invalid memory which will cause a panic. - - -*/ -package bolt diff --git a/src/errors.go b/src/errors.go deleted file mode 100644 index a3620a3..0000000 --- a/src/errors.go +++ /dev/null @@ -1,71 +0,0 @@ -package bolt - -import "errors" - -// These errors can be returned when opening or calling methods on a DB. -var ( - // ErrDatabaseNotOpen is returned when a DB instance is accessed before it - // is opened or after it is closed. - ErrDatabaseNotOpen = errors.New("database not open") - - // ErrDatabaseOpen is returned when opening a database that is - // already open. - ErrDatabaseOpen = errors.New("database already open") - - // ErrInvalid is returned when both meta pages on a database are invalid. - // This typically occurs when a file is not a bolt database. - ErrInvalid = errors.New("invalid database") - - // ErrVersionMismatch is returned when the data file was created with a - // different version of Bolt. - ErrVersionMismatch = errors.New("version mismatch") - - // ErrChecksum is returned when either meta page checksum does not match. - ErrChecksum = errors.New("checksum error") - - // ErrTimeout is returned when a database cannot obtain an exclusive lock - // on the data file after the timeout passed to Open(). - ErrTimeout = errors.New("timeout") -) - -// These errors can occur when beginning or committing a Tx. -var ( - // ErrTxNotWritable is returned when performing a write operation on a - // read-only transaction. - ErrTxNotWritable = errors.New("tx not writable") - - // ErrTxClosed is returned when committing or rolling back a transaction - // that has already been committed or rolled back. - ErrTxClosed = errors.New("tx closed") - - // ErrDatabaseReadOnly is returned when a mutating transaction is started on a - // read-only database. - ErrDatabaseReadOnly = errors.New("database is in read-only mode") -) - -// These errors can occur when putting or deleting a value or a bucket. -var ( - // ErrBucketNotFound is returned when trying to access a bucket that has - // not been created yet. - ErrBucketNotFound = errors.New("bucket not found") - - // ErrBucketExists is returned when creating a bucket that already exists. - ErrBucketExists = errors.New("bucket already exists") - - // ErrBucketNameRequired is returned when creating a bucket with a blank name. - ErrBucketNameRequired = errors.New("bucket name required") - - // ErrKeyRequired is returned when inserting a zero-length key. - ErrKeyRequired = errors.New("key required") - - // ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize. - ErrKeyTooLarge = errors.New("key too large") - - // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. - ErrValueTooLarge = errors.New("value too large") - - // ErrIncompatibleValue is returned when trying create or delete a bucket - // on an existing non-bucket key or when trying to create or delete a - // non-bucket key on an existing bucket key. - ErrIncompatibleValue = errors.New("incompatible value") -) diff --git a/src/freelist.go b/src/freelist.go deleted file mode 100644 index aba48f5..0000000 --- a/src/freelist.go +++ /dev/null @@ -1,252 +0,0 @@ -package bolt - -import ( - "fmt" - "sort" - "unsafe" -) - -// freelist represents a list of all pages that are available for allocation. -// It also tracks pages that have been freed but are still in use by open transactions. -type freelist struct { - ids []pgid // all free and available free page ids. - pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. - cache map[pgid]bool // fast lookup of all free and pending page ids. -} - -// newFreelist returns an empty, initialized freelist. -func newFreelist() *freelist { - return &freelist{ - pending: make(map[txid][]pgid), - cache: make(map[pgid]bool), - } -} - -// size returns the size of the page after serialization. -func (f *freelist) size() int { - n := f.count() - if n >= 0xFFFF { - // The first element will be used to store the count. See freelist.write. - n++ - } - return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n) -} - -// count returns count of pages on the freelist -func (f *freelist) count() int { - return f.free_count() + f.pending_count() -} - -// free_count returns count of free pages -func (f *freelist) free_count() int { - return len(f.ids) -} - -// pending_count returns count of pending pages -func (f *freelist) pending_count() int { - var count int - for _, list := range f.pending { - count += len(list) - } - return count -} - -// copyall copies into dst a list of all free ids and all pending ids in one sorted list. -// f.count returns the minimum length required for dst. -func (f *freelist) copyall(dst []pgid) { - m := make(pgids, 0, f.pending_count()) - for _, list := range f.pending { - m = append(m, list...) - } - sort.Sort(m) - mergepgids(dst, f.ids, m) -} - -// allocate returns the starting page id of a contiguous list of pages of a given size. -// If a contiguous block cannot be found then 0 is returned. -func (f *freelist) allocate(n int) pgid { - if len(f.ids) == 0 { - return 0 - } - - var initial, previd pgid - for i, id := range f.ids { - if id <= 1 { - panic(fmt.Sprintf("invalid page allocation: %d", id)) - } - - // Reset initial page if this is not contiguous. - if previd == 0 || id-previd != 1 { - initial = id - } - - // If we found a contiguous block then remove it and return it. - if (id-initial)+1 == pgid(n) { - // If we're allocating off the beginning then take the fast path - // and just adjust the existing slice. This will use extra memory - // temporarily but the append() in free() will realloc the slice - // as is necessary. - if (i + 1) == n { - f.ids = f.ids[i+1:] - } else { - copy(f.ids[i-n+1:], f.ids[i+1:]) - f.ids = f.ids[:len(f.ids)-n] - } - - // Remove from the free cache. - for i := pgid(0); i < pgid(n); i++ { - delete(f.cache, initial+i) - } - - return initial - } - - previd = id - } - return 0 -} - -// free releases a page and its overflow for a given transaction id. -// If the page is already free then a panic will occur. -func (f *freelist) free(txid txid, p *page) { - if p.id <= 1 { - panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) - } - - // Free page and all its overflow pages. - var ids = f.pending[txid] - for id := p.id; id <= p.id+pgid(p.overflow); id++ { - // Verify that page is not already free. - if f.cache[id] { - panic(fmt.Sprintf("page %d already freed", id)) - } - - // Add to the freelist and cache. - ids = append(ids, id) - f.cache[id] = true - } - f.pending[txid] = ids -} - -// release moves all page ids for a transaction id (or older) to the freelist. -func (f *freelist) release(txid txid) { - m := make(pgids, 0) - for tid, ids := range f.pending { - if tid <= txid { - // Move transaction's pending pages to the available freelist. - // Don't remove from the cache since the page is still free. - m = append(m, ids...) - delete(f.pending, tid) - } - } - sort.Sort(m) - f.ids = pgids(f.ids).merge(m) -} - -// rollback removes the pages from a given pending tx. -func (f *freelist) rollback(txid txid) { - // Remove page ids from cache. - for _, id := range f.pending[txid] { - delete(f.cache, id) - } - - // Remove pages from pending list. - delete(f.pending, txid) -} - -// freed returns whether a given page is in the free list. -func (f *freelist) freed(pgid pgid) bool { - return f.cache[pgid] -} - -// read initializes the freelist from a freelist page. -func (f *freelist) read(p *page) { - // If the page.count is at the max uint16 value (64k) then it's considered - // an overflow and the size of the freelist is stored as the first element. - idx, count := 0, int(p.count) - if count == 0xFFFF { - idx = 1 - count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0]) - } - - // Copy the list of page ids from the freelist. - if count == 0 { - f.ids = nil - } else { - ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count] - f.ids = make([]pgid, len(ids)) - copy(f.ids, ids) - - // Make sure they're sorted. - sort.Sort(pgids(f.ids)) - } - - // Rebuild the page cache. - f.reindex() -} - -// write writes the page ids onto a freelist page. All free and pending ids are -// saved to disk since in the event of a program crash, all pending ids will -// become free. -func (f *freelist) write(p *page) error { - // Combine the old free pgids and pgids waiting on an open transaction. - - // Update the header flag. - p.flags |= freelistPageFlag - - // The page.count can only hold up to 64k elements so if we overflow that - // number then we handle it by putting the size in the first element. - lenids := f.count() - if lenids == 0 { - p.count = uint16(lenids) - } else if lenids < 0xFFFF { - p.count = uint16(lenids) - f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:]) - } else { - p.count = 0xFFFF - ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids) - f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:]) - } - - return nil -} - -// reload reads the freelist from a page and filters out pending items. -func (f *freelist) reload(p *page) { - f.read(p) - - // Build a cache of only pending pages. - pcache := make(map[pgid]bool) - for _, pendingIDs := range f.pending { - for _, pendingID := range pendingIDs { - pcache[pendingID] = true - } - } - - // Check each page in the freelist and build a new available freelist - // with any pages not in the pending lists. - var a []pgid - for _, id := range f.ids { - if !pcache[id] { - a = append(a, id) - } - } - f.ids = a - - // Once the available list is rebuilt then rebuild the free cache so that - // it includes the available and pending free pages. - f.reindex() -} - -// reindex rebuilds the free cache based on available and pending free lists. -func (f *freelist) reindex() { - f.cache = make(map[pgid]bool, len(f.ids)) - for _, id := range f.ids { - f.cache[id] = true - } - for _, pendingIDs := range f.pending { - for _, pendingID := range pendingIDs { - f.cache[pendingID] = true - } - } -} diff --git a/src/gkv.go b/src/gkv.go new file mode 100644 index 0000000..715f6f6 --- /dev/null +++ b/src/gkv.go @@ -0,0 +1,5749 @@ +package gkv + +import ( + "errors" + "fmt" + "hash/fnv" + "log" + "os" + "runtime" + "runtime/debug" + "strings" + "sync" + "time" + "unsafe" + "bytes" + "sort" + "syscall" + "io" + "encoding/binary" + "flag" + "io/ioutil" + "math/rand" + "runtime/pprof" + "strconv" + "unicode" + "unicode/utf8" +) + + + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF + +// Are unaligned load/stores broken on this arch? +var brokenUnaligned = false + + +// fdatasync flushes written data to a file descriptor. +func fdatasync(db *DB) error { + return db.file.Sync() +} + + +// flock acquires an advisory lock on a file descriptor. +func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error { + var t time.Time + for { + // If we're beyond our timeout then return an error. + // This can only occur after we've attempted a flock once. + if t.IsZero() { + t = time.Now() + } else if timeout > 0 && time.Since(t) > timeout { + return ErrTimeout + } + flag := syscall.LOCK_SH + if exclusive { + flag = syscall.LOCK_EX + } + + // Otherwise attempt to obtain an exclusive lock. + err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) + if err == nil { + return nil + } else if err != syscall.EWOULDBLOCK { + return err + } + + // Wait for a bit and try again. + time.Sleep(50 * time.Millisecond) + } +} + +// funlock releases an advisory lock on a file descriptor. +func funlock(db *DB) error { + return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) +} + +// mmap memory maps a DB's data file. +func mmap(db *DB, sz int) error { + // Map the data file to memory. + b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) + if err != nil { + return err + } + + // Advise the kernel that the mmap is accessed randomly. + if err := madvise(b, syscall.MADV_RANDOM); err != nil { + return fmt.Errorf("madvise: %s", err) + } + + // Save the original byte slice and convert to a byte array pointer. + db.dataref = b + db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0])) + db.datasz = sz + return nil +} + +// munmap unmaps a DB's data file from memory. +func munmap(db *DB) error { + // Ignore the unmap if we have no mapped data. + if db.dataref == nil { + return nil + } + + // Unmap using the original byte slice. + err := syscall.Munmap(db.dataref) + db.dataref = nil + db.data = nil + db.datasz = 0 + return err +} + +// NOTE: This function is copied from stdlib because it is not available on darwin. +func madvise(b []byte, advice int) (err error) { + _, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice)) + if e1 != 0 { + err = e1 + } + return +} + +const ( + // MaxKeySize is the maximum length of a key, in bytes. + MaxKeySize = 32768 + + // MaxValueSize is the maximum length of a value, in bytes. + MaxValueSize = (1 << 31) - 2 +) + +const ( + maxUint = ^uint(0) + minUint = 0 + maxInt = int(^uint(0) >> 1) + minInt = -maxInt - 1 +) + +const bucketHeaderSize = int(unsafe.Sizeof(bucket{})) + +const ( + minFillPercent = 0.1 + maxFillPercent = 1.0 +) + +// DefaultFillPercent is the percentage that split pages are filled. +// This value can be changed by setting Bucket.FillPercent. +const DefaultFillPercent = 0.5 + +// Bucket represents a collection of key/value pairs inside the database. +type Bucket struct { + *bucket + tx *Tx // the associated transaction + buckets map[string]*Bucket // subbucket cache + page *page // inline page reference + rootNode *node // materialized node for the root page. + nodes map[pgid]*node // node cache + + // Sets the threshold for filling nodes when they split. By default, + // the bucket will fill to 50% but it can be useful to increase this + // amount if you know that your write workloads are mostly append-only. + // + // This is non-persisted across transactions so it must be set in every Tx. + FillPercent float64 +} + +// bucket represents the on-file representation of a bucket. +// This is stored as the "value" of a bucket key. If the bucket is small enough, +// then its root page can be stored inline in the "value", after the bucket +// header. In the case of inline buckets, the "root" will be 0. +type bucket struct { + root pgid // page id of the bucket's root-level page + sequence uint64 // monotonically incrementing, used by NextSequence() +} + +// newBucket returns a new bucket associated with a transaction. +func newBucket(tx *Tx) Bucket { + var b = Bucket{tx: tx, FillPercent: DefaultFillPercent} + if tx.writable { + b.buckets = make(map[string]*Bucket) + b.nodes = make(map[pgid]*node) + } + return b +} + +// Tx returns the tx of the bucket. +func (b *Bucket) Tx() *Tx { + return b.tx +} + +// Root returns the root of the bucket. +func (b *Bucket) Root() pgid { + return b.root +} + +// Writable returns whether the bucket is writable. +func (b *Bucket) Writable() bool { + return b.tx.writable +} + +// Cursor creates a cursor associated with the bucket. +// The cursor is only valid as long as the transaction is open. +// Do not use a cursor after the transaction is closed. +func (b *Bucket) Cursor() *Cursor { + // Update transaction statistics. + b.tx.stats.CursorCount++ + + // Allocate and return a cursor. + return &Cursor{ + bucket: b, + stack: make([]elemRef, 0), + } +} + +// Bucket retrieves a nested bucket by name. +// Returns nil if the bucket does not exist. +// The bucket instance is only valid for the lifetime of the transaction. +func (b *Bucket) Bucket(name []byte) *Bucket { + if b.buckets != nil { + if child := b.buckets[string(name)]; child != nil { + return child + } + } + + // Move cursor to key. + c := b.Cursor() + k, v, flags := c.seek(name) + + // Return nil if the key doesn't exist or it is not a bucket. + if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 { + return nil + } + + // Otherwise create a bucket and cache it. + var child = b.openBucket(v) + if b.buckets != nil { + b.buckets[string(name)] = child + } + + return child +} + +// Helper method that re-interprets a sub-bucket value +// from a parent into a Bucket +func (b *Bucket) openBucket(value []byte) *Bucket { + var child = newBucket(b.tx) + + // If unaligned load/stores are broken on this arch and value is + // unaligned simply clone to an aligned byte array. + unaligned := brokenUnaligned && uintptr(unsafe.Pointer(&value[0]))&3 != 0 + + if unaligned { + value = cloneBytes(value) + } + + // If this is a writable transaction then we need to copy the bucket entry. + // Read-only transactions can point directly at the mmap entry. + if b.tx.writable && !unaligned { + child.bucket = &bucket{} + *child.bucket = *(*bucket)(unsafe.Pointer(&value[0])) + } else { + child.bucket = (*bucket)(unsafe.Pointer(&value[0])) + } + + // Save a reference to the inline page if the bucket is inline. + if child.root == 0 { + child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) + } + + return &child +} + +// CreateBucket creates a new bucket at the given key and returns the new bucket. +// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long. +// The bucket instance is only valid for the lifetime of the transaction. +func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) { + if b.tx.db == nil { + return nil, ErrTxClosed + } else if !b.tx.writable { + return nil, ErrTxNotWritable + } else if len(key) == 0 { + return nil, ErrBucketNameRequired + } + + // Move cursor to correct position. + c := b.Cursor() + k, _, flags := c.seek(key) + + // Return an error if there is an existing key. + if bytes.Equal(key, k) { + if (flags & bucketLeafFlag) != 0 { + return nil, ErrBucketExists + } + return nil, ErrIncompatibleValue + } + + // Create empty, inline bucket. + var bucket = Bucket{ + bucket: &bucket{}, + rootNode: &node{isLeaf: true}, + FillPercent: DefaultFillPercent, + } + var value = bucket.write() + + // Insert into node. + key = cloneBytes(key) + c.node().put(key, key, value, 0, bucketLeafFlag) + + // Since subbuckets are not allowed on inline buckets, we need to + // dereference the inline page, if it exists. This will cause the bucket + // to be treated as a regular, non-inline bucket for the rest of the tx. + b.page = nil + + return b.Bucket(key), nil +} + +// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it. +// Returns an error if the bucket name is blank, or if the bucket name is too long. +// The bucket instance is only valid for the lifetime of the transaction. +func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) { + child, err := b.CreateBucket(key) + if err == ErrBucketExists { + return b.Bucket(key), nil + } else if err != nil { + return nil, err + } + return child, nil +} + +// DeleteBucket deletes a bucket at the given key. +// Returns an error if the bucket does not exists, or if the key represents a non-bucket value. +func (b *Bucket) DeleteBucket(key []byte) error { + if b.tx.db == nil { + return ErrTxClosed + } else if !b.Writable() { + return ErrTxNotWritable + } + + // Move cursor to correct position. + c := b.Cursor() + k, _, flags := c.seek(key) + + // Return an error if bucket doesn't exist or is not a bucket. + if !bytes.Equal(key, k) { + return ErrBucketNotFound + } else if (flags & bucketLeafFlag) == 0 { + return ErrIncompatibleValue + } + + // Recursively delete all child buckets. + child := b.Bucket(key) + err := child.ForEach(func(k, v []byte) error { + if v == nil { + if err := child.DeleteBucket(k); err != nil { + return fmt.Errorf("delete bucket: %s", err) + } + } + return nil + }) + if err != nil { + return err + } + + // Remove cached copy. + delete(b.buckets, string(key)) + + // Release all bucket pages to freelist. + child.nodes = nil + child.rootNode = nil + child.free() + + // Delete the node if we have a matching key. + c.node().del(key) + + return nil +} + +// Get retrieves the value for a key in the bucket. +// Returns a nil value if the key does not exist or if the key is a nested bucket. +// The returned value is only valid for the life of the transaction. +func (b *Bucket) Get(key []byte) []byte { + k, v, flags := b.Cursor().seek(key) + + // Return nil if this is a bucket. + if (flags & bucketLeafFlag) != 0 { + return nil + } + + // If our target node isn't the same key as what's passed in then return nil. + if !bytes.Equal(key, k) { + return nil + } + return v +} + +// Put sets the value for a key in the bucket. +// If the key exist then its previous value will be overwritten. +// Supplied value must remain valid for the life of the transaction. +// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. +func (b *Bucket) Put(key []byte, value []byte) error { + if b.tx.db == nil { + return ErrTxClosed + } else if !b.Writable() { + return ErrTxNotWritable + } else if len(key) == 0 { + return ErrKeyRequired + } else if len(key) > MaxKeySize { + return ErrKeyTooLarge + } else if int64(len(value)) > MaxValueSize { + return ErrValueTooLarge + } + + // Move cursor to correct position. + c := b.Cursor() + k, _, flags := c.seek(key) + + // Return an error if there is an existing key with a bucket value. + if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 { + return ErrIncompatibleValue + } + + // Insert into node. + key = cloneBytes(key) + c.node().put(key, key, value, 0, 0) + + return nil +} + +// Delete removes a key from the bucket. +// If the key does not exist then nothing is done and a nil error is returned. +// Returns an error if the bucket was created from a read-only transaction. +func (b *Bucket) Delete(key []byte) error { + if b.tx.db == nil { + return ErrTxClosed + } else if !b.Writable() { + return ErrTxNotWritable + } + + // Move cursor to correct position. + c := b.Cursor() + _, _, flags := c.seek(key) + + // Return an error if there is already existing bucket value. + if (flags & bucketLeafFlag) != 0 { + return ErrIncompatibleValue + } + + // Delete the node if we have a matching key. + c.node().del(key) + + return nil +} + +// Sequence returns the current integer for the bucket without incrementing it. +func (b *Bucket) Sequence() uint64 { return b.bucket.sequence } + +// SetSequence updates the sequence number for the bucket. +func (b *Bucket) SetSequence(v uint64) error { + if b.tx.db == nil { + return ErrTxClosed + } else if !b.Writable() { + return ErrTxNotWritable + } + + // Materialize the root node if it hasn't been already so that the + // bucket will be saved during commit. + if b.rootNode == nil { + _ = b.node(b.root, nil) + } + + // Increment and return the sequence. + b.bucket.sequence = v + return nil +} + +// NextSequence returns an autoincrementing integer for the bucket. +func (b *Bucket) NextSequence() (uint64, error) { + if b.tx.db == nil { + return 0, ErrTxClosed + } else if !b.Writable() { + return 0, ErrTxNotWritable + } + + // Materialize the root node if it hasn't been already so that the + // bucket will be saved during commit. + if b.rootNode == nil { + _ = b.node(b.root, nil) + } + + // Increment and return the sequence. + b.bucket.sequence++ + return b.bucket.sequence, nil +} + +// ForEach executes a function for each key/value pair in a bucket. +// If the provided function returns an error then the iteration is stopped and +// the error is returned to the caller. The provided function must not modify +// the bucket; this will result in undefined behavior. +func (b *Bucket) ForEach(fn func(k, v []byte) error) error { + if b.tx.db == nil { + return ErrTxClosed + } + c := b.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + if err := fn(k, v); err != nil { + return err + } + } + return nil +} + +// Stat returns stats on a bucket. +func (b *Bucket) Stats() BucketStats { + var s, subStats BucketStats + pageSize := b.tx.db.pageSize + s.BucketN += 1 + if b.root == 0 { + s.InlineBucketN += 1 + } + b.forEachPage(func(p *page, depth int) { + if (p.flags & leafPageFlag) != 0 { + s.KeyN += int(p.count) + + // used totals the used bytes for the page + used := pageHeaderSize + + if p.count != 0 { + // If page has any elements, add all element headers. + used += leafPageElementSize * int(p.count-1) + + // Add all element key, value sizes. + // The computation takes advantage of the fact that the position + // of the last element's key/value equals to the total of the sizes + // of all previous elements' keys and values. + // It also includes the last element's header. + lastElement := p.leafPageElement(p.count - 1) + used += int(lastElement.pos + lastElement.ksize + lastElement.vsize) + } + + if b.root == 0 { + // For inlined bucket just update the inline stats + s.InlineBucketInuse += used + } else { + // For non-inlined bucket update all the leaf stats + s.LeafPageN++ + s.LeafInuse += used + s.LeafOverflowN += int(p.overflow) + + // Collect stats from sub-buckets. + // Do that by iterating over all element headers + // looking for the ones with the bucketLeafFlag. + for i := uint16(0); i < p.count; i++ { + e := p.leafPageElement(i) + if (e.flags & bucketLeafFlag) != 0 { + // For any bucket element, open the element value + // and recursively call Stats on the contained bucket. + subStats.Add(b.openBucket(e.value()).Stats()) + } + } + } + } else if (p.flags & branchPageFlag) != 0 { + s.BranchPageN++ + lastElement := p.branchPageElement(p.count - 1) + + // used totals the used bytes for the page + // Add header and all element headers. + used := pageHeaderSize + (branchPageElementSize * int(p.count-1)) + + // Add size of all keys and values. + // Again, use the fact that last element's position equals to + // the total of key, value sizes of all previous elements. + used += int(lastElement.pos + lastElement.ksize) + s.BranchInuse += used + s.BranchOverflowN += int(p.overflow) + } + + // Keep track of maximum page depth. + if depth+1 > s.Depth { + s.Depth = (depth + 1) + } + }) + + // Alloc stats can be computed from page counts and pageSize. + s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize + s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize + + // Add the max depth of sub-buckets to get total nested depth. + s.Depth += subStats.Depth + // Add the stats for all sub-buckets + s.Add(subStats) + return s +} + +// forEachPage iterates over every page in a bucket, including inline pages. +func (b *Bucket) forEachPage(fn func(*page, int)) { + // If we have an inline page then just use that. + if b.page != nil { + fn(b.page, 0) + return + } + + // Otherwise traverse the page hierarchy. + b.tx.forEachPage(b.root, 0, fn) +} + +// forEachPageNode iterates over every page (or node) in a bucket. +// This also includes inline pages. +func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { + // If we have an inline page or root node then just use that. + if b.page != nil { + fn(b.page, nil, 0) + return + } + b._forEachPageNode(b.root, 0, fn) +} + +func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) { + var p, n = b.pageNode(pgid) + + // Execute function. + fn(p, n, depth) + + // Recursively loop over children. + if p != nil { + if (p.flags & branchPageFlag) != 0 { + for i := 0; i < int(p.count); i++ { + elem := p.branchPageElement(uint16(i)) + b._forEachPageNode(elem.pgid, depth+1, fn) + } + } + } else { + if !n.isLeaf { + for _, inode := range n.inodes { + b._forEachPageNode(inode.pgid, depth+1, fn) + } + } + } +} + +// spill writes all the nodes for this bucket to dirty pages. +func (b *Bucket) spill() error { + // Spill all child buckets first. + for name, child := range b.buckets { + // If the child bucket is small enough and it has no child buckets then + // write it inline into the parent bucket's page. Otherwise spill it + // like a normal bucket and make the parent value a pointer to the page. + var value []byte + if child.inlineable() { + child.free() + value = child.write() + } else { + if err := child.spill(); err != nil { + return err + } + + // Update the child bucket header in this bucket. + value = make([]byte, unsafe.Sizeof(bucket{})) + var bucket = (*bucket)(unsafe.Pointer(&value[0])) + *bucket = *child.bucket + } + + // Skip writing the bucket if there are no materialized nodes. + if child.rootNode == nil { + continue + } + + // Update parent node. + var c = b.Cursor() + k, _, flags := c.seek([]byte(name)) + if !bytes.Equal([]byte(name), k) { + panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k)) + } + if flags&bucketLeafFlag == 0 { + panic(fmt.Sprintf("unexpected bucket header flag: %x", flags)) + } + c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag) + } + + // Ignore if there's not a materialized root node. + if b.rootNode == nil { + return nil + } + + // Spill nodes. + if err := b.rootNode.spill(); err != nil { + return err + } + b.rootNode = b.rootNode.root() + + // Update the root node for this bucket. + if b.rootNode.pgid >= b.tx.meta.pgid { + panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid)) + } + b.root = b.rootNode.pgid + + return nil +} + +// inlineable returns true if a bucket is small enough to be written inline +// and if it contains no subbuckets. Otherwise returns false. +func (b *Bucket) inlineable() bool { + var n = b.rootNode + + // Bucket must only contain a single leaf node. + if n == nil || !n.isLeaf { + return false + } + + // Bucket is not inlineable if it contains subbuckets or if it goes beyond + // our threshold for inline bucket size. + var size = pageHeaderSize + for _, inode := range n.inodes { + size += leafPageElementSize + len(inode.key) + len(inode.value) + + if inode.flags&bucketLeafFlag != 0 { + return false + } else if size > b.maxInlineBucketSize() { + return false + } + } + + return true +} + +// Returns the maximum total size of a bucket to make it a candidate for inlining. +func (b *Bucket) maxInlineBucketSize() int { + return b.tx.db.pageSize / 4 +} + +// write allocates and writes a bucket to a byte slice. +func (b *Bucket) write() []byte { + // Allocate the appropriate size. + var n = b.rootNode + var value = make([]byte, bucketHeaderSize+n.size()) + + // Write a bucket header. + var bucket = (*bucket)(unsafe.Pointer(&value[0])) + *bucket = *b.bucket + + // Convert byte slice to a fake page and write the root node. + var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize])) + n.write(p) + + return value +} + +// rebalance attempts to balance all nodes. +func (b *Bucket) rebalance() { + for _, n := range b.nodes { + n.rebalance() + } + for _, child := range b.buckets { + child.rebalance() + } +} + +// node creates a node from a page and associates it with a given parent. +func (b *Bucket) node(pgid pgid, parent *node) *node { + _assert(b.nodes != nil, "nodes map expected") + + // Retrieve node if it's already been created. + if n := b.nodes[pgid]; n != nil { + return n + } + + // Otherwise create a node and cache it. + n := &node{bucket: b, parent: parent} + if parent == nil { + b.rootNode = n + } else { + parent.children = append(parent.children, n) + } + + // Use the inline page if this is an inline bucket. + var p = b.page + if p == nil { + p = b.tx.page(pgid) + } + + // Read the page into the node and cache it. + n.read(p) + b.nodes[pgid] = n + + // Update statistics. + b.tx.stats.NodeCount++ + + return n +} + +// free recursively frees all pages in the bucket. +func (b *Bucket) free() { + if b.root == 0 { + return + } + + var tx = b.tx + b.forEachPageNode(func(p *page, n *node, _ int) { + if p != nil { + tx.db.freelist.free(tx.meta.txid, p) + } else { + n.free() + } + }) + b.root = 0 +} + +// dereference removes all references to the old mmap. +func (b *Bucket) dereference() { + if b.rootNode != nil { + b.rootNode.root().dereference() + } + + for _, child := range b.buckets { + child.dereference() + } +} + +// pageNode returns the in-memory node, if it exists. +// Otherwise returns the underlying page. +func (b *Bucket) pageNode(id pgid) (*page, *node) { + // Inline buckets have a fake page embedded in their value so treat them + // differently. We'll return the rootNode (if available) or the fake page. + if b.root == 0 { + if id != 0 { + panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id)) + } + if b.rootNode != nil { + return nil, b.rootNode + } + return b.page, nil + } + + // Check the node cache for non-inline buckets. + if b.nodes != nil { + if n := b.nodes[id]; n != nil { + return nil, n + } + } + + // Finally lookup the page from the transaction if no node is materialized. + return b.tx.page(id), nil +} + +// BucketStats records statistics about resources used by a bucket. +type BucketStats struct { + // Page count statistics. + BranchPageN int // number of logical branch pages + BranchOverflowN int // number of physical branch overflow pages + LeafPageN int // number of logical leaf pages + LeafOverflowN int // number of physical leaf overflow pages + + // Tree statistics. + KeyN int // number of keys/value pairs + Depth int // number of levels in B+tree + + // Page size utilization. + BranchAlloc int // bytes allocated for physical branch pages + BranchInuse int // bytes actually used for branch data + LeafAlloc int // bytes allocated for physical leaf pages + LeafInuse int // bytes actually used for leaf data + + // Bucket statistics + BucketN int // total number of buckets including the top bucket + InlineBucketN int // total number on inlined buckets + InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse) +} + +func (s *BucketStats) Add(other BucketStats) { + s.BranchPageN += other.BranchPageN + s.BranchOverflowN += other.BranchOverflowN + s.LeafPageN += other.LeafPageN + s.LeafOverflowN += other.LeafOverflowN + s.KeyN += other.KeyN + if s.Depth < other.Depth { + s.Depth = other.Depth + } + s.BranchAlloc += other.BranchAlloc + s.BranchInuse += other.BranchInuse + s.LeafAlloc += other.LeafAlloc + s.LeafInuse += other.LeafInuse + + s.BucketN += other.BucketN + s.InlineBucketN += other.InlineBucketN + s.InlineBucketInuse += other.InlineBucketInuse +} + +// cloneBytes returns a copy of a given slice. +func cloneBytes(v []byte) []byte { + var clone = make([]byte, len(v)) + copy(clone, v) + return clone +} + +// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order. +// Cursors see nested buckets with value == nil. +// Cursors can be obtained from a transaction and are valid as long as the transaction is open. +// +// Keys and values returned from the cursor are only valid for the life of the transaction. +// +// Changing data while traversing with a cursor may cause it to be invalidated +// and return unexpected keys and/or values. You must reposition your cursor +// after mutating data. +type Cursor struct { + bucket *Bucket + stack []elemRef +} + +// Bucket returns the bucket that this cursor was created from. +func (c *Cursor) Bucket() *Bucket { + return c.bucket +} + +// First moves the cursor to the first item in the bucket and returns its key and value. +// If the bucket is empty then a nil key and value are returned. +// The returned key and value are only valid for the life of the transaction. +func (c *Cursor) First() (key []byte, value []byte) { + _assert(c.bucket.tx.db != nil, "tx closed") + c.stack = c.stack[:0] + p, n := c.bucket.pageNode(c.bucket.root) + c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) + c.first() + + // If we land on an empty page then move to the next value. + // https://github.com/boltdb/bolt/issues/450 + if c.stack[len(c.stack)-1].count() == 0 { + c.next() + } + + k, v, flags := c.keyValue() + if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v + +} + +// Last moves the cursor to the last item in the bucket and returns its key and value. +// If the bucket is empty then a nil key and value are returned. +// The returned key and value are only valid for the life of the transaction. +func (c *Cursor) Last() (key []byte, value []byte) { + _assert(c.bucket.tx.db != nil, "tx closed") + c.stack = c.stack[:0] + p, n := c.bucket.pageNode(c.bucket.root) + ref := elemRef{page: p, node: n} + ref.index = ref.count() - 1 + c.stack = append(c.stack, ref) + c.last() + k, v, flags := c.keyValue() + if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v +} + +// Next moves the cursor to the next item in the bucket and returns its key and value. +// If the cursor is at the end of the bucket then a nil key and value are returned. +// The returned key and value are only valid for the life of the transaction. +func (c *Cursor) Next() (key []byte, value []byte) { + _assert(c.bucket.tx.db != nil, "tx closed") + k, v, flags := c.next() + if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v +} + +// Prev moves the cursor to the previous item in the bucket and returns its key and value. +// If the cursor is at the beginning of the bucket then a nil key and value are returned. +// The returned key and value are only valid for the life of the transaction. +func (c *Cursor) Prev() (key []byte, value []byte) { + _assert(c.bucket.tx.db != nil, "tx closed") + + // Attempt to move back one element until we're successful. + // Move up the stack as we hit the beginning of each page in our stack. + for i := len(c.stack) - 1; i >= 0; i-- { + elem := &c.stack[i] + if elem.index > 0 { + elem.index-- + break + } + c.stack = c.stack[:i] + } + + // If we've hit the end then return nil. + if len(c.stack) == 0 { + return nil, nil + } + + // Move down the stack to find the last element of the last leaf under this branch. + c.last() + k, v, flags := c.keyValue() + if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v +} + +// Seek moves the cursor to a given key and returns it. +// If the key does not exist then the next key is used. If no keys +// follow, a nil key is returned. +// The returned key and value are only valid for the life of the transaction. +func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { + k, v, flags := c.seek(seek) + + // If we ended up after the last element of a page then move to the next one. + if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() { + k, v, flags = c.next() + } + + if k == nil { + return nil, nil + } else if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v +} + +// Delete removes the current key/value under the cursor from the bucket. +// Delete fails if current key/value is a bucket or if the transaction is not writable. +func (c *Cursor) Delete() error { + if c.bucket.tx.db == nil { + return ErrTxClosed + } else if !c.bucket.Writable() { + return ErrTxNotWritable + } + + key, _, flags := c.keyValue() + // Return an error if current value is a bucket. + if (flags & bucketLeafFlag) != 0 { + return ErrIncompatibleValue + } + c.node().del(key) + + return nil +} + +// seek moves the cursor to a given key and returns it. +// If the key does not exist then the next key is used. +func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { + _assert(c.bucket.tx.db != nil, "tx closed") + + // Start from root page/node and traverse to correct page. + c.stack = c.stack[:0] + c.search(seek, c.bucket.root) + ref := &c.stack[len(c.stack)-1] + + // If the cursor is pointing to the end of page/node then return nil. + if ref.index >= ref.count() { + return nil, nil, 0 + } + + // If this is a bucket then return a nil value. + return c.keyValue() +} + +// first moves the cursor to the first leaf element under the last page in the stack. +func (c *Cursor) first() { + for { + // Exit when we hit a leaf page. + var ref = &c.stack[len(c.stack)-1] + if ref.isLeaf() { + break + } + + // Keep adding pages pointing to the first element to the stack. + var pgid pgid + if ref.node != nil { + pgid = ref.node.inodes[ref.index].pgid + } else { + pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + } + p, n := c.bucket.pageNode(pgid) + c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) + } +} + +// last moves the cursor to the last leaf element under the last page in the stack. +func (c *Cursor) last() { + for { + // Exit when we hit a leaf page. + ref := &c.stack[len(c.stack)-1] + if ref.isLeaf() { + break + } + + // Keep adding pages pointing to the last element in the stack. + var pgid pgid + if ref.node != nil { + pgid = ref.node.inodes[ref.index].pgid + } else { + pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + } + p, n := c.bucket.pageNode(pgid) + + var nextRef = elemRef{page: p, node: n} + nextRef.index = nextRef.count() - 1 + c.stack = append(c.stack, nextRef) + } +} + +// next moves to the next leaf element and returns the key and value. +// If the cursor is at the last leaf element then it stays there and returns nil. +func (c *Cursor) next() (key []byte, value []byte, flags uint32) { + for { + // Attempt to move over one element until we're successful. + // Move up the stack as we hit the end of each page in our stack. + var i int + for i = len(c.stack) - 1; i >= 0; i-- { + elem := &c.stack[i] + if elem.index < elem.count()-1 { + elem.index++ + break + } + } + + // If we've hit the root page then stop and return. This will leave the + // cursor on the last element of the last page. + if i == -1 { + return nil, nil, 0 + } + + // Otherwise start from where we left off in the stack and find the + // first element of the first leaf page. + c.stack = c.stack[:i+1] + c.first() + + // If this is an empty page then restart and move back up the stack. + // https://github.com/boltdb/bolt/issues/450 + if c.stack[len(c.stack)-1].count() == 0 { + continue + } + + return c.keyValue() + } +} + +// search recursively performs a binary search against a given page/node until it finds a given key. +func (c *Cursor) search(key []byte, pgid pgid) { + p, n := c.bucket.pageNode(pgid) + if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 { + panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags)) + } + e := elemRef{page: p, node: n} + c.stack = append(c.stack, e) + + // If we're on a leaf page/node then find the specific node. + if e.isLeaf() { + c.nsearch(key) + return + } + + if n != nil { + c.searchNode(key, n) + return + } + c.searchPage(key, p) +} + +func (c *Cursor) searchNode(key []byte, n *node) { + var exact bool + index := sort.Search(len(n.inodes), func(i int) bool { + // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. + // sort.Search() finds the lowest index where f() != -1 but we need the highest index. + ret := bytes.Compare(n.inodes[i].key, key) + if ret == 0 { + exact = true + } + return ret != -1 + }) + if !exact && index > 0 { + index-- + } + c.stack[len(c.stack)-1].index = index + + // Recursively search to the next page. + c.search(key, n.inodes[index].pgid) +} + +func (c *Cursor) searchPage(key []byte, p *page) { + // Binary search for the correct range. + inodes := p.branchPageElements() + + var exact bool + index := sort.Search(int(p.count), func(i int) bool { + // TODO(benbjohnson): Optimize this range search. It's a bit hacky right now. + // sort.Search() finds the lowest index where f() != -1 but we need the highest index. + ret := bytes.Compare(inodes[i].key(), key) + if ret == 0 { + exact = true + } + return ret != -1 + }) + if !exact && index > 0 { + index-- + } + c.stack[len(c.stack)-1].index = index + + // Recursively search to the next page. + c.search(key, inodes[index].pgid) +} + +// nsearch searches the leaf node on the top of the stack for a key. +func (c *Cursor) nsearch(key []byte) { + e := &c.stack[len(c.stack)-1] + p, n := e.page, e.node + + // If we have a node then search its inodes. + if n != nil { + index := sort.Search(len(n.inodes), func(i int) bool { + return bytes.Compare(n.inodes[i].key, key) != -1 + }) + e.index = index + return + } + + // If we have a page then search its leaf elements. + inodes := p.leafPageElements() + index := sort.Search(int(p.count), func(i int) bool { + return bytes.Compare(inodes[i].key(), key) != -1 + }) + e.index = index +} + +// keyValue returns the key and value of the current leaf element. +func (c *Cursor) keyValue() ([]byte, []byte, uint32) { + ref := &c.stack[len(c.stack)-1] + if ref.count() == 0 || ref.index >= ref.count() { + return nil, nil, 0 + } + + // Retrieve value from node. + if ref.node != nil { + inode := &ref.node.inodes[ref.index] + return inode.key, inode.value, inode.flags + } + + // Or retrieve value from page. + elem := ref.page.leafPageElement(uint16(ref.index)) + return elem.key(), elem.value(), elem.flags +} + +// node returns the node that the cursor is currently positioned on. +func (c *Cursor) node() *node { + _assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack") + + // If the top of the stack is a leaf node then just return it. + if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() { + return ref.node + } + + // Start from root and traverse down the hierarchy. + var n = c.stack[0].node + if n == nil { + n = c.bucket.node(c.stack[0].page.id, nil) + } + for _, ref := range c.stack[:len(c.stack)-1] { + _assert(!n.isLeaf, "expected branch node") + n = n.childAt(int(ref.index)) + } + _assert(n.isLeaf, "expected leaf node") + return n +} + +// elemRef represents a reference to an element on a given page/node. +type elemRef struct { + page *page + node *node + index int +} + +// isLeaf returns whether the ref is pointing at a leaf page/node. +func (r *elemRef) isLeaf() bool { + if r.node != nil { + return r.node.isLeaf + } + return (r.page.flags & leafPageFlag) != 0 +} + +// count returns the number of inodes or page elements. +func (r *elemRef) count() int { + if r.node != nil { + return len(r.node.inodes) + } + return int(r.page.count) +} + +// The largest step that can be taken when remapping the mmap. +const maxMmapStep = 1 << 30 // 1GB + +// The data file format version. +const version = 2 + +// Represents a marker value to indicate that a file is a Bolt DB. +const magic uint32 = 0xED0CDAED + +// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when +// syncing changes to a file. This is required as some operating systems, +// such as OpenBSD, do not have a unified buffer cache (UBC) and writes +// must be synchronized using the msync(2) syscall. +const IgnoreNoSync = runtime.GOOS == "openbsd" + +// Default values if not set in a DB instance. +const ( + DefaultMaxBatchSize int = 1000 + DefaultMaxBatchDelay = 10 * time.Millisecond + DefaultAllocSize = 16 * 1024 * 1024 +) + +// default page size for db is set to the OS page size. +var defaultPageSize = os.Getpagesize() + +// DB represents a collection of buckets persisted to a file on disk. +// All data access is performed through transactions which can be obtained through the DB. +// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. +type DB struct { + // When enabled, the database will perform a Check() after every commit. + // A panic is issued if the database is in an inconsistent state. This + // flag has a large performance impact so it should only be used for + // debugging purposes. + StrictMode bool + + // Setting the NoSync flag will cause the database to skip fsync() + // calls after each commit. This can be useful when bulk loading data + // into a database and you can restart the bulk load in the event of + // a system failure or database corruption. Do not set this flag for + // normal use. + // + // If the package global IgnoreNoSync constant is true, this value is + // ignored. See the comment on that constant for more details. + // + // THIS IS UNSAFE. PLEASE USE WITH CAUTION. + NoSync bool + + // When true, skips the truncate call when growing the database. + // Setting this to true is only safe on non-ext3/ext4 systems. + // Skipping truncation avoids preallocation of hard drive space and + // bypasses a truncate() and fsync() syscall on remapping. + // + // https://github.com/boltdb/bolt/issues/284 + NoGrowSync bool + + // If you want to read the entire database fast, you can set MmapFlag to + // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. + MmapFlags int + + // MaxBatchSize is the maximum size of a batch. Default value is + // copied from DefaultMaxBatchSize in Open. + // + // If <=0, disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchSize int + + // MaxBatchDelay is the maximum delay before a batch starts. + // Default value is copied from DefaultMaxBatchDelay in Open. + // + // If <=0, effectively disables batching. + // + // Do not change concurrently with calls to Batch. + MaxBatchDelay time.Duration + + // AllocSize is the amount of space allocated when the database + // needs to create new pages. This is done to amortize the cost + // of truncate() and fsync() when growing the data file. + AllocSize int + + path string + file *os.File + lockfile *os.File // windows only + dataref []byte // mmap'ed readonly, write throws SEGV + data *[maxMapSize]byte + datasz int + filesz int // current on disk file size + meta0 *meta + meta1 *meta + pageSize int + opened bool + rwtx *Tx + txs []*Tx + freelist *freelist + stats Stats + + pagePool sync.Pool + + batchMu sync.Mutex + batch *batch + + rwlock sync.Mutex // Allows only one writer at a time. + metalock sync.Mutex // Protects meta page access. + mmaplock sync.RWMutex // Protects mmap access during remapping. + statlock sync.RWMutex // Protects stats access. + + ops struct { + writeAt func(b []byte, off int64) (n int, err error) + } + + // Read only mode. + // When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately. + readOnly bool +} + +// Path returns the path to currently open database file. +func (db *DB) Path() string { + return db.path +} + +// GoString returns the Go string representation of the database. +func (db *DB) GoString() string { + return fmt.Sprintf("bolt.DB{path:%q}", db.path) +} + +// String returns the string representation of the database. +func (db *DB) String() string { + return fmt.Sprintf("DB<%q>", db.path) +} + +// Open creates and opens a database at the given path. +// If the file does not exist then it will be created automatically. +// Passing in nil options will cause Bolt to open the database with the default options. +func Open(path string, mode os.FileMode, options *Options) (*DB, error) { + var db = &DB{opened: true} + + // Set default options if no options are provided. + if options == nil { + options = DefaultOptions + } + db.NoGrowSync = options.NoGrowSync + db.MmapFlags = options.MmapFlags + + // Set default values for later DB operations. + db.MaxBatchSize = DefaultMaxBatchSize + db.MaxBatchDelay = DefaultMaxBatchDelay + db.AllocSize = DefaultAllocSize + + flag := os.O_RDWR + if options.ReadOnly { + flag = os.O_RDONLY + db.readOnly = true + } + + // Open data file and separate sync handler for metadata writes. + db.path = path + var err error + if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil { + _ = db.close() + return nil, err + } + + // Lock file so that other processes using Bolt in read-write mode cannot + // use the database at the same time. This would cause corruption since + // the two processes would write meta pages and free pages separately. + // The database file is locked exclusively (only one process can grab the lock) + // if !options.ReadOnly. + // The database file is locked using the shared lock (more than one process may + // hold a lock at the same time) otherwise (options.ReadOnly is set). + if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil { + _ = db.close() + return nil, err + } + + // Default values for test hooks + db.ops.writeAt = db.file.WriteAt + + // Initialize the database if it doesn't exist. + if info, err := db.file.Stat(); err != nil { + return nil, err + } else if info.Size() == 0 { + // Initialize new files with meta pages. + if err := db.init(); err != nil { + return nil, err + } + } else { + // Read the first meta page to determine the page size. + var buf [0x1000]byte + if _, err := db.file.ReadAt(buf[:], 0); err == nil { + m := db.pageInBuffer(buf[:], 0).meta() + if err := m.validate(); err != nil { + // If we can't read the page size, we can assume it's the same + // as the OS -- since that's how the page size was chosen in the + // first place. + // + // If the first page is invalid and this OS uses a different + // page size than what the database was created with then we + // are out of luck and cannot access the database. + db.pageSize = os.Getpagesize() + } else { + db.pageSize = int(m.pageSize) + } + } + } + + // Initialize page pool. + db.pagePool = sync.Pool{ + New: func() interface{} { + return make([]byte, db.pageSize) + }, + } + + // Memory map the data file. + if err := db.mmap(options.InitialMmapSize); err != nil { + _ = db.close() + return nil, err + } + + // Read in the freelist. + db.freelist = newFreelist() + db.freelist.read(db.page(db.meta().freelist)) + + // Mark the database as opened and return. + return db, nil +} + +// mmap opens the underlying memory-mapped file and initializes the meta references. +// minsz is the minimum size that the new mmap can be. +func (db *DB) mmap(minsz int) error { + db.mmaplock.Lock() + defer db.mmaplock.Unlock() + + info, err := db.file.Stat() + if err != nil { + return fmt.Errorf("mmap stat error: %s", err) + } else if int(info.Size()) < db.pageSize*2 { + return fmt.Errorf("file size too small") + } + + // Ensure the size is at least the minimum size. + var size = int(info.Size()) + if size < minsz { + size = minsz + } + size, err = db.mmapSize(size) + if err != nil { + return err + } + + // Dereference all mmap references before unmapping. + if db.rwtx != nil { + db.rwtx.root.dereference() + } + + // Unmap existing data before continuing. + if err := db.munmap(); err != nil { + return err + } + + // Memory-map the data file as a byte slice. + if err := mmap(db, size); err != nil { + return err + } + + // Save references to the meta pages. + db.meta0 = db.page(0).meta() + db.meta1 = db.page(1).meta() + + // Validate the meta pages. We only return an error if both meta pages fail + // validation, since meta0 failing validation means that it wasn't saved + // properly -- but we can recover using meta1. And vice-versa. + err0 := db.meta0.validate() + err1 := db.meta1.validate() + if err0 != nil && err1 != nil { + return err0 + } + + return nil +} + +// munmap unmaps the data file from memory. +func (db *DB) munmap() error { + if err := munmap(db); err != nil { + return fmt.Errorf("unmap error: " + err.Error()) + } + return nil +} + +// mmapSize determines the appropriate size for the mmap given the current size +// of the database. The minimum size is 32KB and doubles until it reaches 1GB. +// Returns an error if the new mmap size is greater than the max allowed. +func (db *DB) mmapSize(size int) (int, error) { + // Double the size from 32KB until 1GB. + for i := uint(15); i <= 30; i++ { + if size <= 1<<i { + return 1 << i, nil + } + } + + // Verify the requested size is not above the maximum allowed. + if size > maxMapSize { + return 0, fmt.Errorf("mmap too large") + } + + // If larger than 1GB then grow by 1GB at a time. + sz := int64(size) + if remainder := sz % int64(maxMmapStep); remainder > 0 { + sz += int64(maxMmapStep) - remainder + } + + // Ensure that the mmap size is a multiple of the page size. + // This should always be true since we're incrementing in MBs. + pageSize := int64(db.pageSize) + if (sz % pageSize) != 0 { + sz = ((sz / pageSize) + 1) * pageSize + } + + // If we've exceeded the max size then only grow up to the max size. + if sz > maxMapSize { + sz = maxMapSize + } + + return int(sz), nil +} + +// init creates a new database file and initializes its meta pages. +func (db *DB) init() error { + // Set the page size to the OS page size. + db.pageSize = os.Getpagesize() + + // Create two meta pages on a buffer. + buf := make([]byte, db.pageSize*4) + for i := 0; i < 2; i++ { + p := db.pageInBuffer(buf[:], pgid(i)) + p.id = pgid(i) + p.flags = metaPageFlag + + // Initialize the meta page. + m := p.meta() + m.magic = magic + m.version = version + m.pageSize = uint32(db.pageSize) + m.freelist = 2 + m.root = bucket{root: 3} + m.pgid = 4 + m.txid = txid(i) + m.checksum = m.sum64() + } + + // Write an empty freelist at page 3. + p := db.pageInBuffer(buf[:], pgid(2)) + p.id = pgid(2) + p.flags = freelistPageFlag + p.count = 0 + + // Write an empty leaf page at page 4. + p = db.pageInBuffer(buf[:], pgid(3)) + p.id = pgid(3) + p.flags = leafPageFlag + p.count = 0 + + // Write the buffer to our data file. + if _, err := db.ops.writeAt(buf, 0); err != nil { + return err + } + if err := fdatasync(db); err != nil { + return err + } + + return nil +} + +// Close releases all database resources. +// All transactions must be closed before closing the database. +func (db *DB) Close() error { + db.rwlock.Lock() + defer db.rwlock.Unlock() + + db.metalock.Lock() + defer db.metalock.Unlock() + + db.mmaplock.RLock() + defer db.mmaplock.RUnlock() + + return db.close() +} + +func (db *DB) close() error { + if !db.opened { + return nil + } + + db.opened = false + + db.freelist = nil + + // Clear ops. + db.ops.writeAt = nil + + // Close the mmap. + if err := db.munmap(); err != nil { + return err + } + + // Close file handles. + if db.file != nil { + // No need to unlock read-only file. + if !db.readOnly { + // Unlock the file. + if err := funlock(db); err != nil { + log.Printf("bolt.Close(): funlock error: %s", err) + } + } + + // Close the file descriptor. + if err := db.file.Close(); err != nil { + return fmt.Errorf("db file close: %s", err) + } + db.file = nil + } + + db.path = "" + return nil +} + +// Begin starts a new transaction. +// Multiple read-only transactions can be used concurrently but only one +// write transaction can be used at a time. Starting multiple write transactions +// will cause the calls to block and be serialized until the current write +// transaction finishes. +// +// Transactions should not be dependent on one another. Opening a read +// transaction and a write transaction in the same goroutine can cause the +// writer to deadlock because the database periodically needs to re-mmap itself +// as it grows and it cannot do that while a read transaction is open. +// +// If a long running read transaction (for example, a snapshot transaction) is +// needed, you might want to set DB.InitialMmapSize to a large enough value +// to avoid potential blocking of write transaction. +// +// IMPORTANT: You must close read-only transactions after you are finished or +// else the database will not reclaim old pages. +func (db *DB) Begin(writable bool) (*Tx, error) { + if writable { + return db.beginRWTx() + } + return db.beginTx() +} + +func (db *DB) beginTx() (*Tx, error) { + // Lock the meta pages while we initialize the transaction. We obtain + // the meta lock before the mmap lock because that's the order that the + // write transaction will obtain them. + db.metalock.Lock() + + // Obtain a read-only lock on the mmap. When the mmap is remapped it will + // obtain a write lock so all transactions must finish before it can be + // remapped. + db.mmaplock.RLock() + + // Exit if the database is not open yet. + if !db.opened { + db.mmaplock.RUnlock() + db.metalock.Unlock() + return nil, ErrDatabaseNotOpen + } + + // Create a transaction associated with the database. + t := &Tx{} + t.init(db) + + // Keep track of transaction until it closes. + db.txs = append(db.txs, t) + n := len(db.txs) + + // Unlock the meta pages. + db.metalock.Unlock() + + // Update the transaction stats. + db.statlock.Lock() + db.stats.TxN++ + db.stats.OpenTxN = n + db.statlock.Unlock() + + return t, nil +} + +func (db *DB) beginRWTx() (*Tx, error) { + // If the database was opened with Options.ReadOnly, return an error. + if db.readOnly { + return nil, ErrDatabaseReadOnly + } + + // Obtain writer lock. This is released by the transaction when it closes. + // This enforces only one writer transaction at a time. + db.rwlock.Lock() + + // Once we have the writer lock then we can lock the meta pages so that + // we can set up the transaction. + db.metalock.Lock() + defer db.metalock.Unlock() + + // Exit if the database is not open yet. + if !db.opened { + db.rwlock.Unlock() + return nil, ErrDatabaseNotOpen + } + + // Create a transaction associated with the database. + t := &Tx{writable: true} + t.init(db) + db.rwtx = t + + // Free any pages associated with closed read-only transactions. + var minid txid = 0xFFFFFFFFFFFFFFFF + for _, t := range db.txs { + if t.meta.txid < minid { + minid = t.meta.txid + } + } + if minid > 0 { + db.freelist.release(minid - 1) + } + + return t, nil +} + +// removeTx removes a transaction from the database. +func (db *DB) removeTx(tx *Tx) { + // Release the read lock on the mmap. + db.mmaplock.RUnlock() + + // Use the meta lock to restrict access to the DB object. + db.metalock.Lock() + + // Remove the transaction. + for i, t := range db.txs { + if t == tx { + last := len(db.txs) - 1 + db.txs[i] = db.txs[last] + db.txs[last] = nil + db.txs = db.txs[:last] + break + } + } + n := len(db.txs) + + // Unlock the meta pages. + db.metalock.Unlock() + + // Merge statistics. + db.statlock.Lock() + db.stats.OpenTxN = n + db.stats.TxStats.add(&tx.stats) + db.statlock.Unlock() +} + +// Update executes a function within the context of a read-write managed transaction. +// If no error is returned from the function then the transaction is committed. +// If an error is returned then the entire transaction is rolled back. +// Any error that is returned from the function or returned from the commit is +// returned from the Update() method. +// +// Attempting to manually commit or rollback within the function will cause a panic. +func (db *DB) Update(fn func(*Tx) error) error { + t, err := db.Begin(true) + if err != nil { + return err + } + + // Make sure the transaction rolls back in the event of a panic. + defer func() { + if t.db != nil { + t.rollback() + } + }() + + // Mark as a managed tx so that the inner function cannot manually commit. + t.managed = true + + // If an error is returned from the function then rollback and return error. + err = fn(t) + t.managed = false + if err != nil { + _ = t.Rollback() + return err + } + + return t.Commit() +} + +// View executes a function within the context of a managed read-only transaction. +// Any error that is returned from the function is returned from the View() method. +// +// Attempting to manually rollback within the function will cause a panic. +func (db *DB) View(fn func(*Tx) error) error { + t, err := db.Begin(false) + if err != nil { + return err + } + + // Make sure the transaction rolls back in the event of a panic. + defer func() { + if t.db != nil { + t.rollback() + } + }() + + // Mark as a managed tx so that the inner function cannot manually rollback. + t.managed = true + + // If an error is returned from the function then pass it through. + err = fn(t) + t.managed = false + if err != nil { + _ = t.Rollback() + return err + } + + if err := t.Rollback(); err != nil { + return err + } + + return nil +} + +// Batch calls fn as part of a batch. It behaves similar to Update, +// except: +// +// 1. concurrent Batch calls can be combined into a single Bolt +// transaction. +// +// 2. the function passed to Batch may be called multiple times, +// regardless of whether it returns error or not. +// +// This means that Batch function side effects must be idempotent and +// take permanent effect only after a successful return is seen in +// caller. +// +// The maximum batch size and delay can be adjusted with DB.MaxBatchSize +// and DB.MaxBatchDelay, respectively. +// +// Batch is only useful when there are multiple goroutines calling it. +func (db *DB) Batch(fn func(*Tx) error) error { + errCh := make(chan error, 1) + + db.batchMu.Lock() + if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) { + // There is no existing batch, or the existing batch is full; start a new one. + db.batch = &batch{ + db: db, + } + db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) + } + db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh}) + if len(db.batch.calls) >= db.MaxBatchSize { + // wake up batch, it's ready to run + go db.batch.trigger() + } + db.batchMu.Unlock() + + err := <-errCh + if err == trySolo { + err = db.Update(fn) + } + return err +} + +type call struct { + fn func(*Tx) error + err chan<- error +} + +type batch struct { + db *DB + timer *time.Timer + start sync.Once + calls []call +} + +// trigger runs the batch if it hasn't already been run. +func (b *batch) trigger() { + b.start.Do(b.run) +} + +// run performs the transactions in the batch and communicates results +// back to DB.Batch. +func (b *batch) run() { + b.db.batchMu.Lock() + b.timer.Stop() + // Make sure no new work is added to this batch, but don't break + // other batches. + if b.db.batch == b { + b.db.batch = nil + } + b.db.batchMu.Unlock() + +retry: + for len(b.calls) > 0 { + var failIdx = -1 + err := b.db.Update(func(tx *Tx) error { + for i, c := range b.calls { + if err := safelyCall(c.fn, tx); err != nil { + failIdx = i + return err + } + } + return nil + }) + + if failIdx >= 0 { + // take the failing transaction out of the batch. it's + // safe to shorten b.calls here because db.batch no longer + // points to us, and we hold the mutex anyway. + c := b.calls[failIdx] + b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] + // tell the submitter re-run it solo, continue with the rest of the batch + c.err <- trySolo + continue retry + } + + // pass success, or bolt internal errors, to all callers + for _, c := range b.calls { + c.err <- err + } + break retry + } +} + +// trySolo is a special sentinel error value used for signaling that a +// transaction function should be re-run. It should never be seen by +// callers. +var trySolo = errors.New("batch function returned an error and should be re-run solo") + +type panicked struct { + reason interface{} +} + +func (p panicked) Error() string { + if err, ok := p.reason.(error); ok { + return err.Error() + } + return fmt.Sprintf("panic: %v", p.reason) +} + +func safelyCall(fn func(*Tx) error, tx *Tx) (err error) { + defer func() { + if p := recover(); p != nil { + err = panicked{p} + } + }() + return fn(tx) +} + +// Sync executes fdatasync() against the database file handle. +// +// This is not necessary under normal operation, however, if you use NoSync +// then it allows you to force the database file to sync against the disk. +func (db *DB) Sync() error { return fdatasync(db) } + +// Stats retrieves ongoing performance stats for the database. +// This is only updated when a transaction closes. +func (db *DB) Stats() Stats { + db.statlock.RLock() + defer db.statlock.RUnlock() + return db.stats +} + +// This is for internal access to the raw data bytes from the C cursor, use +// carefully, or not at all. +func (db *DB) Info() *Info { + return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize} +} + +// page retrieves a page reference from the mmap based on the current page size. +func (db *DB) page(id pgid) *page { + pos := id * pgid(db.pageSize) + return (*page)(unsafe.Pointer(&db.data[pos])) +} + +// pageInBuffer retrieves a page reference from a given byte array based on the current page size. +func (db *DB) pageInBuffer(b []byte, id pgid) *page { + return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)])) +} + +// meta retrieves the current meta page reference. +func (db *DB) meta() *meta { + // We have to return the meta with the highest txid which doesn't fail + // validation. Otherwise, we can cause errors when in fact the database is + // in a consistent state. metaA is the one with the higher txid. + metaA := db.meta0 + metaB := db.meta1 + if db.meta1.txid > db.meta0.txid { + metaA = db.meta1 + metaB = db.meta0 + } + + // Use higher meta page if valid. Otherwise fallback to previous, if valid. + if err := metaA.validate(); err == nil { + return metaA + } else if err := metaB.validate(); err == nil { + return metaB + } + + // This should never be reached, because both meta1 and meta0 were validated + // on mmap() and we do fsync() on every write. + panic("bolt.DB.meta(): invalid meta pages") +} + +// allocate returns a contiguous block of memory starting at a given page. +func (db *DB) allocate(count int) (*page, error) { + // Allocate a temporary buffer for the page. + var buf []byte + if count == 1 { + buf = db.pagePool.Get().([]byte) + } else { + buf = make([]byte, count*db.pageSize) + } + p := (*page)(unsafe.Pointer(&buf[0])) + p.overflow = uint32(count - 1) + + // Use pages from the freelist if they are available. + if p.id = db.freelist.allocate(count); p.id != 0 { + return p, nil + } + + // Resize mmap() if we're at the end. + p.id = db.rwtx.meta.pgid + var minsz = int((p.id+pgid(count))+1) * db.pageSize + if minsz >= db.datasz { + if err := db.mmap(minsz); err != nil { + return nil, fmt.Errorf("mmap allocate error: %s", err) + } + } + + // Move the page id high water mark. + db.rwtx.meta.pgid += pgid(count) + + return p, nil +} + +// grow grows the size of the database to the given sz. +func (db *DB) grow(sz int) error { + // Ignore if the new size is less than available file size. + if sz <= db.filesz { + return nil + } + + // If the data is smaller than the alloc size then only allocate what's needed. + // Once it goes over the allocation size then allocate in chunks. + if db.datasz < db.AllocSize { + sz = db.datasz + } else { + sz += db.AllocSize + } + + // Truncate and fsync to ensure file size metadata is flushed. + // https://github.com/boltdb/bolt/issues/284 + if !db.NoGrowSync && !db.readOnly { + if runtime.GOOS != "windows" { + if err := db.file.Truncate(int64(sz)); err != nil { + return fmt.Errorf("file resize error: %s", err) + } + } + if err := db.file.Sync(); err != nil { + return fmt.Errorf("file sync error: %s", err) + } + } + + db.filesz = sz + return nil +} + +func (db *DB) IsReadOnly() bool { + return db.readOnly +} + +// Options represents the options that can be set when opening a database. +type Options struct { + // Timeout is the amount of time to wait to obtain a file lock. + // When set to zero it will wait indefinitely. This option is only + // available on Darwin and Linux. + Timeout time.Duration + + // Sets the DB.NoGrowSync flag before memory mapping the file. + NoGrowSync bool + + // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to + // grab a shared lock (UNIX). + ReadOnly bool + + // Sets the DB.MmapFlags flag before memory mapping the file. + MmapFlags int + + // InitialMmapSize is the initial mmap size of the database + // in bytes. Read transactions won't block write transaction + // if the InitialMmapSize is large enough to hold database mmap + // size. (See DB.Begin for more information) + // + // If <=0, the initial map size is 0. + // If initialMmapSize is smaller than the previous database size, + // it takes no effect. + InitialMmapSize int +} + +// DefaultOptions represent the options used if nil options are passed into Open(). +// No timeout is used which will cause Bolt to wait indefinitely for a lock. +var DefaultOptions = &Options{ + Timeout: 0, + NoGrowSync: false, +} + +// Stats represents statistics about the database. +type Stats struct { + // Freelist stats + FreePageN int // total number of free pages on the freelist + PendingPageN int // total number of pending pages on the freelist + FreeAlloc int // total bytes allocated in free pages + FreelistInuse int // total bytes used by the freelist + + // Transaction stats + TxN int // total number of started read transactions + OpenTxN int // number of currently open read transactions + + TxStats TxStats // global, ongoing stats. +} + +// Sub calculates and returns the difference between two sets of database stats. +// This is useful when obtaining stats at two different points and time and +// you need the performance counters that occurred within that time span. +func (s *Stats) Sub(other *Stats) Stats { + if other == nil { + return *s + } + var diff Stats + diff.FreePageN = s.FreePageN + diff.PendingPageN = s.PendingPageN + diff.FreeAlloc = s.FreeAlloc + diff.FreelistInuse = s.FreelistInuse + diff.TxN = s.TxN - other.TxN + diff.TxStats = s.TxStats.Sub(&other.TxStats) + return diff +} + +func (s *Stats) add(other *Stats) { + s.TxStats.add(&other.TxStats) +} + +type Info struct { + Data uintptr + PageSize int +} + +type meta struct { + magic uint32 + version uint32 + pageSize uint32 + flags uint32 + root bucket + freelist pgid + pgid pgid + txid txid + checksum uint64 +} + +// validate checks the marker bytes and version of the meta page to ensure it matches this binary. +func (m *meta) validate() error { + if m.magic != magic { + return ErrInvalid + } else if m.version != version { + return ErrVersionMismatch + } else if m.checksum != 0 && m.checksum != m.sum64() { + return ErrChecksum + } + return nil +} + +// copy copies one meta object to another. +func (m *meta) copy(dest *meta) { + *dest = *m +} + +// write writes the meta onto a page. +func (m *meta) write(p *page) { + if m.root.root >= m.pgid { + panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid)) + } else if m.freelist >= m.pgid { + panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid)) + } + + // Page id is either going to be 0 or 1 which we can determine by the transaction ID. + p.id = pgid(m.txid % 2) + p.flags |= metaPageFlag + + // Calculate the checksum. + m.checksum = m.sum64() + + m.copy(p.meta()) +} + +// generates the checksum for the meta. +func (m *meta) sum64() uint64 { + var h = fnv.New64a() + _, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:]) + return h.Sum64() +} + +// _assert will panic with a given formatted message if the given condition is false. +func _assert(condition bool, msg string, v ...interface{}) { + if !condition { + panic(fmt.Sprintf("assertion failed: "+msg, v...)) + } +} + +func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } +func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) } + +func printstack() { + stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n") + fmt.Fprintln(os.Stderr, stack) +} +/* +Package bolt implements a low-level key/value store in pure Go. It supports +fully serializable transactions, ACID semantics, and lock-free MVCC with +multiple readers and a single writer. Bolt can be used for projects that +want a simple data store without the need to add large dependencies such as +Postgres or MySQL. + +Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is +optimized for fast read access and does not require recovery in the event of a +system crash. Transactions which have not finished committing will simply be +rolled back in the event of a crash. + +The design of Bolt is based on Howard Chu's LMDB database project. + +Bolt currently works on Windows, Mac OS X, and Linux. + + +Basics + +There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is +a collection of buckets and is represented by a single file on disk. A bucket is +a collection of unique keys that are associated with values. + +Transactions provide either read-only or read-write access to the database. +Read-only transactions can retrieve key/value pairs and can use Cursors to +iterate over the dataset sequentially. Read-write transactions can create and +delete buckets and can insert and remove keys. Only one read-write transaction +is allowed at a time. + + +Caveats + +The database uses a read-only, memory-mapped data file to ensure that +applications cannot corrupt the database, however, this means that keys and +values returned from Bolt cannot be changed. Writing to a read-only byte slice +will cause Go to panic. + +Keys and values retrieved from the database are only valid for the life of +the transaction. When used outside the transaction, these byte slices can +point to different data or can point to invalid memory which will cause a panic. + + +*/ + +// These errors can be returned when opening or calling methods on a DB. +var ( + // ErrDatabaseNotOpen is returned when a DB instance is accessed before it + // is opened or after it is closed. + ErrDatabaseNotOpen = errors.New("database not open") + + // ErrDatabaseOpen is returned when opening a database that is + // already open. + ErrDatabaseOpen = errors.New("database already open") + + // ErrInvalid is returned when both meta pages on a database are invalid. + // This typically occurs when a file is not a bolt database. + ErrInvalid = errors.New("invalid database") + + // ErrVersionMismatch is returned when the data file was created with a + // different version of Bolt. + ErrVersionMismatch = errors.New("version mismatch") + + // ErrChecksum is returned when either meta page checksum does not match. + ErrChecksum = errors.New("checksum error") + + // ErrTimeout is returned when a database cannot obtain an exclusive lock + // on the data file after the timeout passed to Open(). + ErrTimeout = errors.New("timeout") +) + +// These errors can occur when beginning or committing a Tx. +var ( + // ErrTxNotWritable is returned when performing a write operation on a + // read-only transaction. + ErrTxNotWritable = errors.New("tx not writable") + + // ErrTxClosed is returned when committing or rolling back a transaction + // that has already been committed or rolled back. + ErrTxClosed = errors.New("tx closed") + + // ErrDatabaseReadOnly is returned when a mutating transaction is started on a + // read-only database. + ErrDatabaseReadOnly = errors.New("database is in read-only mode") +) + +// These errors can occur when putting or deleting a value or a bucket. +var ( + // ErrBucketNotFound is returned when trying to access a bucket that has + // not been created yet. + ErrBucketNotFound = errors.New("bucket not found") + + // ErrBucketExists is returned when creating a bucket that already exists. + ErrBucketExists = errors.New("bucket already exists") + + // ErrBucketNameRequired is returned when creating a bucket with a blank name. + ErrBucketNameRequired = errors.New("bucket name required") + + // ErrKeyRequired is returned when inserting a zero-length key. + ErrKeyRequired = errors.New("key required") + + // ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize. + ErrKeyTooLarge = errors.New("key too large") + + // ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize. + ErrValueTooLarge = errors.New("value too large") + + // ErrIncompatibleValue is returned when trying create or delete a bucket + // on an existing non-bucket key or when trying to create or delete a + // non-bucket key on an existing bucket key. + ErrIncompatibleValue = errors.New("incompatible value") +) + +// freelist represents a list of all pages that are available for allocation. +// It also tracks pages that have been freed but are still in use by open transactions. +type freelist struct { + ids []pgid // all free and available free page ids. + pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. + cache map[pgid]bool // fast lookup of all free and pending page ids. +} + +// newFreelist returns an empty, initialized freelist. +func newFreelist() *freelist { + return &freelist{ + pending: make(map[txid][]pgid), + cache: make(map[pgid]bool), + } +} + +// size returns the size of the page after serialization. +func (f *freelist) size() int { + n := f.count() + if n >= 0xFFFF { + // The first element will be used to store the count. See freelist.write. + n++ + } + return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * n) +} + +// count returns count of pages on the freelist +func (f *freelist) count() int { + return f.free_count() + f.pending_count() +} + +// free_count returns count of free pages +func (f *freelist) free_count() int { + return len(f.ids) +} + +// pending_count returns count of pending pages +func (f *freelist) pending_count() int { + var count int + for _, list := range f.pending { + count += len(list) + } + return count +} + +// copyall copies into dst a list of all free ids and all pending ids in one sorted list. +// f.count returns the minimum length required for dst. +func (f *freelist) copyall(dst []pgid) { + m := make(pgids, 0, f.pending_count()) + for _, list := range f.pending { + m = append(m, list...) + } + sort.Sort(m) + mergepgids(dst, f.ids, m) +} + +// allocate returns the starting page id of a contiguous list of pages of a given size. +// If a contiguous block cannot be found then 0 is returned. +func (f *freelist) allocate(n int) pgid { + if len(f.ids) == 0 { + return 0 + } + + var initial, previd pgid + for i, id := range f.ids { + if id <= 1 { + panic(fmt.Sprintf("invalid page allocation: %d", id)) + } + + // Reset initial page if this is not contiguous. + if previd == 0 || id-previd != 1 { + initial = id + } + + // If we found a contiguous block then remove it and return it. + if (id-initial)+1 == pgid(n) { + // If we're allocating off the beginning then take the fast path + // and just adjust the existing slice. This will use extra memory + // temporarily but the append() in free() will realloc the slice + // as is necessary. + if (i + 1) == n { + f.ids = f.ids[i+1:] + } else { + copy(f.ids[i-n+1:], f.ids[i+1:]) + f.ids = f.ids[:len(f.ids)-n] + } + + // Remove from the free cache. + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, initial+i) + } + + return initial + } + + previd = id + } + return 0 +} + +// free releases a page and its overflow for a given transaction id. +// If the page is already free then a panic will occur. +func (f *freelist) free(txid txid, p *page) { + if p.id <= 1 { + panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id)) + } + + // Free page and all its overflow pages. + var ids = f.pending[txid] + for id := p.id; id <= p.id+pgid(p.overflow); id++ { + // Verify that page is not already free. + if f.cache[id] { + panic(fmt.Sprintf("page %d already freed", id)) + } + + // Add to the freelist and cache. + ids = append(ids, id) + f.cache[id] = true + } + f.pending[txid] = ids +} + +// release moves all page ids for a transaction id (or older) to the freelist. +func (f *freelist) release(txid txid) { + m := make(pgids, 0) + for tid, ids := range f.pending { + if tid <= txid { + // Move transaction's pending pages to the available freelist. + // Don't remove from the cache since the page is still free. + m = append(m, ids...) + delete(f.pending, tid) + } + } + sort.Sort(m) + f.ids = pgids(f.ids).merge(m) +} + +// rollback removes the pages from a given pending tx. +func (f *freelist) rollback(txid txid) { + // Remove page ids from cache. + for _, id := range f.pending[txid] { + delete(f.cache, id) + } + + // Remove pages from pending list. + delete(f.pending, txid) +} + +// freed returns whether a given page is in the free list. +func (f *freelist) freed(pgid pgid) bool { + return f.cache[pgid] +} + +// read initializes the freelist from a freelist page. +func (f *freelist) read(p *page) { + // If the page.count is at the max uint16 value (64k) then it's considered + // an overflow and the size of the freelist is stored as the first element. + idx, count := 0, int(p.count) + if count == 0xFFFF { + idx = 1 + count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0]) + } + + // Copy the list of page ids from the freelist. + if count == 0 { + f.ids = nil + } else { + ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count] + f.ids = make([]pgid, len(ids)) + copy(f.ids, ids) + + // Make sure they're sorted. + sort.Sort(pgids(f.ids)) + } + + // Rebuild the page cache. + f.reindex() +} + +// write writes the page ids onto a freelist page. All free and pending ids are +// saved to disk since in the event of a program crash, all pending ids will +// become free. +func (f *freelist) write(p *page) error { + // Combine the old free pgids and pgids waiting on an open transaction. + + // Update the header flag. + p.flags |= freelistPageFlag + + // The page.count can only hold up to 64k elements so if we overflow that + // number then we handle it by putting the size in the first element. + lenids := f.count() + if lenids == 0 { + p.count = uint16(lenids) + } else if lenids < 0xFFFF { + p.count = uint16(lenids) + f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:]) + } else { + p.count = 0xFFFF + ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(lenids) + f.copyall(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:]) + } + + return nil +} + +// reload reads the freelist from a page and filters out pending items. +func (f *freelist) reload(p *page) { + f.read(p) + + // Build a cache of only pending pages. + pcache := make(map[pgid]bool) + for _, pendingIDs := range f.pending { + for _, pendingID := range pendingIDs { + pcache[pendingID] = true + } + } + + // Check each page in the freelist and build a new available freelist + // with any pages not in the pending lists. + var a []pgid + for _, id := range f.ids { + if !pcache[id] { + a = append(a, id) + } + } + f.ids = a + + // Once the available list is rebuilt then rebuild the free cache so that + // it includes the available and pending free pages. + f.reindex() +} + +// reindex rebuilds the free cache based on available and pending free lists. +func (f *freelist) reindex() { + f.cache = make(map[pgid]bool, len(f.ids)) + for _, id := range f.ids { + f.cache[id] = true + } + for _, pendingIDs := range f.pending { + for _, pendingID := range pendingIDs { + f.cache[pendingID] = true + } + } +} + +// node represents an in-memory, deserialized page. +type node struct { + bucket *Bucket + isLeaf bool + unbalanced bool + spilled bool + key []byte + pgid pgid + parent *node + children nodes + inodes inodes +} + +// root returns the top-level node this node is attached to. +func (n *node) root() *node { + if n.parent == nil { + return n + } + return n.parent.root() +} + +// minKeys returns the minimum number of inodes this node should have. +func (n *node) minKeys() int { + if n.isLeaf { + return 1 + } + return 2 +} + +// size returns the size of the node after serialization. +func (n *node) size() int { + sz, elsz := pageHeaderSize, n.pageElementSize() + for i := 0; i < len(n.inodes); i++ { + item := &n.inodes[i] + sz += elsz + len(item.key) + len(item.value) + } + return sz +} + +// sizeLessThan returns true if the node is less than a given size. +// This is an optimization to avoid calculating a large node when we only need +// to know if it fits inside a certain page size. +func (n *node) sizeLessThan(v int) bool { + sz, elsz := pageHeaderSize, n.pageElementSize() + for i := 0; i < len(n.inodes); i++ { + item := &n.inodes[i] + sz += elsz + len(item.key) + len(item.value) + if sz >= v { + return false + } + } + return true +} + +// pageElementSize returns the size of each page element based on the type of node. +func (n *node) pageElementSize() int { + if n.isLeaf { + return leafPageElementSize + } + return branchPageElementSize +} + +// childAt returns the child node at a given index. +func (n *node) childAt(index int) *node { + if n.isLeaf { + panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index)) + } + return n.bucket.node(n.inodes[index].pgid, n) +} + +// childIndex returns the index of a given child node. +func (n *node) childIndex(child *node) int { + index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 }) + return index +} + +// numChildren returns the number of children. +func (n *node) numChildren() int { + return len(n.inodes) +} + +// nextSibling returns the next node with the same parent. +func (n *node) nextSibling() *node { + if n.parent == nil { + return nil + } + index := n.parent.childIndex(n) + if index >= n.parent.numChildren()-1 { + return nil + } + return n.parent.childAt(index + 1) +} + +// prevSibling returns the previous node with the same parent. +func (n *node) prevSibling() *node { + if n.parent == nil { + return nil + } + index := n.parent.childIndex(n) + if index == 0 { + return nil + } + return n.parent.childAt(index - 1) +} + +// put inserts a key/value. +func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { + if pgid >= n.bucket.tx.meta.pgid { + panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid)) + } else if len(oldKey) <= 0 { + panic("put: zero-length old key") + } else if len(newKey) <= 0 { + panic("put: zero-length new key") + } + + // Find insertion index. + index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 }) + + // Add capacity and shift nodes if we don't have an exact match and need to insert. + exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)) + if !exact { + n.inodes = append(n.inodes, inode{}) + copy(n.inodes[index+1:], n.inodes[index:]) + } + + inode := &n.inodes[index] + inode.flags = flags + inode.key = newKey + inode.value = value + inode.pgid = pgid + _assert(len(inode.key) > 0, "put: zero-length inode key") +} + +// del removes a key from the node. +func (n *node) del(key []byte) { + // Find index of key. + index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 }) + + // Exit if the key isn't found. + if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) { + return + } + + // Delete inode from the node. + n.inodes = append(n.inodes[:index], n.inodes[index+1:]...) + + // Mark the node as needing rebalancing. + n.unbalanced = true +} + +// read initializes the node from a page. +func (n *node) read(p *page) { + n.pgid = p.id + n.isLeaf = ((p.flags & leafPageFlag) != 0) + n.inodes = make(inodes, int(p.count)) + + for i := 0; i < int(p.count); i++ { + inode := &n.inodes[i] + if n.isLeaf { + elem := p.leafPageElement(uint16(i)) + inode.flags = elem.flags + inode.key = elem.key() + inode.value = elem.value() + } else { + elem := p.branchPageElement(uint16(i)) + inode.pgid = elem.pgid + inode.key = elem.key() + } + _assert(len(inode.key) > 0, "read: zero-length inode key") + } + + // Save first key so we can find the node in the parent when we spill. + if len(n.inodes) > 0 { + n.key = n.inodes[0].key + _assert(len(n.key) > 0, "read: zero-length node key") + } else { + n.key = nil + } +} + +// write writes the items onto one or more pages. +func (n *node) write(p *page) { + // Initialize page. + if n.isLeaf { + p.flags |= leafPageFlag + } else { + p.flags |= branchPageFlag + } + + if len(n.inodes) >= 0xFFFF { + panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id)) + } + p.count = uint16(len(n.inodes)) + + // Stop here if there are no items to write. + if p.count == 0 { + return + } + + // Loop over each item and write it to the page. + b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):] + for i, item := range n.inodes { + _assert(len(item.key) > 0, "write: zero-length inode key") + + // Write the page element. + if n.isLeaf { + elem := p.leafPageElement(uint16(i)) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.flags = item.flags + elem.ksize = uint32(len(item.key)) + elem.vsize = uint32(len(item.value)) + } else { + elem := p.branchPageElement(uint16(i)) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) + elem.ksize = uint32(len(item.key)) + elem.pgid = item.pgid + _assert(elem.pgid != p.id, "write: circular dependency occurred") + } + + // If the length of key+value is larger than the max allocation size + // then we need to reallocate the byte array pointer. + // + // See: https://github.com/boltdb/bolt/pull/335 + klen, vlen := len(item.key), len(item.value) + if len(b) < klen+vlen { + b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:] + } + + // Write data for the element to the end of the page. + copy(b[0:], item.key) + b = b[klen:] + copy(b[0:], item.value) + b = b[vlen:] + } + + // DEBUG ONLY: n.dump() +} + +// split breaks up a node into multiple smaller nodes, if appropriate. +// This should only be called from the spill() function. +func (n *node) split(pageSize int) []*node { + var nodes []*node + + node := n + for { + // Split node into two. + a, b := node.splitTwo(pageSize) + nodes = append(nodes, a) + + // If we can't split then exit the loop. + if b == nil { + break + } + + // Set node to b so it gets split on the next iteration. + node = b + } + + return nodes +} + +// splitTwo breaks up a node into two smaller nodes, if appropriate. +// This should only be called from the split() function. +func (n *node) splitTwo(pageSize int) (*node, *node) { + // Ignore the split if the page doesn't have at least enough nodes for + // two pages or if the nodes can fit in a single page. + if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) { + return n, nil + } + + // Determine the threshold before starting a new node. + var fillPercent = n.bucket.FillPercent + if fillPercent < minFillPercent { + fillPercent = minFillPercent + } else if fillPercent > maxFillPercent { + fillPercent = maxFillPercent + } + threshold := int(float64(pageSize) * fillPercent) + + // Determine split position and sizes of the two pages. + splitIndex, _ := n.splitIndex(threshold) + + // Split node into two separate nodes. + // If there's no parent then we'll need to create one. + if n.parent == nil { + n.parent = &node{bucket: n.bucket, children: []*node{n}} + } + + // Create a new node and add it to the parent. + next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent} + n.parent.children = append(n.parent.children, next) + + // Split inodes across two nodes. + next.inodes = n.inodes[splitIndex:] + n.inodes = n.inodes[:splitIndex] + + // Update the statistics. + n.bucket.tx.stats.Split++ + + return n, next +} + +// splitIndex finds the position where a page will fill a given threshold. +// It returns the index as well as the size of the first page. +// This is only be called from split(). +func (n *node) splitIndex(threshold int) (index, sz int) { + sz = pageHeaderSize + + // Loop until we only have the minimum number of keys required for the second page. + for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { + index = i + inode := n.inodes[i] + elsize := n.pageElementSize() + len(inode.key) + len(inode.value) + + // If we have at least the minimum number of keys and adding another + // node would put us over the threshold then exit and return. + if i >= minKeysPerPage && sz+elsize > threshold { + break + } + + // Add the element size to the total size. + sz += elsize + } + + return +} + +// spill writes the nodes to dirty pages and splits nodes as it goes. +// Returns an error if dirty pages cannot be allocated. +func (n *node) spill() error { + var tx = n.bucket.tx + if n.spilled { + return nil + } + + // Spill child nodes first. Child nodes can materialize sibling nodes in + // the case of split-merge so we cannot use a range loop. We have to check + // the children size on every loop iteration. + sort.Sort(n.children) + for i := 0; i < len(n.children); i++ { + if err := n.children[i].spill(); err != nil { + return err + } + } + + // We no longer need the child list because it's only used for spill tracking. + n.children = nil + + // Split nodes into appropriate sizes. The first node will always be n. + var nodes = n.split(tx.db.pageSize) + for _, node := range nodes { + // Add node's page to the freelist if it's not new. + if node.pgid > 0 { + tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid)) + node.pgid = 0 + } + + // Allocate contiguous space for the node. + p, err := tx.allocate((node.size() / tx.db.pageSize) + 1) + if err != nil { + return err + } + + // Write the node. + if p.id >= tx.meta.pgid { + panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid)) + } + node.pgid = p.id + node.write(p) + node.spilled = true + + // Insert into parent inodes. + if node.parent != nil { + var key = node.key + if key == nil { + key = node.inodes[0].key + } + + node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0) + node.key = node.inodes[0].key + _assert(len(node.key) > 0, "spill: zero-length node key") + } + + // Update the statistics. + tx.stats.Spill++ + } + + // If the root node split and created a new root then we need to spill that + // as well. We'll clear out the children to make sure it doesn't try to respill. + if n.parent != nil && n.parent.pgid == 0 { + n.children = nil + return n.parent.spill() + } + + return nil +} + +// rebalance attempts to combine the node with sibling nodes if the node fill +// size is below a threshold or if there are not enough keys. +func (n *node) rebalance() { + if !n.unbalanced { + return + } + n.unbalanced = false + + // Update statistics. + n.bucket.tx.stats.Rebalance++ + + // Ignore if node is above threshold (25%) and has enough keys. + var threshold = n.bucket.tx.db.pageSize / 4 + if n.size() > threshold && len(n.inodes) > n.minKeys() { + return + } + + // Root node has special handling. + if n.parent == nil { + // If root node is a branch and only has one node then collapse it. + if !n.isLeaf && len(n.inodes) == 1 { + // Move root's child up. + child := n.bucket.node(n.inodes[0].pgid, n) + n.isLeaf = child.isLeaf + n.inodes = child.inodes[:] + n.children = child.children + + // Reparent all child nodes being moved. + for _, inode := range n.inodes { + if child, ok := n.bucket.nodes[inode.pgid]; ok { + child.parent = n + } + } + + // Remove old child. + child.parent = nil + delete(n.bucket.nodes, child.pgid) + child.free() + } + + return + } + + // If node has no keys then just remove it. + if n.numChildren() == 0 { + n.parent.del(n.key) + n.parent.removeChild(n) + delete(n.bucket.nodes, n.pgid) + n.free() + n.parent.rebalance() + return + } + + _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") + + // Destination node is right sibling if idx == 0, otherwise left sibling. + var target *node + var useNextSibling = (n.parent.childIndex(n) == 0) + if useNextSibling { + target = n.nextSibling() + } else { + target = n.prevSibling() + } + + // If both this node and the target node are too small then merge them. + if useNextSibling { + // Reparent all child nodes being moved. + for _, inode := range target.inodes { + if child, ok := n.bucket.nodes[inode.pgid]; ok { + child.parent.removeChild(child) + child.parent = n + child.parent.children = append(child.parent.children, child) + } + } + + // Copy over inodes from target and remove target. + n.inodes = append(n.inodes, target.inodes...) + n.parent.del(target.key) + n.parent.removeChild(target) + delete(n.bucket.nodes, target.pgid) + target.free() + } else { + // Reparent all child nodes being moved. + for _, inode := range n.inodes { + if child, ok := n.bucket.nodes[inode.pgid]; ok { + child.parent.removeChild(child) + child.parent = target + child.parent.children = append(child.parent.children, child) + } + } + + // Copy over inodes to target and remove node. + target.inodes = append(target.inodes, n.inodes...) + n.parent.del(n.key) + n.parent.removeChild(n) + delete(n.bucket.nodes, n.pgid) + n.free() + } + + // Either this node or the target node was deleted from the parent so rebalance it. + n.parent.rebalance() +} + +// removes a node from the list of in-memory children. +// This does not affect the inodes. +func (n *node) removeChild(target *node) { + for i, child := range n.children { + if child == target { + n.children = append(n.children[:i], n.children[i+1:]...) + return + } + } +} + +// dereference causes the node to copy all its inode key/value references to heap memory. +// This is required when the mmap is reallocated so inodes are not pointing to stale data. +func (n *node) dereference() { + if n.key != nil { + key := make([]byte, len(n.key)) + copy(key, n.key) + n.key = key + _assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node") + } + + for i := range n.inodes { + inode := &n.inodes[i] + + key := make([]byte, len(inode.key)) + copy(key, inode.key) + inode.key = key + _assert(len(inode.key) > 0, "dereference: zero-length inode key") + + value := make([]byte, len(inode.value)) + copy(value, inode.value) + inode.value = value + } + + // Recursively dereference children. + for _, child := range n.children { + child.dereference() + } + + // Update statistics. + n.bucket.tx.stats.NodeDeref++ +} + +// free adds the node's underlying page to the freelist. +func (n *node) free() { + if n.pgid != 0 { + n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid)) + n.pgid = 0 + } +} + +// dump writes the contents of the node to STDERR for debugging purposes. +/* +func (n *node) dump() { + // Write node header. + var typ = "branch" + if n.isLeaf { + typ = "leaf" + } + warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes)) + + // Write out abbreviated version of each item. + for _, item := range n.inodes { + if n.isLeaf { + if item.flags&bucketLeafFlag != 0 { + bucket := (*bucket)(unsafe.Pointer(&item.value[0])) + warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root) + } else { + warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4)) + } + } else { + warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid) + } + } + warn("") +} +*/ + +type nodes []*node + +func (s nodes) Len() int { return len(s) } +func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 } + +// inode represents an internal node inside of a node. +// It can be used to point to elements in a page or point +// to an element which hasn't been added to a page yet. +type inode struct { + flags uint32 + pgid pgid + key []byte + value []byte +} + +type inodes []inode + +const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr)) + +const minKeysPerPage = 2 + +const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{})) +const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) + +const ( + branchPageFlag = 0x01 + leafPageFlag = 0x02 + metaPageFlag = 0x04 + freelistPageFlag = 0x10 +) + +const ( + bucketLeafFlag = 0x01 +) + +type pgid uint64 + +type page struct { + id pgid + flags uint16 + count uint16 + overflow uint32 + ptr uintptr +} + +// typ returns a human readable page type string used for debugging. +func (p *page) typ() string { + if (p.flags & branchPageFlag) != 0 { + return "branch" + } else if (p.flags & leafPageFlag) != 0 { + return "leaf" + } else if (p.flags & metaPageFlag) != 0 { + return "meta" + } else if (p.flags & freelistPageFlag) != 0 { + return "freelist" + } + return fmt.Sprintf("unknown<%02x>", p.flags) +} + +// meta returns a pointer to the metadata section of the page. +func (p *page) meta() *meta { + return (*meta)(unsafe.Pointer(&p.ptr)) +} + +// leafPageElement retrieves the leaf node by index +func (p *page) leafPageElement(index uint16) *leafPageElement { + n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] + return n +} + +// leafPageElements retrieves a list of leaf nodes. +func (p *page) leafPageElements() []leafPageElement { + if p.count == 0 { + return nil + } + return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:] +} + +// branchPageElement retrieves the branch node by index +func (p *page) branchPageElement(index uint16) *branchPageElement { + return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] +} + +// branchPageElements retrieves a list of branch nodes. +func (p *page) branchPageElements() []branchPageElement { + if p.count == 0 { + return nil + } + return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:] +} + +// dump writes n bytes of the page to STDERR as hex output. +func (p *page) hexdump(n int) { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n] + fmt.Fprintf(os.Stderr, "%x\n", buf) +} + +type pages []*page + +func (s pages) Len() int { return len(s) } +func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s pages) Less(i, j int) bool { return s[i].id < s[j].id } + +// branchPageElement represents a node on a branch page. +type branchPageElement struct { + pos uint32 + ksize uint32 + pgid pgid +} + +// key returns a byte slice of the node key. +func (n *branchPageElement) key() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] +} + +// leafPageElement represents a node on a leaf page. +type leafPageElement struct { + flags uint32 + pos uint32 + ksize uint32 + vsize uint32 +} + +// key returns a byte slice of the node key. +func (n *leafPageElement) key() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] +} + +// value returns a byte slice of the node value. +func (n *leafPageElement) value() []byte { + buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) + return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] +} + +// PageInfo represents human readable information about a page. +type PageInfo struct { + ID int + Type string + Count int + OverflowCount int +} + +type pgids []pgid + +func (s pgids) Len() int { return len(s) } +func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (s pgids) Less(i, j int) bool { return s[i] < s[j] } + +// merge returns the sorted union of a and b. +func (a pgids) merge(b pgids) pgids { + // Return the opposite slice if one is nil. + if len(a) == 0 { + return b + } + if len(b) == 0 { + return a + } + merged := make(pgids, len(a)+len(b)) + mergepgids(merged, a, b) + return merged +} + +// mergepgids copies the sorted union of a and b into dst. +// If dst is too small, it panics. +func mergepgids(dst, a, b pgids) { + if len(dst) < len(a)+len(b) { + panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b))) + } + // Copy in the opposite slice if one is nil. + if len(a) == 0 { + copy(dst, b) + return + } + if len(b) == 0 { + copy(dst, a) + return + } + + // Merged will hold all elements from both lists. + merged := dst[:0] + + // Assign lead to the slice with a lower starting value, follow to the higher value. + lead, follow := a, b + if b[0] < a[0] { + lead, follow = b, a + } + + // Continue while there are elements in the lead. + for len(lead) > 0 { + // Merge largest prefix of lead that is ahead of follow[0]. + n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] }) + merged = append(merged, lead[:n]...) + if n >= len(lead) { + break + } + + // Swap lead and follow. + lead, follow = follow, lead[n:] + } + + // Append what's left in follow. + _ = append(merged, follow...) +} + +// txid represents the internal transaction identifier. +type txid uint64 + +// Tx represents a read-only or read/write transaction on the database. +// Read-only transactions can be used for retrieving values for keys and creating cursors. +// Read/write transactions can create and remove buckets and create and remove keys. +// +// IMPORTANT: You must commit or rollback transactions when you are done with +// them. Pages can not be reclaimed by the writer until no more transactions +// are using them. A long running read transaction can cause the database to +// quickly grow. +type Tx struct { + writable bool + managed bool + db *DB + meta *meta + root Bucket + pages map[pgid]*page + stats TxStats + commitHandlers []func() + + // WriteFlag specifies the flag for write-related methods like WriteTo(). + // Tx opens the database file with the specified flag to copy the data. + // + // By default, the flag is unset, which works well for mostly in-memory + // workloads. For databases that are much larger than available RAM, + // set the flag to syscall.O_DIRECT to avoid trashing the page cache. + WriteFlag int +} + +// init initializes the transaction. +func (tx *Tx) init(db *DB) { + tx.db = db + tx.pages = nil + + // Copy the meta page since it can be changed by the writer. + tx.meta = &meta{} + db.meta().copy(tx.meta) + + // Copy over the root bucket. + tx.root = newBucket(tx) + tx.root.bucket = &bucket{} + *tx.root.bucket = tx.meta.root + + // Increment the transaction id and add a page cache for writable transactions. + if tx.writable { + tx.pages = make(map[pgid]*page) + tx.meta.txid += txid(1) + } +} + +// ID returns the transaction id. +func (tx *Tx) ID() int { + return int(tx.meta.txid) +} + +// DB returns a reference to the database that created the transaction. +func (tx *Tx) DB() *DB { + return tx.db +} + +// Size returns current database size in bytes as seen by this transaction. +func (tx *Tx) Size() int64 { + return int64(tx.meta.pgid) * int64(tx.db.pageSize) +} + +// Writable returns whether the transaction can perform write operations. +func (tx *Tx) Writable() bool { + return tx.writable +} + +// Cursor creates a cursor associated with the root bucket. +// All items in the cursor will return a nil value because all root bucket keys point to buckets. +// The cursor is only valid as long as the transaction is open. +// Do not use a cursor after the transaction is closed. +func (tx *Tx) Cursor() *Cursor { + return tx.root.Cursor() +} + +// Stats retrieves a copy of the current transaction statistics. +func (tx *Tx) Stats() TxStats { + return tx.stats +} + +// Bucket retrieves a bucket by name. +// Returns nil if the bucket does not exist. +// The bucket instance is only valid for the lifetime of the transaction. +func (tx *Tx) Bucket(name []byte) *Bucket { + return tx.root.Bucket(name) +} + +// CreateBucket creates a new bucket. +// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long. +// The bucket instance is only valid for the lifetime of the transaction. +func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) { + return tx.root.CreateBucket(name) +} + +// CreateBucketIfNotExists creates a new bucket if it doesn't already exist. +// Returns an error if the bucket name is blank, or if the bucket name is too long. +// The bucket instance is only valid for the lifetime of the transaction. +func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) { + return tx.root.CreateBucketIfNotExists(name) +} + +// DeleteBucket deletes a bucket. +// Returns an error if the bucket cannot be found or if the key represents a non-bucket value. +func (tx *Tx) DeleteBucket(name []byte) error { + return tx.root.DeleteBucket(name) +} + +// ForEach executes a function for each bucket in the root. +// If the provided function returns an error then the iteration is stopped and +// the error is returned to the caller. +func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error { + return tx.root.ForEach(func(k, v []byte) error { + if err := fn(k, tx.root.Bucket(k)); err != nil { + return err + } + return nil + }) +} + +// OnCommit adds a handler function to be executed after the transaction successfully commits. +func (tx *Tx) OnCommit(fn func()) { + tx.commitHandlers = append(tx.commitHandlers, fn) +} + +// Commit writes all changes to disk and updates the meta page. +// Returns an error if a disk write error occurs, or if Commit is +// called on a read-only transaction. +func (tx *Tx) Commit() error { + _assert(!tx.managed, "managed tx commit not allowed") + if tx.db == nil { + return ErrTxClosed + } else if !tx.writable { + return ErrTxNotWritable + } + + // TODO(benbjohnson): Use vectorized I/O to write out dirty pages. + + // Rebalance nodes which have had deletions. + var startTime = time.Now() + tx.root.rebalance() + if tx.stats.Rebalance > 0 { + tx.stats.RebalanceTime += time.Since(startTime) + } + + // spill data onto dirty pages. + startTime = time.Now() + if err := tx.root.spill(); err != nil { + tx.rollback() + return err + } + tx.stats.SpillTime += time.Since(startTime) + + // Free the old root bucket. + tx.meta.root.root = tx.root.root + + opgid := tx.meta.pgid + + // Free the freelist and allocate new pages for it. This will overestimate + // the size of the freelist but not underestimate the size (which would be bad). + tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) + p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) + if err != nil { + tx.rollback() + return err + } + if err := tx.db.freelist.write(p); err != nil { + tx.rollback() + return err + } + tx.meta.freelist = p.id + + // If the high water mark has moved up then attempt to grow the database. + if tx.meta.pgid > opgid { + if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + tx.rollback() + return err + } + } + + // Write dirty pages to disk. + startTime = time.Now() + if err := tx.write(); err != nil { + tx.rollback() + return err + } + + // If strict mode is enabled then perform a consistency check. + // Only the first consistency error is reported in the panic. + if tx.db.StrictMode { + ch := tx.Check() + var errs []string + for { + err, ok := <-ch + if !ok { + break + } + errs = append(errs, err.Error()) + } + if len(errs) > 0 { + panic("check fail: " + strings.Join(errs, "\n")) + } + } + + // Write meta to disk. + if err := tx.writeMeta(); err != nil { + tx.rollback() + return err + } + tx.stats.WriteTime += time.Since(startTime) + + // Finalize the transaction. + tx.close() + + // Execute commit handlers now that the locks have been removed. + for _, fn := range tx.commitHandlers { + fn() + } + + return nil +} + +// Rollback closes the transaction and ignores all previous updates. Read-only +// transactions must be rolled back and not committed. +func (tx *Tx) Rollback() error { + _assert(!tx.managed, "managed tx rollback not allowed") + if tx.db == nil { + return ErrTxClosed + } + tx.rollback() + return nil +} + +func (tx *Tx) rollback() { + if tx.db == nil { + return + } + if tx.writable { + tx.db.freelist.rollback(tx.meta.txid) + tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + } + tx.close() +} + +func (tx *Tx) close() { + if tx.db == nil { + return + } + if tx.writable { + // Grab freelist stats. + var freelistFreeN = tx.db.freelist.free_count() + var freelistPendingN = tx.db.freelist.pending_count() + var freelistAlloc = tx.db.freelist.size() + + // Remove transaction ref & writer lock. + tx.db.rwtx = nil + tx.db.rwlock.Unlock() + + // Merge statistics. + tx.db.statlock.Lock() + tx.db.stats.FreePageN = freelistFreeN + tx.db.stats.PendingPageN = freelistPendingN + tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize + tx.db.stats.FreelistInuse = freelistAlloc + tx.db.stats.TxStats.add(&tx.stats) + tx.db.statlock.Unlock() + } else { + tx.db.removeTx(tx) + } + + // Clear all references. + tx.db = nil + tx.meta = nil + tx.root = Bucket{tx: tx} + tx.pages = nil +} + +// Copy writes the entire database to a writer. +// This function exists for backwards compatibility. +// +// Deprecated; Use WriteTo() instead. +func (tx *Tx) Copy(w io.Writer) error { + _, err := tx.WriteTo(w) + return err +} + +// WriteTo writes the entire database to a writer. +// If err == nil then exactly tx.Size() bytes will be written into the writer. +func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { + // Attempt to open reader with WriteFlag + f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) + if err != nil { + return 0, err + } + defer func() { _ = f.Close() }() + + // Generate a meta page. We use the same page data for both meta pages. + buf := make([]byte, tx.db.pageSize) + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = metaPageFlag + *page.meta() = *tx.meta + + // Write meta 0. + page.id = 0 + page.meta().checksum = page.meta().sum64() + nn, err := w.Write(buf) + n += int64(nn) + if err != nil { + return n, fmt.Errorf("meta 0 copy: %s", err) + } + + // Write meta 1 with a lower transaction id. + page.id = 1 + page.meta().txid -= 1 + page.meta().checksum = page.meta().sum64() + nn, err = w.Write(buf) + n += int64(nn) + if err != nil { + return n, fmt.Errorf("meta 1 copy: %s", err) + } + + // Move past the meta pages in the file. + if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { + return n, fmt.Errorf("seek: %s", err) + } + + // Copy data pages. + wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2)) + n += wn + if err != nil { + return n, err + } + + return n, f.Close() +} + +// CopyFile copies the entire database to file at the given path. +// A reader transaction is maintained during the copy so it is safe to continue +// using the database while a copy is in progress. +func (tx *Tx) CopyFile(path string, mode os.FileMode) error { + f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) + if err != nil { + return err + } + + err = tx.Copy(f) + if err != nil { + _ = f.Close() + return err + } + return f.Close() +} + +// Check performs several consistency checks on the database for this transaction. +// An error is returned if any inconsistency is found. +// +// It can be safely run concurrently on a writable transaction. However, this +// incurs a high cost for large databases and databases with a lot of subbuckets +// because of caching. This overhead can be removed if running on a read-only +// transaction, however, it is not safe to execute other writer transactions at +// the same time. +func (tx *Tx) Check() <-chan error { + ch := make(chan error) + go tx.check(ch) + return ch +} + +func (tx *Tx) check(ch chan error) { + // Check if any pages are double freed. + freed := make(map[pgid]bool) + all := make([]pgid, tx.db.freelist.count()) + tx.db.freelist.copyall(all) + for _, id := range all { + if freed[id] { + ch <- fmt.Errorf("page %d: already freed", id) + } + freed[id] = true + } + + // Track every reachable page. + reachable := make(map[pgid]*page) + reachable[0] = tx.page(0) // meta0 + reachable[1] = tx.page(1) // meta1 + for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { + reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) + } + + // Recursively check buckets. + tx.checkBucket(&tx.root, reachable, freed, ch) + + // Ensure all pages below high water mark are either reachable or freed. + for i := pgid(0); i < tx.meta.pgid; i++ { + _, isReachable := reachable[i] + if !isReachable && !freed[i] { + ch <- fmt.Errorf("page %d: unreachable unfreed", int(i)) + } + } + + // Close the channel to signal completion. + close(ch) +} + +func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) { + // Ignore inline buckets. + if b.root == 0 { + return + } + + // Check every page used by this bucket. + b.tx.forEachPage(b.root, 0, func(p *page, _ int) { + if p.id > tx.meta.pgid { + ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid)) + } + + // Ensure each page is only referenced once. + for i := pgid(0); i <= pgid(p.overflow); i++ { + var id = p.id + i + if _, ok := reachable[id]; ok { + ch <- fmt.Errorf("page %d: multiple references", int(id)) + } + reachable[id] = p + } + + // We should only encounter un-freed leaf and branch pages. + if freed[p.id] { + ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) + } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { + ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ()) + } + }) + + // Check each bucket within this bucket. + _ = b.ForEach(func(k, v []byte) error { + if child := b.Bucket(k); child != nil { + tx.checkBucket(child, reachable, freed, ch) + } + return nil + }) +} + +// allocate returns a contiguous block of memory starting at a given page. +func (tx *Tx) allocate(count int) (*page, error) { + p, err := tx.db.allocate(count) + if err != nil { + return nil, err + } + + // Save to our page cache. + tx.pages[p.id] = p + + // Update statistics. + tx.stats.PageCount++ + tx.stats.PageAlloc += count * tx.db.pageSize + + return p, nil +} + +// write writes any dirty pages to disk. +func (tx *Tx) write() error { + // Sort pages by id. + pages := make(pages, 0, len(tx.pages)) + for _, p := range tx.pages { + pages = append(pages, p) + } + // Clear out page cache early. + tx.pages = make(map[pgid]*page) + sort.Sort(pages) + + // Write pages to disk in order. + for _, p := range pages { + size := (int(p.overflow) + 1) * tx.db.pageSize + offset := int64(p.id) * int64(tx.db.pageSize) + + // Write out page in "max allocation" sized chunks. + ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p)) + for { + // Limit our write to our max allocation size. + sz := size + if sz > maxAllocSize-1 { + sz = maxAllocSize - 1 + } + + // Write chunk to disk. + buf := ptr[:sz] + if _, err := tx.db.ops.writeAt(buf, offset); err != nil { + return err + } + + // Update statistics. + tx.stats.Write++ + + // Exit inner for loop if we've written all the chunks. + size -= sz + if size == 0 { + break + } + + // Otherwise move offset forward and move pointer to next chunk. + offset += int64(sz) + ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz])) + } + } + + // Ignore file sync if flag is set on DB. + if !tx.db.NoSync || IgnoreNoSync { + if err := fdatasync(tx.db); err != nil { + return err + } + } + + // Put small pages back to page pool. + for _, p := range pages { + // Ignore page sizes over 1 page. + // These are allocated using make() instead of the page pool. + if int(p.overflow) != 0 { + continue + } + + buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize] + + // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 + for i := range buf { + buf[i] = 0 + } + tx.db.pagePool.Put(buf) + } + + return nil +} + +// writeMeta writes the meta to the disk. +func (tx *Tx) writeMeta() error { + // Create a temporary buffer for the meta page. + buf := make([]byte, tx.db.pageSize) + p := tx.db.pageInBuffer(buf, 0) + tx.meta.write(p) + + // Write the meta page to file. + if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil { + return err + } + if !tx.db.NoSync || IgnoreNoSync { + if err := fdatasync(tx.db); err != nil { + return err + } + } + + // Update statistics. + tx.stats.Write++ + + return nil +} + +// page returns a reference to the page with a given id. +// If page has been written to then a temporary buffered page is returned. +func (tx *Tx) page(id pgid) *page { + // Check the dirty pages first. + if tx.pages != nil { + if p, ok := tx.pages[id]; ok { + return p + } + } + + // Otherwise return directly from the mmap. + return tx.db.page(id) +} + +// forEachPage iterates over every page within a given page and executes a function. +func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) { + p := tx.page(pgid) + + // Execute function. + fn(p, depth) + + // Recursively loop over children. + if (p.flags & branchPageFlag) != 0 { + for i := 0; i < int(p.count); i++ { + elem := p.branchPageElement(uint16(i)) + tx.forEachPage(elem.pgid, depth+1, fn) + } + } +} + +// Page returns page information for a given page number. +// This is only safe for concurrent use when used by a writable transaction. +func (tx *Tx) Page(id int) (*PageInfo, error) { + if tx.db == nil { + return nil, ErrTxClosed + } else if pgid(id) >= tx.meta.pgid { + return nil, nil + } + + // Build the page info. + p := tx.db.page(pgid(id)) + info := &PageInfo{ + ID: id, + Count: int(p.count), + OverflowCount: int(p.overflow), + } + + // Determine the type (or if it's free). + if tx.db.freelist.freed(pgid(id)) { + info.Type = "free" + } else { + info.Type = p.typ() + } + + return info, nil +} + +// TxStats represents statistics about the actions performed by the transaction. +type TxStats struct { + // Page statistics. + PageCount int // number of page allocations + PageAlloc int // total bytes allocated + + // Cursor statistics. + CursorCount int // number of cursors created + + // Node statistics + NodeCount int // number of node allocations + NodeDeref int // number of node dereferences + + // Rebalance statistics. + Rebalance int // number of node rebalances + RebalanceTime time.Duration // total time spent rebalancing + + // Split/Spill statistics. + Split int // number of nodes split + Spill int // number of nodes spilled + SpillTime time.Duration // total time spent spilling + + // Write statistics. + Write int // number of writes performed + WriteTime time.Duration // total time spent writing to disk +} + +func (s *TxStats) add(other *TxStats) { + s.PageCount += other.PageCount + s.PageAlloc += other.PageAlloc + s.CursorCount += other.CursorCount + s.NodeCount += other.NodeCount + s.NodeDeref += other.NodeDeref + s.Rebalance += other.Rebalance + s.RebalanceTime += other.RebalanceTime + s.Split += other.Split + s.Spill += other.Spill + s.SpillTime += other.SpillTime + s.Write += other.Write + s.WriteTime += other.WriteTime +} + +// Sub calculates and returns the difference between two sets of transaction stats. +// This is useful when obtaining stats at two different points and time and +// you need the performance counters that occurred within that time span. +func (s *TxStats) Sub(other *TxStats) TxStats { + var diff TxStats + diff.PageCount = s.PageCount - other.PageCount + diff.PageAlloc = s.PageAlloc - other.PageAlloc + diff.CursorCount = s.CursorCount - other.CursorCount + diff.NodeCount = s.NodeCount - other.NodeCount + diff.NodeDeref = s.NodeDeref - other.NodeDeref + diff.Rebalance = s.Rebalance - other.Rebalance + diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime + diff.Split = s.Split - other.Split + diff.Spill = s.Spill - other.Spill + diff.SpillTime = s.SpillTime - other.SpillTime + diff.Write = s.Write - other.Write + diff.WriteTime = s.WriteTime - other.WriteTime + return diff +} + +var ( + // ErrUsage is returned when a usage message was printed and the process + // should simply exit with an error. + ErrUsage = errors.New("usage") + + // ErrUnknownCommand is returned when a CLI command is not specified. + ErrUnknownCommand = errors.New("unknown command") + + // ErrPathRequired is returned when the path to a Bolt database is not specified. + ErrPathRequired = errors.New("path required") + + // ErrFileNotFound is returned when a Bolt database does not exist. + ErrFileNotFound = errors.New("file not found") + + // ErrInvalidValue is returned when a benchmark reads an unexpected value. + ErrInvalidValue = errors.New("invalid value") + + // ErrCorrupt is returned when a checking a data file finds errors. + ErrCorrupt = errors.New("invalid value") + + // ErrNonDivisibleBatchSize is returned when the batch size can't be evenly + // divided by the iteration count. + ErrNonDivisibleBatchSize = errors.New("number of iterations must be divisible by the batch size") + + // ErrPageIDRequired is returned when a required page id is not specified. + ErrPageIDRequired = errors.New("page id required") + + // ErrPageNotFound is returned when specifying a page above the high water mark. + ErrPageNotFound = errors.New("page not found") + + // ErrPageFreed is returned when reading a page that has already been freed. + ErrPageFreed = errors.New("page freed") +) + +// PageHeaderSize represents the size of the bolt.page header. +const PageHeaderSize = 16 + +func Main() { + m := NewMain() + if err := m.Run(os.Args[1:]...); err == ErrUsage { + os.Exit(2) + } else if err != nil { + fmt.Println(err.Error()) + os.Exit(1) + } +} + +// Main represents the main program execution. +type MainT struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewMain returns a new instance of Main connect to the standard input/output. +func NewMain() *MainT { + return &MainT{ + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + } +} + +// Run executes the program. +func (m *MainT) Run(args ...string) error { + // Require a command at the beginning. + if len(args) == 0 || strings.HasPrefix(args[0], "-") { + fmt.Fprintln(m.Stderr, m.Usage()) + return ErrUsage + } + + // Execute command. + switch args[0] { + case "help": + fmt.Fprintln(m.Stderr, m.Usage()) + return ErrUsage + case "bench": + return newBenchCommand(m).Run(args[1:]...) + case "check": + return newCheckCommand(m).Run(args[1:]...) + case "compact": + return newCompactCommand(m).Run(args[1:]...) + case "dump": + return newDumpCommand(m).Run(args[1:]...) + case "info": + return newInfoCommand(m).Run(args[1:]...) + case "page": + return newPageCommand(m).Run(args[1:]...) + case "pages": + return newPagesCommand(m).Run(args[1:]...) + case "stats": + return newStatsCommand(m).Run(args[1:]...) + default: + return ErrUnknownCommand + } +} + +// Usage returns the help message. +func (m *MainT) Usage() string { + return strings.TrimLeft(` +Bolt is a tool for inspecting bolt databases. + +Usage: + + bolt command [arguments] + +The commands are: + + bench run synthetic benchmark against bolt + check verifies integrity of bolt database + compact copies a bolt database, compacting it in the process + info print basic info + help print this screen + pages print list of pages with their types + stats iterate over all pages and generate usage stats + +Use "bolt [command] -h" for more information about a command. +`, "\n") +} + +// CheckCommand represents the "check" command execution. +type CheckCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewCheckCommand returns a CheckCommand. +func newCheckCommand(m *MainT) *CheckCommand { + return &CheckCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *CheckCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path. + path := fs.Arg(0) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Open database. + db, err := Open(path, 0666, nil) + if err != nil { + return err + } + defer db.Close() + + // Perform consistency check. + return db.View(func(tx *Tx) error { + var count int + ch := tx.Check() + loop: + for { + select { + case err, ok := <-ch: + if !ok { + break loop + } + fmt.Fprintln(cmd.Stdout, err) + count++ + } + } + + // Print summary of errors. + if count > 0 { + fmt.Fprintf(cmd.Stdout, "%d errors found\n", count) + return ErrCorrupt + } + + // Notify user that database is valid. + fmt.Fprintln(cmd.Stdout, "OK") + return nil + }) +} + +// Usage returns the help message. +func (cmd *CheckCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt check PATH + +Check opens a database at PATH and runs an exhaustive check to verify that +all pages are accessible or are marked as freed. It also verifies that no +pages are double referenced. + +Verification errors will stream out as they are found and the process will +return after all pages have been checked. +`, "\n") +} + +// InfoCommand represents the "info" command execution. +type InfoCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewInfoCommand returns a InfoCommand. +func newInfoCommand(m *MainT) *InfoCommand { + return &InfoCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *InfoCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path. + path := fs.Arg(0) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Open the database. + db, err := Open(path, 0666, nil) + if err != nil { + return err + } + defer db.Close() + + // Print basic database info. + info := db.Info() + fmt.Fprintf(cmd.Stdout, "Page Size: %d\n", info.PageSize) + + return nil +} + +// Usage returns the help message. +func (cmd *InfoCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt info PATH + +Info prints basic information about the Bolt database at PATH. +`, "\n") +} + +// DumpCommand represents the "dump" command execution. +type DumpCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// newDumpCommand returns a DumpCommand. +func newDumpCommand(m *MainT) *DumpCommand { + return &DumpCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *DumpCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path and page id. + path := fs.Arg(0) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Read page ids. + pageIDs, err := atois(fs.Args()[1:]) + if err != nil { + return err + } else if len(pageIDs) == 0 { + return ErrPageIDRequired + } + + // Open database to retrieve page size. + pageSize, err := ReadPageSize(path) + if err != nil { + return err + } + + // Open database file handler. + f, err := os.Open(path) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + // Print each page listed. + for i, pageID := range pageIDs { + // Print a separator. + if i > 0 { + fmt.Fprintln(cmd.Stdout, "===============================================") + } + + // Print page to stdout. + if err := cmd.PrintPage(cmd.Stdout, f, pageID, pageSize); err != nil { + return err + } + } + + return nil +} + +// PrintPage prints a given page as hexadecimal. +func (cmd *DumpCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSize int) error { + const bytesPerLineN = 16 + + // Read page into buffer. + buf := make([]byte, pageSize) + addr := pageID * pageSize + if n, err := r.ReadAt(buf, int64(addr)); err != nil { + return err + } else if n != pageSize { + return io.ErrUnexpectedEOF + } + + // Write out to writer in 16-byte lines. + var prev []byte + var skipped bool + for offset := 0; offset < pageSize; offset += bytesPerLineN { + // Retrieve current 16-byte line. + line := buf[offset : offset+bytesPerLineN] + isLastLine := (offset == (pageSize - bytesPerLineN)) + + // If it's the same as the previous line then print a skip. + if bytes.Equal(line, prev) && !isLastLine { + if !skipped { + fmt.Fprintf(w, "%07x *\n", addr+offset) + skipped = true + } + } else { + // Print line as hexadecimal in 2-byte groups. + fmt.Fprintf(w, "%07x %04x %04x %04x %04x %04x %04x %04x %04x\n", addr+offset, + line[0:2], line[2:4], line[4:6], line[6:8], + line[8:10], line[10:12], line[12:14], line[14:16], + ) + + skipped = false + } + + // Save the previous line. + prev = line + } + fmt.Fprint(w, "\n") + + return nil +} + +// Usage returns the help message. +func (cmd *DumpCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt dump -page PAGEID PATH + +Dump prints a hexadecimal dump of a single page. +`, "\n") +} + +// PageCommand represents the "page" command execution. +type PageCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// newPageCommand returns a PageCommand. +func newPageCommand(m *MainT) *PageCommand { + return &PageCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *PageCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path and page id. + path := fs.Arg(0) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Read page ids. + pageIDs, err := atois(fs.Args()[1:]) + if err != nil { + return err + } else if len(pageIDs) == 0 { + return ErrPageIDRequired + } + + // Open database file handler. + f, err := os.Open(path) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + // Print each page listed. + for i, pageID := range pageIDs { + // Print a separator. + if i > 0 { + fmt.Fprintln(cmd.Stdout, "===============================================") + } + + // Retrieve page info and page size. + p, buf, err := ReadPage(path, pageID) + if err != nil { + return err + } + + // Print basic page info. + fmt.Fprintf(cmd.Stdout, "Page ID: %d\n", p.id) + fmt.Fprintf(cmd.Stdout, "Page Type: %s\n", p.Type()) + fmt.Fprintf(cmd.Stdout, "Total Size: %d bytes\n", len(buf)) + + // Print type-specific data. + switch p.Type() { + case "meta": + err = cmd.PrintMeta(cmd.Stdout, buf) + case "leaf": + err = cmd.PrintLeaf(cmd.Stdout, buf) + case "branch": + err = cmd.PrintBranch(cmd.Stdout, buf) + case "freelist": + err = cmd.PrintFreelist(cmd.Stdout, buf) + } + if err != nil { + return err + } + } + + return nil +} + +// PrintMeta prints the data from the meta page. +func (cmd *PageCommand) PrintMeta(w io.Writer, buf []byte) error { + m := (*meta)(unsafe.Pointer(&buf[PageHeaderSize])) + fmt.Fprintf(w, "Version: %d\n", m.version) + fmt.Fprintf(w, "Page Size: %d bytes\n", m.pageSize) + fmt.Fprintf(w, "Flags: %08x\n", m.flags) + fmt.Fprintf(w, "Root: <pgid=%d>\n", m.root.root) + fmt.Fprintf(w, "Freelist: <pgid=%d>\n", m.freelist) + fmt.Fprintf(w, "HWM: <pgid=%d>\n", m.pgid) + fmt.Fprintf(w, "Txn ID: %d\n", m.txid) + fmt.Fprintf(w, "Checksum: %016x\n", m.checksum) + fmt.Fprintf(w, "\n") + return nil +} + +// PrintLeaf prints the data for a leaf page. +func (cmd *PageCommand) PrintLeaf(w io.Writer, buf []byte) error { + p := (*page)(unsafe.Pointer(&buf[0])) + + // Print number of items. + fmt.Fprintf(w, "Item Count: %d\n", p.count) + fmt.Fprintf(w, "\n") + + // Print each key/value. + for i := uint16(0); i < p.count; i++ { + e := p.leafPageElement(i) + + // Format key as string. + var k string + if isPrintable(string(e.key())) { + k = fmt.Sprintf("%q", string(e.key())) + } else { + k = fmt.Sprintf("%x", string(e.key())) + } + + // Format value as string. + var v string + if (e.flags & uint32(bucketLeafFlag)) != 0 { + b := (*bucket)(unsafe.Pointer(&e.value()[0])) + v = fmt.Sprintf("<pgid=%d,seq=%d>", b.root, b.sequence) + } else if isPrintable(string(e.value())) { + v = fmt.Sprintf("%q", string(e.value())) + } else { + v = fmt.Sprintf("%x", string(e.value())) + } + + fmt.Fprintf(w, "%s: %s\n", k, v) + } + fmt.Fprintf(w, "\n") + return nil +} + +// PrintBranch prints the data for a leaf page. +func (cmd *PageCommand) PrintBranch(w io.Writer, buf []byte) error { + p := (*page)(unsafe.Pointer(&buf[0])) + + // Print number of items. + fmt.Fprintf(w, "Item Count: %d\n", p.count) + fmt.Fprintf(w, "\n") + + // Print each key/value. + for i := uint16(0); i < p.count; i++ { + e := p.branchPageElement(i) + + // Format key as string. + var k string + if isPrintable(string(e.key())) { + k = fmt.Sprintf("%q", string(e.key())) + } else { + k = fmt.Sprintf("%x", string(e.key())) + } + + fmt.Fprintf(w, "%s: <pgid=%d>\n", k, e.pgid) + } + fmt.Fprintf(w, "\n") + return nil +} + +// PrintFreelist prints the data for a freelist page. +func (cmd *PageCommand) PrintFreelist(w io.Writer, buf []byte) error { + p := (*page)(unsafe.Pointer(&buf[0])) + + // Print number of items. + fmt.Fprintf(w, "Item Count: %d\n", p.count) + fmt.Fprintf(w, "\n") + + // Print each page in the freelist. + ids := (*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)) + for i := uint16(0); i < p.count; i++ { + fmt.Fprintf(w, "%d\n", ids[i]) + } + fmt.Fprintf(w, "\n") + return nil +} + +// PrintPage prints a given page as hexadecimal. +func (cmd *PageCommand) PrintPage(w io.Writer, r io.ReaderAt, pageID int, pageSize int) error { + const bytesPerLineN = 16 + + // Read page into buffer. + buf := make([]byte, pageSize) + addr := pageID * pageSize + if n, err := r.ReadAt(buf, int64(addr)); err != nil { + return err + } else if n != pageSize { + return io.ErrUnexpectedEOF + } + + // Write out to writer in 16-byte lines. + var prev []byte + var skipped bool + for offset := 0; offset < pageSize; offset += bytesPerLineN { + // Retrieve current 16-byte line. + line := buf[offset : offset+bytesPerLineN] + isLastLine := (offset == (pageSize - bytesPerLineN)) + + // If it's the same as the previous line then print a skip. + if bytes.Equal(line, prev) && !isLastLine { + if !skipped { + fmt.Fprintf(w, "%07x *\n", addr+offset) + skipped = true + } + } else { + // Print line as hexadecimal in 2-byte groups. + fmt.Fprintf(w, "%07x %04x %04x %04x %04x %04x %04x %04x %04x\n", addr+offset, + line[0:2], line[2:4], line[4:6], line[6:8], + line[8:10], line[10:12], line[12:14], line[14:16], + ) + + skipped = false + } + + // Save the previous line. + prev = line + } + fmt.Fprint(w, "\n") + + return nil +} + +// Usage returns the help message. +func (cmd *PageCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt page -page PATH pageid [pageid...] + +Page prints one or more pages in human readable format. +`, "\n") +} + +// PagesCommand represents the "pages" command execution. +type PagesCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewPagesCommand returns a PagesCommand. +func newPagesCommand(m *MainT) *PagesCommand { + return &PagesCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *PagesCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path. + path := fs.Arg(0) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Open database. + db, err := Open(path, 0666, nil) + if err != nil { + return err + } + defer func() { _ = db.Close() }() + + // Write header. + fmt.Fprintln(cmd.Stdout, "ID TYPE ITEMS OVRFLW") + fmt.Fprintln(cmd.Stdout, "======== ========== ====== ======") + + return db.Update(func(tx *Tx) error { + var id int + for { + p, err := tx.Page(id) + if err != nil { + return &PageError{ID: id, Err: err} + } else if p == nil { + break + } + + // Only display count and overflow if this is a non-free page. + var count, overflow string + if p.Type != "free" { + count = strconv.Itoa(p.Count) + if p.OverflowCount > 0 { + overflow = strconv.Itoa(p.OverflowCount) + } + } + + // Print table row. + fmt.Fprintf(cmd.Stdout, "%-8d %-10s %-6s %-6s\n", p.ID, p.Type, count, overflow) + + // Move to the next non-overflow page. + id += 1 + if p.Type != "free" { + id += p.OverflowCount + } + } + return nil + }) +} + +// Usage returns the help message. +func (cmd *PagesCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt pages PATH + +Pages prints a table of pages with their type (meta, leaf, branch, freelist). +Leaf and branch pages will show a key count in the "items" column while the +freelist will show the number of free pages in the "items" column. + +The "overflow" column shows the number of blocks that the page spills over +into. Normally there is no overflow but large keys and values can cause +a single page to take up multiple blocks. +`, "\n") +} + +// StatsCommand represents the "stats" command execution. +type StatsCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewStatsCommand returns a StatsCommand. +func newStatsCommand(m *MainT) *StatsCommand { + return &StatsCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *StatsCommand) Run(args ...string) error { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + help := fs.Bool("h", false, "") + if err := fs.Parse(args); err != nil { + return err + } else if *help { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } + + // Require database path. + path, prefix := fs.Arg(0), fs.Arg(1) + if path == "" { + return ErrPathRequired + } else if _, err := os.Stat(path); os.IsNotExist(err) { + return ErrFileNotFound + } + + // Open database. + db, err := Open(path, 0666, nil) + if err != nil { + return err + } + defer db.Close() + + return db.View(func(tx *Tx) error { + var s BucketStats + var count int + if err := tx.ForEach(func(name []byte, b *Bucket) error { + if bytes.HasPrefix(name, []byte(prefix)) { + s.Add(b.Stats()) + count += 1 + } + return nil + }); err != nil { + return err + } + + fmt.Fprintf(cmd.Stdout, "Aggregate statistics for %d buckets\n\n", count) + + fmt.Fprintln(cmd.Stdout, "Page count statistics") + fmt.Fprintf(cmd.Stdout, "\tNumber of logical branch pages: %d\n", s.BranchPageN) + fmt.Fprintf(cmd.Stdout, "\tNumber of physical branch overflow pages: %d\n", s.BranchOverflowN) + fmt.Fprintf(cmd.Stdout, "\tNumber of logical leaf pages: %d\n", s.LeafPageN) + fmt.Fprintf(cmd.Stdout, "\tNumber of physical leaf overflow pages: %d\n", s.LeafOverflowN) + + fmt.Fprintln(cmd.Stdout, "Tree statistics") + fmt.Fprintf(cmd.Stdout, "\tNumber of keys/value pairs: %d\n", s.KeyN) + fmt.Fprintf(cmd.Stdout, "\tNumber of levels in B+tree: %d\n", s.Depth) + + fmt.Fprintln(cmd.Stdout, "Page size utilization") + fmt.Fprintf(cmd.Stdout, "\tBytes allocated for physical branch pages: %d\n", s.BranchAlloc) + var percentage int + if s.BranchAlloc != 0 { + percentage = int(float32(s.BranchInuse) * 100.0 / float32(s.BranchAlloc)) + } + fmt.Fprintf(cmd.Stdout, "\tBytes actually used for branch data: %d (%d%%)\n", s.BranchInuse, percentage) + fmt.Fprintf(cmd.Stdout, "\tBytes allocated for physical leaf pages: %d\n", s.LeafAlloc) + percentage = 0 + if s.LeafAlloc != 0 { + percentage = int(float32(s.LeafInuse) * 100.0 / float32(s.LeafAlloc)) + } + fmt.Fprintf(cmd.Stdout, "\tBytes actually used for leaf data: %d (%d%%)\n", s.LeafInuse, percentage) + + fmt.Fprintln(cmd.Stdout, "Bucket statistics") + fmt.Fprintf(cmd.Stdout, "\tTotal number of buckets: %d\n", s.BucketN) + percentage = 0 + if s.BucketN != 0 { + percentage = int(float32(s.InlineBucketN) * 100.0 / float32(s.BucketN)) + } + fmt.Fprintf(cmd.Stdout, "\tTotal number on inlined buckets: %d (%d%%)\n", s.InlineBucketN, percentage) + percentage = 0 + if s.LeafInuse != 0 { + percentage = int(float32(s.InlineBucketInuse) * 100.0 / float32(s.LeafInuse)) + } + fmt.Fprintf(cmd.Stdout, "\tBytes used for inlined buckets: %d (%d%%)\n", s.InlineBucketInuse, percentage) + + return nil + }) +} + +// Usage returns the help message. +func (cmd *StatsCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt stats PATH + +Stats performs an extensive search of the database to track every page +reference. It starts at the current meta page and recursively iterates +through every accessible bucket. + +The following errors can be reported: + + already freed + The page is referenced more than once in the freelist. + + unreachable unfreed + The page is not referenced by a bucket or in the freelist. + + reachable freed + The page is referenced by a bucket but is also in the freelist. + + out of bounds + A page is referenced that is above the high water mark. + + multiple references + A page is referenced by more than one other page. + + invalid type + The page type is not "meta", "leaf", "branch", or "freelist". + +No errors should occur in your database. However, if for some reason you +experience corruption, please submit a ticket to the Bolt project page: + + https://github.com/boltdb/bolt/issues +`, "\n") +} + +var benchBucketName = []byte("bench") + +// BenchCommand represents the "bench" command execution. +type BenchCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer +} + +// NewBenchCommand returns a BenchCommand using the +func newBenchCommand(m *MainT) *BenchCommand { + return &BenchCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the "bench" command. +func (cmd *BenchCommand) Run(args ...string) error { + // Parse CLI arguments. + options, err := cmd.ParseFlags(args) + if err != nil { + return err + } + + // Remove path if "-work" is not set. Otherwise keep path. + if options.Work { + fmt.Fprintf(cmd.Stdout, "work: %s\n", options.Path) + } else { + defer os.Remove(options.Path) + } + + // Create database. + db, err := Open(options.Path, 0666, nil) + if err != nil { + return err + } + db.NoSync = options.NoSync + defer db.Close() + + // Write to the database. + var results BenchResults + if err := cmd.runWrites(db, options, &results); err != nil { + return fmt.Errorf("write: %v", err) + } + + // Read from the database. + if err := cmd.runReads(db, options, &results); err != nil { + return fmt.Errorf("bench: read: %s", err) + } + + // Print results. + fmt.Fprintf(os.Stderr, "# Write\t%v\t(%v/op)\t(%v op/sec)\n", results.WriteDuration, results.WriteOpDuration(), results.WriteOpsPerSecond()) + fmt.Fprintf(os.Stderr, "# Read\t%v\t(%v/op)\t(%v op/sec)\n", results.ReadDuration, results.ReadOpDuration(), results.ReadOpsPerSecond()) + fmt.Fprintln(os.Stderr, "") + return nil +} + +// ParseFlags parses the command line flags. +func (cmd *BenchCommand) ParseFlags(args []string) (*BenchOptions, error) { + var options BenchOptions + + // Parse flagset. + fs := flag.NewFlagSet("", flag.ContinueOnError) + fs.StringVar(&options.ProfileMode, "profile-mode", "rw", "") + fs.StringVar(&options.WriteMode, "write-mode", "seq", "") + fs.StringVar(&options.ReadMode, "read-mode", "seq", "") + fs.IntVar(&options.Iterations, "count", 1000, "") + fs.IntVar(&options.BatchSize, "batch-size", 0, "") + fs.IntVar(&options.KeySize, "key-size", 8, "") + fs.IntVar(&options.ValueSize, "value-size", 32, "") + fs.StringVar(&options.CPUProfile, "cpuprofile", "", "") + fs.StringVar(&options.MemProfile, "memprofile", "", "") + fs.StringVar(&options.BlockProfile, "blockprofile", "", "") + fs.Float64Var(&options.FillPercent, "fill-percent", DefaultFillPercent, "") + fs.BoolVar(&options.NoSync, "no-sync", false, "") + fs.BoolVar(&options.Work, "work", false, "") + fs.StringVar(&options.Path, "path", "", "") + fs.SetOutput(cmd.Stderr) + if err := fs.Parse(args); err != nil { + return nil, err + } + + // Set batch size to iteration size if not set. + // Require that batch size can be evenly divided by the iteration count. + if options.BatchSize == 0 { + options.BatchSize = options.Iterations + } else if options.Iterations%options.BatchSize != 0 { + return nil, ErrNonDivisibleBatchSize + } + + // Generate temp path if one is not passed in. + if options.Path == "" { + f, err := ioutil.TempFile("", "bolt-bench-") + if err != nil { + return nil, fmt.Errorf("temp file: %s", err) + } + f.Close() + os.Remove(f.Name()) + options.Path = f.Name() + } + + return &options, nil +} + +// Writes to the database. +func (cmd *BenchCommand) runWrites(db *DB, options *BenchOptions, results *BenchResults) error { + // Start profiling for writes. + if options.ProfileMode == "rw" || options.ProfileMode == "w" { + cmd.startProfiling(options) + } + + t := time.Now() + + var err error + switch options.WriteMode { + case "seq": + err = cmd.runWritesSequential(db, options, results) + case "rnd": + err = cmd.runWritesRandom(db, options, results) + case "seq-nest": + err = cmd.runWritesSequentialNested(db, options, results) + case "rnd-nest": + err = cmd.runWritesRandomNested(db, options, results) + default: + return fmt.Errorf("invalid write mode: %s", options.WriteMode) + } + + // Save time to write. + results.WriteDuration = time.Since(t) + + // Stop profiling for writes only. + if options.ProfileMode == "w" { + cmd.stopProfiling() + } + + return err +} + +func (cmd *BenchCommand) runWritesSequential(db *DB, options *BenchOptions, results *BenchResults) error { + var i = uint32(0) + return cmd.runWritesWithSource(db, options, results, func() uint32 { i++; return i }) +} + +func (cmd *BenchCommand) runWritesRandom(db *DB, options *BenchOptions, results *BenchResults) error { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + return cmd.runWritesWithSource(db, options, results, func() uint32 { return r.Uint32() }) +} + +func (cmd *BenchCommand) runWritesSequentialNested(db *DB, options *BenchOptions, results *BenchResults) error { + var i = uint32(0) + return cmd.runWritesWithSource(db, options, results, func() uint32 { i++; return i }) +} + +func (cmd *BenchCommand) runWritesRandomNested(db *DB, options *BenchOptions, results *BenchResults) error { + r := rand.New(rand.NewSource(time.Now().UnixNano())) + return cmd.runWritesWithSource(db, options, results, func() uint32 { return r.Uint32() }) +} + +func (cmd *BenchCommand) runWritesWithSource(db *DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error { + results.WriteOps = options.Iterations + + for i := 0; i < options.Iterations; i += options.BatchSize { + if err := db.Update(func(tx *Tx) error { + b, _ := tx.CreateBucketIfNotExists(benchBucketName) + b.FillPercent = options.FillPercent + + for j := 0; j < options.BatchSize; j++ { + key := make([]byte, options.KeySize) + value := make([]byte, options.ValueSize) + + // Write key as uint32. + binary.BigEndian.PutUint32(key, keySource()) + + // Insert key/value. + if err := b.Put(key, value); err != nil { + return err + } + } + + return nil + }); err != nil { + return err + } + } + return nil +} + +func (cmd *BenchCommand) runWritesNestedWithSource(db *DB, options *BenchOptions, results *BenchResults, keySource func() uint32) error { + results.WriteOps = options.Iterations + + for i := 0; i < options.Iterations; i += options.BatchSize { + if err := db.Update(func(tx *Tx) error { + top, err := tx.CreateBucketIfNotExists(benchBucketName) + if err != nil { + return err + } + top.FillPercent = options.FillPercent + + // Create bucket key. + name := make([]byte, options.KeySize) + binary.BigEndian.PutUint32(name, keySource()) + + // Create bucket. + b, err := top.CreateBucketIfNotExists(name) + if err != nil { + return err + } + b.FillPercent = options.FillPercent + + for j := 0; j < options.BatchSize; j++ { + var key = make([]byte, options.KeySize) + var value = make([]byte, options.ValueSize) + + // Generate key as uint32. + binary.BigEndian.PutUint32(key, keySource()) + + // Insert value into subbucket. + if err := b.Put(key, value); err != nil { + return err + } + } + + return nil + }); err != nil { + return err + } + } + return nil +} + +// Reads from the database. +func (cmd *BenchCommand) runReads(db *DB, options *BenchOptions, results *BenchResults) error { + // Start profiling for reads. + if options.ProfileMode == "r" { + cmd.startProfiling(options) + } + + t := time.Now() + + var err error + switch options.ReadMode { + case "seq": + switch options.WriteMode { + case "seq-nest", "rnd-nest": + err = cmd.runReadsSequentialNested(db, options, results) + default: + err = cmd.runReadsSequential(db, options, results) + } + default: + return fmt.Errorf("invalid read mode: %s", options.ReadMode) + } + + // Save read time. + results.ReadDuration = time.Since(t) + + // Stop profiling for reads. + if options.ProfileMode == "rw" || options.ProfileMode == "r" { + cmd.stopProfiling() + } + + return err +} + +func (cmd *BenchCommand) runReadsSequential(db *DB, options *BenchOptions, results *BenchResults) error { + return db.View(func(tx *Tx) error { + t := time.Now() + + for { + var count int + + c := tx.Bucket(benchBucketName).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + if v == nil { + return errors.New("invalid value") + } + count++ + } + + if options.WriteMode == "seq" && count != options.Iterations { + return fmt.Errorf("read seq: iter mismatch: expected %d, got %d", options.Iterations, count) + } + + results.ReadOps += count + + // Make sure we do this for at least a second. + if time.Since(t) >= time.Second { + break + } + } + + return nil + }) +} + +func (cmd *BenchCommand) runReadsSequentialNested(db *DB, options *BenchOptions, results *BenchResults) error { + return db.View(func(tx *Tx) error { + t := time.Now() + + for { + var count int + var top = tx.Bucket(benchBucketName) + if err := top.ForEach(func(name, _ []byte) error { + c := top.Bucket(name).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + if v == nil { + return ErrInvalidValue + } + count++ + } + return nil + }); err != nil { + return err + } + + if options.WriteMode == "seq-nest" && count != options.Iterations { + return fmt.Errorf("read seq-nest: iter mismatch: expected %d, got %d", options.Iterations, count) + } + + results.ReadOps += count + + // Make sure we do this for at least a second. + if time.Since(t) >= time.Second { + break + } + } + + return nil + }) +} + +// File handlers for the various profiles. +var cpuprofile, memprofile, blockprofile *os.File + +// Starts all profiles set on the options. +func (cmd *BenchCommand) startProfiling(options *BenchOptions) { + var err error + + // Start CPU profiling. + if options.CPUProfile != "" { + cpuprofile, err = os.Create(options.CPUProfile) + if err != nil { + fmt.Fprintf(cmd.Stderr, "bench: could not create cpu profile %q: %v\n", options.CPUProfile, err) + os.Exit(1) + } + pprof.StartCPUProfile(cpuprofile) + } + + // Start memory profiling. + if options.MemProfile != "" { + memprofile, err = os.Create(options.MemProfile) + if err != nil { + fmt.Fprintf(cmd.Stderr, "bench: could not create memory profile %q: %v\n", options.MemProfile, err) + os.Exit(1) + } + runtime.MemProfileRate = 4096 + } + + // Start fatal profiling. + if options.BlockProfile != "" { + blockprofile, err = os.Create(options.BlockProfile) + if err != nil { + fmt.Fprintf(cmd.Stderr, "bench: could not create block profile %q: %v\n", options.BlockProfile, err) + os.Exit(1) + } + runtime.SetBlockProfileRate(1) + } +} + +// Stops all profiles. +func (cmd *BenchCommand) stopProfiling() { + if cpuprofile != nil { + pprof.StopCPUProfile() + cpuprofile.Close() + cpuprofile = nil + } + + if memprofile != nil { + pprof.Lookup("heap").WriteTo(memprofile, 0) + memprofile.Close() + memprofile = nil + } + + if blockprofile != nil { + pprof.Lookup("block").WriteTo(blockprofile, 0) + blockprofile.Close() + blockprofile = nil + runtime.SetBlockProfileRate(0) + } +} + +// BenchOptions represents the set of options that can be passed to "bolt bench". +type BenchOptions struct { + ProfileMode string + WriteMode string + ReadMode string + Iterations int + BatchSize int + KeySize int + ValueSize int + CPUProfile string + MemProfile string + BlockProfile string + StatsInterval time.Duration + FillPercent float64 + NoSync bool + Work bool + Path string +} + +// BenchResults represents the performance results of the benchmark. +type BenchResults struct { + WriteOps int + WriteDuration time.Duration + ReadOps int + ReadDuration time.Duration +} + +// Returns the duration for a single write operation. +func (r *BenchResults) WriteOpDuration() time.Duration { + if r.WriteOps == 0 { + return 0 + } + return r.WriteDuration / time.Duration(r.WriteOps) +} + +// Returns average number of write operations that can be performed per second. +func (r *BenchResults) WriteOpsPerSecond() int { + var op = r.WriteOpDuration() + if op == 0 { + return 0 + } + return int(time.Second) / int(op) +} + +// Returns the duration for a single read operation. +func (r *BenchResults) ReadOpDuration() time.Duration { + if r.ReadOps == 0 { + return 0 + } + return r.ReadDuration / time.Duration(r.ReadOps) +} + +// Returns average number of read operations that can be performed per second. +func (r *BenchResults) ReadOpsPerSecond() int { + var op = r.ReadOpDuration() + if op == 0 { + return 0 + } + return int(time.Second) / int(op) +} + +type PageError struct { + ID int + Err error +} + +func (e *PageError) Error() string { + return fmt.Sprintf("page error: id=%d, err=%s", e.ID, e.Err) +} + +// isPrintable returns true if the string is valid unicode and contains only printable runes. +func isPrintable(s string) bool { + if !utf8.ValidString(s) { + return false + } + for _, ch := range s { + if !unicode.IsPrint(ch) { + return false + } + } + return true +} + +// ReadPage reads page info & full page data from a path. +// This is not transactionally safe. +func ReadPage(path string, pageID int) (*page, []byte, error) { + // Find page size. + pageSize, err := ReadPageSize(path) + if err != nil { + return nil, nil, fmt.Errorf("read page size: %s", err) + } + + // Open database file. + f, err := os.Open(path) + if err != nil { + return nil, nil, err + } + defer f.Close() + + // Read one block into buffer. + buf := make([]byte, pageSize) + if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { + return nil, nil, err + } else if n != len(buf) { + return nil, nil, io.ErrUnexpectedEOF + } + + // Determine total number of blocks. + p := (*page)(unsafe.Pointer(&buf[0])) + overflowN := p.overflow + + // Re-read entire page (with overflow) into buffer. + buf = make([]byte, (int(overflowN)+1)*pageSize) + if n, err := f.ReadAt(buf, int64(pageID*pageSize)); err != nil { + return nil, nil, err + } else if n != len(buf) { + return nil, nil, io.ErrUnexpectedEOF + } + p = (*page)(unsafe.Pointer(&buf[0])) + + return p, buf, nil +} + +// ReadPageSize reads page size a path. +// This is not transactionally safe. +func ReadPageSize(path string) (int, error) { + // Open database file. + f, err := os.Open(path) + if err != nil { + return 0, err + } + defer f.Close() + + // Read 4KB chunk. + buf := make([]byte, 4096) + if _, err := io.ReadFull(f, buf); err != nil { + return 0, err + } + + // Read page size from metadata. + m := (*meta)(unsafe.Pointer(&buf[PageHeaderSize])) + return int(m.pageSize), nil +} + +// atois parses a slice of strings into integers. +func atois(strs []string) ([]int, error) { + var a []int + for _, str := range strs { + i, err := strconv.Atoi(str) + if err != nil { + return nil, err + } + a = append(a, i) + } + return a, nil +} + +// DO NOT EDIT. Copied from the "bolt" package. +func (p *page) Type() string { + if (p.flags & branchPageFlag) != 0 { + return "branch" + } else if (p.flags & leafPageFlag) != 0 { + return "leaf" + } else if (p.flags & metaPageFlag) != 0 { + return "meta" + } else if (p.flags & freelistPageFlag) != 0 { + return "freelist" + } + return fmt.Sprintf("unknown<%02x>", p.flags) +} + +// CompactCommand represents the "compact" command execution. +type CompactCommand struct { + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer + + SrcPath string + DstPath string + TxMaxSize int64 +} + +// newCompactCommand returns a CompactCommand. +func newCompactCommand(m *MainT) *CompactCommand { + return &CompactCommand{ + Stdin: m.Stdin, + Stdout: m.Stdout, + Stderr: m.Stderr, + } +} + +// Run executes the command. +func (cmd *CompactCommand) Run(args ...string) (err error) { + // Parse flags. + fs := flag.NewFlagSet("", flag.ContinueOnError) + fs.SetOutput(ioutil.Discard) + fs.StringVar(&cmd.DstPath, "o", "", "") + fs.Int64Var(&cmd.TxMaxSize, "tx-max-size", 65536, "") + if err := fs.Parse(args); err == flag.ErrHelp { + fmt.Fprintln(cmd.Stderr, cmd.Usage()) + return ErrUsage + } else if err != nil { + return err + } else if cmd.DstPath == "" { + return fmt.Errorf("output file required") + } + + // Require database paths. + cmd.SrcPath = fs.Arg(0) + if cmd.SrcPath == "" { + return ErrPathRequired + } + + // Ensure source file exists. + fi, err := os.Stat(cmd.SrcPath) + if os.IsNotExist(err) { + return ErrFileNotFound + } else if err != nil { + return err + } + initialSize := fi.Size() + + // Open source database. + src, err := Open(cmd.SrcPath, 0444, nil) + if err != nil { + return err + } + defer src.Close() + + // Open destination database. + dst, err := Open(cmd.DstPath, fi.Mode(), nil) + if err != nil { + return err + } + defer dst.Close() + + // Run compaction. + if err := cmd.compact(dst, src); err != nil { + return err + } + + // Report stats on new size. + fi, err = os.Stat(cmd.DstPath) + if err != nil { + return err + } else if fi.Size() == 0 { + return fmt.Errorf("zero db size") + } + fmt.Fprintf(cmd.Stdout, "%d -> %d bytes (gain=%.2fx)\n", initialSize, fi.Size(), float64(initialSize)/float64(fi.Size())) + + return nil +} + +func (cmd *CompactCommand) compact(dst, src *DB) error { + // commit regularly, or we'll run out of memory for large datasets if using one transaction. + var size int64 + tx, err := dst.Begin(true) + if err != nil { + return err + } + defer tx.Rollback() + + if err := cmd.walk(src, func(keys [][]byte, k, v []byte, seq uint64) error { + // On each key/value, check if we have exceeded tx size. + sz := int64(len(k) + len(v)) + if size+sz > cmd.TxMaxSize && cmd.TxMaxSize != 0 { + // Commit previous transaction. + if err := tx.Commit(); err != nil { + return err + } + + // Start new transaction. + tx, err = dst.Begin(true) + if err != nil { + return err + } + size = 0 + } + size += sz + + // Create bucket on the root transaction if this is the first level. + nk := len(keys) + if nk == 0 { + bkt, err := tx.CreateBucket(k) + if err != nil { + return err + } + if err := bkt.SetSequence(seq); err != nil { + return err + } + return nil + } + + // Create buckets on subsequent levels, if necessary. + b := tx.Bucket(keys[0]) + if nk > 1 { + for _, k := range keys[1:] { + b = b.Bucket(k) + } + } + + // If there is no value then this is a bucket call. + if v == nil { + bkt, err := b.CreateBucket(k) + if err != nil { + return err + } + if err := bkt.SetSequence(seq); err != nil { + return err + } + return nil + } + + // Otherwise treat it as a key/value pair. + return b.Put(k, v) + }); err != nil { + return err + } + + return tx.Commit() +} + +// walkFunc is the type of the function called for keys (buckets and "normal" +// values) discovered by Walk. keys is the list of keys to descend to the bucket +// owning the discovered key/value pair k/v. +type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error + +// walk walks recursively the bolt database db, calling walkFn for each key it finds. +func (cmd *CompactCommand) walk(db *DB, walkFn walkFunc) error { + return db.View(func(tx *Tx) error { + return tx.ForEach(func(name []byte, b *Bucket) error { + return cmd.walkBucket(b, nil, name, nil, b.Sequence(), walkFn) + }) + }) +} + +func (cmd *CompactCommand) walkBucket(b *Bucket, keypath [][]byte, k, v []byte, seq uint64, fn walkFunc) error { + // Execute callback. + if err := fn(keypath, k, v, seq); err != nil { + return err + } + + // If this is not a bucket then stop. + if v != nil { + return nil + } + + // Iterate over each child key/value. + keypath = append(keypath, k) + return b.ForEach(func(k, v []byte) error { + if v == nil { + bkt := b.Bucket(k) + return cmd.walkBucket(bkt, keypath, k, nil, bkt.Sequence(), fn) + } + return cmd.walkBucket(b, keypath, k, v, b.Sequence(), fn) + }) +} + +// Usage returns the help message. +func (cmd *CompactCommand) Usage() string { + return strings.TrimLeft(` +usage: bolt compact [options] -o DST SRC + +Compact opens a database at SRC path and walks it recursively, copying keys +as they are found from all buckets, to a newly created database at DST path. + +The original database is left untouched. + +Additional options include: + + -tx-max-size NUM + Specifies the maximum size of individual transactions. + Defaults to 64KB. +`, "\n") +} diff --git a/src/main.go b/src/main.go new file mode 100644 index 0000000..996b5b4 --- /dev/null +++ b/src/main.go @@ -0,0 +1,7 @@ +package main + +import "gkv" + +func main() { + gkv.Main() +} diff --git a/src/node.go b/src/node.go deleted file mode 100644 index 159318b..0000000 --- a/src/node.go +++ /dev/null @@ -1,604 +0,0 @@ -package bolt - -import ( - "bytes" - "fmt" - "sort" - "unsafe" -) - -// node represents an in-memory, deserialized page. -type node struct { - bucket *Bucket - isLeaf bool - unbalanced bool - spilled bool - key []byte - pgid pgid - parent *node - children nodes - inodes inodes -} - -// root returns the top-level node this node is attached to. -func (n *node) root() *node { - if n.parent == nil { - return n - } - return n.parent.root() -} - -// minKeys returns the minimum number of inodes this node should have. -func (n *node) minKeys() int { - if n.isLeaf { - return 1 - } - return 2 -} - -// size returns the size of the node after serialization. -func (n *node) size() int { - sz, elsz := pageHeaderSize, n.pageElementSize() - for i := 0; i < len(n.inodes); i++ { - item := &n.inodes[i] - sz += elsz + len(item.key) + len(item.value) - } - return sz -} - -// sizeLessThan returns true if the node is less than a given size. -// This is an optimization to avoid calculating a large node when we only need -// to know if it fits inside a certain page size. -func (n *node) sizeLessThan(v int) bool { - sz, elsz := pageHeaderSize, n.pageElementSize() - for i := 0; i < len(n.inodes); i++ { - item := &n.inodes[i] - sz += elsz + len(item.key) + len(item.value) - if sz >= v { - return false - } - } - return true -} - -// pageElementSize returns the size of each page element based on the type of node. -func (n *node) pageElementSize() int { - if n.isLeaf { - return leafPageElementSize - } - return branchPageElementSize -} - -// childAt returns the child node at a given index. -func (n *node) childAt(index int) *node { - if n.isLeaf { - panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index)) - } - return n.bucket.node(n.inodes[index].pgid, n) -} - -// childIndex returns the index of a given child node. -func (n *node) childIndex(child *node) int { - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 }) - return index -} - -// numChildren returns the number of children. -func (n *node) numChildren() int { - return len(n.inodes) -} - -// nextSibling returns the next node with the same parent. -func (n *node) nextSibling() *node { - if n.parent == nil { - return nil - } - index := n.parent.childIndex(n) - if index >= n.parent.numChildren()-1 { - return nil - } - return n.parent.childAt(index + 1) -} - -// prevSibling returns the previous node with the same parent. -func (n *node) prevSibling() *node { - if n.parent == nil { - return nil - } - index := n.parent.childIndex(n) - if index == 0 { - return nil - } - return n.parent.childAt(index - 1) -} - -// put inserts a key/value. -func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { - if pgid >= n.bucket.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid)) - } else if len(oldKey) <= 0 { - panic("put: zero-length old key") - } else if len(newKey) <= 0 { - panic("put: zero-length new key") - } - - // Find insertion index. - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 }) - - // Add capacity and shift nodes if we don't have an exact match and need to insert. - exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey)) - if !exact { - n.inodes = append(n.inodes, inode{}) - copy(n.inodes[index+1:], n.inodes[index:]) - } - - inode := &n.inodes[index] - inode.flags = flags - inode.key = newKey - inode.value = value - inode.pgid = pgid - _assert(len(inode.key) > 0, "put: zero-length inode key") -} - -// del removes a key from the node. -func (n *node) del(key []byte) { - // Find index of key. - index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 }) - - // Exit if the key isn't found. - if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) { - return - } - - // Delete inode from the node. - n.inodes = append(n.inodes[:index], n.inodes[index+1:]...) - - // Mark the node as needing rebalancing. - n.unbalanced = true -} - -// read initializes the node from a page. -func (n *node) read(p *page) { - n.pgid = p.id - n.isLeaf = ((p.flags & leafPageFlag) != 0) - n.inodes = make(inodes, int(p.count)) - - for i := 0; i < int(p.count); i++ { - inode := &n.inodes[i] - if n.isLeaf { - elem := p.leafPageElement(uint16(i)) - inode.flags = elem.flags - inode.key = elem.key() - inode.value = elem.value() - } else { - elem := p.branchPageElement(uint16(i)) - inode.pgid = elem.pgid - inode.key = elem.key() - } - _assert(len(inode.key) > 0, "read: zero-length inode key") - } - - // Save first key so we can find the node in the parent when we spill. - if len(n.inodes) > 0 { - n.key = n.inodes[0].key - _assert(len(n.key) > 0, "read: zero-length node key") - } else { - n.key = nil - } -} - -// write writes the items onto one or more pages. -func (n *node) write(p *page) { - // Initialize page. - if n.isLeaf { - p.flags |= leafPageFlag - } else { - p.flags |= branchPageFlag - } - - if len(n.inodes) >= 0xFFFF { - panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id)) - } - p.count = uint16(len(n.inodes)) - - // Stop here if there are no items to write. - if p.count == 0 { - return - } - - // Loop over each item and write it to the page. - b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):] - for i, item := range n.inodes { - _assert(len(item.key) > 0, "write: zero-length inode key") - - // Write the page element. - if n.isLeaf { - elem := p.leafPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) - elem.flags = item.flags - elem.ksize = uint32(len(item.key)) - elem.vsize = uint32(len(item.value)) - } else { - elem := p.branchPageElement(uint16(i)) - elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) - elem.ksize = uint32(len(item.key)) - elem.pgid = item.pgid - _assert(elem.pgid != p.id, "write: circular dependency occurred") - } - - // If the length of key+value is larger than the max allocation size - // then we need to reallocate the byte array pointer. - // - // See: https://github.com/boltdb/bolt/pull/335 - klen, vlen := len(item.key), len(item.value) - if len(b) < klen+vlen { - b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:] - } - - // Write data for the element to the end of the page. - copy(b[0:], item.key) - b = b[klen:] - copy(b[0:], item.value) - b = b[vlen:] - } - - // DEBUG ONLY: n.dump() -} - -// split breaks up a node into multiple smaller nodes, if appropriate. -// This should only be called from the spill() function. -func (n *node) split(pageSize int) []*node { - var nodes []*node - - node := n - for { - // Split node into two. - a, b := node.splitTwo(pageSize) - nodes = append(nodes, a) - - // If we can't split then exit the loop. - if b == nil { - break - } - - // Set node to b so it gets split on the next iteration. - node = b - } - - return nodes -} - -// splitTwo breaks up a node into two smaller nodes, if appropriate. -// This should only be called from the split() function. -func (n *node) splitTwo(pageSize int) (*node, *node) { - // Ignore the split if the page doesn't have at least enough nodes for - // two pages or if the nodes can fit in a single page. - if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) { - return n, nil - } - - // Determine the threshold before starting a new node. - var fillPercent = n.bucket.FillPercent - if fillPercent < minFillPercent { - fillPercent = minFillPercent - } else if fillPercent > maxFillPercent { - fillPercent = maxFillPercent - } - threshold := int(float64(pageSize) * fillPercent) - - // Determine split position and sizes of the two pages. - splitIndex, _ := n.splitIndex(threshold) - - // Split node into two separate nodes. - // If there's no parent then we'll need to create one. - if n.parent == nil { - n.parent = &node{bucket: n.bucket, children: []*node{n}} - } - - // Create a new node and add it to the parent. - next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent} - n.parent.children = append(n.parent.children, next) - - // Split inodes across two nodes. - next.inodes = n.inodes[splitIndex:] - n.inodes = n.inodes[:splitIndex] - - // Update the statistics. - n.bucket.tx.stats.Split++ - - return n, next -} - -// splitIndex finds the position where a page will fill a given threshold. -// It returns the index as well as the size of the first page. -// This is only be called from split(). -func (n *node) splitIndex(threshold int) (index, sz int) { - sz = pageHeaderSize - - // Loop until we only have the minimum number of keys required for the second page. - for i := 0; i < len(n.inodes)-minKeysPerPage; i++ { - index = i - inode := n.inodes[i] - elsize := n.pageElementSize() + len(inode.key) + len(inode.value) - - // If we have at least the minimum number of keys and adding another - // node would put us over the threshold then exit and return. - if i >= minKeysPerPage && sz+elsize > threshold { - break - } - - // Add the element size to the total size. - sz += elsize - } - - return -} - -// spill writes the nodes to dirty pages and splits nodes as it goes. -// Returns an error if dirty pages cannot be allocated. -func (n *node) spill() error { - var tx = n.bucket.tx - if n.spilled { - return nil - } - - // Spill child nodes first. Child nodes can materialize sibling nodes in - // the case of split-merge so we cannot use a range loop. We have to check - // the children size on every loop iteration. - sort.Sort(n.children) - for i := 0; i < len(n.children); i++ { - if err := n.children[i].spill(); err != nil { - return err - } - } - - // We no longer need the child list because it's only used for spill tracking. - n.children = nil - - // Split nodes into appropriate sizes. The first node will always be n. - var nodes = n.split(tx.db.pageSize) - for _, node := range nodes { - // Add node's page to the freelist if it's not new. - if node.pgid > 0 { - tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid)) - node.pgid = 0 - } - - // Allocate contiguous space for the node. - p, err := tx.allocate((node.size() / tx.db.pageSize) + 1) - if err != nil { - return err - } - - // Write the node. - if p.id >= tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid)) - } - node.pgid = p.id - node.write(p) - node.spilled = true - - // Insert into parent inodes. - if node.parent != nil { - var key = node.key - if key == nil { - key = node.inodes[0].key - } - - node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0) - node.key = node.inodes[0].key - _assert(len(node.key) > 0, "spill: zero-length node key") - } - - // Update the statistics. - tx.stats.Spill++ - } - - // If the root node split and created a new root then we need to spill that - // as well. We'll clear out the children to make sure it doesn't try to respill. - if n.parent != nil && n.parent.pgid == 0 { - n.children = nil - return n.parent.spill() - } - - return nil -} - -// rebalance attempts to combine the node with sibling nodes if the node fill -// size is below a threshold or if there are not enough keys. -func (n *node) rebalance() { - if !n.unbalanced { - return - } - n.unbalanced = false - - // Update statistics. - n.bucket.tx.stats.Rebalance++ - - // Ignore if node is above threshold (25%) and has enough keys. - var threshold = n.bucket.tx.db.pageSize / 4 - if n.size() > threshold && len(n.inodes) > n.minKeys() { - return - } - - // Root node has special handling. - if n.parent == nil { - // If root node is a branch and only has one node then collapse it. - if !n.isLeaf && len(n.inodes) == 1 { - // Move root's child up. - child := n.bucket.node(n.inodes[0].pgid, n) - n.isLeaf = child.isLeaf - n.inodes = child.inodes[:] - n.children = child.children - - // Reparent all child nodes being moved. - for _, inode := range n.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { - child.parent = n - } - } - - // Remove old child. - child.parent = nil - delete(n.bucket.nodes, child.pgid) - child.free() - } - - return - } - - // If node has no keys then just remove it. - if n.numChildren() == 0 { - n.parent.del(n.key) - n.parent.removeChild(n) - delete(n.bucket.nodes, n.pgid) - n.free() - n.parent.rebalance() - return - } - - _assert(n.parent.numChildren() > 1, "parent must have at least 2 children") - - // Destination node is right sibling if idx == 0, otherwise left sibling. - var target *node - var useNextSibling = (n.parent.childIndex(n) == 0) - if useNextSibling { - target = n.nextSibling() - } else { - target = n.prevSibling() - } - - // If both this node and the target node are too small then merge them. - if useNextSibling { - // Reparent all child nodes being moved. - for _, inode := range target.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { - child.parent.removeChild(child) - child.parent = n - child.parent.children = append(child.parent.children, child) - } - } - - // Copy over inodes from target and remove target. - n.inodes = append(n.inodes, target.inodes...) - n.parent.del(target.key) - n.parent.removeChild(target) - delete(n.bucket.nodes, target.pgid) - target.free() - } else { - // Reparent all child nodes being moved. - for _, inode := range n.inodes { - if child, ok := n.bucket.nodes[inode.pgid]; ok { - child.parent.removeChild(child) - child.parent = target - child.parent.children = append(child.parent.children, child) - } - } - - // Copy over inodes to target and remove node. - target.inodes = append(target.inodes, n.inodes...) - n.parent.del(n.key) - n.parent.removeChild(n) - delete(n.bucket.nodes, n.pgid) - n.free() - } - - // Either this node or the target node was deleted from the parent so rebalance it. - n.parent.rebalance() -} - -// removes a node from the list of in-memory children. -// This does not affect the inodes. -func (n *node) removeChild(target *node) { - for i, child := range n.children { - if child == target { - n.children = append(n.children[:i], n.children[i+1:]...) - return - } - } -} - -// dereference causes the node to copy all its inode key/value references to heap memory. -// This is required when the mmap is reallocated so inodes are not pointing to stale data. -func (n *node) dereference() { - if n.key != nil { - key := make([]byte, len(n.key)) - copy(key, n.key) - n.key = key - _assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node") - } - - for i := range n.inodes { - inode := &n.inodes[i] - - key := make([]byte, len(inode.key)) - copy(key, inode.key) - inode.key = key - _assert(len(inode.key) > 0, "dereference: zero-length inode key") - - value := make([]byte, len(inode.value)) - copy(value, inode.value) - inode.value = value - } - - // Recursively dereference children. - for _, child := range n.children { - child.dereference() - } - - // Update statistics. - n.bucket.tx.stats.NodeDeref++ -} - -// free adds the node's underlying page to the freelist. -func (n *node) free() { - if n.pgid != 0 { - n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid)) - n.pgid = 0 - } -} - -// dump writes the contents of the node to STDERR for debugging purposes. -/* -func (n *node) dump() { - // Write node header. - var typ = "branch" - if n.isLeaf { - typ = "leaf" - } - warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes)) - - // Write out abbreviated version of each item. - for _, item := range n.inodes { - if n.isLeaf { - if item.flags&bucketLeafFlag != 0 { - bucket := (*bucket)(unsafe.Pointer(&item.value[0])) - warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root) - } else { - warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4)) - } - } else { - warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid) - } - } - warn("") -} -*/ - -type nodes []*node - -func (s nodes) Len() int { return len(s) } -func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 } - -// inode represents an internal node inside of a node. -// It can be used to point to elements in a page or point -// to an element which hasn't been added to a page yet. -type inode struct { - flags uint32 - pgid pgid - key []byte - value []byte -} - -type inodes []inode diff --git a/src/page.go b/src/page.go deleted file mode 100644 index cde403a..0000000 --- a/src/page.go +++ /dev/null @@ -1,197 +0,0 @@ -package bolt - -import ( - "fmt" - "os" - "sort" - "unsafe" -) - -const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr)) - -const minKeysPerPage = 2 - -const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{})) -const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{})) - -const ( - branchPageFlag = 0x01 - leafPageFlag = 0x02 - metaPageFlag = 0x04 - freelistPageFlag = 0x10 -) - -const ( - bucketLeafFlag = 0x01 -) - -type pgid uint64 - -type page struct { - id pgid - flags uint16 - count uint16 - overflow uint32 - ptr uintptr -} - -// typ returns a human readable page type string used for debugging. -func (p *page) typ() string { - if (p.flags & branchPageFlag) != 0 { - return "branch" - } else if (p.flags & leafPageFlag) != 0 { - return "leaf" - } else if (p.flags & metaPageFlag) != 0 { - return "meta" - } else if (p.flags & freelistPageFlag) != 0 { - return "freelist" - } - return fmt.Sprintf("unknown<%02x>", p.flags) -} - -// meta returns a pointer to the metadata section of the page. -func (p *page) meta() *meta { - return (*meta)(unsafe.Pointer(&p.ptr)) -} - -// leafPageElement retrieves the leaf node by index -func (p *page) leafPageElement(index uint16) *leafPageElement { - n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index] - return n -} - -// leafPageElements retrieves a list of leaf nodes. -func (p *page) leafPageElements() []leafPageElement { - if p.count == 0 { - return nil - } - return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:] -} - -// branchPageElement retrieves the branch node by index -func (p *page) branchPageElement(index uint16) *branchPageElement { - return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index] -} - -// branchPageElements retrieves a list of branch nodes. -func (p *page) branchPageElements() []branchPageElement { - if p.count == 0 { - return nil - } - return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:] -} - -// dump writes n bytes of the page to STDERR as hex output. -func (p *page) hexdump(n int) { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n] - fmt.Fprintf(os.Stderr, "%x\n", buf) -} - -type pages []*page - -func (s pages) Len() int { return len(s) } -func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pages) Less(i, j int) bool { return s[i].id < s[j].id } - -// branchPageElement represents a node on a branch page. -type branchPageElement struct { - pos uint32 - ksize uint32 - pgid pgid -} - -// key returns a byte slice of the node key. -func (n *branchPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] -} - -// leafPageElement represents a node on a leaf page. -type leafPageElement struct { - flags uint32 - pos uint32 - ksize uint32 - vsize uint32 -} - -// key returns a byte slice of the node key. -func (n *leafPageElement) key() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] -} - -// value returns a byte slice of the node value. -func (n *leafPageElement) value() []byte { - buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) - return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] -} - -// PageInfo represents human readable information about a page. -type PageInfo struct { - ID int - Type string - Count int - OverflowCount int -} - -type pgids []pgid - -func (s pgids) Len() int { return len(s) } -func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] } -func (s pgids) Less(i, j int) bool { return s[i] < s[j] } - -// merge returns the sorted union of a and b. -func (a pgids) merge(b pgids) pgids { - // Return the opposite slice if one is nil. - if len(a) == 0 { - return b - } - if len(b) == 0 { - return a - } - merged := make(pgids, len(a)+len(b)) - mergepgids(merged, a, b) - return merged -} - -// mergepgids copies the sorted union of a and b into dst. -// If dst is too small, it panics. -func mergepgids(dst, a, b pgids) { - if len(dst) < len(a)+len(b) { - panic(fmt.Errorf("mergepgids bad len %d < %d + %d", len(dst), len(a), len(b))) - } - // Copy in the opposite slice if one is nil. - if len(a) == 0 { - copy(dst, b) - return - } - if len(b) == 0 { - copy(dst, a) - return - } - - // Merged will hold all elements from both lists. - merged := dst[:0] - - // Assign lead to the slice with a lower starting value, follow to the higher value. - lead, follow := a, b - if b[0] < a[0] { - lead, follow = b, a - } - - // Continue while there are elements in the lead. - for len(lead) > 0 { - // Merge largest prefix of lead that is ahead of follow[0]. - n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] }) - merged = append(merged, lead[:n]...) - if n >= len(lead) { - break - } - - // Swap lead and follow. - lead, follow = follow, lead[n:] - } - - // Append what's left in follow. - _ = append(merged, follow...) -} diff --git a/src/tx.go b/src/tx.go deleted file mode 100644 index bbb60ac..0000000 --- a/src/tx.go +++ /dev/null @@ -1,686 +0,0 @@ -package bolt - -import ( - "fmt" - "io" - "os" - "sort" - "strings" - "time" - "unsafe" -) - -// txid represents the internal transaction identifier. -type txid uint64 - -// Tx represents a read-only or read/write transaction on the database. -// Read-only transactions can be used for retrieving values for keys and creating cursors. -// Read/write transactions can create and remove buckets and create and remove keys. -// -// IMPORTANT: You must commit or rollback transactions when you are done with -// them. Pages can not be reclaimed by the writer until no more transactions -// are using them. A long running read transaction can cause the database to -// quickly grow. -type Tx struct { - writable bool - managed bool - db *DB - meta *meta - root Bucket - pages map[pgid]*page - stats TxStats - commitHandlers []func() - - // WriteFlag specifies the flag for write-related methods like WriteTo(). - // Tx opens the database file with the specified flag to copy the data. - // - // By default, the flag is unset, which works well for mostly in-memory - // workloads. For databases that are much larger than available RAM, - // set the flag to syscall.O_DIRECT to avoid trashing the page cache. - WriteFlag int -} - -// init initializes the transaction. -func (tx *Tx) init(db *DB) { - tx.db = db - tx.pages = nil - - // Copy the meta page since it can be changed by the writer. - tx.meta = &meta{} - db.meta().copy(tx.meta) - - // Copy over the root bucket. - tx.root = newBucket(tx) - tx.root.bucket = &bucket{} - *tx.root.bucket = tx.meta.root - - // Increment the transaction id and add a page cache for writable transactions. - if tx.writable { - tx.pages = make(map[pgid]*page) - tx.meta.txid += txid(1) - } -} - -// ID returns the transaction id. -func (tx *Tx) ID() int { - return int(tx.meta.txid) -} - -// DB returns a reference to the database that created the transaction. -func (tx *Tx) DB() *DB { - return tx.db -} - -// Size returns current database size in bytes as seen by this transaction. -func (tx *Tx) Size() int64 { - return int64(tx.meta.pgid) * int64(tx.db.pageSize) -} - -// Writable returns whether the transaction can perform write operations. -func (tx *Tx) Writable() bool { - return tx.writable -} - -// Cursor creates a cursor associated with the root bucket. -// All items in the cursor will return a nil value because all root bucket keys point to buckets. -// The cursor is only valid as long as the transaction is open. -// Do not use a cursor after the transaction is closed. -func (tx *Tx) Cursor() *Cursor { - return tx.root.Cursor() -} - -// Stats retrieves a copy of the current transaction statistics. -func (tx *Tx) Stats() TxStats { - return tx.stats -} - -// Bucket retrieves a bucket by name. -// Returns nil if the bucket does not exist. -// The bucket instance is only valid for the lifetime of the transaction. -func (tx *Tx) Bucket(name []byte) *Bucket { - return tx.root.Bucket(name) -} - -// CreateBucket creates a new bucket. -// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. -func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) { - return tx.root.CreateBucket(name) -} - -// CreateBucketIfNotExists creates a new bucket if it doesn't already exist. -// Returns an error if the bucket name is blank, or if the bucket name is too long. -// The bucket instance is only valid for the lifetime of the transaction. -func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) { - return tx.root.CreateBucketIfNotExists(name) -} - -// DeleteBucket deletes a bucket. -// Returns an error if the bucket cannot be found or if the key represents a non-bucket value. -func (tx *Tx) DeleteBucket(name []byte) error { - return tx.root.DeleteBucket(name) -} - -// ForEach executes a function for each bucket in the root. -// If the provided function returns an error then the iteration is stopped and -// the error is returned to the caller. -func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error { - return tx.root.ForEach(func(k, v []byte) error { - if err := fn(k, tx.root.Bucket(k)); err != nil { - return err - } - return nil - }) -} - -// OnCommit adds a handler function to be executed after the transaction successfully commits. -func (tx *Tx) OnCommit(fn func()) { - tx.commitHandlers = append(tx.commitHandlers, fn) -} - -// Commit writes all changes to disk and updates the meta page. -// Returns an error if a disk write error occurs, or if Commit is -// called on a read-only transaction. -func (tx *Tx) Commit() error { - _assert(!tx.managed, "managed tx commit not allowed") - if tx.db == nil { - return ErrTxClosed - } else if !tx.writable { - return ErrTxNotWritable - } - - // TODO(benbjohnson): Use vectorized I/O to write out dirty pages. - - // Rebalance nodes which have had deletions. - var startTime = time.Now() - tx.root.rebalance() - if tx.stats.Rebalance > 0 { - tx.stats.RebalanceTime += time.Since(startTime) - } - - // spill data onto dirty pages. - startTime = time.Now() - if err := tx.root.spill(); err != nil { - tx.rollback() - return err - } - tx.stats.SpillTime += time.Since(startTime) - - // Free the old root bucket. - tx.meta.root.root = tx.root.root - - opgid := tx.meta.pgid - - // Free the freelist and allocate new pages for it. This will overestimate - // the size of the freelist but not underestimate the size (which would be bad). - tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) - p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) - if err != nil { - tx.rollback() - return err - } - if err := tx.db.freelist.write(p); err != nil { - tx.rollback() - return err - } - tx.meta.freelist = p.id - - // If the high water mark has moved up then attempt to grow the database. - if tx.meta.pgid > opgid { - if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { - tx.rollback() - return err - } - } - - // Write dirty pages to disk. - startTime = time.Now() - if err := tx.write(); err != nil { - tx.rollback() - return err - } - - // If strict mode is enabled then perform a consistency check. - // Only the first consistency error is reported in the panic. - if tx.db.StrictMode { - ch := tx.Check() - var errs []string - for { - err, ok := <-ch - if !ok { - break - } - errs = append(errs, err.Error()) - } - if len(errs) > 0 { - panic("check fail: " + strings.Join(errs, "\n")) - } - } - - // Write meta to disk. - if err := tx.writeMeta(); err != nil { - tx.rollback() - return err - } - tx.stats.WriteTime += time.Since(startTime) - - // Finalize the transaction. - tx.close() - - // Execute commit handlers now that the locks have been removed. - for _, fn := range tx.commitHandlers { - fn() - } - - return nil -} - -// Rollback closes the transaction and ignores all previous updates. Read-only -// transactions must be rolled back and not committed. -func (tx *Tx) Rollback() error { - _assert(!tx.managed, "managed tx rollback not allowed") - if tx.db == nil { - return ErrTxClosed - } - tx.rollback() - return nil -} - -func (tx *Tx) rollback() { - if tx.db == nil { - return - } - if tx.writable { - tx.db.freelist.rollback(tx.meta.txid) - tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) - } - tx.close() -} - -func (tx *Tx) close() { - if tx.db == nil { - return - } - if tx.writable { - // Grab freelist stats. - var freelistFreeN = tx.db.freelist.free_count() - var freelistPendingN = tx.db.freelist.pending_count() - var freelistAlloc = tx.db.freelist.size() - - // Remove transaction ref & writer lock. - tx.db.rwtx = nil - tx.db.rwlock.Unlock() - - // Merge statistics. - tx.db.statlock.Lock() - tx.db.stats.FreePageN = freelistFreeN - tx.db.stats.PendingPageN = freelistPendingN - tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize - tx.db.stats.FreelistInuse = freelistAlloc - tx.db.stats.TxStats.add(&tx.stats) - tx.db.statlock.Unlock() - } else { - tx.db.removeTx(tx) - } - - // Clear all references. - tx.db = nil - tx.meta = nil - tx.root = Bucket{tx: tx} - tx.pages = nil -} - -// Copy writes the entire database to a writer. -// This function exists for backwards compatibility. -// -// Deprecated; Use WriteTo() instead. -func (tx *Tx) Copy(w io.Writer) error { - _, err := tx.WriteTo(w) - return err -} - -// WriteTo writes the entire database to a writer. -// If err == nil then exactly tx.Size() bytes will be written into the writer. -func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) { - // Attempt to open reader with WriteFlag - f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) - if err != nil { - return 0, err - } - defer func() { _ = f.Close() }() - - // Generate a meta page. We use the same page data for both meta pages. - buf := make([]byte, tx.db.pageSize) - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = metaPageFlag - *page.meta() = *tx.meta - - // Write meta 0. - page.id = 0 - page.meta().checksum = page.meta().sum64() - nn, err := w.Write(buf) - n += int64(nn) - if err != nil { - return n, fmt.Errorf("meta 0 copy: %s", err) - } - - // Write meta 1 with a lower transaction id. - page.id = 1 - page.meta().txid -= 1 - page.meta().checksum = page.meta().sum64() - nn, err = w.Write(buf) - n += int64(nn) - if err != nil { - return n, fmt.Errorf("meta 1 copy: %s", err) - } - - // Move past the meta pages in the file. - if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil { - return n, fmt.Errorf("seek: %s", err) - } - - // Copy data pages. - wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2)) - n += wn - if err != nil { - return n, err - } - - return n, f.Close() -} - -// CopyFile copies the entire database to file at the given path. -// A reader transaction is maintained during the copy so it is safe to continue -// using the database while a copy is in progress. -func (tx *Tx) CopyFile(path string, mode os.FileMode) error { - f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode) - if err != nil { - return err - } - - err = tx.Copy(f) - if err != nil { - _ = f.Close() - return err - } - return f.Close() -} - -// Check performs several consistency checks on the database for this transaction. -// An error is returned if any inconsistency is found. -// -// It can be safely run concurrently on a writable transaction. However, this -// incurs a high cost for large databases and databases with a lot of subbuckets -// because of caching. This overhead can be removed if running on a read-only -// transaction, however, it is not safe to execute other writer transactions at -// the same time. -func (tx *Tx) Check() <-chan error { - ch := make(chan error) - go tx.check(ch) - return ch -} - -func (tx *Tx) check(ch chan error) { - // Check if any pages are double freed. - freed := make(map[pgid]bool) - all := make([]pgid, tx.db.freelist.count()) - tx.db.freelist.copyall(all) - for _, id := range all { - if freed[id] { - ch <- fmt.Errorf("page %d: already freed", id) - } - freed[id] = true - } - - // Track every reachable page. - reachable := make(map[pgid]*page) - reachable[0] = tx.page(0) // meta0 - reachable[1] = tx.page(1) // meta1 - for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { - reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) - } - - // Recursively check buckets. - tx.checkBucket(&tx.root, reachable, freed, ch) - - // Ensure all pages below high water mark are either reachable or freed. - for i := pgid(0); i < tx.meta.pgid; i++ { - _, isReachable := reachable[i] - if !isReachable && !freed[i] { - ch <- fmt.Errorf("page %d: unreachable unfreed", int(i)) - } - } - - // Close the channel to signal completion. - close(ch) -} - -func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) { - // Ignore inline buckets. - if b.root == 0 { - return - } - - // Check every page used by this bucket. - b.tx.forEachPage(b.root, 0, func(p *page, _ int) { - if p.id > tx.meta.pgid { - ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid)) - } - - // Ensure each page is only referenced once. - for i := pgid(0); i <= pgid(p.overflow); i++ { - var id = p.id + i - if _, ok := reachable[id]; ok { - ch <- fmt.Errorf("page %d: multiple references", int(id)) - } - reachable[id] = p - } - - // We should only encounter un-freed leaf and branch pages. - if freed[p.id] { - ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) - } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { - ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ()) - } - }) - - // Check each bucket within this bucket. - _ = b.ForEach(func(k, v []byte) error { - if child := b.Bucket(k); child != nil { - tx.checkBucket(child, reachable, freed, ch) - } - return nil - }) -} - -// allocate returns a contiguous block of memory starting at a given page. -func (tx *Tx) allocate(count int) (*page, error) { - p, err := tx.db.allocate(count) - if err != nil { - return nil, err - } - - // Save to our page cache. - tx.pages[p.id] = p - - // Update statistics. - tx.stats.PageCount++ - tx.stats.PageAlloc += count * tx.db.pageSize - - return p, nil -} - -// write writes any dirty pages to disk. -func (tx *Tx) write() error { - // Sort pages by id. - pages := make(pages, 0, len(tx.pages)) - for _, p := range tx.pages { - pages = append(pages, p) - } - // Clear out page cache early. - tx.pages = make(map[pgid]*page) - sort.Sort(pages) - - // Write pages to disk in order. - for _, p := range pages { - size := (int(p.overflow) + 1) * tx.db.pageSize - offset := int64(p.id) * int64(tx.db.pageSize) - - // Write out page in "max allocation" sized chunks. - ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p)) - for { - // Limit our write to our max allocation size. - sz := size - if sz > maxAllocSize-1 { - sz = maxAllocSize - 1 - } - - // Write chunk to disk. - buf := ptr[:sz] - if _, err := tx.db.ops.writeAt(buf, offset); err != nil { - return err - } - - // Update statistics. - tx.stats.Write++ - - // Exit inner for loop if we've written all the chunks. - size -= sz - if size == 0 { - break - } - - // Otherwise move offset forward and move pointer to next chunk. - offset += int64(sz) - ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz])) - } - } - - // Ignore file sync if flag is set on DB. - if !tx.db.NoSync || IgnoreNoSync { - if err := fdatasync(tx.db); err != nil { - return err - } - } - - // Put small pages back to page pool. - for _, p := range pages { - // Ignore page sizes over 1 page. - // These are allocated using make() instead of the page pool. - if int(p.overflow) != 0 { - continue - } - - buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize] - - // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 - for i := range buf { - buf[i] = 0 - } - tx.db.pagePool.Put(buf) - } - - return nil -} - -// writeMeta writes the meta to the disk. -func (tx *Tx) writeMeta() error { - // Create a temporary buffer for the meta page. - buf := make([]byte, tx.db.pageSize) - p := tx.db.pageInBuffer(buf, 0) - tx.meta.write(p) - - // Write the meta page to file. - if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil { - return err - } - if !tx.db.NoSync || IgnoreNoSync { - if err := fdatasync(tx.db); err != nil { - return err - } - } - - // Update statistics. - tx.stats.Write++ - - return nil -} - -// page returns a reference to the page with a given id. -// If page has been written to then a temporary buffered page is returned. -func (tx *Tx) page(id pgid) *page { - // Check the dirty pages first. - if tx.pages != nil { - if p, ok := tx.pages[id]; ok { - return p - } - } - - // Otherwise return directly from the mmap. - return tx.db.page(id) -} - -// forEachPage iterates over every page within a given page and executes a function. -func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) { - p := tx.page(pgid) - - // Execute function. - fn(p, depth) - - // Recursively loop over children. - if (p.flags & branchPageFlag) != 0 { - for i := 0; i < int(p.count); i++ { - elem := p.branchPageElement(uint16(i)) - tx.forEachPage(elem.pgid, depth+1, fn) - } - } -} - -// Page returns page information for a given page number. -// This is only safe for concurrent use when used by a writable transaction. -func (tx *Tx) Page(id int) (*PageInfo, error) { - if tx.db == nil { - return nil, ErrTxClosed - } else if pgid(id) >= tx.meta.pgid { - return nil, nil - } - - // Build the page info. - p := tx.db.page(pgid(id)) - info := &PageInfo{ - ID: id, - Count: int(p.count), - OverflowCount: int(p.overflow), - } - - // Determine the type (or if it's free). - if tx.db.freelist.freed(pgid(id)) { - info.Type = "free" - } else { - info.Type = p.typ() - } - - return info, nil -} - -// TxStats represents statistics about the actions performed by the transaction. -type TxStats struct { - // Page statistics. - PageCount int // number of page allocations - PageAlloc int // total bytes allocated - - // Cursor statistics. - CursorCount int // number of cursors created - - // Node statistics - NodeCount int // number of node allocations - NodeDeref int // number of node dereferences - - // Rebalance statistics. - Rebalance int // number of node rebalances - RebalanceTime time.Duration // total time spent rebalancing - - // Split/Spill statistics. - Split int // number of nodes split - Spill int // number of nodes spilled - SpillTime time.Duration // total time spent spilling - - // Write statistics. - Write int // number of writes performed - WriteTime time.Duration // total time spent writing to disk -} - -func (s *TxStats) add(other *TxStats) { - s.PageCount += other.PageCount - s.PageAlloc += other.PageAlloc - s.CursorCount += other.CursorCount - s.NodeCount += other.NodeCount - s.NodeDeref += other.NodeDeref - s.Rebalance += other.Rebalance - s.RebalanceTime += other.RebalanceTime - s.Split += other.Split - s.Spill += other.Spill - s.SpillTime += other.SpillTime - s.Write += other.Write - s.WriteTime += other.WriteTime -} - -// Sub calculates and returns the difference between two sets of transaction stats. -// This is useful when obtaining stats at two different points and time and -// you need the performance counters that occurred within that time span. -func (s *TxStats) Sub(other *TxStats) TxStats { - var diff TxStats - diff.PageCount = s.PageCount - other.PageCount - diff.PageAlloc = s.PageAlloc - other.PageAlloc - diff.CursorCount = s.CursorCount - other.CursorCount - diff.NodeCount = s.NodeCount - other.NodeCount - diff.NodeDeref = s.NodeDeref - other.NodeDeref - diff.Rebalance = s.Rebalance - other.Rebalance - diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime - diff.Split = s.Split - other.Split - diff.Spill = s.Spill - other.Spill - diff.SpillTime = s.SpillTime - other.SpillTime - diff.Write = s.Write - other.Write - diff.WriteTime = s.WriteTime - other.WriteTime - return diff -} diff --git a/tests/benchmarks/multiple-writers/gkv.go b/tests/benchmarks/multiple-writers/gkv.go new file mode 100644 index 0000000..1d9ac9d --- /dev/null +++ b/tests/benchmarks/multiple-writers/gkv.go @@ -0,0 +1,23 @@ +package gkv + +import ( + "flag" + "time" +) + + + +var nFlag = flag.Int( + "n", + 1_000, + "The number of iterations to execute", +) + +func MainTest() { + flag.Parse() + n := *nFlag + + for i := 0; i < n; i++ { + time.Sleep(time.Millisecond * 1) + } +} diff --git a/tests/benchmarks/multiple-writers/main.go b/tests/benchmarks/multiple-writers/main.go new file mode 120000 index 0000000..f67563d --- /dev/null +++ b/tests/benchmarks/multiple-writers/main.go @@ -0,0 +1 @@ +../../main.go
\ No newline at end of file diff --git a/tests/bucket_test.go b/tests/bucket_test.go deleted file mode 100644 index cddbe27..0000000 --- a/tests/bucket_test.go +++ /dev/null @@ -1,1909 +0,0 @@ -package bolt_test - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "log" - "math/rand" - "os" - "strconv" - "strings" - "testing" - "testing/quick" - - "github.com/boltdb/bolt" -) - -// Ensure that a bucket that gets a non-existent key returns nil. -func TestBucket_Get_NonExistent(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if v := b.Get([]byte("foo")); v != nil { - t.Fatal("expected nil value") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can read a value that is not flushed yet. -func TestBucket_Get_FromNode(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if v := b.Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket retrieved via Get() returns a nil. -func TestBucket_Get_IncompatibleValue(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - - if tx.Bucket([]byte("widgets")).Get([]byte("foo")) != nil { - t.Fatal("expected nil value") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a slice returned from a bucket has a capacity equal to its length. -// This also allows slices to be appended to since it will require a realloc by Go. -// -// https://github.com/boltdb/bolt/issues/544 -func TestBucket_Get_Capacity(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Write key to a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("bucket")) - if err != nil { - return err - } - return b.Put([]byte("key"), []byte("val")) - }); err != nil { - t.Fatal(err) - } - - // Retrieve value and attempt to append to it. - if err := db.Update(func(tx *bolt.Tx) error { - k, v := tx.Bucket([]byte("bucket")).Cursor().First() - - // Verify capacity. - if len(k) != cap(k) { - t.Fatalf("unexpected key slice capacity: %d", cap(k)) - } else if len(v) != cap(v) { - t.Fatalf("unexpected value slice capacity: %d", cap(v)) - } - - // Ensure slice can be appended to without a segfault. - k = append(k, []byte("123")...) - v = append(v, []byte("123")...) - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can write a key/value. -func TestBucket_Put(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - - v := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - if !bytes.Equal([]byte("bar"), v) { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can rewrite a key in the same transaction. -func TestBucket_Put_Repeat(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("baz")); err != nil { - t.Fatal(err) - } - - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - if !bytes.Equal([]byte("baz"), value) { - t.Fatalf("unexpected value: %v", value) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can write a bunch of large values. -func TestBucket_Put_Large(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - count, factor := 100, 200 - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for i := 1; i < count; i++ { - if err := b.Put([]byte(strings.Repeat("0", i*factor)), []byte(strings.Repeat("X", (count-i)*factor))); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 1; i < count; i++ { - value := b.Get([]byte(strings.Repeat("0", i*factor))) - if !bytes.Equal(value, []byte(strings.Repeat("X", (count-i)*factor))) { - t.Fatalf("unexpected value: %v", value) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a database can perform multiple large appends safely. -func TestDB_Put_VeryLarge(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - n, batchN := 400000, 200000 - ksize, vsize := 8, 500 - - db := MustOpenDB() - defer db.MustClose() - - for i := 0; i < n; i += batchN { - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for j := 0; j < batchN; j++ { - k, v := make([]byte, ksize), make([]byte, vsize) - binary.BigEndian.PutUint32(k, uint32(i+j)) - if err := b.Put(k, v); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - } -} - -// Ensure that a setting a value on a key with a bucket value returns an error. -func TestBucket_Put_IncompatibleValue(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b0, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - if err := b0.Put([]byte("foo"), []byte("bar")); err != bolt.ErrIncompatibleValue { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a setting a value while the transaction is closed returns an error. -func TestBucket_Put_Closed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - - if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that setting a value on a read-only bucket returns an error. -func TestBucket_Put_ReadOnly(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - if err := b.Put([]byte("foo"), []byte("bar")); err != bolt.ErrTxNotWritable { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can delete an existing key. -func TestBucket_Delete(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Delete([]byte("foo")); err != nil { - t.Fatal(err) - } - if v := b.Get([]byte("foo")); v != nil { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a large set of keys will work correctly. -func TestBucket_Delete_Large(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < 100; i++ { - if err := b.Put([]byte(strconv.Itoa(i)), []byte(strings.Repeat("*", 1024))); err != nil { - t.Fatal(err) - } - } - - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 0; i < 100; i++ { - if err := b.Delete([]byte(strconv.Itoa(i))); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 0; i < 100; i++ { - if v := b.Get([]byte(strconv.Itoa(i))); v != nil { - t.Fatalf("unexpected value: %v, i=%d", v, i) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Deleting a very large list of keys will cause the freelist to use overflow. -func TestBucket_Delete_FreelistOverflow(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - db := MustOpenDB() - defer db.MustClose() - - k := make([]byte, 16) - for i := uint64(0); i < 10000; i++ { - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("0")) - if err != nil { - t.Fatalf("bucket error: %s", err) - } - - for j := uint64(0); j < 1000; j++ { - binary.BigEndian.PutUint64(k[:8], i) - binary.BigEndian.PutUint64(k[8:], j) - if err := b.Put(k, nil); err != nil { - t.Fatalf("put error: %s", err) - } - } - - return nil - }); err != nil { - t.Fatal(err) - } - } - - // Delete all of them in one large transaction - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("0")) - c := b.Cursor() - for k, _ := c.First(); k != nil; k, _ = c.Next() { - if err := c.Delete(); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that accessing and updating nested buckets is ok across transactions. -func TestBucket_Nested(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - // Create a widgets bucket. - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - // Create a widgets/foo bucket. - _, err = b.CreateBucket([]byte("foo")) - if err != nil { - t.Fatal(err) - } - - // Create a widgets/bar key. - if err := b.Put([]byte("bar"), []byte("0000")); err != nil { - t.Fatal(err) - } - - return nil - }); err != nil { - t.Fatal(err) - } - db.MustCheck() - - // Update widgets/bar. - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - if err := b.Put([]byte("bar"), []byte("xxxx")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - db.MustCheck() - - // Cause a split. - if err := db.Update(func(tx *bolt.Tx) error { - var b = tx.Bucket([]byte("widgets")) - for i := 0; i < 10000; i++ { - if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - db.MustCheck() - - // Insert into widgets/foo/baz. - if err := db.Update(func(tx *bolt.Tx) error { - var b = tx.Bucket([]byte("widgets")) - if err := b.Bucket([]byte("foo")).Put([]byte("baz"), []byte("yyyy")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - db.MustCheck() - - // Verify. - if err := db.View(func(tx *bolt.Tx) error { - var b = tx.Bucket([]byte("widgets")) - if v := b.Bucket([]byte("foo")).Get([]byte("baz")); !bytes.Equal(v, []byte("yyyy")) { - t.Fatalf("unexpected value: %v", v) - } - if v := b.Get([]byte("bar")); !bytes.Equal(v, []byte("xxxx")) { - t.Fatalf("unexpected value: %v", v) - } - for i := 0; i < 10000; i++ { - if v := b.Get([]byte(strconv.Itoa(i))); !bytes.Equal(v, []byte(strconv.Itoa(i))) { - t.Fatalf("unexpected value: %v", v) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a bucket using Delete() returns an error. -func TestBucket_Delete_Bucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - if err := b.Delete([]byte("foo")); err != bolt.ErrIncompatibleValue { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a key on a read-only bucket returns an error. -func TestBucket_Delete_ReadOnly(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != bolt.ErrTxNotWritable { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a deleting value while the transaction is closed returns an error. -func TestBucket_Delete_Closed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - if err := b.Delete([]byte("foo")); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that deleting a bucket causes nested buckets to be deleted. -func TestBucket_DeleteBucket_Nested(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - foo, err := widgets.CreateBucket([]byte("foo")) - if err != nil { - t.Fatal(err) - } - - bar, err := foo.CreateBucket([]byte("bar")) - if err != nil { - t.Fatal(err) - } - if err := bar.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a bucket causes nested buckets to be deleted after they have been committed. -func TestBucket_DeleteBucket_Nested2(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - foo, err := widgets.CreateBucket([]byte("foo")) - if err != nil { - t.Fatal(err) - } - - bar, err := foo.CreateBucket([]byte("bar")) - if err != nil { - t.Fatal(err) - } - - if err := bar.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - widgets := tx.Bucket([]byte("widgets")) - if widgets == nil { - t.Fatal("expected widgets bucket") - } - - foo := widgets.Bucket([]byte("foo")) - if foo == nil { - t.Fatal("expected foo bucket") - } - - bar := foo.Bucket([]byte("bar")) - if bar == nil { - t.Fatal("expected bar bucket") - } - - if v := bar.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { - t.Fatalf("unexpected value: %v", v) - } - if err := tx.DeleteBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) != nil { - t.Fatal("expected bucket to be deleted") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a child bucket with multiple pages causes all pages to get collected. -// NOTE: Consistency check in bolt_test.DB.Close() will panic if pages not freed properly. -func TestBucket_DeleteBucket_Large(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - foo, err := widgets.CreateBucket([]byte("foo")) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < 1000; i++ { - if err := foo.Put([]byte(fmt.Sprintf("%d", i)), []byte(fmt.Sprintf("%0100d", i))); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a simple value retrieved via Bucket() returns a nil. -func TestBucket_Bucket_IncompatibleValue(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if b := tx.Bucket([]byte("widgets")).Bucket([]byte("foo")); b != nil { - t.Fatal("expected nil bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that creating a bucket on an existing non-bucket key returns an error. -func TestBucket_CreateBucket_IncompatibleValue(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if _, err := widgets.CreateBucket([]byte("foo")); err != bolt.ErrIncompatibleValue { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a bucket on an existing non-bucket key returns an error. -func TestBucket_DeleteBucket_IncompatibleValue(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != bolt.ErrIncompatibleValue { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure bucket can set and update its sequence number. -func TestBucket_Sequence(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - bkt, err := tx.CreateBucket([]byte("0")) - if err != nil { - t.Fatal(err) - } - - // Retrieve sequence. - if v := bkt.Sequence(); v != 0 { - t.Fatalf("unexpected sequence: %d", v) - } - - // Update sequence. - if err := bkt.SetSequence(1000); err != nil { - t.Fatal(err) - } - - // Read sequence again. - if v := bkt.Sequence(); v != 1000 { - t.Fatalf("unexpected sequence: %d", v) - } - - return nil - }); err != nil { - t.Fatal(err) - } - - // Verify sequence in separate transaction. - if err := db.View(func(tx *bolt.Tx) error { - if v := tx.Bucket([]byte("0")).Sequence(); v != 1000 { - t.Fatalf("unexpected sequence: %d", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can return an autoincrementing sequence. -func TestBucket_NextSequence(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - widgets, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - woojits, err := tx.CreateBucket([]byte("woojits")) - if err != nil { - t.Fatal(err) - } - - // Make sure sequence increments. - if seq, err := widgets.NextSequence(); err != nil { - t.Fatal(err) - } else if seq != 1 { - t.Fatalf("unexpecte sequence: %d", seq) - } - - if seq, err := widgets.NextSequence(); err != nil { - t.Fatal(err) - } else if seq != 2 { - t.Fatalf("unexpected sequence: %d", seq) - } - - // Buckets should be separate. - if seq, err := woojits.NextSequence(); err != nil { - t.Fatal(err) - } else if seq != 1 { - t.Fatalf("unexpected sequence: %d", 1) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket will persist an autoincrementing sequence even if its -// the only thing updated on the bucket. -// https://github.com/boltdb/bolt/issues/296 -func TestBucket_NextSequence_Persist(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.Bucket([]byte("widgets")).NextSequence(); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - seq, err := tx.Bucket([]byte("widgets")).NextSequence() - if err != nil { - t.Fatalf("unexpected error: %s", err) - } else if seq != 2 { - t.Fatalf("unexpected sequence: %d", seq) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that retrieving the next sequence on a read-only bucket returns an error. -func TestBucket_NextSequence_ReadOnly(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - _, err := tx.Bucket([]byte("widgets")).NextSequence() - if err != bolt.ErrTxNotWritable { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that retrieving the next sequence for a bucket on a closed database return an error. -func TestBucket_NextSequence_Closed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - if _, err := b.NextSequence(); err != bolt.ErrTxClosed { - t.Fatal(err) - } -} - -// Ensure a user can loop over all key/value pairs in a bucket. -func TestBucket_ForEach(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("0000")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("0001")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte("0002")); err != nil { - t.Fatal(err) - } - - var index int - if err := b.ForEach(func(k, v []byte) error { - switch index { - case 0: - if !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0002")) { - t.Fatalf("unexpected value: %v", v) - } - case 1: - if !bytes.Equal(k, []byte("baz")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0001")) { - t.Fatalf("unexpected value: %v", v) - } - case 2: - if !bytes.Equal(k, []byte("foo")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0000")) { - t.Fatalf("unexpected value: %v", v) - } - } - index++ - return nil - }); err != nil { - t.Fatal(err) - } - - if index != 3 { - t.Fatalf("unexpected index: %d", index) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a database can stop iteration early. -func TestBucket_ForEach_ShortCircuit(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte("0000")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("0000")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("0000")); err != nil { - t.Fatal(err) - } - - var index int - if err := tx.Bucket([]byte("widgets")).ForEach(func(k, v []byte) error { - index++ - if bytes.Equal(k, []byte("baz")) { - return errors.New("marker") - } - return nil - }); err == nil || err.Error() != "marker" { - t.Fatalf("unexpected error: %s", err) - } - if index != 2 { - t.Fatalf("unexpected index: %d", index) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that looping over a bucket on a closed database returns an error. -func TestBucket_ForEach_Closed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - - if err := b.ForEach(func(k, v []byte) error { return nil }); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that an error is returned when inserting with an empty key. -func TestBucket_Put_EmptyKey(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte(""), []byte("bar")); err != bolt.ErrKeyRequired { - t.Fatalf("unexpected error: %s", err) - } - if err := b.Put(nil, []byte("bar")); err != bolt.ErrKeyRequired { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that an error is returned when inserting with a key that's too large. -func TestBucket_Put_KeyTooLarge(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put(make([]byte, 32769), []byte("bar")); err != bolt.ErrKeyTooLarge { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that an error is returned when inserting a value that's too large. -func TestBucket_Put_ValueTooLarge(t *testing.T) { - // Skip this test on DroneCI because the machine is resource constrained. - if os.Getenv("DRONE") == "true" { - t.Skip("not enough RAM for test") - } - - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), make([]byte, bolt.MaxValueSize+1)); err != bolt.ErrValueTooLarge { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a bucket can calculate stats. -func TestBucket_Stats(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Add bucket with fewer keys but one big value. - bigKey := []byte("really-big-value") - for i := 0; i < 500; i++ { - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("woojits")) - if err != nil { - t.Fatal(err) - } - - if err := b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i))); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - } - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("woojits")).Put(bigKey, []byte(strings.Repeat("*", 10000))); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - stats := tx.Bucket([]byte("woojits")).Stats() - if stats.BranchPageN != 1 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.LeafPageN != 7 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 2 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.KeyN != 501 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } else if stats.Depth != 2 { - t.Fatalf("unexpected Depth: %d", stats.Depth) - } - - branchInuse := 16 // branch page header - branchInuse += 7 * 16 // branch elements - branchInuse += 7 * 3 // branch keys (6 3-byte keys) - if stats.BranchInuse != branchInuse { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } - - leafInuse := 7 * 16 // leaf page header - leafInuse += 501 * 16 // leaf elements - leafInuse += 500*3 + len(bigKey) // leaf keys - leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values - if stats.LeafInuse != leafInuse { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } - - // Only check allocations for 4KB pages. - if os.Getpagesize() == 4096 { - if stats.BranchAlloc != 4096 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } else if stats.LeafAlloc != 36864 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - } - - if stats.BucketN != 1 { - t.Fatalf("unexpected BucketN: %d", stats.BucketN) - } else if stats.InlineBucketN != 0 { - t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) - } else if stats.InlineBucketInuse != 0 { - t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a bucket with random insertion utilizes fill percentage correctly. -func TestBucket_Stats_RandomFill(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } else if os.Getpagesize() != 4096 { - t.Skip("invalid page size for test") - } - - db := MustOpenDB() - defer db.MustClose() - - // Add a set of values in random order. It will be the same random - // order so we can maintain consistency between test runs. - var count int - rand := rand.New(rand.NewSource(42)) - for _, i := range rand.Perm(1000) { - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("woojits")) - if err != nil { - t.Fatal(err) - } - b.FillPercent = 0.9 - for _, j := range rand.Perm(100) { - index := (j * 10000) + i - if err := b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000")); err != nil { - t.Fatal(err) - } - count++ - } - return nil - }); err != nil { - t.Fatal(err) - } - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - stats := tx.Bucket([]byte("woojits")).Stats() - if stats.KeyN != 100000 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } - - if stats.BranchPageN != 98 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.BranchInuse != 130984 { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } else if stats.BranchAlloc != 401408 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } - - if stats.LeafPageN != 3412 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 0 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.LeafInuse != 4742482 { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } else if stats.LeafAlloc != 13975552 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a bucket can calculate stats. -func TestBucket_Stats_Small(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - // Add a bucket that fits on a single root leaf. - b, err := tx.CreateBucket([]byte("whozawhats")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - - return nil - }); err != nil { - t.Fatal(err) - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("whozawhats")) - stats := b.Stats() - if stats.BranchPageN != 0 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.LeafPageN != 0 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 0 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.KeyN != 1 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } else if stats.Depth != 1 { - t.Fatalf("unexpected Depth: %d", stats.Depth) - } else if stats.BranchInuse != 0 { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } else if stats.LeafInuse != 0 { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } - - if os.Getpagesize() == 4096 { - if stats.BranchAlloc != 0 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } else if stats.LeafAlloc != 0 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - } - - if stats.BucketN != 1 { - t.Fatalf("unexpected BucketN: %d", stats.BucketN) - } else if stats.InlineBucketN != 1 { - t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) - } else if stats.InlineBucketInuse != 16+16+6 { - t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -func TestBucket_Stats_EmptyBucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - // Add a bucket that fits on a single root leaf. - if _, err := tx.CreateBucket([]byte("whozawhats")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("whozawhats")) - stats := b.Stats() - if stats.BranchPageN != 0 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.LeafPageN != 0 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 0 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.KeyN != 0 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } else if stats.Depth != 1 { - t.Fatalf("unexpected Depth: %d", stats.Depth) - } else if stats.BranchInuse != 0 { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } else if stats.LeafInuse != 0 { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } - - if os.Getpagesize() == 4096 { - if stats.BranchAlloc != 0 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } else if stats.LeafAlloc != 0 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - } - - if stats.BucketN != 1 { - t.Fatalf("unexpected BucketN: %d", stats.BucketN) - } else if stats.InlineBucketN != 1 { - t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) - } else if stats.InlineBucketInuse != 16 { - t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a bucket can calculate stats. -func TestBucket_Stats_Nested(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("foo")) - if err != nil { - t.Fatal(err) - } - for i := 0; i < 100; i++ { - if err := b.Put([]byte(fmt.Sprintf("%02d", i)), []byte(fmt.Sprintf("%02d", i))); err != nil { - t.Fatal(err) - } - } - - bar, err := b.CreateBucket([]byte("bar")) - if err != nil { - t.Fatal(err) - } - for i := 0; i < 10; i++ { - if err := bar.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { - t.Fatal(err) - } - } - - baz, err := bar.CreateBucket([]byte("baz")) - if err != nil { - t.Fatal(err) - } - for i := 0; i < 10; i++ { - if err := baz.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { - t.Fatal(err) - } - } - - return nil - }); err != nil { - t.Fatal(err) - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("foo")) - stats := b.Stats() - if stats.BranchPageN != 0 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.LeafPageN != 2 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 0 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.KeyN != 122 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } else if stats.Depth != 3 { - t.Fatalf("unexpected Depth: %d", stats.Depth) - } else if stats.BranchInuse != 0 { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } - - foo := 16 // foo (pghdr) - foo += 101 * 16 // foo leaf elements - foo += 100*2 + 100*2 // foo leaf key/values - foo += 3 + 16 // foo -> bar key/value - - bar := 16 // bar (pghdr) - bar += 11 * 16 // bar leaf elements - bar += 10 + 10 // bar leaf key/values - bar += 3 + 16 // bar -> baz key/value - - baz := 16 // baz (inline) (pghdr) - baz += 10 * 16 // baz leaf elements - baz += 10 + 10 // baz leaf key/values - - if stats.LeafInuse != foo+bar+baz { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } - - if os.Getpagesize() == 4096 { - if stats.BranchAlloc != 0 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } else if stats.LeafAlloc != 8192 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - } - - if stats.BucketN != 3 { - t.Fatalf("unexpected BucketN: %d", stats.BucketN) - } else if stats.InlineBucketN != 1 { - t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) - } else if stats.InlineBucketInuse != baz { - t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a large bucket can calculate stats. -func TestBucket_Stats_Large(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - db := MustOpenDB() - defer db.MustClose() - - var index int - for i := 0; i < 100; i++ { - // Add bucket with lots of keys. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucketIfNotExists([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for i := 0; i < 1000; i++ { - if err := b.Put([]byte(strconv.Itoa(index)), []byte(strconv.Itoa(index))); err != nil { - t.Fatal(err) - } - index++ - } - return nil - }); err != nil { - t.Fatal(err) - } - } - - db.MustCheck() - - if err := db.View(func(tx *bolt.Tx) error { - stats := tx.Bucket([]byte("widgets")).Stats() - if stats.BranchPageN != 13 { - t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) - } else if stats.BranchOverflowN != 0 { - t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) - } else if stats.LeafPageN != 1196 { - t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) - } else if stats.LeafOverflowN != 0 { - t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) - } else if stats.KeyN != 100000 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } else if stats.Depth != 3 { - t.Fatalf("unexpected Depth: %d", stats.Depth) - } else if stats.BranchInuse != 25257 { - t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) - } else if stats.LeafInuse != 2596916 { - t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) - } - - if os.Getpagesize() == 4096 { - if stats.BranchAlloc != 53248 { - t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) - } else if stats.LeafAlloc != 4898816 { - t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) - } - } - - if stats.BucketN != 1 { - t.Fatalf("unexpected BucketN: %d", stats.BucketN) - } else if stats.InlineBucketN != 0 { - t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) - } else if stats.InlineBucketInuse != 0 { - t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can write random keys and values across multiple transactions. -func TestBucket_Put_Single(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - index := 0 - if err := quick.Check(func(items testdata) bool { - db := MustOpenDB() - defer db.MustClose() - - m := make(map[string][]byte) - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - for _, item := range items { - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("widgets")).Put(item.Key, item.Value); err != nil { - panic("put error: " + err.Error()) - } - m[string(item.Key)] = item.Value - return nil - }); err != nil { - t.Fatal(err) - } - - // Verify all key/values so far. - if err := db.View(func(tx *bolt.Tx) error { - i := 0 - for k, v := range m { - value := tx.Bucket([]byte("widgets")).Get([]byte(k)) - if !bytes.Equal(value, v) { - t.Logf("value mismatch [run %d] (%d of %d):\nkey: %x\ngot: %x\nexp: %x", index, i, len(m), []byte(k), value, v) - db.CopyTempFile() - t.FailNow() - } - i++ - } - return nil - }); err != nil { - t.Fatal(err) - } - } - - index++ - return true - }, nil); err != nil { - t.Error(err) - } -} - -// Ensure that a transaction can insert multiple key/value pairs at once. -func TestBucket_Put_Multiple(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - if err := quick.Check(func(items testdata) bool { - db := MustOpenDB() - defer db.MustClose() - - // Bulk insert all values. - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for _, item := range items { - if err := b.Put(item.Key, item.Value); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Verify all items exist. - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for _, item := range items { - value := b.Get(item.Key) - if !bytes.Equal(item.Value, value) { - db.CopyTempFile() - t.Fatalf("exp=%x; got=%x", item.Value, value) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - return true - }, qconfig()); err != nil { - t.Error(err) - } -} - -// Ensure that a transaction can delete all key/value pairs and return to a single leaf page. -func TestBucket_Delete_Quick(t *testing.T) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - if err := quick.Check(func(items testdata) bool { - db := MustOpenDB() - defer db.MustClose() - - // Bulk insert all values. - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for _, item := range items { - if err := b.Put(item.Key, item.Value); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Remove items one at a time and check consistency. - for _, item := range items { - if err := db.Update(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Delete(item.Key) - }); err != nil { - t.Fatal(err) - } - } - - // Anything before our deletion index should be nil. - if err := db.View(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("widgets")).ForEach(func(k, v []byte) error { - t.Fatalf("bucket should be empty; found: %06x", trunc(k, 3)) - return nil - }); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - return true - }, qconfig()); err != nil { - t.Error(err) - } -} - -func ExampleBucket_Put() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Start a write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - // Create a bucket. - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - return err - } - - // Set the value "bar" for the key "foo". - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - return err - } - return nil - }); err != nil { - log.Fatal(err) - } - - // Read value back in a different read-only transaction. - if err := db.View(func(tx *bolt.Tx) error { - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - fmt.Printf("The value of 'foo' is: %s\n", value) - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // The value of 'foo' is: bar -} - -func ExampleBucket_Delete() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Start a write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - // Create a bucket. - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - return err - } - - // Set the value "bar" for the key "foo". - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - return err - } - - // Retrieve the key back from the database and verify it. - value := b.Get([]byte("foo")) - fmt.Printf("The value of 'foo' was: %s\n", value) - - return nil - }); err != nil { - log.Fatal(err) - } - - // Delete the key in a different write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Delete([]byte("foo")) - }); err != nil { - log.Fatal(err) - } - - // Retrieve the key again. - if err := db.View(func(tx *bolt.Tx) error { - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - if value == nil { - fmt.Printf("The value of 'foo' is now: nil\n") - } - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // The value of 'foo' was: bar - // The value of 'foo' is now: nil -} - -func ExampleBucket_ForEach() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Insert data into a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("animals")) - if err != nil { - return err - } - - if err := b.Put([]byte("dog"), []byte("fun")); err != nil { - return err - } - if err := b.Put([]byte("cat"), []byte("lame")); err != nil { - return err - } - if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { - return err - } - - // Iterate over items in sorted key order. - if err := b.ForEach(func(k, v []byte) error { - fmt.Printf("A %s is %s.\n", k, v) - return nil - }); err != nil { - return err - } - - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // A cat is lame. - // A dog is fun. - // A liger is awesome. -} diff --git a/tests/cli-opts.sh b/tests/cli-opts.sh new file mode 100755 index 0000000..e69de29 --- /dev/null +++ b/tests/cli-opts.sh diff --git a/tests/cursor_test.go b/tests/cursor_test.go deleted file mode 100644 index 562d60f..0000000 --- a/tests/cursor_test.go +++ /dev/null @@ -1,817 +0,0 @@ -package bolt_test - -import ( - "bytes" - "encoding/binary" - "fmt" - "log" - "os" - "reflect" - "sort" - "testing" - "testing/quick" - - "github.com/boltdb/bolt" -) - -// Ensure that a cursor can return a reference to the bucket that created it. -func TestCursor_Bucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if cb := b.Cursor().Bucket(); !reflect.DeepEqual(cb, b) { - t.Fatal("cursor bucket mismatch") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can seek to the appropriate keys. -func TestCursor_Seek(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("0001")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte("0002")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("0003")); err != nil { - t.Fatal(err) - } - - if _, err := b.CreateBucket([]byte("bkt")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("widgets")).Cursor() - - // Exact match should go to the key. - if k, v := c.Seek([]byte("bar")); !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0002")) { - t.Fatalf("unexpected value: %v", v) - } - - // Inexact match should go to the next key. - if k, v := c.Seek([]byte("bas")); !bytes.Equal(k, []byte("baz")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0003")) { - t.Fatalf("unexpected value: %v", v) - } - - // Low key should go to the first key. - if k, v := c.Seek([]byte("")); !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte("0002")) { - t.Fatalf("unexpected value: %v", v) - } - - // High key should return no key. - if k, v := c.Seek([]byte("zzz")); k != nil { - t.Fatalf("expected nil key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - // Buckets should return their key but no value. - if k, v := c.Seek([]byte("bkt")); !bytes.Equal(k, []byte("bkt")) { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -func TestCursor_Delete(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - const count = 1000 - - // Insert every other key between 0 and $count. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for i := 0; i < count; i += 1 { - k := make([]byte, 8) - binary.BigEndian.PutUint64(k, uint64(i)) - if err := b.Put(k, make([]byte, 100)); err != nil { - t.Fatal(err) - } - } - if _, err := b.CreateBucket([]byte("sub")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("widgets")).Cursor() - bound := make([]byte, 8) - binary.BigEndian.PutUint64(bound, uint64(count/2)) - for key, _ := c.First(); bytes.Compare(key, bound) < 0; key, _ = c.Next() { - if err := c.Delete(); err != nil { - t.Fatal(err) - } - } - - c.Seek([]byte("sub")) - if err := c.Delete(); err != bolt.ErrIncompatibleValue { - t.Fatalf("unexpected error: %s", err) - } - - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - stats := tx.Bucket([]byte("widgets")).Stats() - if stats.KeyN != count/2+1 { - t.Fatalf("unexpected KeyN: %d", stats.KeyN) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can seek to the appropriate keys when there are a -// large number of keys. This test also checks that seek will always move -// forward to the next key. -// -// Related: https://github.com/boltdb/bolt/pull/187 -func TestCursor_Seek_Large(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var count = 10000 - - // Insert every other key between 0 and $count. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < count; i += 100 { - for j := i; j < i+100; j += 2 { - k := make([]byte, 8) - binary.BigEndian.PutUint64(k, uint64(j)) - if err := b.Put(k, make([]byte, 100)); err != nil { - t.Fatal(err) - } - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("widgets")).Cursor() - for i := 0; i < count; i++ { - seek := make([]byte, 8) - binary.BigEndian.PutUint64(seek, uint64(i)) - - k, _ := c.Seek(seek) - - // The last seek is beyond the end of the the range so - // it should return nil. - if i == count-1 { - if k != nil { - t.Fatal("expected nil key") - } - continue - } - - // Otherwise we should seek to the exact key or the next key. - num := binary.BigEndian.Uint64(k) - if i%2 == 0 { - if num != uint64(i) { - t.Fatalf("unexpected num: %d", num) - } - } else { - if num != uint64(i+1) { - t.Fatalf("unexpected num: %d", num) - } - } - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a cursor can iterate over an empty bucket without error. -func TestCursor_EmptyBucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("widgets")).Cursor() - k, v := c.First() - if k != nil { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can reverse iterate over an empty bucket without error. -func TestCursor_EmptyBucketReverse(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - if err := db.View(func(tx *bolt.Tx) error { - c := tx.Bucket([]byte("widgets")).Cursor() - k, v := c.Last() - if k != nil { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can iterate over a single root with a couple elements. -func TestCursor_Iterate_Leaf(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte{}); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte{0}); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte{1}); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - tx, err := db.Begin(false) - if err != nil { - t.Fatal(err) - } - defer func() { _ = tx.Rollback() }() - - c := tx.Bucket([]byte("widgets")).Cursor() - - k, v := c.First() - if !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{1}) { - t.Fatalf("unexpected value: %v", v) - } - - k, v = c.Next() - if !bytes.Equal(k, []byte("baz")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{}) { - t.Fatalf("unexpected value: %v", v) - } - - k, v = c.Next() - if !bytes.Equal(k, []byte("foo")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{0}) { - t.Fatalf("unexpected value: %v", v) - } - - k, v = c.Next() - if k != nil { - t.Fatalf("expected nil key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - k, v = c.Next() - if k != nil { - t.Fatalf("expected nil key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can iterate in reverse over a single root with a couple elements. -func TestCursor_LeafRootReverse(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte{}); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte{0}); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte{1}); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - tx, err := db.Begin(false) - if err != nil { - t.Fatal(err) - } - c := tx.Bucket([]byte("widgets")).Cursor() - - if k, v := c.Last(); !bytes.Equal(k, []byte("foo")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{0}) { - t.Fatalf("unexpected value: %v", v) - } - - if k, v := c.Prev(); !bytes.Equal(k, []byte("baz")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{}) { - t.Fatalf("unexpected value: %v", v) - } - - if k, v := c.Prev(); !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, []byte{1}) { - t.Fatalf("unexpected value: %v", v) - } - - if k, v := c.Prev(); k != nil { - t.Fatalf("expected nil key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - if k, v := c.Prev(); k != nil { - t.Fatalf("expected nil key: %v", k) - } else if v != nil { - t.Fatalf("expected nil value: %v", v) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can restart from the beginning. -func TestCursor_Restart(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("bar"), []byte{}); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte{}); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - tx, err := db.Begin(false) - if err != nil { - t.Fatal(err) - } - c := tx.Bucket([]byte("widgets")).Cursor() - - if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } - if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) { - t.Fatalf("unexpected key: %v", k) - } - - if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) { - t.Fatalf("unexpected key: %v", k) - } - if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) { - t.Fatalf("unexpected key: %v", k) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } -} - -// Ensure that a cursor can skip over empty pages that have been deleted. -func TestCursor_First_EmptyPages(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Create 1000 keys in the "widgets" bucket. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - for i := 0; i < 1000; i++ { - if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil { - t.Fatal(err) - } - } - - return nil - }); err != nil { - t.Fatal(err) - } - - // Delete half the keys and then try to iterate. - if err := db.Update(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 0; i < 600; i++ { - if err := b.Delete(u64tob(uint64(i))); err != nil { - t.Fatal(err) - } - } - - c := b.Cursor() - var n int - for k, _ := c.First(); k != nil; k, _ = c.Next() { - n++ - } - if n != 400 { - t.Fatalf("unexpected key count: %d", n) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx can iterate over all elements in a bucket. -func TestCursor_QuickCheck(t *testing.T) { - f := func(items testdata) bool { - db := MustOpenDB() - defer db.MustClose() - - // Bulk insert all values. - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for _, item := range items { - if err := b.Put(item.Key, item.Value); err != nil { - t.Fatal(err) - } - } - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - - // Sort test data. - sort.Sort(items) - - // Iterate over all items and check consistency. - var index = 0 - tx, err = db.Begin(false) - if err != nil { - t.Fatal(err) - } - - c := tx.Bucket([]byte("widgets")).Cursor() - for k, v := c.First(); k != nil && index < len(items); k, v = c.Next() { - if !bytes.Equal(k, items[index].Key) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, items[index].Value) { - t.Fatalf("unexpected value: %v", v) - } - index++ - } - if len(items) != index { - t.Fatalf("unexpected item count: %v, expected %v", len(items), index) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - - return true - } - if err := quick.Check(f, qconfig()); err != nil { - t.Error(err) - } -} - -// Ensure that a transaction can iterate over all elements in a bucket in reverse. -func TestCursor_QuickCheck_Reverse(t *testing.T) { - f := func(items testdata) bool { - db := MustOpenDB() - defer db.MustClose() - - // Bulk insert all values. - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - for _, item := range items { - if err := b.Put(item.Key, item.Value); err != nil { - t.Fatal(err) - } - } - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - - // Sort test data. - sort.Sort(revtestdata(items)) - - // Iterate over all items and check consistency. - var index = 0 - tx, err = db.Begin(false) - if err != nil { - t.Fatal(err) - } - c := tx.Bucket([]byte("widgets")).Cursor() - for k, v := c.Last(); k != nil && index < len(items); k, v = c.Prev() { - if !bytes.Equal(k, items[index].Key) { - t.Fatalf("unexpected key: %v", k) - } else if !bytes.Equal(v, items[index].Value) { - t.Fatalf("unexpected value: %v", v) - } - index++ - } - if len(items) != index { - t.Fatalf("unexpected item count: %v, expected %v", len(items), index) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - - return true - } - if err := quick.Check(f, qconfig()); err != nil { - t.Error(err) - } -} - -// Ensure that a Tx cursor can iterate over subbuckets. -func TestCursor_QuickCheck_BucketsOnly(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("bar")); err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("baz")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - var names []string - c := tx.Bucket([]byte("widgets")).Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - names = append(names, string(k)) - if v != nil { - t.Fatalf("unexpected value: %v", v) - } - } - if !reflect.DeepEqual(names, []string{"bar", "baz", "foo"}) { - t.Fatalf("unexpected names: %+v", names) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx cursor can reverse iterate over subbuckets. -func TestCursor_QuickCheck_BucketsOnly_Reverse(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("bar")); err != nil { - t.Fatal(err) - } - if _, err := b.CreateBucket([]byte("baz")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - var names []string - c := tx.Bucket([]byte("widgets")).Cursor() - for k, v := c.Last(); k != nil; k, v = c.Prev() { - names = append(names, string(k)) - if v != nil { - t.Fatalf("unexpected value: %v", v) - } - } - if !reflect.DeepEqual(names, []string{"foo", "baz", "bar"}) { - t.Fatalf("unexpected names: %+v", names) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -func ExampleCursor() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Start a read-write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - // Create a new bucket. - b, err := tx.CreateBucket([]byte("animals")) - if err != nil { - return err - } - - // Insert data into a bucket. - if err := b.Put([]byte("dog"), []byte("fun")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("cat"), []byte("lame")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { - log.Fatal(err) - } - - // Create a cursor for iteration. - c := b.Cursor() - - // Iterate over items in sorted key order. This starts from the - // first key/value pair and updates the k/v variables to the - // next key/value on each iteration. - // - // The loop finishes at the end of the cursor when a nil key is returned. - for k, v := c.First(); k != nil; k, v = c.Next() { - fmt.Printf("A %s is %s.\n", k, v) - } - - return nil - }); err != nil { - log.Fatal(err) - } - - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // A cat is lame. - // A dog is fun. - // A liger is awesome. -} - -func ExampleCursor_reverse() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Start a read-write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - // Create a new bucket. - b, err := tx.CreateBucket([]byte("animals")) - if err != nil { - return err - } - - // Insert data into a bucket. - if err := b.Put([]byte("dog"), []byte("fun")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("cat"), []byte("lame")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { - log.Fatal(err) - } - - // Create a cursor for iteration. - c := b.Cursor() - - // Iterate over items in reverse sorted key order. This starts - // from the last key/value pair and updates the k/v variables to - // the previous key/value on each iteration. - // - // The loop finishes at the beginning of the cursor when a nil key - // is returned. - for k, v := c.Last(); k != nil; k, v = c.Prev() { - fmt.Printf("A %s is %s.\n", k, v) - } - - return nil - }); err != nil { - log.Fatal(err) - } - - // Close the database to release the file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // A liger is awesome. - // A dog is fun. - // A cat is lame. -} diff --git a/tests/db_test.go b/tests/db_test.go deleted file mode 100644 index 3034d4f..0000000 --- a/tests/db_test.go +++ /dev/null @@ -1,1545 +0,0 @@ -package bolt_test - -import ( - "bytes" - "encoding/binary" - "errors" - "flag" - "fmt" - "hash/fnv" - "io/ioutil" - "log" - "os" - "path/filepath" - "regexp" - "sort" - "strings" - "sync" - "testing" - "time" - "unsafe" - - "github.com/boltdb/bolt" -) - -var statsFlag = flag.Bool("stats", false, "show performance stats") - -// version is the data file format version. -const version = 2 - -// magic is the marker value to indicate that a file is a Bolt DB. -const magic uint32 = 0xED0CDAED - -// pageSize is the size of one page in the data file. -const pageSize = 4096 - -// pageHeaderSize is the size of a page header. -const pageHeaderSize = 16 - -// meta represents a simplified version of a database meta page for testing. -type meta struct { - magic uint32 - version uint32 - _ uint32 - _ uint32 - _ [16]byte - _ uint64 - pgid uint64 - _ uint64 - checksum uint64 -} - -// Ensure that a database can be opened without error. -func TestOpen(t *testing.T) { - path := tempfile() - db, err := bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } else if db == nil { - t.Fatal("expected db") - } - - if s := db.Path(); s != path { - t.Fatalf("unexpected path: %s", s) - } - - if err := db.Close(); err != nil { - t.Fatal(err) - } -} - -// Ensure that opening a database with a blank path returns an error. -func TestOpen_ErrPathRequired(t *testing.T) { - _, err := bolt.Open("", 0666, nil) - if err == nil { - t.Fatalf("expected error") - } -} - -// Ensure that opening a database with a bad path returns an error. -func TestOpen_ErrNotExists(t *testing.T) { - _, err := bolt.Open(filepath.Join(tempfile(), "bad-path"), 0666, nil) - if err == nil { - t.Fatal("expected error") - } -} - -// Ensure that opening a file that is not a Bolt database returns ErrInvalid. -func TestOpen_ErrInvalid(t *testing.T) { - path := tempfile() - - f, err := os.Create(path) - if err != nil { - t.Fatal(err) - } - if _, err := fmt.Fprintln(f, "this is not a bolt database"); err != nil { - t.Fatal(err) - } - if err := f.Close(); err != nil { - t.Fatal(err) - } - defer os.Remove(path) - - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrInvalid { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that opening a file with two invalid versions returns ErrVersionMismatch. -func TestOpen_ErrVersionMismatch(t *testing.T) { - if pageSize != os.Getpagesize() { - t.Skip("page size mismatch") - } - - // Create empty database. - db := MustOpenDB() - path := db.Path() - defer db.MustClose() - - // Close database. - if err := db.DB.Close(); err != nil { - t.Fatal(err) - } - - // Read data file. - buf, err := ioutil.ReadFile(path) - if err != nil { - t.Fatal(err) - } - - // Rewrite meta pages. - meta0 := (*meta)(unsafe.Pointer(&buf[pageHeaderSize])) - meta0.version++ - meta1 := (*meta)(unsafe.Pointer(&buf[pageSize+pageHeaderSize])) - meta1.version++ - if err := ioutil.WriteFile(path, buf, 0666); err != nil { - t.Fatal(err) - } - - // Reopen data file. - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrVersionMismatch { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that opening a file with two invalid checksums returns ErrChecksum. -func TestOpen_ErrChecksum(t *testing.T) { - if pageSize != os.Getpagesize() { - t.Skip("page size mismatch") - } - - // Create empty database. - db := MustOpenDB() - path := db.Path() - defer db.MustClose() - - // Close database. - if err := db.DB.Close(); err != nil { - t.Fatal(err) - } - - // Read data file. - buf, err := ioutil.ReadFile(path) - if err != nil { - t.Fatal(err) - } - - // Rewrite meta pages. - meta0 := (*meta)(unsafe.Pointer(&buf[pageHeaderSize])) - meta0.pgid++ - meta1 := (*meta)(unsafe.Pointer(&buf[pageSize+pageHeaderSize])) - meta1.pgid++ - if err := ioutil.WriteFile(path, buf, 0666); err != nil { - t.Fatal(err) - } - - // Reopen data file. - if _, err := bolt.Open(path, 0666, nil); err != bolt.ErrChecksum { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that opening a database does not increase its size. -// https://github.com/boltdb/bolt/issues/291 -func TestOpen_Size(t *testing.T) { - // Open a data file. - db := MustOpenDB() - path := db.Path() - defer db.MustClose() - - pagesize := db.Info().PageSize - - // Insert until we get above the minimum 4MB size. - if err := db.Update(func(tx *bolt.Tx) error { - b, _ := tx.CreateBucketIfNotExists([]byte("data")) - for i := 0; i < 10000; i++ { - if err := b.Put([]byte(fmt.Sprintf("%04d", i)), make([]byte, 1000)); err != nil { - t.Fatal(err) - } - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Close database and grab the size. - if err := db.DB.Close(); err != nil { - t.Fatal(err) - } - sz := fileSize(path) - if sz == 0 { - t.Fatalf("unexpected new file size: %d", sz) - } - - // Reopen database, update, and check size again. - db0, err := bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } - if err := db0.Update(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("data")).Put([]byte{0}, []byte{0}); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - if err := db0.Close(); err != nil { - t.Fatal(err) - } - newSz := fileSize(path) - if newSz == 0 { - t.Fatalf("unexpected new file size: %d", newSz) - } - - // Compare the original size with the new size. - // db size might increase by a few page sizes due to the new small update. - if sz < newSz-5*int64(pagesize) { - t.Fatalf("unexpected file growth: %d => %d", sz, newSz) - } -} - -// Ensure that opening a database beyond the max step size does not increase its size. -// https://github.com/boltdb/bolt/issues/303 -func TestOpen_Size_Large(t *testing.T) { - if testing.Short() { - t.Skip("short mode") - } - - // Open a data file. - db := MustOpenDB() - path := db.Path() - defer db.MustClose() - - pagesize := db.Info().PageSize - - // Insert until we get above the minimum 4MB size. - var index uint64 - for i := 0; i < 10000; i++ { - if err := db.Update(func(tx *bolt.Tx) error { - b, _ := tx.CreateBucketIfNotExists([]byte("data")) - for j := 0; j < 1000; j++ { - if err := b.Put(u64tob(index), make([]byte, 50)); err != nil { - t.Fatal(err) - } - index++ - } - return nil - }); err != nil { - t.Fatal(err) - } - } - - // Close database and grab the size. - if err := db.DB.Close(); err != nil { - t.Fatal(err) - } - sz := fileSize(path) - if sz == 0 { - t.Fatalf("unexpected new file size: %d", sz) - } else if sz < (1 << 30) { - t.Fatalf("expected larger initial size: %d", sz) - } - - // Reopen database, update, and check size again. - db0, err := bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } - if err := db0.Update(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("data")).Put([]byte{0}, []byte{0}) - }); err != nil { - t.Fatal(err) - } - if err := db0.Close(); err != nil { - t.Fatal(err) - } - - newSz := fileSize(path) - if newSz == 0 { - t.Fatalf("unexpected new file size: %d", newSz) - } - - // Compare the original size with the new size. - // db size might increase by a few page sizes due to the new small update. - if sz < newSz-5*int64(pagesize) { - t.Fatalf("unexpected file growth: %d => %d", sz, newSz) - } -} - -// Ensure that a re-opened database is consistent. -func TestOpen_Check(t *testing.T) { - path := tempfile() - - db, err := bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } - if err := db.View(func(tx *bolt.Tx) error { return <-tx.Check() }); err != nil { - t.Fatal(err) - } - if err := db.Close(); err != nil { - t.Fatal(err) - } - - db, err = bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } - if err := db.View(func(tx *bolt.Tx) error { return <-tx.Check() }); err != nil { - t.Fatal(err) - } - if err := db.Close(); err != nil { - t.Fatal(err) - } -} - -// Ensure that write errors to the meta file handler during initialization are returned. -func TestOpen_MetaInitWriteError(t *testing.T) { - t.Skip("pending") -} - -// Ensure that a database that is too small returns an error. -func TestOpen_FileTooSmall(t *testing.T) { - path := tempfile() - - db, err := bolt.Open(path, 0666, nil) - if err != nil { - t.Fatal(err) - } - if err := db.Close(); err != nil { - t.Fatal(err) - } - - // corrupt the database - if err := os.Truncate(path, int64(os.Getpagesize())); err != nil { - t.Fatal(err) - } - - db, err = bolt.Open(path, 0666, nil) - if err == nil || err.Error() != "file size too small" { - t.Fatalf("unexpected error: %s", err) - } -} - -// TestDB_Open_InitialMmapSize tests if having InitialMmapSize large enough -// to hold data from concurrent write transaction resolves the issue that -// read transaction blocks the write transaction and causes deadlock. -// This is a very hacky test since the mmap size is not exposed. -func TestDB_Open_InitialMmapSize(t *testing.T) { - path := tempfile() - defer os.Remove(path) - - initMmapSize := 1 << 31 // 2GB - testWriteSize := 1 << 27 // 134MB - - db, err := bolt.Open(path, 0666, &bolt.Options{InitialMmapSize: initMmapSize}) - if err != nil { - t.Fatal(err) - } - - // create a long-running read transaction - // that never gets closed while writing - rtx, err := db.Begin(false) - if err != nil { - t.Fatal(err) - } - - // create a write transaction - wtx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - b, err := wtx.CreateBucket([]byte("test")) - if err != nil { - t.Fatal(err) - } - - // and commit a large write - err = b.Put([]byte("foo"), make([]byte, testWriteSize)) - if err != nil { - t.Fatal(err) - } - - done := make(chan struct{}) - - go func() { - if err := wtx.Commit(); err != nil { - t.Fatal(err) - } - done <- struct{}{} - }() - - select { - case <-time.After(5 * time.Second): - t.Errorf("unexpected that the reader blocks writer") - case <-done: - } - - if err := rtx.Rollback(); err != nil { - t.Fatal(err) - } -} - -// Ensure that a database cannot open a transaction when it's not open. -func TestDB_Begin_ErrDatabaseNotOpen(t *testing.T) { - var db bolt.DB - if _, err := db.Begin(false); err != bolt.ErrDatabaseNotOpen { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that a read-write transaction can be retrieved. -func TestDB_BeginRW(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } else if tx == nil { - t.Fatal("expected tx") - } - - if tx.DB() != db.DB { - t.Fatal("unexpected tx database") - } else if !tx.Writable() { - t.Fatal("expected writable tx") - } - - if err := tx.Commit(); err != nil { - t.Fatal(err) - } -} - -// Ensure that opening a transaction while the DB is closed returns an error. -func TestDB_BeginRW_Closed(t *testing.T) { - var db bolt.DB - if _, err := db.Begin(true); err != bolt.ErrDatabaseNotOpen { - t.Fatalf("unexpected error: %s", err) - } -} - -func TestDB_Close_PendingTx_RW(t *testing.T) { testDB_Close_PendingTx(t, true) } -func TestDB_Close_PendingTx_RO(t *testing.T) { testDB_Close_PendingTx(t, false) } - -// Ensure that a database cannot close while transactions are open. -func testDB_Close_PendingTx(t *testing.T, writable bool) { - db := MustOpenDB() - defer db.MustClose() - - // Start transaction. - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - // Open update in separate goroutine. - done := make(chan struct{}) - go func() { - if err := db.Close(); err != nil { - t.Fatal(err) - } - close(done) - }() - - // Ensure database hasn't closed. - time.Sleep(100 * time.Millisecond) - select { - case <-done: - t.Fatal("database closed too early") - default: - } - - // Commit transaction. - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - - // Ensure database closed now. - time.Sleep(100 * time.Millisecond) - select { - case <-done: - default: - t.Fatal("database did not close") - } -} - -// Ensure a database can provide a transactional block. -func TestDB_Update(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - if err := b.Delete([]byte("foo")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - if v := b.Get([]byte("foo")); v != nil { - t.Fatalf("expected nil value, got: %v", v) - } - if v := b.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a closed database returns an error while running a transaction block -func TestDB_Update_Closed(t *testing.T) { - var db bolt.DB - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != bolt.ErrDatabaseNotOpen { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure a panic occurs while trying to commit a managed transaction. -func TestDB_Update_ManualCommit(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var panicked bool - if err := db.Update(func(tx *bolt.Tx) error { - func() { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - }() - return nil - }); err != nil { - t.Fatal(err) - } else if !panicked { - t.Fatal("expected panic") - } -} - -// Ensure a panic occurs while trying to rollback a managed transaction. -func TestDB_Update_ManualRollback(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var panicked bool - if err := db.Update(func(tx *bolt.Tx) error { - func() { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - }() - return nil - }); err != nil { - t.Fatal(err) - } else if !panicked { - t.Fatal("expected panic") - } -} - -// Ensure a panic occurs while trying to commit a managed transaction. -func TestDB_View_ManualCommit(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var panicked bool - if err := db.View(func(tx *bolt.Tx) error { - func() { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - }() - return nil - }); err != nil { - t.Fatal(err) - } else if !panicked { - t.Fatal("expected panic") - } -} - -// Ensure a panic occurs while trying to rollback a managed transaction. -func TestDB_View_ManualRollback(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var panicked bool - if err := db.View(func(tx *bolt.Tx) error { - func() { - defer func() { - if r := recover(); r != nil { - panicked = true - } - }() - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - }() - return nil - }); err != nil { - t.Fatal(err) - } else if !panicked { - t.Fatal("expected panic") - } -} - -// Ensure a write transaction that panics does not hold open locks. -func TestDB_Update_Panic(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Panic during update but recover. - func() { - defer func() { - if r := recover(); r != nil { - t.Log("recover: update", r) - } - }() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - panic("omg") - }); err != nil { - t.Fatal(err) - } - }() - - // Verify we can update again. - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Verify that our change persisted. - if err := db.Update(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure a database can return an error through a read-only transactional block. -func TestDB_View_Error(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.View(func(tx *bolt.Tx) error { - return errors.New("xxx") - }); err == nil || err.Error() != "xxx" { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure a read transaction that panics does not hold open locks. -func TestDB_View_Panic(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Panic during view transaction but recover. - func() { - defer func() { - if r := recover(); r != nil { - t.Log("recover: view", r) - } - }() - - if err := db.View(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - panic("omg") - }); err != nil { - t.Fatal(err) - } - }() - - // Verify that we can still use read transactions. - if err := db.View(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that DB stats can be returned. -func TestDB_Stats(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - - stats := db.Stats() - if stats.TxStats.PageCount != 2 { - t.Fatalf("unexpected TxStats.PageCount: %d", stats.TxStats.PageCount) - } else if stats.FreePageN != 0 { - t.Fatalf("unexpected FreePageN != 0: %d", stats.FreePageN) - } else if stats.PendingPageN != 2 { - t.Fatalf("unexpected PendingPageN != 2: %d", stats.PendingPageN) - } -} - -// Ensure that database pages are in expected order and type. -func TestDB_Consistency(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - - for i := 0; i < 10; i++ { - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - } - - if err := db.Update(func(tx *bolt.Tx) error { - if p, _ := tx.Page(0); p == nil { - t.Fatal("expected page") - } else if p.Type != "meta" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(1); p == nil { - t.Fatal("expected page") - } else if p.Type != "meta" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(2); p == nil { - t.Fatal("expected page") - } else if p.Type != "free" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(3); p == nil { - t.Fatal("expected page") - } else if p.Type != "free" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(4); p == nil { - t.Fatal("expected page") - } else if p.Type != "leaf" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(5); p == nil { - t.Fatal("expected page") - } else if p.Type != "freelist" { - t.Fatalf("unexpected page type: %s", p.Type) - } - - if p, _ := tx.Page(6); p != nil { - t.Fatal("unexpected page") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that DB stats can be subtracted from one another. -func TestDBStats_Sub(t *testing.T) { - var a, b bolt.Stats - a.TxStats.PageCount = 3 - a.FreePageN = 4 - b.TxStats.PageCount = 10 - b.FreePageN = 14 - diff := b.Sub(&a) - if diff.TxStats.PageCount != 7 { - t.Fatalf("unexpected TxStats.PageCount: %d", diff.TxStats.PageCount) - } - - // free page stats are copied from the receiver and not subtracted - if diff.FreePageN != 14 { - t.Fatalf("unexpected FreePageN: %d", diff.FreePageN) - } -} - -// Ensure two functions can perform updates in a single batch. -func TestDB_Batch(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Iterate over multiple updates in separate goroutines. - n := 2 - ch := make(chan error) - for i := 0; i < n; i++ { - go func(i int) { - ch <- db.Batch(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) - }) - }(i) - } - - // Check all responses to make sure there's no error. - for i := 0; i < n; i++ { - if err := <-ch; err != nil { - t.Fatal(err) - } - } - - // Ensure data is correct. - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 0; i < n; i++ { - if v := b.Get(u64tob(uint64(i))); v == nil { - t.Errorf("key not found: %d", i) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -func TestDB_Batch_Panic(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var sentinel int - var bork = &sentinel - var problem interface{} - var err error - - // Execute a function inside a batch that panics. - func() { - defer func() { - if p := recover(); p != nil { - problem = p - } - }() - err = db.Batch(func(tx *bolt.Tx) error { - panic(bork) - }) - }() - - // Verify there is no error. - if g, e := err, error(nil); g != e { - t.Fatalf("wrong error: %v != %v", g, e) - } - // Verify the panic was captured. - if g, e := problem, bork; g != e { - t.Fatalf("wrong error: %v != %v", g, e) - } -} - -func TestDB_BatchFull(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - - const size = 3 - // buffered so we never leak goroutines - ch := make(chan error, size) - put := func(i int) { - ch <- db.Batch(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) - }) - } - - db.MaxBatchSize = size - // high enough to never trigger here - db.MaxBatchDelay = 1 * time.Hour - - go put(1) - go put(2) - - // Give the batch a chance to exhibit bugs. - time.Sleep(10 * time.Millisecond) - - // not triggered yet - select { - case <-ch: - t.Fatalf("batch triggered too early") - default: - } - - go put(3) - - // Check all responses to make sure there's no error. - for i := 0; i < size; i++ { - if err := <-ch; err != nil { - t.Fatal(err) - } - } - - // Ensure data is correct. - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 1; i <= size; i++ { - if v := b.Get(u64tob(uint64(i))); v == nil { - t.Errorf("key not found: %d", i) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -func TestDB_BatchTime(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - t.Fatal(err) - } - - const size = 1 - // buffered so we never leak goroutines - ch := make(chan error, size) - put := func(i int) { - ch <- db.Batch(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) - }) - } - - db.MaxBatchSize = 1000 - db.MaxBatchDelay = 0 - - go put(1) - - // Batch must trigger by time alone. - - // Check all responses to make sure there's no error. - for i := 0; i < size; i++ { - if err := <-ch; err != nil { - t.Fatal(err) - } - } - - // Ensure data is correct. - if err := db.View(func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("widgets")) - for i := 1; i <= size; i++ { - if v := b.Get(u64tob(uint64(i))); v == nil { - t.Errorf("key not found: %d", i) - } - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -func ExampleDB_Update() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Execute several commands within a read-write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - return err - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - return err - } - return nil - }); err != nil { - log.Fatal(err) - } - - // Read the value back from a separate read-only transaction. - if err := db.View(func(tx *bolt.Tx) error { - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - fmt.Printf("The value of 'foo' is: %s\n", value) - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release the file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // The value of 'foo' is: bar -} - -func ExampleDB_View() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Insert data into a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("people")) - if err != nil { - return err - } - if err := b.Put([]byte("john"), []byte("doe")); err != nil { - return err - } - if err := b.Put([]byte("susy"), []byte("que")); err != nil { - return err - } - return nil - }); err != nil { - log.Fatal(err) - } - - // Access data from within a read-only transactional block. - if err := db.View(func(tx *bolt.Tx) error { - v := tx.Bucket([]byte("people")).Get([]byte("john")) - fmt.Printf("John's last name is %s.\n", v) - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release the file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // John's last name is doe. -} - -func ExampleDB_Begin_ReadOnly() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Create a bucket using a read-write transaction. - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - log.Fatal(err) - } - - // Create several keys in a transaction. - tx, err := db.Begin(true) - if err != nil { - log.Fatal(err) - } - b := tx.Bucket([]byte("widgets")) - if err := b.Put([]byte("john"), []byte("blue")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("abby"), []byte("red")); err != nil { - log.Fatal(err) - } - if err := b.Put([]byte("zephyr"), []byte("purple")); err != nil { - log.Fatal(err) - } - if err := tx.Commit(); err != nil { - log.Fatal(err) - } - - // Iterate over the values in sorted key order. - tx, err = db.Begin(false) - if err != nil { - log.Fatal(err) - } - c := tx.Bucket([]byte("widgets")).Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - fmt.Printf("%s likes %s\n", k, v) - } - - if err := tx.Rollback(); err != nil { - log.Fatal(err) - } - - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // abby likes red - // john likes blue - // zephyr likes purple -} - -func BenchmarkDBBatchAutomatic(b *testing.B) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("bench")) - return err - }); err != nil { - b.Fatal(err) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - start := make(chan struct{}) - var wg sync.WaitGroup - - for round := 0; round < 1000; round++ { - wg.Add(1) - - go func(id uint32) { - defer wg.Done() - <-start - - h := fnv.New32a() - buf := make([]byte, 4) - binary.LittleEndian.PutUint32(buf, id) - _, _ = h.Write(buf[:]) - k := h.Sum(nil) - insert := func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("bench")) - return b.Put(k, []byte("filler")) - } - if err := db.Batch(insert); err != nil { - b.Error(err) - return - } - }(uint32(round)) - } - close(start) - wg.Wait() - } - - b.StopTimer() - validateBatchBench(b, db) -} - -func BenchmarkDBBatchSingle(b *testing.B) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("bench")) - return err - }); err != nil { - b.Fatal(err) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - start := make(chan struct{}) - var wg sync.WaitGroup - - for round := 0; round < 1000; round++ { - wg.Add(1) - go func(id uint32) { - defer wg.Done() - <-start - - h := fnv.New32a() - buf := make([]byte, 4) - binary.LittleEndian.PutUint32(buf, id) - _, _ = h.Write(buf[:]) - k := h.Sum(nil) - insert := func(tx *bolt.Tx) error { - b := tx.Bucket([]byte("bench")) - return b.Put(k, []byte("filler")) - } - if err := db.Update(insert); err != nil { - b.Error(err) - return - } - }(uint32(round)) - } - close(start) - wg.Wait() - } - - b.StopTimer() - validateBatchBench(b, db) -} - -func BenchmarkDBBatchManual10x100(b *testing.B) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("bench")) - return err - }); err != nil { - b.Fatal(err) - } - - b.ResetTimer() - for i := 0; i < b.N; i++ { - start := make(chan struct{}) - var wg sync.WaitGroup - - for major := 0; major < 10; major++ { - wg.Add(1) - go func(id uint32) { - defer wg.Done() - <-start - - insert100 := func(tx *bolt.Tx) error { - h := fnv.New32a() - buf := make([]byte, 4) - for minor := uint32(0); minor < 100; minor++ { - binary.LittleEndian.PutUint32(buf, uint32(id*100+minor)) - h.Reset() - _, _ = h.Write(buf[:]) - k := h.Sum(nil) - b := tx.Bucket([]byte("bench")) - if err := b.Put(k, []byte("filler")); err != nil { - return err - } - } - return nil - } - if err := db.Update(insert100); err != nil { - b.Fatal(err) - } - }(uint32(major)) - } - close(start) - wg.Wait() - } - - b.StopTimer() - validateBatchBench(b, db) -} - -func validateBatchBench(b *testing.B, db *DB) { - var rollback = errors.New("sentinel error to cause rollback") - validate := func(tx *bolt.Tx) error { - bucket := tx.Bucket([]byte("bench")) - h := fnv.New32a() - buf := make([]byte, 4) - for id := uint32(0); id < 1000; id++ { - binary.LittleEndian.PutUint32(buf, id) - h.Reset() - _, _ = h.Write(buf[:]) - k := h.Sum(nil) - v := bucket.Get(k) - if v == nil { - b.Errorf("not found id=%d key=%x", id, k) - continue - } - if g, e := v, []byte("filler"); !bytes.Equal(g, e) { - b.Errorf("bad value for id=%d key=%x: %s != %q", id, k, g, e) - } - if err := bucket.Delete(k); err != nil { - return err - } - } - // should be empty now - c := bucket.Cursor() - for k, v := c.First(); k != nil; k, v = c.Next() { - b.Errorf("unexpected key: %x = %q", k, v) - } - return rollback - } - if err := db.Update(validate); err != nil && err != rollback { - b.Error(err) - } -} - -// DB is a test wrapper for bolt.DB. -type DB struct { - *bolt.DB -} - -// MustOpenDB returns a new, open DB at a temporary location. -func MustOpenDB() *DB { - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - panic(err) - } - return &DB{db} -} - -// Close closes the database and deletes the underlying file. -func (db *DB) Close() error { - // Log statistics. - if *statsFlag { - db.PrintStats() - } - - // Check database consistency after every test. - db.MustCheck() - - // Close database and remove file. - defer os.Remove(db.Path()) - return db.DB.Close() -} - -// MustClose closes the database and deletes the underlying file. Panic on error. -func (db *DB) MustClose() { - if err := db.Close(); err != nil { - panic(err) - } -} - -// PrintStats prints the database stats -func (db *DB) PrintStats() { - var stats = db.Stats() - fmt.Printf("[db] %-20s %-20s %-20s\n", - fmt.Sprintf("pg(%d/%d)", stats.TxStats.PageCount, stats.TxStats.PageAlloc), - fmt.Sprintf("cur(%d)", stats.TxStats.CursorCount), - fmt.Sprintf("node(%d/%d)", stats.TxStats.NodeCount, stats.TxStats.NodeDeref), - ) - fmt.Printf(" %-20s %-20s %-20s\n", - fmt.Sprintf("rebal(%d/%v)", stats.TxStats.Rebalance, truncDuration(stats.TxStats.RebalanceTime)), - fmt.Sprintf("spill(%d/%v)", stats.TxStats.Spill, truncDuration(stats.TxStats.SpillTime)), - fmt.Sprintf("w(%d/%v)", stats.TxStats.Write, truncDuration(stats.TxStats.WriteTime)), - ) -} - -// MustCheck runs a consistency check on the database and panics if any errors are found. -func (db *DB) MustCheck() { - if err := db.Update(func(tx *bolt.Tx) error { - // Collect all the errors. - var errors []error - for err := range tx.Check() { - errors = append(errors, err) - if len(errors) > 10 { - break - } - } - - // If errors occurred, copy the DB and print the errors. - if len(errors) > 0 { - var path = tempfile() - if err := tx.CopyFile(path, 0600); err != nil { - panic(err) - } - - // Print errors. - fmt.Print("\n\n") - fmt.Printf("consistency check failed (%d errors)\n", len(errors)) - for _, err := range errors { - fmt.Println(err) - } - fmt.Println("") - fmt.Println("db saved to:") - fmt.Println(path) - fmt.Print("\n\n") - os.Exit(-1) - } - - return nil - }); err != nil && err != bolt.ErrDatabaseNotOpen { - panic(err) - } -} - -// CopyTempFile copies a database to a temporary file. -func (db *DB) CopyTempFile() { - path := tempfile() - if err := db.View(func(tx *bolt.Tx) error { - return tx.CopyFile(path, 0600) - }); err != nil { - panic(err) - } - fmt.Println("db copied to: ", path) -} - -// tempfile returns a temporary file path. -func tempfile() string { - f, err := ioutil.TempFile("", "bolt-") - if err != nil { - panic(err) - } - if err := f.Close(); err != nil { - panic(err) - } - if err := os.Remove(f.Name()); err != nil { - panic(err) - } - return f.Name() -} - -// mustContainKeys checks that a bucket contains a given set of keys. -func mustContainKeys(b *bolt.Bucket, m map[string]string) { - found := make(map[string]string) - if err := b.ForEach(func(k, _ []byte) error { - found[string(k)] = "" - return nil - }); err != nil { - panic(err) - } - - // Check for keys found in bucket that shouldn't be there. - var keys []string - for k, _ := range found { - if _, ok := m[string(k)]; !ok { - keys = append(keys, k) - } - } - if len(keys) > 0 { - sort.Strings(keys) - panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ","))) - } - - // Check for keys not found in bucket that should be there. - for k, _ := range m { - if _, ok := found[string(k)]; !ok { - keys = append(keys, k) - } - } - if len(keys) > 0 { - sort.Strings(keys) - panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ","))) - } -} - -func trunc(b []byte, length int) []byte { - if length < len(b) { - return b[:length] - } - return b -} - -func truncDuration(d time.Duration) string { - return regexp.MustCompile(`^(\d+)(\.\d+)`).ReplaceAllString(d.String(), "$1") -} - -func fileSize(path string) int64 { - fi, err := os.Stat(path) - if err != nil { - return 0 - } - return fi.Size() -} - -func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) } -func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) } - -// u64tob converts a uint64 into an 8-byte slice. -func u64tob(v uint64) []byte { - b := make([]byte, 8) - binary.BigEndian.PutUint64(b, v) - return b -} - -// btou64 converts an 8-byte slice into an uint64. -func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) } diff --git a/tests/freelist_test.go b/tests/freelist_test.go deleted file mode 100644 index 4e9b3a8..0000000 --- a/tests/freelist_test.go +++ /dev/null @@ -1,158 +0,0 @@ -package bolt - -import ( - "math/rand" - "reflect" - "sort" - "testing" - "unsafe" -) - -// Ensure that a page is added to a transaction's freelist. -func TestFreelist_free(t *testing.T) { - f := newFreelist() - f.free(100, &page{id: 12}) - if !reflect.DeepEqual([]pgid{12}, f.pending[100]) { - t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100]) - } -} - -// Ensure that a page and its overflow is added to a transaction's freelist. -func TestFreelist_free_overflow(t *testing.T) { - f := newFreelist() - f.free(100, &page{id: 12, overflow: 3}) - if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100]) { - t.Fatalf("exp=%v; got=%v", exp, f.pending[100]) - } -} - -// Ensure that a transaction's free pages can be released. -func TestFreelist_release(t *testing.T) { - f := newFreelist() - f.free(100, &page{id: 12, overflow: 1}) - f.free(100, &page{id: 9}) - f.free(102, &page{id: 39}) - f.release(100) - f.release(101) - if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.ids) { - t.Fatalf("exp=%v; got=%v", exp, f.ids) - } - - f.release(102) - if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.ids) { - t.Fatalf("exp=%v; got=%v", exp, f.ids) - } -} - -// Ensure that a freelist can find contiguous blocks of pages. -func TestFreelist_allocate(t *testing.T) { - f := &freelist{ids: []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}} - if id := int(f.allocate(3)); id != 3 { - t.Fatalf("exp=3; got=%v", id) - } - if id := int(f.allocate(1)); id != 6 { - t.Fatalf("exp=6; got=%v", id) - } - if id := int(f.allocate(3)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if id := int(f.allocate(2)); id != 12 { - t.Fatalf("exp=12; got=%v", id) - } - if id := int(f.allocate(1)); id != 7 { - t.Fatalf("exp=7; got=%v", id) - } - if id := int(f.allocate(0)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if id := int(f.allocate(0)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.ids) { - t.Fatalf("exp=%v; got=%v", exp, f.ids) - } - - if id := int(f.allocate(1)); id != 9 { - t.Fatalf("exp=9; got=%v", id) - } - if id := int(f.allocate(1)); id != 18 { - t.Fatalf("exp=18; got=%v", id) - } - if id := int(f.allocate(1)); id != 0 { - t.Fatalf("exp=0; got=%v", id) - } - if exp := []pgid{}; !reflect.DeepEqual(exp, f.ids) { - t.Fatalf("exp=%v; got=%v", exp, f.ids) - } -} - -// Ensure that a freelist can deserialize from a freelist page. -func TestFreelist_read(t *testing.T) { - // Create a page. - var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = freelistPageFlag - page.count = 2 - - // Insert 2 page ids. - ids := (*[3]pgid)(unsafe.Pointer(&page.ptr)) - ids[0] = 23 - ids[1] = 50 - - // Deserialize page into a freelist. - f := newFreelist() - f.read(page) - - // Ensure that there are two page ids in the freelist. - if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.ids) { - t.Fatalf("exp=%v; got=%v", exp, f.ids) - } -} - -// Ensure that a freelist can serialize into a freelist page. -func TestFreelist_write(t *testing.T) { - // Create a freelist and write it to a page. - var buf [4096]byte - f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid][]pgid)} - f.pending[100] = []pgid{28, 11} - f.pending[101] = []pgid{3} - p := (*page)(unsafe.Pointer(&buf[0])) - if err := f.write(p); err != nil { - t.Fatal(err) - } - - // Read the page back out. - f2 := newFreelist() - f2.read(p) - - // Ensure that the freelist is correct. - // All pages should be present and in reverse order. - if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.ids) { - t.Fatalf("exp=%v; got=%v", exp, f2.ids) - } -} - -func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) } -func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) } -func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) } -func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) } - -func benchmark_FreelistRelease(b *testing.B, size int) { - ids := randomPgids(size) - pending := randomPgids(len(ids) / 400) - b.ResetTimer() - for i := 0; i < b.N; i++ { - f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}} - f.release(1) - } -} - -func randomPgids(n int) []pgid { - rand.Seed(42) - pgids := make(pgids, n) - for i := range pgids { - pgids[i] = pgid(rand.Int63()) - } - sort.Sort(pgids) - return pgids -} diff --git a/tests/functional/read-snapshots/gkv.go b/tests/functional/read-snapshots/gkv.go new file mode 100644 index 0000000..c5c6aa4 --- /dev/null +++ b/tests/functional/read-snapshots/gkv.go @@ -0,0 +1,12 @@ +package gkv + +import ( + g "gobang" +) + + + +func MainTest() { + g.Testing("a read snapshot won't change, despite write update", func() { + }) +} diff --git a/tests/functional/read-snapshots/main.go b/tests/functional/read-snapshots/main.go new file mode 120000 index 0000000..f67563d --- /dev/null +++ b/tests/functional/read-snapshots/main.go @@ -0,0 +1 @@ +../../main.go
\ No newline at end of file diff --git a/tests/fuzz/api/gkv.go b/tests/fuzz/api/gkv.go new file mode 100644 index 0000000..f7f0309 --- /dev/null +++ b/tests/fuzz/api/gkv.go @@ -0,0 +1,34 @@ +package gkv + +import ( + "os" + "testing" + "testing/internal/testdeps" +) + + + +func api(f *testing.F) { + f.Fuzz(func(t *testing.T, n int) { + if n > 1 { + if n < 2 { + t.Errorf("Failed n: %v\n", n) + } + } + }) +} + + + +func MainTest() { + fuzzTargets := []testing.InternalFuzzTarget{ + { "api", api }, + } + + deps := testdeps.TestDeps{} + tests := []testing.InternalTest {} + benchmarks := []testing.InternalBenchmark{} + examples := []testing.InternalExample {} + m := testing.MainStart(deps, tests, benchmarks, fuzzTargets, examples) + os.Exit(m.Run()) +} diff --git a/tests/fuzz/api/main.go b/tests/fuzz/api/main.go new file mode 120000 index 0000000..f67563d --- /dev/null +++ b/tests/fuzz/api/main.go @@ -0,0 +1 @@ +../../main.go
\ No newline at end of file diff --git a/tests/gkv.go b/tests/gkv.go new file mode 100644 index 0000000..63464a7 --- /dev/null +++ b/tests/gkv.go @@ -0,0 +1,6021 @@ +package gkv + +import ( + crypto "crypto/rand" + "bytes" + "io" + "encoding/binary" + "errors" + "fmt" + "log" + "math/rand" + "os" + "strconv" + "strings" + "testing" + "testing/quick" + "reflect" + "sort" + "flag" + "hash/fnv" + "io/ioutil" + "path/filepath" + "regexp" + "sync" + "time" + "unsafe" +) + +// Ensure that a bucket that gets a non-existent key returns nil. +func TestBucket_Get_NonExistent(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if v := b.Get([]byte("foo")); v != nil { + t.Fatal("expected nil value") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can read a value that is not flushed yet. +func TestBucket_Get_FromNode(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if v := b.Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket retrieved via Get() returns a nil. +func TestBucket_Get_IncompatibleValue(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + + if tx.Bucket([]byte("widgets")).Get([]byte("foo")) != nil { + t.Fatal("expected nil value") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a slice returned from a bucket has a capacity equal to its length. +// This also allows slices to be appended to since it will require a realloc by Go. +// +// https://github.com/boltdb/bolt/issues/544 +func TestBucket_Get_Capacity(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Write key to a bucket. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("bucket")) + if err != nil { + return err + } + return b.Put([]byte("key"), []byte("val")) + }); err != nil { + t.Fatal(err) + } + + // Retrieve value and attempt to append to it. + if err := db.Update(func(tx *Tx) error { + k, v := tx.Bucket([]byte("bucket")).Cursor().First() + + // Verify capacity. + if len(k) != cap(k) { + t.Fatalf("unexpected key slice capacity: %d", cap(k)) + } else if len(v) != cap(v) { + t.Fatalf("unexpected value slice capacity: %d", cap(v)) + } + + // Ensure slice can be appended to without a segfault. + k = append(k, []byte("123")...) + v = append(v, []byte("123")...) + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can write a key/value. +func TestBucket_Put(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + + v := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + if !bytes.Equal([]byte("bar"), v) { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can rewrite a key in the same transaction. +func TestBucket_Put_Repeat(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("baz")); err != nil { + t.Fatal(err) + } + + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + if !bytes.Equal([]byte("baz"), value) { + t.Fatalf("unexpected value: %v", value) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can write a bunch of large values. +func TestBucket_Put_Large(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + count, factor := 100, 200 + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for i := 1; i < count; i++ { + if err := b.Put([]byte(strings.Repeat("0", i*factor)), []byte(strings.Repeat("X", (count-i)*factor))); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 1; i < count; i++ { + value := b.Get([]byte(strings.Repeat("0", i*factor))) + if !bytes.Equal(value, []byte(strings.Repeat("X", (count-i)*factor))) { + t.Fatalf("unexpected value: %v", value) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a database can perform multiple large appends safely. +func TestDB_Put_VeryLarge(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + n, batchN := 400000, 200000 + ksize, vsize := 8, 500 + + db := MustOpenDB() + defer db.MustClose() + + for i := 0; i < n; i += batchN { + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for j := 0; j < batchN; j++ { + k, v := make([]byte, ksize), make([]byte, vsize) + binary.BigEndian.PutUint32(k, uint32(i+j)) + if err := b.Put(k, v); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + } +} + +// Ensure that a setting a value on a key with a bucket value returns an error. +func TestBucket_Put_IncompatibleValue(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b0, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if _, err := tx.Bucket([]byte("widgets")).CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + if err := b0.Put([]byte("foo"), []byte("bar")); err != ErrIncompatibleValue { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a setting a value while the transaction is closed returns an error. +func TestBucket_Put_Closed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + + if err := b.Put([]byte("foo"), []byte("bar")); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that setting a value on a read-only bucket returns an error. +func TestBucket_Put_ReadOnly(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + if err := b.Put([]byte("foo"), []byte("bar")); err != ErrTxNotWritable { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can delete an existing key. +func TestBucket_Delete(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Delete([]byte("foo")); err != nil { + t.Fatal(err) + } + if v := b.Get([]byte("foo")); v != nil { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a large set of keys will work correctly. +func TestBucket_Delete_Large(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 100; i++ { + if err := b.Put([]byte(strconv.Itoa(i)), []byte(strings.Repeat("*", 1024))); err != nil { + t.Fatal(err) + } + } + + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 0; i < 100; i++ { + if err := b.Delete([]byte(strconv.Itoa(i))); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 0; i < 100; i++ { + if v := b.Get([]byte(strconv.Itoa(i))); v != nil { + t.Fatalf("unexpected value: %v, i=%d", v, i) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Deleting a very large list of keys will cause the freelist to use overflow. +func TestBucket_Delete_FreelistOverflow(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + db := MustOpenDB() + defer db.MustClose() + + k := make([]byte, 16) + for i := uint64(0); i < 10000; i++ { + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("0")) + if err != nil { + t.Fatalf("bucket error: %s", err) + } + + for j := uint64(0); j < 1000; j++ { + binary.BigEndian.PutUint64(k[:8], i) + binary.BigEndian.PutUint64(k[8:], j) + if err := b.Put(k, nil); err != nil { + t.Fatalf("put error: %s", err) + } + } + + return nil + }); err != nil { + t.Fatal(err) + } + } + + // Delete all of them in one large transaction + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("0")) + c := b.Cursor() + for k, _ := c.First(); k != nil; k, _ = c.Next() { + if err := c.Delete(); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that accessing and updating nested buckets is ok across transactions. +func TestBucket_Nested(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + // Create a widgets bucket. + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + // Create a widgets/foo bucket. + _, err = b.CreateBucket([]byte("foo")) + if err != nil { + t.Fatal(err) + } + + // Create a widgets/bar key. + if err := b.Put([]byte("bar"), []byte("0000")); err != nil { + t.Fatal(err) + } + + return nil + }); err != nil { + t.Fatal(err) + } + db.MustCheck() + + // Update widgets/bar. + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + if err := b.Put([]byte("bar"), []byte("xxxx")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + db.MustCheck() + + // Cause a split. + if err := db.Update(func(tx *Tx) error { + var b = tx.Bucket([]byte("widgets")) + for i := 0; i < 10000; i++ { + if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + db.MustCheck() + + // Insert into widgets/foo/baz. + if err := db.Update(func(tx *Tx) error { + var b = tx.Bucket([]byte("widgets")) + if err := b.Bucket([]byte("foo")).Put([]byte("baz"), []byte("yyyy")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + db.MustCheck() + + // Verify. + if err := db.View(func(tx *Tx) error { + var b = tx.Bucket([]byte("widgets")) + if v := b.Bucket([]byte("foo")).Get([]byte("baz")); !bytes.Equal(v, []byte("yyyy")) { + t.Fatalf("unexpected value: %v", v) + } + if v := b.Get([]byte("bar")); !bytes.Equal(v, []byte("xxxx")) { + t.Fatalf("unexpected value: %v", v) + } + for i := 0; i < 10000; i++ { + if v := b.Get([]byte(strconv.Itoa(i))); !bytes.Equal(v, []byte(strconv.Itoa(i))) { + t.Fatalf("unexpected value: %v", v) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a bucket using Delete() returns an error. +func TestBucket_Delete_Bucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + if err := b.Delete([]byte("foo")); err != ErrIncompatibleValue { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a key on a read-only bucket returns an error. +func TestBucket_Delete_ReadOnly(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + if err := tx.Bucket([]byte("widgets")).Delete([]byte("foo")); err != ErrTxNotWritable { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a deleting value while the transaction is closed returns an error. +func TestBucket_Delete_Closed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + if err := b.Delete([]byte("foo")); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that deleting a bucket causes nested buckets to be deleted. +func TestBucket_DeleteBucket_Nested(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + foo, err := widgets.CreateBucket([]byte("foo")) + if err != nil { + t.Fatal(err) + } + + bar, err := foo.CreateBucket([]byte("bar")) + if err != nil { + t.Fatal(err) + } + if err := bar.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a bucket causes nested buckets to be deleted after they have been committed. +func TestBucket_DeleteBucket_Nested2(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + foo, err := widgets.CreateBucket([]byte("foo")) + if err != nil { + t.Fatal(err) + } + + bar, err := foo.CreateBucket([]byte("bar")) + if err != nil { + t.Fatal(err) + } + + if err := bar.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + widgets := tx.Bucket([]byte("widgets")) + if widgets == nil { + t.Fatal("expected widgets bucket") + } + + foo := widgets.Bucket([]byte("foo")) + if foo == nil { + t.Fatal("expected foo bucket") + } + + bar := foo.Bucket([]byte("bar")) + if bar == nil { + t.Fatal("expected bar bucket") + } + + if v := bar.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { + t.Fatalf("unexpected value: %v", v) + } + if err := tx.DeleteBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) != nil { + t.Fatal("expected bucket to be deleted") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a child bucket with multiple pages causes all pages to get collected. +// NOTE: Consistency check in bolt_test.DB.Close() will panic if pages not freed properly. +func TestBucket_DeleteBucket_Large(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + foo, err := widgets.CreateBucket([]byte("foo")) + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 1000; i++ { + if err := foo.Put([]byte(fmt.Sprintf("%d", i)), []byte(fmt.Sprintf("%0100d", i))); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + if err := tx.DeleteBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a simple value retrieved via Bucket() returns a nil. +func TestBucket_Bucket_IncompatibleValue(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if b := tx.Bucket([]byte("widgets")).Bucket([]byte("foo")); b != nil { + t.Fatal("expected nil bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that creating a bucket on an existing non-bucket key returns an error. +func TestBucket_CreateBucket_IncompatibleValue(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if _, err := widgets.CreateBucket([]byte("foo")); err != ErrIncompatibleValue { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a bucket on an existing non-bucket key returns an error. +func TestBucket_DeleteBucket_IncompatibleValue(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := widgets.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := tx.Bucket([]byte("widgets")).DeleteBucket([]byte("foo")); err != ErrIncompatibleValue { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure bucket can set and update its sequence number. +func TestBucket_Sequence(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + bkt, err := tx.CreateBucket([]byte("0")) + if err != nil { + t.Fatal(err) + } + + // Retrieve sequence. + if v := bkt.Sequence(); v != 0 { + t.Fatalf("unexpected sequence: %d", v) + } + + // Update sequence. + if err := bkt.SetSequence(1000); err != nil { + t.Fatal(err) + } + + // Read sequence again. + if v := bkt.Sequence(); v != 1000 { + t.Fatalf("unexpected sequence: %d", v) + } + + return nil + }); err != nil { + t.Fatal(err) + } + + // Verify sequence in separate transaction. + if err := db.View(func(tx *Tx) error { + if v := tx.Bucket([]byte("0")).Sequence(); v != 1000 { + t.Fatalf("unexpected sequence: %d", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can return an autoincrementing sequence. +func TestBucket_NextSequence(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + widgets, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + woojits, err := tx.CreateBucket([]byte("woojits")) + if err != nil { + t.Fatal(err) + } + + // Make sure sequence increments. + if seq, err := widgets.NextSequence(); err != nil { + t.Fatal(err) + } else if seq != 1 { + t.Fatalf("unexpecte sequence: %d", seq) + } + + if seq, err := widgets.NextSequence(); err != nil { + t.Fatal(err) + } else if seq != 2 { + t.Fatalf("unexpected sequence: %d", seq) + } + + // Buckets should be separate. + if seq, err := woojits.NextSequence(); err != nil { + t.Fatal(err) + } else if seq != 1 { + t.Fatalf("unexpected sequence: %d", 1) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket will persist an autoincrementing sequence even if its +// the only thing updated on the bucket. +// https://github.com/boltdb/bolt/issues/296 +func TestBucket_NextSequence_Persist(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.Bucket([]byte("widgets")).NextSequence(); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + seq, err := tx.Bucket([]byte("widgets")).NextSequence() + if err != nil { + t.Fatalf("unexpected error: %s", err) + } else if seq != 2 { + t.Fatalf("unexpected sequence: %d", seq) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that retrieving the next sequence on a read-only bucket returns an error. +func TestBucket_NextSequence_ReadOnly(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + _, err := tx.Bucket([]byte("widgets")).NextSequence() + if err != ErrTxNotWritable { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that retrieving the next sequence for a bucket on a closed database return an error. +func TestBucket_NextSequence_Closed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + if _, err := b.NextSequence(); err != ErrTxClosed { + t.Fatal(err) + } +} + +// Ensure a user can loop over all key/value pairs in a bucket. +func TestBucket_ForEach(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("0000")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("0001")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte("0002")); err != nil { + t.Fatal(err) + } + + var index int + if err := b.ForEach(func(k, v []byte) error { + switch index { + case 0: + if !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0002")) { + t.Fatalf("unexpected value: %v", v) + } + case 1: + if !bytes.Equal(k, []byte("baz")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0001")) { + t.Fatalf("unexpected value: %v", v) + } + case 2: + if !bytes.Equal(k, []byte("foo")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0000")) { + t.Fatalf("unexpected value: %v", v) + } + } + index++ + return nil + }); err != nil { + t.Fatal(err) + } + + if index != 3 { + t.Fatalf("unexpected index: %d", index) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a database can stop iteration early. +func TestBucket_ForEach_ShortCircuit(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte("0000")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("0000")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("0000")); err != nil { + t.Fatal(err) + } + + var index int + if err := tx.Bucket([]byte("widgets")).ForEach(func(k, v []byte) error { + index++ + if bytes.Equal(k, []byte("baz")) { + return errors.New("marker") + } + return nil + }); err == nil || err.Error() != "marker" { + t.Fatalf("unexpected error: %s", err) + } + if index != 2 { + t.Fatalf("unexpected index: %d", index) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that looping over a bucket on a closed database returns an error. +func TestBucket_ForEach_Closed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + + if err := b.ForEach(func(k, v []byte) error { return nil }); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that an error is returned when inserting with an empty key. +func TestBucket_Put_EmptyKey(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte(""), []byte("bar")); err != ErrKeyRequired { + t.Fatalf("unexpected error: %s", err) + } + if err := b.Put(nil, []byte("bar")); err != ErrKeyRequired { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that an error is returned when inserting with a key that's too large. +func TestBucket_Put_KeyTooLarge(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put(make([]byte, 32769), []byte("bar")); err != ErrKeyTooLarge { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that an error is returned when inserting a value that's too large. +func TestBucket_Put_ValueTooLarge(t *testing.T) { + // Skip this test on DroneCI because the machine is resource constrained. + if os.Getenv("DRONE") == "true" { + t.Skip("not enough RAM for test") + } + + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), make([]byte, MaxValueSize+1)); err != ErrValueTooLarge { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a bucket can calculate stats. +func TestBucket_Stats(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Add bucket with fewer keys but one big value. + bigKey := []byte("really-big-value") + for i := 0; i < 500; i++ { + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("woojits")) + if err != nil { + t.Fatal(err) + } + + if err := b.Put([]byte(fmt.Sprintf("%03d", i)), []byte(strconv.Itoa(i))); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + } + if err := db.Update(func(tx *Tx) error { + if err := tx.Bucket([]byte("woojits")).Put(bigKey, []byte(strings.Repeat("*", 10000))); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + stats := tx.Bucket([]byte("woojits")).Stats() + if stats.BranchPageN != 1 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.LeafPageN != 7 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 2 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.KeyN != 501 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } else if stats.Depth != 2 { + t.Fatalf("unexpected Depth: %d", stats.Depth) + } + + branchInuse := 16 // branch page header + branchInuse += 7 * 16 // branch elements + branchInuse += 7 * 3 // branch keys (6 3-byte keys) + if stats.BranchInuse != branchInuse { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } + + leafInuse := 7 * 16 // leaf page header + leafInuse += 501 * 16 // leaf elements + leafInuse += 500*3 + len(bigKey) // leaf keys + leafInuse += 1*10 + 2*90 + 3*400 + 10000 // leaf values + if stats.LeafInuse != leafInuse { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } + + // Only check allocations for 4KB pages. + if os.Getpagesize() == 4096 { + if stats.BranchAlloc != 4096 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } else if stats.LeafAlloc != 36864 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + } + + if stats.BucketN != 1 { + t.Fatalf("unexpected BucketN: %d", stats.BucketN) + } else if stats.InlineBucketN != 0 { + t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) + } else if stats.InlineBucketInuse != 0 { + t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a bucket with random insertion utilizes fill percentage correctly. +func TestBucket_Stats_RandomFill(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } else if os.Getpagesize() != 4096 { + t.Skip("invalid page size for test") + } + + db := MustOpenDB() + defer db.MustClose() + + // Add a set of values in random order. It will be the same random + // order so we can maintain consistency between test runs. + var count int + rand := rand.New(rand.NewSource(42)) + for _, i := range rand.Perm(1000) { + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("woojits")) + if err != nil { + t.Fatal(err) + } + b.FillPercent = 0.9 + for _, j := range rand.Perm(100) { + index := (j * 10000) + i + if err := b.Put([]byte(fmt.Sprintf("%d000000000000000", index)), []byte("0000000000")); err != nil { + t.Fatal(err) + } + count++ + } + return nil + }); err != nil { + t.Fatal(err) + } + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + stats := tx.Bucket([]byte("woojits")).Stats() + if stats.KeyN != 100000 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } + + if stats.BranchPageN != 98 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.BranchInuse != 130984 { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } else if stats.BranchAlloc != 401408 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } + + if stats.LeafPageN != 3412 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 0 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.LeafInuse != 4742482 { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } else if stats.LeafAlloc != 13975552 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a bucket can calculate stats. +func TestBucket_Stats_Small(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + // Add a bucket that fits on a single root leaf. + b, err := tx.CreateBucket([]byte("whozawhats")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + + return nil + }); err != nil { + t.Fatal(err) + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("whozawhats")) + stats := b.Stats() + if stats.BranchPageN != 0 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.LeafPageN != 0 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 0 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.KeyN != 1 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } else if stats.Depth != 1 { + t.Fatalf("unexpected Depth: %d", stats.Depth) + } else if stats.BranchInuse != 0 { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } else if stats.LeafInuse != 0 { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } + + if os.Getpagesize() == 4096 { + if stats.BranchAlloc != 0 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } else if stats.LeafAlloc != 0 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + } + + if stats.BucketN != 1 { + t.Fatalf("unexpected BucketN: %d", stats.BucketN) + } else if stats.InlineBucketN != 1 { + t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) + } else if stats.InlineBucketInuse != 16+16+6 { + t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +func TestBucket_Stats_EmptyBucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + // Add a bucket that fits on a single root leaf. + if _, err := tx.CreateBucket([]byte("whozawhats")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("whozawhats")) + stats := b.Stats() + if stats.BranchPageN != 0 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.LeafPageN != 0 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 0 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.KeyN != 0 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } else if stats.Depth != 1 { + t.Fatalf("unexpected Depth: %d", stats.Depth) + } else if stats.BranchInuse != 0 { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } else if stats.LeafInuse != 0 { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } + + if os.Getpagesize() == 4096 { + if stats.BranchAlloc != 0 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } else if stats.LeafAlloc != 0 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + } + + if stats.BucketN != 1 { + t.Fatalf("unexpected BucketN: %d", stats.BucketN) + } else if stats.InlineBucketN != 1 { + t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) + } else if stats.InlineBucketInuse != 16 { + t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a bucket can calculate stats. +func TestBucket_Stats_Nested(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("foo")) + if err != nil { + t.Fatal(err) + } + for i := 0; i < 100; i++ { + if err := b.Put([]byte(fmt.Sprintf("%02d", i)), []byte(fmt.Sprintf("%02d", i))); err != nil { + t.Fatal(err) + } + } + + bar, err := b.CreateBucket([]byte("bar")) + if err != nil { + t.Fatal(err) + } + for i := 0; i < 10; i++ { + if err := bar.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { + t.Fatal(err) + } + } + + baz, err := bar.CreateBucket([]byte("baz")) + if err != nil { + t.Fatal(err) + } + for i := 0; i < 10; i++ { + if err := baz.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { + t.Fatal(err) + } + } + + return nil + }); err != nil { + t.Fatal(err) + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("foo")) + stats := b.Stats() + if stats.BranchPageN != 0 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.LeafPageN != 2 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 0 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.KeyN != 122 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } else if stats.Depth != 3 { + t.Fatalf("unexpected Depth: %d", stats.Depth) + } else if stats.BranchInuse != 0 { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } + + foo := 16 // foo (pghdr) + foo += 101 * 16 // foo leaf elements + foo += 100*2 + 100*2 // foo leaf key/values + foo += 3 + 16 // foo -> bar key/value + + bar := 16 // bar (pghdr) + bar += 11 * 16 // bar leaf elements + bar += 10 + 10 // bar leaf key/values + bar += 3 + 16 // bar -> baz key/value + + baz := 16 // baz (inline) (pghdr) + baz += 10 * 16 // baz leaf elements + baz += 10 + 10 // baz leaf key/values + + if stats.LeafInuse != foo+bar+baz { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } + + if os.Getpagesize() == 4096 { + if stats.BranchAlloc != 0 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } else if stats.LeafAlloc != 8192 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + } + + if stats.BucketN != 3 { + t.Fatalf("unexpected BucketN: %d", stats.BucketN) + } else if stats.InlineBucketN != 1 { + t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) + } else if stats.InlineBucketInuse != baz { + t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a large bucket can calculate stats. +func TestBucket_Stats_Large(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + db := MustOpenDB() + defer db.MustClose() + + var index int + for i := 0; i < 100; i++ { + // Add bucket with lots of keys. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for i := 0; i < 1000; i++ { + if err := b.Put([]byte(strconv.Itoa(index)), []byte(strconv.Itoa(index))); err != nil { + t.Fatal(err) + } + index++ + } + return nil + }); err != nil { + t.Fatal(err) + } + } + + db.MustCheck() + + if err := db.View(func(tx *Tx) error { + stats := tx.Bucket([]byte("widgets")).Stats() + if stats.BranchPageN != 13 { + t.Fatalf("unexpected BranchPageN: %d", stats.BranchPageN) + } else if stats.BranchOverflowN != 0 { + t.Fatalf("unexpected BranchOverflowN: %d", stats.BranchOverflowN) + } else if stats.LeafPageN != 1196 { + t.Fatalf("unexpected LeafPageN: %d", stats.LeafPageN) + } else if stats.LeafOverflowN != 0 { + t.Fatalf("unexpected LeafOverflowN: %d", stats.LeafOverflowN) + } else if stats.KeyN != 100000 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } else if stats.Depth != 3 { + t.Fatalf("unexpected Depth: %d", stats.Depth) + } else if stats.BranchInuse != 25257 { + t.Fatalf("unexpected BranchInuse: %d", stats.BranchInuse) + } else if stats.LeafInuse != 2596916 { + t.Fatalf("unexpected LeafInuse: %d", stats.LeafInuse) + } + + if os.Getpagesize() == 4096 { + if stats.BranchAlloc != 53248 { + t.Fatalf("unexpected BranchAlloc: %d", stats.BranchAlloc) + } else if stats.LeafAlloc != 4898816 { + t.Fatalf("unexpected LeafAlloc: %d", stats.LeafAlloc) + } + } + + if stats.BucketN != 1 { + t.Fatalf("unexpected BucketN: %d", stats.BucketN) + } else if stats.InlineBucketN != 0 { + t.Fatalf("unexpected InlineBucketN: %d", stats.InlineBucketN) + } else if stats.InlineBucketInuse != 0 { + t.Fatalf("unexpected InlineBucketInuse: %d", stats.InlineBucketInuse) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can write random keys and values across multiple transactions. +func TestBucket_Put_Single(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + index := 0 + if err := quick.Check(func(items testdata) bool { + db := MustOpenDB() + defer db.MustClose() + + m := make(map[string][]byte) + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + for _, item := range items { + if err := db.Update(func(tx *Tx) error { + if err := tx.Bucket([]byte("widgets")).Put(item.Key, item.Value); err != nil { + panic("put error: " + err.Error()) + } + m[string(item.Key)] = item.Value + return nil + }); err != nil { + t.Fatal(err) + } + + // Verify all key/values so far. + if err := db.View(func(tx *Tx) error { + i := 0 + for k, v := range m { + value := tx.Bucket([]byte("widgets")).Get([]byte(k)) + if !bytes.Equal(value, v) { + t.Logf("value mismatch [run %d] (%d of %d):\nkey: %x\ngot: %x\nexp: %x", index, i, len(m), []byte(k), value, v) + db.CopyTempFile() + t.FailNow() + } + i++ + } + return nil + }); err != nil { + t.Fatal(err) + } + } + + index++ + return true + }, nil); err != nil { + t.Error(err) + } +} + +// Ensure that a transaction can insert multiple key/value pairs at once. +func TestBucket_Put_Multiple(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + if err := quick.Check(func(items testdata) bool { + db := MustOpenDB() + defer db.MustClose() + + // Bulk insert all values. + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for _, item := range items { + if err := b.Put(item.Key, item.Value); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Verify all items exist. + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for _, item := range items { + value := b.Get(item.Key) + if !bytes.Equal(item.Value, value) { + db.CopyTempFile() + t.Fatalf("exp=%x; got=%x", item.Value, value) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + return true + }, qconfig()); err != nil { + t.Error(err) + } +} + +// Ensure that a transaction can delete all key/value pairs and return to a single leaf page. +func TestBucket_Delete_Quick(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + if err := quick.Check(func(items testdata) bool { + db := MustOpenDB() + defer db.MustClose() + + // Bulk insert all values. + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for _, item := range items { + if err := b.Put(item.Key, item.Value); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Remove items one at a time and check consistency. + for _, item := range items { + if err := db.Update(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Delete(item.Key) + }); err != nil { + t.Fatal(err) + } + } + + // Anything before our deletion index should be nil. + if err := db.View(func(tx *Tx) error { + if err := tx.Bucket([]byte("widgets")).ForEach(func(k, v []byte) error { + t.Fatalf("bucket should be empty; found: %06x", trunc(k, 3)) + return nil + }); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + return true + }, qconfig()); err != nil { + t.Error(err) + } +} + +func ExampleBucket_Put() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Start a write transaction. + if err := db.Update(func(tx *Tx) error { + // Create a bucket. + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + return err + } + + // Set the value "bar" for the key "foo". + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + return err + } + return nil + }); err != nil { + log.Fatal(err) + } + + // Read value back in a different read-only transaction. + if err := db.View(func(tx *Tx) error { + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + fmt.Printf("The value of 'foo' is: %s\n", value) + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // The value of 'foo' is: bar +} + +func ExampleBucket_Delete() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Start a write transaction. + if err := db.Update(func(tx *Tx) error { + // Create a bucket. + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + return err + } + + // Set the value "bar" for the key "foo". + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + return err + } + + // Retrieve the key back from the database and verify it. + value := b.Get([]byte("foo")) + fmt.Printf("The value of 'foo' was: %s\n", value) + + return nil + }); err != nil { + log.Fatal(err) + } + + // Delete the key in a different write transaction. + if err := db.Update(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Delete([]byte("foo")) + }); err != nil { + log.Fatal(err) + } + + // Retrieve the key again. + if err := db.View(func(tx *Tx) error { + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + if value == nil { + fmt.Printf("The value of 'foo' is now: nil\n") + } + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // The value of 'foo' was: bar + // The value of 'foo' is now: nil +} + +func ExampleBucket_ForEach() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Insert data into a bucket. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("animals")) + if err != nil { + return err + } + + if err := b.Put([]byte("dog"), []byte("fun")); err != nil { + return err + } + if err := b.Put([]byte("cat"), []byte("lame")); err != nil { + return err + } + if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { + return err + } + + // Iterate over items in sorted key order. + if err := b.ForEach(func(k, v []byte) error { + fmt.Printf("A %s is %s.\n", k, v) + return nil + }); err != nil { + return err + } + + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // A cat is lame. + // A dog is fun. + // A liger is awesome. +} + +// Ensure that a cursor can return a reference to the bucket that created it. +func TestCursor_Bucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if cb := b.Cursor().Bucket(); !reflect.DeepEqual(cb, b) { + t.Fatal("cursor bucket mismatch") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can seek to the appropriate keys. +func TestCursor_Seek(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("0001")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte("0002")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("0003")); err != nil { + t.Fatal(err) + } + + if _, err := b.CreateBucket([]byte("bkt")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + c := tx.Bucket([]byte("widgets")).Cursor() + + // Exact match should go to the key. + if k, v := c.Seek([]byte("bar")); !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0002")) { + t.Fatalf("unexpected value: %v", v) + } + + // Inexact match should go to the next key. + if k, v := c.Seek([]byte("bas")); !bytes.Equal(k, []byte("baz")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0003")) { + t.Fatalf("unexpected value: %v", v) + } + + // Low key should go to the first key. + if k, v := c.Seek([]byte("")); !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte("0002")) { + t.Fatalf("unexpected value: %v", v) + } + + // High key should return no key. + if k, v := c.Seek([]byte("zzz")); k != nil { + t.Fatalf("expected nil key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + // Buckets should return their key but no value. + if k, v := c.Seek([]byte("bkt")); !bytes.Equal(k, []byte("bkt")) { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +func TestCursor_Delete(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + const count = 1000 + + // Insert every other key between 0 and $count. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for i := 0; i < count; i += 1 { + k := make([]byte, 8) + binary.BigEndian.PutUint64(k, uint64(i)) + if err := b.Put(k, make([]byte, 100)); err != nil { + t.Fatal(err) + } + } + if _, err := b.CreateBucket([]byte("sub")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + c := tx.Bucket([]byte("widgets")).Cursor() + bound := make([]byte, 8) + binary.BigEndian.PutUint64(bound, uint64(count/2)) + for key, _ := c.First(); bytes.Compare(key, bound) < 0; key, _ = c.Next() { + if err := c.Delete(); err != nil { + t.Fatal(err) + } + } + + c.Seek([]byte("sub")) + if err := c.Delete(); err != ErrIncompatibleValue { + t.Fatalf("unexpected error: %s", err) + } + + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + stats := tx.Bucket([]byte("widgets")).Stats() + if stats.KeyN != count/2+1 { + t.Fatalf("unexpected KeyN: %d", stats.KeyN) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can seek to the appropriate keys when there are a +// large number of keys. This test also checks that seek will always move +// forward to the next key. +// +// Related: https://github.com/boltdb/bolt/pull/187 +func TestCursor_Seek_Large(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var count = 10000 + + // Insert every other key between 0 and $count. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + for i := 0; i < count; i += 100 { + for j := i; j < i+100; j += 2 { + k := make([]byte, 8) + binary.BigEndian.PutUint64(k, uint64(j)) + if err := b.Put(k, make([]byte, 100)); err != nil { + t.Fatal(err) + } + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + c := tx.Bucket([]byte("widgets")).Cursor() + for i := 0; i < count; i++ { + seek := make([]byte, 8) + binary.BigEndian.PutUint64(seek, uint64(i)) + + k, _ := c.Seek(seek) + + // The last seek is beyond the end of the the range so + // it should return nil. + if i == count-1 { + if k != nil { + t.Fatal("expected nil key") + } + continue + } + + // Otherwise we should seek to the exact key or the next key. + num := binary.BigEndian.Uint64(k) + if i%2 == 0 { + if num != uint64(i) { + t.Fatalf("unexpected num: %d", num) + } + } else { + if num != uint64(i+1) { + t.Fatalf("unexpected num: %d", num) + } + } + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a cursor can iterate over an empty bucket without error. +func TestCursor_EmptyBucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + c := tx.Bucket([]byte("widgets")).Cursor() + k, v := c.First() + if k != nil { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can reverse iterate over an empty bucket without error. +func TestCursor_EmptyBucketReverse(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + if err := db.View(func(tx *Tx) error { + c := tx.Bucket([]byte("widgets")).Cursor() + k, v := c.Last() + if k != nil { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can iterate over a single root with a couple elements. +func TestCursor_Iterate_Leaf(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte{}); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte{0}); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte{1}); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + tx, err := db.Begin(false) + if err != nil { + t.Fatal(err) + } + defer func() { _ = tx.Rollback() }() + + c := tx.Bucket([]byte("widgets")).Cursor() + + k, v := c.First() + if !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{1}) { + t.Fatalf("unexpected value: %v", v) + } + + k, v = c.Next() + if !bytes.Equal(k, []byte("baz")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{}) { + t.Fatalf("unexpected value: %v", v) + } + + k, v = c.Next() + if !bytes.Equal(k, []byte("foo")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{0}) { + t.Fatalf("unexpected value: %v", v) + } + + k, v = c.Next() + if k != nil { + t.Fatalf("expected nil key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + k, v = c.Next() + if k != nil { + t.Fatalf("expected nil key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can iterate in reverse over a single root with a couple elements. +func TestCursor_LeafRootReverse(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte{}); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte{0}); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte{1}); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + tx, err := db.Begin(false) + if err != nil { + t.Fatal(err) + } + c := tx.Bucket([]byte("widgets")).Cursor() + + if k, v := c.Last(); !bytes.Equal(k, []byte("foo")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{0}) { + t.Fatalf("unexpected value: %v", v) + } + + if k, v := c.Prev(); !bytes.Equal(k, []byte("baz")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{}) { + t.Fatalf("unexpected value: %v", v) + } + + if k, v := c.Prev(); !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, []byte{1}) { + t.Fatalf("unexpected value: %v", v) + } + + if k, v := c.Prev(); k != nil { + t.Fatalf("expected nil key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + if k, v := c.Prev(); k != nil { + t.Fatalf("expected nil key: %v", k) + } else if v != nil { + t.Fatalf("expected nil value: %v", v) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can restart from the beginning. +func TestCursor_Restart(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("bar"), []byte{}); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte{}); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + tx, err := db.Begin(false) + if err != nil { + t.Fatal(err) + } + c := tx.Bucket([]byte("widgets")).Cursor() + + if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } + if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) { + t.Fatalf("unexpected key: %v", k) + } + + if k, _ := c.First(); !bytes.Equal(k, []byte("bar")) { + t.Fatalf("unexpected key: %v", k) + } + if k, _ := c.Next(); !bytes.Equal(k, []byte("foo")) { + t.Fatalf("unexpected key: %v", k) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } +} + +// Ensure that a cursor can skip over empty pages that have been deleted. +func TestCursor_First_EmptyPages(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Create 1000 keys in the "widgets" bucket. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + for i := 0; i < 1000; i++ { + if err := b.Put(u64tob(uint64(i)), []byte{}); err != nil { + t.Fatal(err) + } + } + + return nil + }); err != nil { + t.Fatal(err) + } + + // Delete half the keys and then try to iterate. + if err := db.Update(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 0; i < 600; i++ { + if err := b.Delete(u64tob(uint64(i))); err != nil { + t.Fatal(err) + } + } + + c := b.Cursor() + var n int + for k, _ := c.First(); k != nil; k, _ = c.Next() { + n++ + } + if n != 400 { + t.Fatalf("unexpected key count: %d", n) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx can iterate over all elements in a bucket. +func TestCursor_QuickCheck(t *testing.T) { + f := func(items testdata) bool { + db := MustOpenDB() + defer db.MustClose() + + // Bulk insert all values. + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for _, item := range items { + if err := b.Put(item.Key, item.Value); err != nil { + t.Fatal(err) + } + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + + // Sort test data. + sort.Sort(items) + + // Iterate over all items and check consistency. + var index = 0 + tx, err = db.Begin(false) + if err != nil { + t.Fatal(err) + } + + c := tx.Bucket([]byte("widgets")).Cursor() + for k, v := c.First(); k != nil && index < len(items); k, v = c.Next() { + if !bytes.Equal(k, items[index].Key) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, items[index].Value) { + t.Fatalf("unexpected value: %v", v) + } + index++ + } + if len(items) != index { + t.Fatalf("unexpected item count: %v, expected %v", len(items), index) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + + return true + } + if err := quick.Check(f, qconfig()); err != nil { + t.Error(err) + } +} + +// Ensure that a transaction can iterate over all elements in a bucket in reverse. +func TestCursor_QuickCheck_Reverse(t *testing.T) { + f := func(items testdata) bool { + db := MustOpenDB() + defer db.MustClose() + + // Bulk insert all values. + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + for _, item := range items { + if err := b.Put(item.Key, item.Value); err != nil { + t.Fatal(err) + } + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + + // Sort test data. + sort.Sort(revtestdata(items)) + + // Iterate over all items and check consistency. + var index = 0 + tx, err = db.Begin(false) + if err != nil { + t.Fatal(err) + } + c := tx.Bucket([]byte("widgets")).Cursor() + for k, v := c.Last(); k != nil && index < len(items); k, v = c.Prev() { + if !bytes.Equal(k, items[index].Key) { + t.Fatalf("unexpected key: %v", k) + } else if !bytes.Equal(v, items[index].Value) { + t.Fatalf("unexpected value: %v", v) + } + index++ + } + if len(items) != index { + t.Fatalf("unexpected item count: %v, expected %v", len(items), index) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + + return true + } + if err := quick.Check(f, qconfig()); err != nil { + t.Error(err) + } +} + +// Ensure that a Tx cursor can iterate over subbuckets. +func TestCursor_QuickCheck_BucketsOnly(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("bar")); err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("baz")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + var names []string + c := tx.Bucket([]byte("widgets")).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + names = append(names, string(k)) + if v != nil { + t.Fatalf("unexpected value: %v", v) + } + } + if !reflect.DeepEqual(names, []string{"bar", "baz", "foo"}) { + t.Fatalf("unexpected names: %+v", names) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx cursor can reverse iterate over subbuckets. +func TestCursor_QuickCheck_BucketsOnly_Reverse(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("bar")); err != nil { + t.Fatal(err) + } + if _, err := b.CreateBucket([]byte("baz")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + var names []string + c := tx.Bucket([]byte("widgets")).Cursor() + for k, v := c.Last(); k != nil; k, v = c.Prev() { + names = append(names, string(k)) + if v != nil { + t.Fatalf("unexpected value: %v", v) + } + } + if !reflect.DeepEqual(names, []string{"foo", "baz", "bar"}) { + t.Fatalf("unexpected names: %+v", names) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +func ExampleCursor() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Start a read-write transaction. + if err := db.Update(func(tx *Tx) error { + // Create a new bucket. + b, err := tx.CreateBucket([]byte("animals")) + if err != nil { + return err + } + + // Insert data into a bucket. + if err := b.Put([]byte("dog"), []byte("fun")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("cat"), []byte("lame")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { + log.Fatal(err) + } + + // Create a cursor for iteration. + c := b.Cursor() + + // Iterate over items in sorted key order. This starts from the + // first key/value pair and updates the k/v variables to the + // next key/value on each iteration. + // + // The loop finishes at the end of the cursor when a nil key is returned. + for k, v := c.First(); k != nil; k, v = c.Next() { + fmt.Printf("A %s is %s.\n", k, v) + } + + return nil + }); err != nil { + log.Fatal(err) + } + + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // A cat is lame. + // A dog is fun. + // A liger is awesome. +} + +func ExampleCursor_reverse() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Start a read-write transaction. + if err := db.Update(func(tx *Tx) error { + // Create a new bucket. + b, err := tx.CreateBucket([]byte("animals")) + if err != nil { + return err + } + + // Insert data into a bucket. + if err := b.Put([]byte("dog"), []byte("fun")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("cat"), []byte("lame")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("liger"), []byte("awesome")); err != nil { + log.Fatal(err) + } + + // Create a cursor for iteration. + c := b.Cursor() + + // Iterate over items in reverse sorted key order. This starts + // from the last key/value pair and updates the k/v variables to + // the previous key/value on each iteration. + // + // The loop finishes at the beginning of the cursor when a nil key + // is returned. + for k, v := c.Last(); k != nil; k, v = c.Prev() { + fmt.Printf("A %s is %s.\n", k, v) + } + + return nil + }); err != nil { + log.Fatal(err) + } + + // Close the database to release the file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // A liger is awesome. + // A dog is fun. + // A cat is lame. +} + +var statsFlag = flag.Bool("stats", false, "show performance stats") + +// pageSize is the size of one page in the data file. +const pageSize = 4096 + +// Ensure that a database can be opened without error. +func TestOpen(t *testing.T) { + path := tempfile() + db, err := Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } else if db == nil { + t.Fatal("expected db") + } + + if s := db.Path(); s != path { + t.Fatalf("unexpected path: %s", s) + } + + if err := db.Close(); err != nil { + t.Fatal(err) + } +} + +// Ensure that opening a database with a blank path returns an error. +func TestOpen_ErrPathRequired(t *testing.T) { + _, err := Open("", 0666, nil) + if err == nil { + t.Fatalf("expected error") + } +} + +// Ensure that opening a database with a bad path returns an error. +func TestOpen_ErrNotExists(t *testing.T) { + _, err := Open(filepath.Join(tempfile(), "bad-path"), 0666, nil) + if err == nil { + t.Fatal("expected error") + } +} + +// Ensure that opening a file that is not a Bolt database returns ErrInvalid. +func TestOpen_ErrInvalid(t *testing.T) { + path := tempfile() + + f, err := os.Create(path) + if err != nil { + t.Fatal(err) + } + if _, err := fmt.Fprintln(f, "this is not a bolt database"); err != nil { + t.Fatal(err) + } + if err := f.Close(); err != nil { + t.Fatal(err) + } + defer os.Remove(path) + + if _, err := Open(path, 0666, nil); err != ErrInvalid { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that opening a file with two invalid versions returns ErrVersionMismatch. +func TestOpen_ErrVersionMismatch(t *testing.T) { + if pageSize != os.Getpagesize() { + t.Skip("page size mismatch") + } + + // Create empty database. + db := MustOpenDB() + path := db.Path() + defer db.MustClose() + + // Close database. + if err := db.DB.Close(); err != nil { + t.Fatal(err) + } + + // Read data file. + buf, err := ioutil.ReadFile(path) + if err != nil { + t.Fatal(err) + } + + // Rewrite meta pages. + meta0 := (*meta)(unsafe.Pointer(&buf[pageHeaderSize])) + meta0.version++ + meta1 := (*meta)(unsafe.Pointer(&buf[pageSize+pageHeaderSize])) + meta1.version++ + if err := ioutil.WriteFile(path, buf, 0666); err != nil { + t.Fatal(err) + } + + // Reopen data file. + if _, err := Open(path, 0666, nil); err != ErrVersionMismatch { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that opening a file with two invalid checksums returns ErrChecksum. +func TestOpen_ErrChecksum(t *testing.T) { + if pageSize != os.Getpagesize() { + t.Skip("page size mismatch") + } + + // Create empty database. + db := MustOpenDB() + path := db.Path() + defer db.MustClose() + + // Close database. + if err := db.DB.Close(); err != nil { + t.Fatal(err) + } + + // Read data file. + buf, err := ioutil.ReadFile(path) + if err != nil { + t.Fatal(err) + } + + // Rewrite meta pages. + meta0 := (*meta)(unsafe.Pointer(&buf[pageHeaderSize])) + meta0.pgid++ + meta1 := (*meta)(unsafe.Pointer(&buf[pageSize+pageHeaderSize])) + meta1.pgid++ + if err := ioutil.WriteFile(path, buf, 0666); err != nil { + t.Fatal(err) + } + + // Reopen data file. + if _, err := Open(path, 0666, nil); err != ErrChecksum { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that opening a database does not increase its size. +// https://github.com/boltdb/bolt/issues/291 +func TestOpen_Size(t *testing.T) { + // Open a data file. + db := MustOpenDB() + path := db.Path() + defer db.MustClose() + + pagesize := db.Info().PageSize + + // Insert until we get above the minimum 4MB size. + if err := db.Update(func(tx *Tx) error { + b, _ := tx.CreateBucketIfNotExists([]byte("data")) + for i := 0; i < 10000; i++ { + if err := b.Put([]byte(fmt.Sprintf("%04d", i)), make([]byte, 1000)); err != nil { + t.Fatal(err) + } + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Close database and grab the size. + if err := db.DB.Close(); err != nil { + t.Fatal(err) + } + sz := fileSize(path) + if sz == 0 { + t.Fatalf("unexpected new file size: %d", sz) + } + + // Reopen database, update, and check size again. + db0, err := Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } + if err := db0.Update(func(tx *Tx) error { + if err := tx.Bucket([]byte("data")).Put([]byte{0}, []byte{0}); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + if err := db0.Close(); err != nil { + t.Fatal(err) + } + newSz := fileSize(path) + if newSz == 0 { + t.Fatalf("unexpected new file size: %d", newSz) + } + + // Compare the original size with the new size. + // db size might increase by a few page sizes due to the new small update. + if sz < newSz-5*int64(pagesize) { + t.Fatalf("unexpected file growth: %d => %d", sz, newSz) + } +} + +// Ensure that opening a database beyond the max step size does not increase its size. +// https://github.com/boltdb/bolt/issues/303 +func TestOpen_Size_Large(t *testing.T) { + if testing.Short() { + t.Skip("short mode") + } + + // Open a data file. + db := MustOpenDB() + path := db.Path() + defer db.MustClose() + + pagesize := db.Info().PageSize + + // Insert until we get above the minimum 4MB size. + var index uint64 + for i := 0; i < 10000; i++ { + if err := db.Update(func(tx *Tx) error { + b, _ := tx.CreateBucketIfNotExists([]byte("data")) + for j := 0; j < 1000; j++ { + if err := b.Put(u64tob(index), make([]byte, 50)); err != nil { + t.Fatal(err) + } + index++ + } + return nil + }); err != nil { + t.Fatal(err) + } + } + + // Close database and grab the size. + if err := db.DB.Close(); err != nil { + t.Fatal(err) + } + sz := fileSize(path) + if sz == 0 { + t.Fatalf("unexpected new file size: %d", sz) + } else if sz < (1 << 30) { + t.Fatalf("expected larger initial size: %d", sz) + } + + // Reopen database, update, and check size again. + db0, err := Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } + if err := db0.Update(func(tx *Tx) error { + return tx.Bucket([]byte("data")).Put([]byte{0}, []byte{0}) + }); err != nil { + t.Fatal(err) + } + if err := db0.Close(); err != nil { + t.Fatal(err) + } + + newSz := fileSize(path) + if newSz == 0 { + t.Fatalf("unexpected new file size: %d", newSz) + } + + // Compare the original size with the new size. + // db size might increase by a few page sizes due to the new small update. + if sz < newSz-5*int64(pagesize) { + t.Fatalf("unexpected file growth: %d => %d", sz, newSz) + } +} + +// Ensure that a re-opened database is consistent. +func TestOpen_Check(t *testing.T) { + path := tempfile() + + db, err := Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } + if err := db.View(func(tx *Tx) error { return <-tx.Check() }); err != nil { + t.Fatal(err) + } + if err := db.Close(); err != nil { + t.Fatal(err) + } + + db, err = Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } + if err := db.View(func(tx *Tx) error { return <-tx.Check() }); err != nil { + t.Fatal(err) + } + if err := db.Close(); err != nil { + t.Fatal(err) + } +} + +// Ensure that write errors to the meta file handler during initialization are returned. +func TestOpen_MetaInitWriteError(t *testing.T) { + t.Skip("pending") +} + +// Ensure that a database that is too small returns an error. +func TestOpen_FileTooSmall(t *testing.T) { + path := tempfile() + + db, err := Open(path, 0666, nil) + if err != nil { + t.Fatal(err) + } + if err := db.Close(); err != nil { + t.Fatal(err) + } + + // corrupt the database + if err := os.Truncate(path, int64(os.Getpagesize())); err != nil { + t.Fatal(err) + } + + db, err = Open(path, 0666, nil) + if err == nil || err.Error() != "file size too small" { + t.Fatalf("unexpected error: %s", err) + } +} + +// TestDB_Open_InitialMmapSize tests if having InitialMmapSize large enough +// to hold data from concurrent write transaction resolves the issue that +// read transaction blocks the write transaction and causes deadlock. +// This is a very hacky test since the mmap size is not exposed. +func TestDB_Open_InitialMmapSize(t *testing.T) { + path := tempfile() + defer os.Remove(path) + + initMmapSize := 1 << 31 // 2GB + testWriteSize := 1 << 27 // 134MB + + db, err := Open(path, 0666, &Options{InitialMmapSize: initMmapSize}) + if err != nil { + t.Fatal(err) + } + + // create a long-running read transaction + // that never gets closed while writing + rtx, err := db.Begin(false) + if err != nil { + t.Fatal(err) + } + + // create a write transaction + wtx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + b, err := wtx.CreateBucket([]byte("test")) + if err != nil { + t.Fatal(err) + } + + // and commit a large write + err = b.Put([]byte("foo"), make([]byte, testWriteSize)) + if err != nil { + t.Fatal(err) + } + + done := make(chan struct{}) + + go func() { + if err := wtx.Commit(); err != nil { + t.Fatal(err) + } + done <- struct{}{} + }() + + select { + case <-time.After(5 * time.Second): + t.Errorf("unexpected that the reader blocks writer") + case <-done: + } + + if err := rtx.Rollback(); err != nil { + t.Fatal(err) + } +} + +// Ensure that a database cannot open a transaction when it's not open. +func TestDB_Begin_ErrDatabaseNotOpen(t *testing.T) { + var db DB + if _, err := db.Begin(false); err != ErrDatabaseNotOpen { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that a read-write transaction can be retrieved. +func TestDB_BeginRW(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } else if tx == nil { + t.Fatal("expected tx") + } + + if tx.DB() != db.DB { + t.Fatal("unexpected tx database") + } else if !tx.Writable() { + t.Fatal("expected writable tx") + } + + if err := tx.Commit(); err != nil { + t.Fatal(err) + } +} + +// Ensure that opening a transaction while the DB is closed returns an error. +func TestDB_BeginRW_Closed(t *testing.T) { + var db DB + if _, err := db.Begin(true); err != ErrDatabaseNotOpen { + t.Fatalf("unexpected error: %s", err) + } +} + +func TestDB_Close_PendingTx_RW(t *testing.T) { testDB_Close_PendingTx(t, true) } +func TestDB_Close_PendingTx_RO(t *testing.T) { testDB_Close_PendingTx(t, false) } + +// Ensure that a database cannot close while transactions are open. +func testDB_Close_PendingTx(t *testing.T, writable bool) { + db := MustOpenDB() + defer db.MustClose() + + // Start transaction. + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + // Open update in separate goroutine. + done := make(chan struct{}) + go func() { + if err := db.Close(); err != nil { + t.Fatal(err) + } + close(done) + }() + + // Ensure database hasn't closed. + time.Sleep(100 * time.Millisecond) + select { + case <-done: + t.Fatal("database closed too early") + default: + } + + // Commit transaction. + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + + // Ensure database closed now. + time.Sleep(100 * time.Millisecond) + select { + case <-done: + default: + t.Fatal("database did not close") + } +} + +// Ensure a database can provide a transactional block. +func TestDB_Update(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + if err := b.Delete([]byte("foo")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + if v := b.Get([]byte("foo")); v != nil { + t.Fatalf("expected nil value, got: %v", v) + } + if v := b.Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a closed database returns an error while running a transaction block +func TestDB_Update_Closed(t *testing.T) { + var db DB + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != ErrDatabaseNotOpen { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure a panic occurs while trying to commit a managed transaction. +func TestDB_Update_ManualCommit(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var panicked bool + if err := db.Update(func(tx *Tx) error { + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + }() + return nil + }); err != nil { + t.Fatal(err) + } else if !panicked { + t.Fatal("expected panic") + } +} + +// Ensure a panic occurs while trying to rollback a managed transaction. +func TestDB_Update_ManualRollback(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var panicked bool + if err := db.Update(func(tx *Tx) error { + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + }() + return nil + }); err != nil { + t.Fatal(err) + } else if !panicked { + t.Fatal("expected panic") + } +} + +// Ensure a panic occurs while trying to commit a managed transaction. +func TestDB_View_ManualCommit(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var panicked bool + if err := db.View(func(tx *Tx) error { + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + }() + return nil + }); err != nil { + t.Fatal(err) + } else if !panicked { + t.Fatal("expected panic") + } +} + +// Ensure a panic occurs while trying to rollback a managed transaction. +func TestDB_View_ManualRollback(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var panicked bool + if err := db.View(func(tx *Tx) error { + func() { + defer func() { + if r := recover(); r != nil { + panicked = true + } + }() + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + }() + return nil + }); err != nil { + t.Fatal(err) + } else if !panicked { + t.Fatal("expected panic") + } +} + +// Ensure a write transaction that panics does not hold open locks. +func TestDB_Update_Panic(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Panic during update but recover. + func() { + defer func() { + if r := recover(); r != nil { + t.Log("recover: update", r) + } + }() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + panic("omg") + }); err != nil { + t.Fatal(err) + } + }() + + // Verify we can update again. + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Verify that our change persisted. + if err := db.Update(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure a database can return an error through a read-only transactional block. +func TestDB_View_Error(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.View(func(tx *Tx) error { + return errors.New("xxx") + }); err == nil || err.Error() != "xxx" { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure a read transaction that panics does not hold open locks. +func TestDB_View_Panic(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Panic during view transaction but recover. + func() { + defer func() { + if r := recover(); r != nil { + t.Log("recover: view", r) + } + }() + + if err := db.View(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + panic("omg") + }); err != nil { + t.Fatal(err) + } + }() + + // Verify that we can still use read transactions. + if err := db.View(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that DB stats can be returned. +func TestDB_Stats(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + + stats := db.Stats() + if stats.TxStats.PageCount != 2 { + t.Fatalf("unexpected TxStats.PageCount: %d", stats.TxStats.PageCount) + } else if stats.FreePageN != 0 { + t.Fatalf("unexpected FreePageN != 0: %d", stats.FreePageN) + } else if stats.PendingPageN != 2 { + t.Fatalf("unexpected PendingPageN != 2: %d", stats.PendingPageN) + } +} + +// Ensure that database pages are in expected order and type. +func TestDB_Consistency(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + + for i := 0; i < 10; i++ { + if err := db.Update(func(tx *Tx) error { + if err := tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + } + + if err := db.Update(func(tx *Tx) error { + if p, _ := tx.Page(0); p == nil { + t.Fatal("expected page") + } else if p.Type != "meta" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(1); p == nil { + t.Fatal("expected page") + } else if p.Type != "meta" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(2); p == nil { + t.Fatal("expected page") + } else if p.Type != "free" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(3); p == nil { + t.Fatal("expected page") + } else if p.Type != "free" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(4); p == nil { + t.Fatal("expected page") + } else if p.Type != "leaf" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(5); p == nil { + t.Fatal("expected page") + } else if p.Type != "freelist" { + t.Fatalf("unexpected page type: %s", p.Type) + } + + if p, _ := tx.Page(6); p != nil { + t.Fatal("unexpected page") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that DB stats can be subtracted from one another. +func TestDBStats_Sub(t *testing.T) { + var a, b Stats + a.TxStats.PageCount = 3 + a.FreePageN = 4 + b.TxStats.PageCount = 10 + b.FreePageN = 14 + diff := b.Sub(&a) + if diff.TxStats.PageCount != 7 { + t.Fatalf("unexpected TxStats.PageCount: %d", diff.TxStats.PageCount) + } + + // free page stats are copied from the receiver and not subtracted + if diff.FreePageN != 14 { + t.Fatalf("unexpected FreePageN: %d", diff.FreePageN) + } +} + +// Ensure two functions can perform updates in a single batch. +func TestDB_Batch(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Iterate over multiple updates in separate goroutines. + n := 2 + ch := make(chan error) + for i := 0; i < n; i++ { + go func(i int) { + ch <- db.Batch(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + }(i) + } + + // Check all responses to make sure there's no error. + for i := 0; i < n; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 0; i < n; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +func TestDB_Batch_Panic(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var sentinel int + var bork = &sentinel + var problem interface{} + var err error + + // Execute a function inside a batch that panics. + func() { + defer func() { + if p := recover(); p != nil { + problem = p + } + }() + err = db.Batch(func(tx *Tx) error { + panic(bork) + }) + }() + + // Verify there is no error. + if g, e := err, error(nil); g != e { + t.Fatalf("wrong error: %v != %v", g, e) + } + // Verify the panic was captured. + if g, e := problem, bork; g != e { + t.Fatalf("wrong error: %v != %v", g, e) + } +} + +func TestDB_BatchFull(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + + const size = 3 + // buffered so we never leak goroutines + ch := make(chan error, size) + put := func(i int) { + ch <- db.Batch(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + } + + db.MaxBatchSize = size + // high enough to never trigger here + db.MaxBatchDelay = 1 * time.Hour + + go put(1) + go put(2) + + // Give the batch a chance to exhibit bugs. + time.Sleep(10 * time.Millisecond) + + // not triggered yet + select { + case <-ch: + t.Fatalf("batch triggered too early") + default: + } + + go put(3) + + // Check all responses to make sure there's no error. + for i := 0; i < size; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 1; i <= size; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +func TestDB_BatchTime(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + t.Fatal(err) + } + + const size = 1 + // buffered so we never leak goroutines + ch := make(chan error, size) + put := func(i int) { + ch <- db.Batch(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Put(u64tob(uint64(i)), []byte{}) + }) + } + + db.MaxBatchSize = 1000 + db.MaxBatchDelay = 0 + + go put(1) + + // Batch must trigger by time alone. + + // Check all responses to make sure there's no error. + for i := 0; i < size; i++ { + if err := <-ch; err != nil { + t.Fatal(err) + } + } + + // Ensure data is correct. + if err := db.View(func(tx *Tx) error { + b := tx.Bucket([]byte("widgets")) + for i := 1; i <= size; i++ { + if v := b.Get(u64tob(uint64(i))); v == nil { + t.Errorf("key not found: %d", i) + } + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +func ExampleDB_Update() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Execute several commands within a read-write transaction. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + return err + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + return err + } + return nil + }); err != nil { + log.Fatal(err) + } + + // Read the value back from a separate read-only transaction. + if err := db.View(func(tx *Tx) error { + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + fmt.Printf("The value of 'foo' is: %s\n", value) + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release the file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // The value of 'foo' is: bar +} + +func ExampleDB_View() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Insert data into a bucket. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("people")) + if err != nil { + return err + } + if err := b.Put([]byte("john"), []byte("doe")); err != nil { + return err + } + if err := b.Put([]byte("susy"), []byte("que")); err != nil { + return err + } + return nil + }); err != nil { + log.Fatal(err) + } + + // Access data from within a read-only transactional block. + if err := db.View(func(tx *Tx) error { + v := tx.Bucket([]byte("people")).Get([]byte("john")) + fmt.Printf("John's last name is %s.\n", v) + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release the file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // John's last name is doe. +} + +func ExampleDB_Begin_ReadOnly() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Create a bucket using a read-write transaction. + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + log.Fatal(err) + } + + // Create several keys in a transaction. + tx, err := db.Begin(true) + if err != nil { + log.Fatal(err) + } + b := tx.Bucket([]byte("widgets")) + if err := b.Put([]byte("john"), []byte("blue")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("abby"), []byte("red")); err != nil { + log.Fatal(err) + } + if err := b.Put([]byte("zephyr"), []byte("purple")); err != nil { + log.Fatal(err) + } + if err := tx.Commit(); err != nil { + log.Fatal(err) + } + + // Iterate over the values in sorted key order. + tx, err = db.Begin(false) + if err != nil { + log.Fatal(err) + } + c := tx.Bucket([]byte("widgets")).Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + fmt.Printf("%s likes %s\n", k, v) + } + + if err := tx.Rollback(); err != nil { + log.Fatal(err) + } + + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // abby likes red + // john likes blue + // zephyr likes purple +} + +func BenchmarkDBBatchAutomatic(b *testing.B) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("bench")) + return err + }); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for round := 0; round < 1000; round++ { + wg.Add(1) + + go func(id uint32) { + defer wg.Done() + <-start + + h := fnv.New32a() + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, id) + _, _ = h.Write(buf[:]) + k := h.Sum(nil) + insert := func(tx *Tx) error { + b := tx.Bucket([]byte("bench")) + return b.Put(k, []byte("filler")) + } + if err := db.Batch(insert); err != nil { + b.Error(err) + return + } + }(uint32(round)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} + +func BenchmarkDBBatchSingle(b *testing.B) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("bench")) + return err + }); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for round := 0; round < 1000; round++ { + wg.Add(1) + go func(id uint32) { + defer wg.Done() + <-start + + h := fnv.New32a() + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, id) + _, _ = h.Write(buf[:]) + k := h.Sum(nil) + insert := func(tx *Tx) error { + b := tx.Bucket([]byte("bench")) + return b.Put(k, []byte("filler")) + } + if err := db.Update(insert); err != nil { + b.Error(err) + return + } + }(uint32(round)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} + +func BenchmarkDBBatchManual10x100(b *testing.B) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("bench")) + return err + }); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + start := make(chan struct{}) + var wg sync.WaitGroup + + for major := 0; major < 10; major++ { + wg.Add(1) + go func(id uint32) { + defer wg.Done() + <-start + + insert100 := func(tx *Tx) error { + h := fnv.New32a() + buf := make([]byte, 4) + for minor := uint32(0); minor < 100; minor++ { + binary.LittleEndian.PutUint32(buf, uint32(id*100+minor)) + h.Reset() + _, _ = h.Write(buf[:]) + k := h.Sum(nil) + b := tx.Bucket([]byte("bench")) + if err := b.Put(k, []byte("filler")); err != nil { + return err + } + } + return nil + } + if err := db.Update(insert100); err != nil { + b.Fatal(err) + } + }(uint32(major)) + } + close(start) + wg.Wait() + } + + b.StopTimer() + validateBatchBench(b, db) +} + +func validateBatchBench(b *testing.B, db *WDB) { + var rollback = errors.New("sentinel error to cause rollback") + validate := func(tx *Tx) error { + bucket := tx.Bucket([]byte("bench")) + h := fnv.New32a() + buf := make([]byte, 4) + for id := uint32(0); id < 1000; id++ { + binary.LittleEndian.PutUint32(buf, id) + h.Reset() + _, _ = h.Write(buf[:]) + k := h.Sum(nil) + v := bucket.Get(k) + if v == nil { + b.Errorf("not found id=%d key=%x", id, k) + continue + } + if g, e := v, []byte("filler"); !bytes.Equal(g, e) { + b.Errorf("bad value for id=%d key=%x: %s != %q", id, k, g, e) + } + if err := bucket.Delete(k); err != nil { + return err + } + } + // should be empty now + c := bucket.Cursor() + for k, v := c.First(); k != nil; k, v = c.Next() { + b.Errorf("unexpected key: %x = %q", k, v) + } + return rollback + } + if err := db.Update(validate); err != nil && err != rollback { + b.Error(err) + } +} + +// DB is a test wrapper for DB. +type WDB struct { + *DB +} + +// MustOpenDB returns a new, open DB at a temporary location. +func MustOpenDB() *WDB { + db, err := Open(tempfile(), 0666, nil) + if err != nil { + panic(err) + } + return &WDB{db} +} + +// Close closes the database and deletes the underlying file. +func (db *WDB) Close() error { + // Log statistics. + if *statsFlag { + db.PrintStats() + } + + // Check database consistency after every test. + db.MustCheck() + + // Close database and remove file. + defer os.Remove(db.Path()) + return db.DB.Close() +} + +// MustClose closes the database and deletes the underlying file. Panic on error. +func (db *WDB) MustClose() { + if err := db.Close(); err != nil { + panic(err) + } +} + +// PrintStats prints the database stats +func (db *WDB) PrintStats() { + var stats = db.Stats() + fmt.Printf("[db] %-20s %-20s %-20s\n", + fmt.Sprintf("pg(%d/%d)", stats.TxStats.PageCount, stats.TxStats.PageAlloc), + fmt.Sprintf("cur(%d)", stats.TxStats.CursorCount), + fmt.Sprintf("node(%d/%d)", stats.TxStats.NodeCount, stats.TxStats.NodeDeref), + ) + fmt.Printf(" %-20s %-20s %-20s\n", + fmt.Sprintf("rebal(%d/%v)", stats.TxStats.Rebalance, truncDuration(stats.TxStats.RebalanceTime)), + fmt.Sprintf("spill(%d/%v)", stats.TxStats.Spill, truncDuration(stats.TxStats.SpillTime)), + fmt.Sprintf("w(%d/%v)", stats.TxStats.Write, truncDuration(stats.TxStats.WriteTime)), + ) +} + +// MustCheck runs a consistency check on the database and panics if any errors are found. +func (db *WDB) MustCheck() { + if err := db.Update(func(tx *Tx) error { + // Collect all the errors. + var errors []error + for err := range tx.Check() { + errors = append(errors, err) + if len(errors) > 10 { + break + } + } + + // If errors occurred, copy the DB and print the errors. + if len(errors) > 0 { + var path = tempfile() + if err := tx.CopyFile(path, 0600); err != nil { + panic(err) + } + + // Print errors. + fmt.Print("\n\n") + fmt.Printf("consistency check failed (%d errors)\n", len(errors)) + for _, err := range errors { + fmt.Println(err) + } + fmt.Println("") + fmt.Println("db saved to:") + fmt.Println(path) + fmt.Print("\n\n") + os.Exit(-1) + } + + return nil + }); err != nil && err != ErrDatabaseNotOpen { + panic(err) + } +} + +// CopyTempFile copies a database to a temporary file. +func (db *WDB) CopyTempFile() { + path := tempfile() + if err := db.View(func(tx *Tx) error { + return tx.CopyFile(path, 0600) + }); err != nil { + panic(err) + } + fmt.Println("db copied to: ", path) +} + +// tempfile returns a temporary file path. +func tempfile() string { + f, err := ioutil.TempFile("", "bolt-") + if err != nil { + panic(err) + } + if err := f.Close(); err != nil { + panic(err) + } + if err := os.Remove(f.Name()); err != nil { + panic(err) + } + return f.Name() +} + +// mustContainKeys checks that a bucket contains a given set of keys. +func mustContainKeys(b *Bucket, m map[string]string) { + found := make(map[string]string) + if err := b.ForEach(func(k, _ []byte) error { + found[string(k)] = "" + return nil + }); err != nil { + panic(err) + } + + // Check for keys found in bucket that shouldn't be there. + var keys []string + for k, _ := range found { + if _, ok := m[string(k)]; !ok { + keys = append(keys, k) + } + } + if len(keys) > 0 { + sort.Strings(keys) + panic(fmt.Sprintf("keys found(%d): %s", len(keys), strings.Join(keys, ","))) + } + + // Check for keys not found in bucket that should be there. + for k, _ := range m { + if _, ok := found[string(k)]; !ok { + keys = append(keys, k) + } + } + if len(keys) > 0 { + sort.Strings(keys) + panic(fmt.Sprintf("keys not found(%d): %s", len(keys), strings.Join(keys, ","))) + } +} + +func trunc(b []byte, length int) []byte { + if length < len(b) { + return b[:length] + } + return b +} + +func truncDuration(d time.Duration) string { + return regexp.MustCompile(`^(\d+)(\.\d+)`).ReplaceAllString(d.String(), "$1") +} + +func fileSize(path string) int64 { + fi, err := os.Stat(path) + if err != nil { + return 0 + } + return fi.Size() +} + +// u64tob converts a uint64 into an 8-byte slice. +func u64tob(v uint64) []byte { + b := make([]byte, 8) + binary.BigEndian.PutUint64(b, v) + return b +} + +// btou64 converts an 8-byte slice into an uint64. +func btou64(b []byte) uint64 { return binary.BigEndian.Uint64(b) } + +// Ensure that a page is added to a transaction's freelist. +func TestFreelist_free(t *testing.T) { + f := newFreelist() + f.free(100, &page{id: 12}) + if !reflect.DeepEqual([]pgid{12}, f.pending[100]) { + t.Fatalf("exp=%v; got=%v", []pgid{12}, f.pending[100]) + } +} + +// Ensure that a page and its overflow is added to a transaction's freelist. +func TestFreelist_free_overflow(t *testing.T) { + f := newFreelist() + f.free(100, &page{id: 12, overflow: 3}) + if exp := []pgid{12, 13, 14, 15}; !reflect.DeepEqual(exp, f.pending[100]) { + t.Fatalf("exp=%v; got=%v", exp, f.pending[100]) + } +} + +// Ensure that a transaction's free pages can be released. +func TestFreelist_release(t *testing.T) { + f := newFreelist() + f.free(100, &page{id: 12, overflow: 1}) + f.free(100, &page{id: 9}) + f.free(102, &page{id: 39}) + f.release(100) + f.release(101) + if exp := []pgid{9, 12, 13}; !reflect.DeepEqual(exp, f.ids) { + t.Fatalf("exp=%v; got=%v", exp, f.ids) + } + + f.release(102) + if exp := []pgid{9, 12, 13, 39}; !reflect.DeepEqual(exp, f.ids) { + t.Fatalf("exp=%v; got=%v", exp, f.ids) + } +} + +// Ensure that a freelist can find contiguous blocks of pages. +func TestFreelist_allocate(t *testing.T) { + f := &freelist{ids: []pgid{3, 4, 5, 6, 7, 9, 12, 13, 18}} + if id := int(f.allocate(3)); id != 3 { + t.Fatalf("exp=3; got=%v", id) + } + if id := int(f.allocate(1)); id != 6 { + t.Fatalf("exp=6; got=%v", id) + } + if id := int(f.allocate(3)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if id := int(f.allocate(2)); id != 12 { + t.Fatalf("exp=12; got=%v", id) + } + if id := int(f.allocate(1)); id != 7 { + t.Fatalf("exp=7; got=%v", id) + } + if id := int(f.allocate(0)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if id := int(f.allocate(0)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if exp := []pgid{9, 18}; !reflect.DeepEqual(exp, f.ids) { + t.Fatalf("exp=%v; got=%v", exp, f.ids) + } + + if id := int(f.allocate(1)); id != 9 { + t.Fatalf("exp=9; got=%v", id) + } + if id := int(f.allocate(1)); id != 18 { + t.Fatalf("exp=18; got=%v", id) + } + if id := int(f.allocate(1)); id != 0 { + t.Fatalf("exp=0; got=%v", id) + } + if exp := []pgid{}; !reflect.DeepEqual(exp, f.ids) { + t.Fatalf("exp=%v; got=%v", exp, f.ids) + } +} + +// Ensure that a freelist can deserialize from a freelist page. +func TestFreelist_read(t *testing.T) { + // Create a page. + var buf [4096]byte + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = freelistPageFlag + page.count = 2 + + // Insert 2 page ids. + ids := (*[3]pgid)(unsafe.Pointer(&page.ptr)) + ids[0] = 23 + ids[1] = 50 + + // Deserialize page into a freelist. + f := newFreelist() + f.read(page) + + // Ensure that there are two page ids in the freelist. + if exp := []pgid{23, 50}; !reflect.DeepEqual(exp, f.ids) { + t.Fatalf("exp=%v; got=%v", exp, f.ids) + } +} + +// Ensure that a freelist can serialize into a freelist page. +func TestFreelist_write(t *testing.T) { + // Create a freelist and write it to a page. + var buf [4096]byte + f := &freelist{ids: []pgid{12, 39}, pending: make(map[txid][]pgid)} + f.pending[100] = []pgid{28, 11} + f.pending[101] = []pgid{3} + p := (*page)(unsafe.Pointer(&buf[0])) + if err := f.write(p); err != nil { + t.Fatal(err) + } + + // Read the page back out. + f2 := newFreelist() + f2.read(p) + + // Ensure that the freelist is correct. + // All pages should be present and in reverse order. + if exp := []pgid{3, 11, 12, 28, 39}; !reflect.DeepEqual(exp, f2.ids) { + t.Fatalf("exp=%v; got=%v", exp, f2.ids) + } +} + +func Benchmark_FreelistRelease10K(b *testing.B) { benchmark_FreelistRelease(b, 10000) } +func Benchmark_FreelistRelease100K(b *testing.B) { benchmark_FreelistRelease(b, 100000) } +func Benchmark_FreelistRelease1000K(b *testing.B) { benchmark_FreelistRelease(b, 1000000) } +func Benchmark_FreelistRelease10000K(b *testing.B) { benchmark_FreelistRelease(b, 10000000) } + +func benchmark_FreelistRelease(b *testing.B, size int) { + ids := randomPgids(size) + pending := randomPgids(len(ids) / 400) + b.ResetTimer() + for i := 0; i < b.N; i++ { + f := &freelist{ids: ids, pending: map[txid][]pgid{1: pending}} + f.release(1) + } +} + +func randomPgids(n int) []pgid { + rand.Seed(42) + pgids := make(pgids, n) + for i := range pgids { + pgids[i] = pgid(rand.Int63()) + } + sort.Sort(pgids) + return pgids +} + +// Ensure that a node can insert a key/value. +func TestNode_put(t *testing.T) { + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}} + n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0) + n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0) + n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0) + n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag) + + if len(n.inodes) != 3 { + t.Fatalf("exp=3; got=%d", len(n.inodes)) + } + if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "1" { + t.Fatalf("exp=<bar,1>; got=<%s,%s>", k, v) + } + if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "baz" || string(v) != "2" { + t.Fatalf("exp=<baz,2>; got=<%s,%s>", k, v) + } + if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" { + t.Fatalf("exp=<foo,3>; got=<%s,%s>", k, v) + } + if n.inodes[2].flags != uint32(leafPageFlag) { + t.Fatalf("not a leaf: %d", n.inodes[2].flags) + } +} + +// Ensure that a node can deserialize from a leaf page. +func TestNode_read_LeafPage(t *testing.T) { + // Create a page. + var buf [4096]byte + page := (*page)(unsafe.Pointer(&buf[0])) + page.flags = leafPageFlag + page.count = 2 + + // Insert 2 elements at the beginning. sizeof(leafPageElement) == 16 + nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr)) + nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2 + nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4 + + // Write data for the nodes at the end. + data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) + copy(data[:], []byte("barfooz")) + copy(data[7:], []byte("helloworldbye")) + + // Deserialize page into a leaf. + n := &node{} + n.read(page) + + // Check that there are two inodes with correct data. + if !n.isLeaf { + t.Fatal("expected leaf") + } + if len(n.inodes) != 2 { + t.Fatalf("exp=2; got=%d", len(n.inodes)) + } + if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "fooz" { + t.Fatalf("exp=<bar,fooz>; got=<%s,%s>", k, v) + } + if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "helloworld" || string(v) != "bye" { + t.Fatalf("exp=<helloworld,bye>; got=<%s,%s>", k, v) + } +} + +// Ensure that a node can serialize into a leaf page. +func TestNode_write_LeafPage(t *testing.T) { + // Create a node. + n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0) + n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0) + n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0) + + // Write it to a page. + var buf [4096]byte + p := (*page)(unsafe.Pointer(&buf[0])) + n.write(p) + + // Read the page back in. + n2 := &node{} + n2.read(p) + + // Check that the two pages are the same. + if len(n2.inodes) != 3 { + t.Fatalf("exp=3; got=%d", len(n2.inodes)) + } + if k, v := n2.inodes[0].key, n2.inodes[0].value; string(k) != "john" || string(v) != "johnson" { + t.Fatalf("exp=<john,johnson>; got=<%s,%s>", k, v) + } + if k, v := n2.inodes[1].key, n2.inodes[1].value; string(k) != "ricki" || string(v) != "lake" { + t.Fatalf("exp=<ricki,lake>; got=<%s,%s>", k, v) + } + if k, v := n2.inodes[2].key, n2.inodes[2].value; string(k) != "susy" || string(v) != "que" { + t.Fatalf("exp=<susy,que>; got=<%s,%s>", k, v) + } +} + +// Ensure that a node can split into appropriate subgroups. +func TestNode_split(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0) + + // Split between 2 & 3. + n.split(100) + + var parent = n.parent + if len(parent.children) != 2 { + t.Fatalf("exp=2; got=%d", len(parent.children)) + } + if len(parent.children[0].inodes) != 2 { + t.Fatalf("exp=2; got=%d", len(parent.children[0].inodes)) + } + if len(parent.children[1].inodes) != 3 { + t.Fatalf("exp=3; got=%d", len(parent.children[1].inodes)) + } +} + +// Ensure that a page with the minimum number of inodes just returns a single node. +func TestNode_split_MinKeys(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) + + // Split. + n.split(20) + if n.parent != nil { + t.Fatalf("expected nil parent") + } +} + +// Ensure that a node that has keys that all fit on a page just returns one leaf. +func TestNode_split_SinglePage(t *testing.T) { + // Create a node. + n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} + n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0) + n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0) + + // Split. + n.split(4096) + if n.parent != nil { + t.Fatalf("expected nil parent") + } +} + +// Ensure that the page type can be returned in human readable format. +func TestPage_typ(t *testing.T) { + if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" { + t.Fatalf("exp=branch; got=%v", typ) + } + if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" { + t.Fatalf("exp=leaf; got=%v", typ) + } + if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" { + t.Fatalf("exp=meta; got=%v", typ) + } + if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" { + t.Fatalf("exp=freelist; got=%v", typ) + } + if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" { + t.Fatalf("exp=unknown<4e20>; got=%v", typ) + } +} + +// Ensure that the hexdump debugging function doesn't blow up. +func TestPage_dump(t *testing.T) { + (&page{id: 256}).hexdump(16) +} + +func TestPgids_merge(t *testing.T) { + a := pgids{4, 5, 6, 10, 11, 12, 13, 27} + b := pgids{1, 3, 8, 9, 25, 30} + c := a.merge(b) + if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) { + t.Errorf("mismatch: %v", c) + } + + a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36} + b = pgids{8, 9, 25, 30} + c = a.merge(b) + if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) { + t.Errorf("mismatch: %v", c) + } +} + +func TestPgids_merge_quick(t *testing.T) { + if err := quick.Check(func(a, b pgids) bool { + // Sort incoming lists. + sort.Sort(a) + sort.Sort(b) + + // Merge the two lists together. + got := a.merge(b) + + // The expected value should be the two lists combined and sorted. + exp := append(a, b...) + sort.Sort(exp) + + if !reflect.DeepEqual(exp, got) { + t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got) + return false + } + + return true + }, nil); err != nil { + t.Fatal(err) + } +} + +// testing/quick defaults to 5 iterations and a random seed. +// You can override these settings from the command line: +// +// -quick.count The number of iterations to perform. +// -quick.seed The seed to use for randomizing. +// -quick.maxitems The maximum number of items to insert into a DB. +// -quick.maxksize The maximum size of a key. +// -quick.maxvsize The maximum size of a value. +// + +var qcount, qseed, qmaxitems, qmaxksize, qmaxvsize int + +func init() { + flag.IntVar(&qcount, "quick.count", 5, "") + flag.IntVar(&qseed, "quick.seed", int(time.Now().UnixNano())%100000, "") + flag.IntVar(&qmaxitems, "quick.maxitems", 1000, "") + flag.IntVar(&qmaxksize, "quick.maxksize", 1024, "") + flag.IntVar(&qmaxvsize, "quick.maxvsize", 1024, "") + flag.Parse() + fmt.Fprintln(os.Stderr, "seed:", qseed) + fmt.Fprintf(os.Stderr, "quick settings: count=%v, items=%v, ksize=%v, vsize=%v\n", qcount, qmaxitems, qmaxksize, qmaxvsize) +} + +func qconfig() *quick.Config { + return &quick.Config{ + MaxCount: qcount, + Rand: rand.New(rand.NewSource(int64(qseed))), + } +} + +type testdata []testdataitem + +func (t testdata) Len() int { return len(t) } +func (t testdata) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t testdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == -1 } + +func (t testdata) Generate(rand *rand.Rand, size int) reflect.Value { + n := rand.Intn(qmaxitems-1) + 1 + items := make(testdata, n) + used := make(map[string]bool) + for i := 0; i < n; i++ { + item := &items[i] + // Ensure that keys are unique by looping until we find one that we have not already used. + for { + item.Key = randByteSlice(rand, 1, qmaxksize) + if !used[string(item.Key)] { + used[string(item.Key)] = true + break + } + } + item.Value = randByteSlice(rand, 0, qmaxvsize) + } + return reflect.ValueOf(items) +} + +type revtestdata []testdataitem + +func (t revtestdata) Len() int { return len(t) } +func (t revtestdata) Swap(i, j int) { t[i], t[j] = t[j], t[i] } +func (t revtestdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == 1 } + +type testdataitem struct { + Key []byte + Value []byte +} + +func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte { + n := rand.Intn(maxSize-minSize) + minSize + b := make([]byte, n) + for i := 0; i < n; i++ { + b[i] = byte(rand.Intn(255)) + } + return b +} + +func TestSimulate_1op_1p(t *testing.T) { testSimulate(t, 1, 1) } +func TestSimulate_10op_1p(t *testing.T) { testSimulate(t, 10, 1) } +func TestSimulate_100op_1p(t *testing.T) { testSimulate(t, 100, 1) } +func TestSimulate_1000op_1p(t *testing.T) { testSimulate(t, 1000, 1) } +func TestSimulate_10000op_1p(t *testing.T) { testSimulate(t, 10000, 1) } + +func TestSimulate_10op_10p(t *testing.T) { testSimulate(t, 10, 10) } +func TestSimulate_100op_10p(t *testing.T) { testSimulate(t, 100, 10) } +func TestSimulate_1000op_10p(t *testing.T) { testSimulate(t, 1000, 10) } +func TestSimulate_10000op_10p(t *testing.T) { testSimulate(t, 10000, 10) } + +func TestSimulate_100op_100p(t *testing.T) { testSimulate(t, 100, 100) } +func TestSimulate_1000op_100p(t *testing.T) { testSimulate(t, 1000, 100) } +func TestSimulate_10000op_100p(t *testing.T) { testSimulate(t, 10000, 100) } + +func TestSimulate_10000op_1000p(t *testing.T) { testSimulate(t, 10000, 1000) } + +// Randomly generate operations on a given database with multiple clients to ensure consistency and thread safety. +func testSimulate(t *testing.T, threadCount, parallelism int) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + rand.Seed(int64(qseed)) + + // A list of operations that readers and writers can perform. + var readerHandlers = []simulateHandler{simulateGetHandler} + var writerHandlers = []simulateHandler{simulateGetHandler, simulatePutHandler} + + var versions = make(map[int]*QuickDB) + versions[1] = NewQuickDB() + + db := MustOpenDB() + defer db.MustClose() + + var mutex sync.Mutex + + // Run n threads in parallel, each with their own operation. + var wg sync.WaitGroup + var threads = make(chan bool, parallelism) + var i int + for { + threads <- true + wg.Add(1) + writable := ((rand.Int() % 100) < 20) // 20% writers + + // Choose an operation to execute. + var handler simulateHandler + if writable { + handler = writerHandlers[rand.Intn(len(writerHandlers))] + } else { + handler = readerHandlers[rand.Intn(len(readerHandlers))] + } + + // Execute a thread for the given operation. + go func(writable bool, handler simulateHandler) { + defer wg.Done() + + // Start transaction. + tx, err := db.Begin(writable) + if err != nil { + t.Fatal("tx begin: ", err) + } + + // Obtain current state of the dataset. + mutex.Lock() + var qdb = versions[tx.ID()] + if writable { + qdb = versions[tx.ID()-1].Copy() + } + mutex.Unlock() + + // Make sure we commit/rollback the tx at the end and update the state. + if writable { + defer func() { + mutex.Lock() + versions[tx.ID()] = qdb + mutex.Unlock() + + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + }() + } else { + defer func() { _ = tx.Rollback() }() + } + + // Ignore operation if we don't have data yet. + if qdb == nil { + return + } + + // Execute handler. + handler(tx, qdb) + + // Release a thread back to the scheduling loop. + <-threads + }(writable, handler) + + i++ + if i > threadCount { + break + } + } + + // Wait until all threads are done. + wg.Wait() +} + +type simulateHandler func(tx *Tx, qdb *QuickDB) + +// Retrieves a key from the database and verifies that it is what is expected. +func simulateGetHandler(tx *Tx, qdb *QuickDB) { + // Randomly retrieve an existing exist. + keys := qdb.Rand() + if len(keys) == 0 { + return + } + + // Retrieve root bucket. + b := tx.Bucket(keys[0]) + if b == nil { + panic(fmt.Sprintf("bucket[0] expected: %08x\n", trunc(keys[0], 4))) + } + + // Drill into nested buckets. + for _, key := range keys[1 : len(keys)-1] { + b = b.Bucket(key) + if b == nil { + panic(fmt.Sprintf("bucket[n] expected: %v -> %v\n", keys, key)) + } + } + + // Verify key/value on the final bucket. + expected := qdb.Get(keys) + actual := b.Get(keys[len(keys)-1]) + if !bytes.Equal(actual, expected) { + fmt.Println("=== EXPECTED ===") + fmt.Println(expected) + fmt.Println("=== ACTUAL ===") + fmt.Println(actual) + fmt.Println("=== END ===") + panic("value mismatch") + } +} + +// Inserts a key into the database. +func simulatePutHandler(tx *Tx, qdb *QuickDB) { + var err error + keys, value := randKeys(), randValue() + + // Retrieve root bucket. + b := tx.Bucket(keys[0]) + if b == nil { + b, err = tx.CreateBucket(keys[0]) + if err != nil { + panic("create bucket: " + err.Error()) + } + } + + // Create nested buckets, if necessary. + for _, key := range keys[1 : len(keys)-1] { + child := b.Bucket(key) + if child != nil { + b = child + } else { + b, err = b.CreateBucket(key) + if err != nil { + panic("create bucket: " + err.Error()) + } + } + } + + // Insert into database. + if err := b.Put(keys[len(keys)-1], value); err != nil { + panic("put: " + err.Error()) + } + + // Insert into in-memory database. + qdb.Put(keys, value) +} + +// QuickDB is an in-memory database that replicates the functionality of the +// Bolt DB type except that it is entirely in-memory. It is meant for testing +// that the Bolt database is consistent. +type QuickDB struct { + sync.RWMutex + m map[string]interface{} +} + +// NewQuickDB returns an instance of QuickDB. +func NewQuickDB() *QuickDB { + return &QuickDB{m: make(map[string]interface{})} +} + +// Get retrieves the value at a key path. +func (db *QuickDB) Get(keys [][]byte) []byte { + db.RLock() + defer db.RUnlock() + + m := db.m + for _, key := range keys[:len(keys)-1] { + value := m[string(key)] + if value == nil { + return nil + } + switch value := value.(type) { + case map[string]interface{}: + m = value + case []byte: + return nil + } + } + + // Only return if it's a simple value. + if value, ok := m[string(keys[len(keys)-1])].([]byte); ok { + return value + } + return nil +} + +// Put inserts a value into a key path. +func (db *QuickDB) Put(keys [][]byte, value []byte) { + db.Lock() + defer db.Unlock() + + // Build buckets all the way down the key path. + m := db.m + for _, key := range keys[:len(keys)-1] { + if _, ok := m[string(key)].([]byte); ok { + return // Keypath intersects with a simple value. Do nothing. + } + + if m[string(key)] == nil { + m[string(key)] = make(map[string]interface{}) + } + m = m[string(key)].(map[string]interface{}) + } + + // Insert value into the last key. + m[string(keys[len(keys)-1])] = value +} + +// Rand returns a random key path that points to a simple value. +func (db *QuickDB) Rand() [][]byte { + db.RLock() + defer db.RUnlock() + if len(db.m) == 0 { + return nil + } + var keys [][]byte + db.rand(db.m, &keys) + return keys +} + +func (db *QuickDB) rand(m map[string]interface{}, keys *[][]byte) { + i, index := 0, rand.Intn(len(m)) + for k, v := range m { + if i == index { + *keys = append(*keys, []byte(k)) + if v, ok := v.(map[string]interface{}); ok { + db.rand(v, keys) + } + return + } + i++ + } + panic("quickdb rand: out-of-range") +} + +// Copy copies the entire database. +func (db *QuickDB) Copy() *QuickDB { + db.RLock() + defer db.RUnlock() + return &QuickDB{m: db.copy(db.m)} +} + +func (db *QuickDB) copy(m map[string]interface{}) map[string]interface{} { + clone := make(map[string]interface{}, len(m)) + for k, v := range m { + switch v := v.(type) { + case map[string]interface{}: + clone[k] = db.copy(v) + default: + clone[k] = v + } + } + return clone +} + +func randKey() []byte { + var min, max = 1, 1024 + n := rand.Intn(max-min) + min + b := make([]byte, n) + for i := 0; i < n; i++ { + b[i] = byte(rand.Intn(255)) + } + return b +} + +func randKeys() [][]byte { + var keys [][]byte + var count = rand.Intn(2) + 2 + for i := 0; i < count; i++ { + keys = append(keys, randKey()) + } + return keys +} + +func randValue() []byte { + n := rand.Intn(8192) + b := make([]byte, n) + for i := 0; i < n; i++ { + b[i] = byte(rand.Intn(255)) + } + return b +} + +// Ensure that committing a closed transaction returns an error. +func TestTx_Commit_ErrTxClosed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + if _, err := tx.CreateBucket([]byte("foo")); err != nil { + t.Fatal(err) + } + + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + + if err := tx.Commit(); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that rolling back a closed transaction returns an error. +func TestTx_Rollback_ErrTxClosed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + + if err := tx.Rollback(); err != nil { + t.Fatal(err) + } + if err := tx.Rollback(); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that committing a read-only transaction returns an error. +func TestTx_Commit_ErrTxNotWritable(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(false) + if err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != ErrTxNotWritable { + t.Fatal(err) + } +} + +// Ensure that a transaction can retrieve a cursor on the root bucket. +func TestTx_Cursor(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + + if _, err := tx.CreateBucket([]byte("woojits")); err != nil { + t.Fatal(err) + } + + c := tx.Cursor() + if k, v := c.First(); !bytes.Equal(k, []byte("widgets")) { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("unexpected value: %v", v) + } + + if k, v := c.Next(); !bytes.Equal(k, []byte("woojits")) { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("unexpected value: %v", v) + } + + if k, v := c.Next(); k != nil { + t.Fatalf("unexpected key: %v", k) + } else if v != nil { + t.Fatalf("unexpected value: %v", k) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that creating a bucket with a read-only transaction returns an error. +func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.View(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("foo")) + if err != ErrTxNotWritable { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that creating a bucket on a closed transaction returns an error. +func TestTx_CreateBucket_ErrTxClosed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + + if _, err := tx.CreateBucket([]byte("foo")); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that a Tx can retrieve a bucket. +func TestTx_Bucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a Tx retrieving a non-existent key returns nil. +func TestTx_Get_NotFound(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if b.Get([]byte("no_such_key")) != nil { + t.Fatal("expected nil value") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can be created and retrieved. +func TestTx_CreateBucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Create a bucket. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } else if b == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Read the bucket through a separate transaction. + if err := db.View(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can be created if it doesn't already exist. +func TestTx_CreateBucketIfNotExists(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + // Create bucket. + if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil { + t.Fatal(err) + } else if b == nil { + t.Fatal("expected bucket") + } + + // Create bucket again. + if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil { + t.Fatal(err) + } else if b == nil { + t.Fatal("expected bucket") + } + + return nil + }); err != nil { + t.Fatal(err) + } + + // Read the bucket through a separate transaction. + if err := db.View(func(tx *Tx) error { + if tx.Bucket([]byte("widgets")) == nil { + t.Fatal("expected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure transaction returns an error if creating an unnamed bucket. +func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucketIfNotExists([]byte{}); err != ErrBucketNameRequired { + t.Fatalf("unexpected error: %s", err) + } + + if _, err := tx.CreateBucketIfNotExists(nil); err != ErrBucketNameRequired { + t.Fatalf("unexpected error: %s", err) + } + + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket cannot be created twice. +func TestTx_CreateBucket_ErrBucketExists(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Create a bucket. + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Create the same bucket again. + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket([]byte("widgets")); err != ErrBucketExists { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket is created with a non-blank name. +func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + if _, err := tx.CreateBucket(nil); err != ErrBucketNameRequired { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that a bucket can be deleted. +func TestTx_DeleteBucket(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + // Create a bucket and add a value. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + // Delete the bucket and make sure we can't get the value. + if err := db.Update(func(tx *Tx) error { + if err := tx.DeleteBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + if tx.Bucket([]byte("widgets")) != nil { + t.Fatal("unexpected bucket") + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.Update(func(tx *Tx) error { + // Create the bucket again and make sure there's not a phantom value. + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if v := b.Get([]byte("foo")); v != nil { + t.Fatalf("unexpected phantom value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that deleting a bucket on a closed transaction returns an error. +func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + tx, err := db.Begin(true) + if err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + if err := tx.DeleteBucket([]byte("foo")); err != ErrTxClosed { + t.Fatalf("unexpected error: %s", err) + } +} + +// Ensure that deleting a bucket with a read-only transaction returns an error. +func TestTx_DeleteBucket_ReadOnly(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.View(func(tx *Tx) error { + if err := tx.DeleteBucket([]byte("foo")); err != ErrTxNotWritable { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that nothing happens when deleting a bucket that doesn't exist. +func TestTx_DeleteBucket_NotFound(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + if err := tx.DeleteBucket([]byte("widgets")); err != ErrBucketNotFound { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that no error is returned when a tx.ForEach function does not return +// an error. +func TestTx_ForEach_NoError(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + + if err := tx.ForEach(func(name []byte, b *Bucket) error { + return nil + }); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that an error is returned when a tx.ForEach function returns an error. +func TestTx_ForEach_WithError(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + + marker := errors.New("marker") + if err := tx.ForEach(func(name []byte, b *Bucket) error { + return marker + }); err != marker { + t.Fatalf("unexpected error: %s", err) + } + return nil + }); err != nil { + t.Fatal(err) + } +} + +// Ensure that Tx commit handlers are called after a transaction successfully commits. +func TestTx_OnCommit(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var x int + if err := db.Update(func(tx *Tx) error { + tx.OnCommit(func() { x += 1 }) + tx.OnCommit(func() { x += 2 }) + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } else if x != 3 { + t.Fatalf("unexpected x: %d", x) + } +} + +// Ensure that Tx commit handlers are NOT called after a transaction rolls back. +func TestTx_OnCommit_Rollback(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + var x int + if err := db.Update(func(tx *Tx) error { + tx.OnCommit(func() { x += 1 }) + tx.OnCommit(func() { x += 2 }) + if _, err := tx.CreateBucket([]byte("widgets")); err != nil { + t.Fatal(err) + } + return errors.New("rollback this commit") + }); err == nil || err.Error() != "rollback this commit" { + t.Fatalf("unexpected error: %s", err) + } else if x != 0 { + t.Fatalf("unexpected x: %d", x) + } +} + +// Ensure that the database can be copied to a file path. +func TestTx_CopyFile(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + + path := tempfile() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + return tx.CopyFile(path, 0600) + }); err != nil { + t.Fatal(err) + } + + db2, err := Open(path, 0600, nil) + if err != nil { + t.Fatal(err) + } + + if err := db2.View(func(tx *Tx) error { + if v := tx.Bucket([]byte("widgets")).Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) { + t.Fatalf("unexpected value: %v", v) + } + if v := tx.Bucket([]byte("widgets")).Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { + t.Fatalf("unexpected value: %v", v) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db2.Close(); err != nil { + t.Fatal(err) + } +} + +type failWriterError struct{} + +func (failWriterError) Error() string { + return "error injected for tests" +} + +type failWriter struct { + // fail after this many bytes + After int +} + +func (f *failWriter) Write(p []byte) (n int, err error) { + n = len(p) + if n > f.After { + n = f.After + err = failWriterError{} + } + f.After -= n + return n, err +} + +// Ensure that Copy handles write errors right. +func TestTx_CopyFile_Error_Meta(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + return tx.Copy(&failWriter{}) + }); err == nil || err.Error() != "meta 0 copy: error injected for tests" { + t.Fatalf("unexpected error: %v", err) + } +} + +// Ensure that Copy handles write errors right. +func TestTx_CopyFile_Error_Normal(t *testing.T) { + db := MustOpenDB() + defer db.MustClose() + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + t.Fatal(err) + } + if err := b.Put([]byte("baz"), []byte("bat")); err != nil { + t.Fatal(err) + } + return nil + }); err != nil { + t.Fatal(err) + } + + if err := db.View(func(tx *Tx) error { + return tx.Copy(&failWriter{3 * db.Info().PageSize}) + }); err == nil || err.Error() != "error injected for tests" { + t.Fatalf("unexpected error: %v", err) + } +} + +func ExampleTx_Rollback() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Create a bucket. + if err := db.Update(func(tx *Tx) error { + _, err := tx.CreateBucket([]byte("widgets")) + return err + }); err != nil { + log.Fatal(err) + } + + // Set a value for a key. + if err := db.Update(func(tx *Tx) error { + return tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar")) + }); err != nil { + log.Fatal(err) + } + + // Update the key but rollback the transaction so it never saves. + tx, err := db.Begin(true) + if err != nil { + log.Fatal(err) + } + b := tx.Bucket([]byte("widgets")) + if err := b.Put([]byte("foo"), []byte("baz")); err != nil { + log.Fatal(err) + } + if err := tx.Rollback(); err != nil { + log.Fatal(err) + } + + // Ensure that our original value is still set. + if err := db.View(func(tx *Tx) error { + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + fmt.Printf("The value for 'foo' is still: %s\n", value) + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // The value for 'foo' is still: bar +} + +func ExampleTx_CopyFile() { + // Open the database. + db, err := Open(tempfile(), 0666, nil) + if err != nil { + log.Fatal(err) + } + defer os.Remove(db.Path()) + + // Create a bucket and a key. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucket([]byte("widgets")) + if err != nil { + return err + } + if err := b.Put([]byte("foo"), []byte("bar")); err != nil { + return err + } + return nil + }); err != nil { + log.Fatal(err) + } + + // Copy the database to another file. + toFile := tempfile() + if err := db.View(func(tx *Tx) error { + return tx.CopyFile(toFile, 0666) + }); err != nil { + log.Fatal(err) + } + defer os.Remove(toFile) + + // Open the cloned database. + db2, err := Open(toFile, 0666, nil) + if err != nil { + log.Fatal(err) + } + + // Ensure that the key exists in the copy. + if err := db2.View(func(tx *Tx) error { + value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) + fmt.Printf("The value for 'foo' in the clone is: %s\n", value) + return nil + }); err != nil { + log.Fatal(err) + } + + // Close database to release file lock. + if err := db.Close(); err != nil { + log.Fatal(err) + } + + if err := db2.Close(); err != nil { + log.Fatal(err) + } + + // Output: + // The value for 'foo' in the clone is: bar +} + +// Ensure the "info" command can print information about a database. +func TestInfoCommand_Run(t *testing.T) { + db := MustOpen2(0666, nil) + db.DB.Close() + defer db.Close() + + // Run the info command. + m := NewMain() + if err := m.Run("info", db.Path); err != nil { + t.Fatal(err) + } +} + +// Ensure the "stats" command executes correctly with an empty database. +func TestStatsCommand_Run_EmptyDatabase(t *testing.T) { + // Ignore + if os.Getpagesize() != 4096 { + t.Skip("system does not use 4KB page size") + } + + db := MustOpen2(0666, nil) + defer db.Close() + db.DB.Close() + + // Generate expected result. + exp := "Aggregate statistics for 0 buckets\n\n" + + "Page count statistics\n" + + "\tNumber of logical branch pages: 0\n" + + "\tNumber of physical branch overflow pages: 0\n" + + "\tNumber of logical leaf pages: 0\n" + + "\tNumber of physical leaf overflow pages: 0\n" + + "Tree statistics\n" + + "\tNumber of keys/value pairs: 0\n" + + "\tNumber of levels in B+tree: 0\n" + + "Page size utilization\n" + + "\tBytes allocated for physical branch pages: 0\n" + + "\tBytes actually used for branch data: 0 (0%)\n" + + "\tBytes allocated for physical leaf pages: 0\n" + + "\tBytes actually used for leaf data: 0 (0%)\n" + + "Bucket statistics\n" + + "\tTotal number of buckets: 0\n" + + "\tTotal number on inlined buckets: 0 (0%)\n" + + "\tBytes used for inlined buckets: 0 (0%)\n" + + // Run the command. + m := NewMainTW() + if err := m.Run("stats", db.Path); err != nil { + t.Fatal(err) + } else if m.Stdout.String() != exp { + t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String()) + } +} + +// Ensure the "stats" command can execute correctly. +func TestStatsCommand_Run(t *testing.T) { + // Ignore + if os.Getpagesize() != 4096 { + t.Skip("system does not use 4KB page size") + } + + db := MustOpen2(0666, nil) + defer db.Close() + + if err := db.Update(func(tx *Tx) error { + // Create "foo" bucket. + b, err := tx.CreateBucket([]byte("foo")) + if err != nil { + return err + } + for i := 0; i < 10; i++ { + if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { + return err + } + } + + // Create "bar" bucket. + b, err = tx.CreateBucket([]byte("bar")) + if err != nil { + return err + } + for i := 0; i < 100; i++ { + if err := b.Put([]byte(strconv.Itoa(i)), []byte(strconv.Itoa(i))); err != nil { + return err + } + } + + // Create "baz" bucket. + b, err = tx.CreateBucket([]byte("baz")) + if err != nil { + return err + } + if err := b.Put([]byte("key"), []byte("value")); err != nil { + return err + } + + return nil + }); err != nil { + t.Fatal(err) + } + db.DB.Close() + + // Generate expected result. + exp := "Aggregate statistics for 3 buckets\n\n" + + "Page count statistics\n" + + "\tNumber of logical branch pages: 0\n" + + "\tNumber of physical branch overflow pages: 0\n" + + "\tNumber of logical leaf pages: 1\n" + + "\tNumber of physical leaf overflow pages: 0\n" + + "Tree statistics\n" + + "\tNumber of keys/value pairs: 111\n" + + "\tNumber of levels in B+tree: 1\n" + + "Page size utilization\n" + + "\tBytes allocated for physical branch pages: 0\n" + + "\tBytes actually used for branch data: 0 (0%)\n" + + "\tBytes allocated for physical leaf pages: 4096\n" + + "\tBytes actually used for leaf data: 1996 (48%)\n" + + "Bucket statistics\n" + + "\tTotal number of buckets: 3\n" + + "\tTotal number on inlined buckets: 2 (66%)\n" + + "\tBytes used for inlined buckets: 236 (11%)\n" + + // Run the command. + m := NewMainTW() + if err := m.Run("stats", db.Path); err != nil { + t.Fatal(err) + } else if m.Stdout.String() != exp { + t.Fatalf("unexpected stdout:\n\n%s", m.Stdout.String()) + } +} + +// Main represents a test wrapper for main.Main that records output. +type MainTW struct { + *MainT + Stdin bytes.Buffer + Stdout bytes.Buffer + Stderr bytes.Buffer +} + +// NewMain returns a new instance of Main. +func NewMainTW() *MainTW { + m := &MainTW{MainT: NewMain()} + m.MainT.Stdin = &m.Stdin + m.MainT.Stdout = &m.Stdout + m.MainT.Stderr = &m.Stderr + return m +} + +// MustOpen creates a Bolt database in a temporary location. +func MustOpen2(mode os.FileMode, options *Options) *WDB2 { + // Create temporary path. + f, _ := ioutil.TempFile("", "bolt-") + f.Close() + os.Remove(f.Name()) + + db, err := Open(f.Name(), mode, options) + if err != nil { + panic(err.Error()) + } + return &WDB2{DB: db, Path: f.Name()} +} + +// DB is a test wrapper for bolt.DB. +type WDB2 struct { + *DB + Path string +} + +// Close closes and removes the database. +func (db *WDB2) Close() error { + defer os.Remove(db.Path) + return db.DB.Close() +} + +func TestCompactCommand_Run(t *testing.T) { + var s int64 + if err := binary.Read(crypto.Reader, binary.BigEndian, &s); err != nil { + t.Fatal(err) + } + rand.Seed(s) + + dstdb := MustOpen2(0666, nil) + dstdb.Close() + + // fill the db + db := MustOpen2(0666, nil) + if err := db.Update(func(tx *Tx) error { + n := 2 + rand.Intn(5) + for i := 0; i < n; i++ { + k := []byte(fmt.Sprintf("b%d", i)) + b, err := tx.CreateBucketIfNotExists(k) + if err != nil { + return err + } + if err := b.SetSequence(uint64(i)); err != nil { + return err + } + if err := fillBucket(b, append(k, '.')); err != nil { + return err + } + } + return nil + }); err != nil { + db.Close() + t.Fatal(err) + } + + // make the db grow by adding large values, and delete them. + if err := db.Update(func(tx *Tx) error { + b, err := tx.CreateBucketIfNotExists([]byte("large_vals")) + if err != nil { + return err + } + n := 5 + rand.Intn(5) + for i := 0; i < n; i++ { + v := make([]byte, 1000*1000*(1+rand.Intn(5))) + _, err := crypto.Read(v) + if err != nil { + return err + } + if err := b.Put([]byte(fmt.Sprintf("l%d", i)), v); err != nil { + return err + } + } + return nil + }); err != nil { + db.Close() + t.Fatal(err) + } + if err := db.Update(func(tx *Tx) error { + c := tx.Bucket([]byte("large_vals")).Cursor() + for k, _ := c.First(); k != nil; k, _ = c.Next() { + if err := c.Delete(); err != nil { + return err + } + } + return tx.DeleteBucket([]byte("large_vals")) + }); err != nil { + db.Close() + t.Fatal(err) + } + db.DB.Close() + defer db.Close() + defer dstdb.Close() + + dbChk, err := chkdb(db.Path) + if err != nil { + t.Fatal(err) + } + + m := NewMainTW() + if err := m.Run("compact", "-o", dstdb.Path, db.Path); err != nil { + t.Fatal(err) + } + + dbChkAfterCompact, err := chkdb(db.Path) + if err != nil { + t.Fatal(err) + } + + dstdbChk, err := chkdb(dstdb.Path) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(dbChk, dbChkAfterCompact) { + t.Error("the original db has been touched") + } + if !bytes.Equal(dbChk, dstdbChk) { + t.Error("the compacted db data isn't the same than the original db") + } +} + +func fillBucket(b *Bucket, prefix []byte) error { + n := 10 + rand.Intn(50) + for i := 0; i < n; i++ { + v := make([]byte, 10*(1+rand.Intn(4))) + _, err := crypto.Read(v) + if err != nil { + return err + } + k := append(prefix, []byte(fmt.Sprintf("k%d", i))...) + if err := b.Put(k, v); err != nil { + return err + } + } + // limit depth of subbuckets + s := 2 + rand.Intn(4) + if len(prefix) > (2*s + 1) { + return nil + } + n = 1 + rand.Intn(3) + for i := 0; i < n; i++ { + k := append(prefix, []byte(fmt.Sprintf("b%d", i))...) + sb, err := b.CreateBucket(k) + if err != nil { + return err + } + if err := fillBucket(sb, append(k, '.')); err != nil { + return err + } + } + return nil +} + +func chkdb(path string) ([]byte, error) { + db, err := Open(path, 0666, nil) + if err != nil { + return nil, err + } + defer db.Close() + var buf bytes.Buffer + err = db.View(func(tx *Tx) error { + return tx.ForEach(func(name []byte, b *Bucket) error { + return walkBucket(b, name, nil, &buf) + }) + }) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func walkBucket(parent *Bucket, k []byte, v []byte, w io.Writer) error { + if _, err := fmt.Fprintf(w, "%d:%x=%x\n", parent.Sequence(), k, v); err != nil { + return err + } + + // not a bucket, exit. + if v != nil { + return nil + } + return parent.ForEach(func(k, v []byte) error { + if v == nil { + return walkBucket(parent.Bucket(k), k, nil, w) + } + return walkBucket(parent, k, v, w) + }) +} + + +func MainTest() { +} diff --git a/tests/integration.sh b/tests/integration.sh new file mode 100755 index 0000000..e69de29 --- /dev/null +++ b/tests/integration.sh diff --git a/tests/main.go b/tests/main.go new file mode 100644 index 0000000..26555fe --- /dev/null +++ b/tests/main.go @@ -0,0 +1,7 @@ +package main + +import "gkv" + +func main() { + gkv.MainTest() +} diff --git a/tests/node_test.go b/tests/node_test.go deleted file mode 100644 index fa5d10f..0000000 --- a/tests/node_test.go +++ /dev/null @@ -1,156 +0,0 @@ -package bolt - -import ( - "testing" - "unsafe" -) - -// Ensure that a node can insert a key/value. -func TestNode_put(t *testing.T) { - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{meta: &meta{pgid: 1}}}} - n.put([]byte("baz"), []byte("baz"), []byte("2"), 0, 0) - n.put([]byte("foo"), []byte("foo"), []byte("0"), 0, 0) - n.put([]byte("bar"), []byte("bar"), []byte("1"), 0, 0) - n.put([]byte("foo"), []byte("foo"), []byte("3"), 0, leafPageFlag) - - if len(n.inodes) != 3 { - t.Fatalf("exp=3; got=%d", len(n.inodes)) - } - if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "1" { - t.Fatalf("exp=<bar,1>; got=<%s,%s>", k, v) - } - if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "baz" || string(v) != "2" { - t.Fatalf("exp=<baz,2>; got=<%s,%s>", k, v) - } - if k, v := n.inodes[2].key, n.inodes[2].value; string(k) != "foo" || string(v) != "3" { - t.Fatalf("exp=<foo,3>; got=<%s,%s>", k, v) - } - if n.inodes[2].flags != uint32(leafPageFlag) { - t.Fatalf("not a leaf: %d", n.inodes[2].flags) - } -} - -// Ensure that a node can deserialize from a leaf page. -func TestNode_read_LeafPage(t *testing.T) { - // Create a page. - var buf [4096]byte - page := (*page)(unsafe.Pointer(&buf[0])) - page.flags = leafPageFlag - page.count = 2 - - // Insert 2 elements at the beginning. sizeof(leafPageElement) == 16 - nodes := (*[3]leafPageElement)(unsafe.Pointer(&page.ptr)) - nodes[0] = leafPageElement{flags: 0, pos: 32, ksize: 3, vsize: 4} // pos = sizeof(leafPageElement) * 2 - nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4 - - // Write data for the nodes at the end. - data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) - copy(data[:], []byte("barfooz")) - copy(data[7:], []byte("helloworldbye")) - - // Deserialize page into a leaf. - n := &node{} - n.read(page) - - // Check that there are two inodes with correct data. - if !n.isLeaf { - t.Fatal("expected leaf") - } - if len(n.inodes) != 2 { - t.Fatalf("exp=2; got=%d", len(n.inodes)) - } - if k, v := n.inodes[0].key, n.inodes[0].value; string(k) != "bar" || string(v) != "fooz" { - t.Fatalf("exp=<bar,fooz>; got=<%s,%s>", k, v) - } - if k, v := n.inodes[1].key, n.inodes[1].value; string(k) != "helloworld" || string(v) != "bye" { - t.Fatalf("exp=<helloworld,bye>; got=<%s,%s>", k, v) - } -} - -// Ensure that a node can serialize into a leaf page. -func TestNode_write_LeafPage(t *testing.T) { - // Create a node. - n := &node{isLeaf: true, inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} - n.put([]byte("susy"), []byte("susy"), []byte("que"), 0, 0) - n.put([]byte("ricki"), []byte("ricki"), []byte("lake"), 0, 0) - n.put([]byte("john"), []byte("john"), []byte("johnson"), 0, 0) - - // Write it to a page. - var buf [4096]byte - p := (*page)(unsafe.Pointer(&buf[0])) - n.write(p) - - // Read the page back in. - n2 := &node{} - n2.read(p) - - // Check that the two pages are the same. - if len(n2.inodes) != 3 { - t.Fatalf("exp=3; got=%d", len(n2.inodes)) - } - if k, v := n2.inodes[0].key, n2.inodes[0].value; string(k) != "john" || string(v) != "johnson" { - t.Fatalf("exp=<john,johnson>; got=<%s,%s>", k, v) - } - if k, v := n2.inodes[1].key, n2.inodes[1].value; string(k) != "ricki" || string(v) != "lake" { - t.Fatalf("exp=<ricki,lake>; got=<%s,%s>", k, v) - } - if k, v := n2.inodes[2].key, n2.inodes[2].value; string(k) != "susy" || string(v) != "que" { - t.Fatalf("exp=<susy,que>; got=<%s,%s>", k, v) - } -} - -// Ensure that a node can split into appropriate subgroups. -func TestNode_split(t *testing.T) { - // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} - n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0) - - // Split between 2 & 3. - n.split(100) - - var parent = n.parent - if len(parent.children) != 2 { - t.Fatalf("exp=2; got=%d", len(parent.children)) - } - if len(parent.children[0].inodes) != 2 { - t.Fatalf("exp=2; got=%d", len(parent.children[0].inodes)) - } - if len(parent.children[1].inodes) != 3 { - t.Fatalf("exp=3; got=%d", len(parent.children[1].inodes)) - } -} - -// Ensure that a page with the minimum number of inodes just returns a single node. -func TestNode_split_MinKeys(t *testing.T) { - // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} - n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) - - // Split. - n.split(20) - if n.parent != nil { - t.Fatalf("expected nil parent") - } -} - -// Ensure that a node that has keys that all fit on a page just returns one leaf. -func TestNode_split_SinglePage(t *testing.T) { - // Create a node. - n := &node{inodes: make(inodes, 0), bucket: &Bucket{tx: &Tx{db: &DB{}, meta: &meta{pgid: 1}}}} - n.put([]byte("00000001"), []byte("00000001"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000002"), []byte("00000002"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000003"), []byte("00000003"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000004"), []byte("00000004"), []byte("0123456701234567"), 0, 0) - n.put([]byte("00000005"), []byte("00000005"), []byte("0123456701234567"), 0, 0) - - // Split. - n.split(4096) - if n.parent != nil { - t.Fatalf("expected nil parent") - } -} diff --git a/tests/page_test.go b/tests/page_test.go deleted file mode 100644 index 59f4a30..0000000 --- a/tests/page_test.go +++ /dev/null @@ -1,72 +0,0 @@ -package bolt - -import ( - "reflect" - "sort" - "testing" - "testing/quick" -) - -// Ensure that the page type can be returned in human readable format. -func TestPage_typ(t *testing.T) { - if typ := (&page{flags: branchPageFlag}).typ(); typ != "branch" { - t.Fatalf("exp=branch; got=%v", typ) - } - if typ := (&page{flags: leafPageFlag}).typ(); typ != "leaf" { - t.Fatalf("exp=leaf; got=%v", typ) - } - if typ := (&page{flags: metaPageFlag}).typ(); typ != "meta" { - t.Fatalf("exp=meta; got=%v", typ) - } - if typ := (&page{flags: freelistPageFlag}).typ(); typ != "freelist" { - t.Fatalf("exp=freelist; got=%v", typ) - } - if typ := (&page{flags: 20000}).typ(); typ != "unknown<4e20>" { - t.Fatalf("exp=unknown<4e20>; got=%v", typ) - } -} - -// Ensure that the hexdump debugging function doesn't blow up. -func TestPage_dump(t *testing.T) { - (&page{id: 256}).hexdump(16) -} - -func TestPgids_merge(t *testing.T) { - a := pgids{4, 5, 6, 10, 11, 12, 13, 27} - b := pgids{1, 3, 8, 9, 25, 30} - c := a.merge(b) - if !reflect.DeepEqual(c, pgids{1, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30}) { - t.Errorf("mismatch: %v", c) - } - - a = pgids{4, 5, 6, 10, 11, 12, 13, 27, 35, 36} - b = pgids{8, 9, 25, 30} - c = a.merge(b) - if !reflect.DeepEqual(c, pgids{4, 5, 6, 8, 9, 10, 11, 12, 13, 25, 27, 30, 35, 36}) { - t.Errorf("mismatch: %v", c) - } -} - -func TestPgids_merge_quick(t *testing.T) { - if err := quick.Check(func(a, b pgids) bool { - // Sort incoming lists. - sort.Sort(a) - sort.Sort(b) - - // Merge the two lists together. - got := a.merge(b) - - // The expected value should be the two lists combined and sorted. - exp := append(a, b...) - sort.Sort(exp) - - if !reflect.DeepEqual(exp, got) { - t.Errorf("\nexp=%+v\ngot=%+v\n", exp, got) - return false - } - - return true - }, nil); err != nil { - t.Fatal(err) - } -} diff --git a/tests/quick_test.go b/tests/quick_test.go deleted file mode 100644 index 9e27792..0000000 --- a/tests/quick_test.go +++ /dev/null @@ -1,87 +0,0 @@ -package bolt_test - -import ( - "bytes" - "flag" - "fmt" - "math/rand" - "os" - "reflect" - "testing/quick" - "time" -) - -// testing/quick defaults to 5 iterations and a random seed. -// You can override these settings from the command line: -// -// -quick.count The number of iterations to perform. -// -quick.seed The seed to use for randomizing. -// -quick.maxitems The maximum number of items to insert into a DB. -// -quick.maxksize The maximum size of a key. -// -quick.maxvsize The maximum size of a value. -// - -var qcount, qseed, qmaxitems, qmaxksize, qmaxvsize int - -func init() { - flag.IntVar(&qcount, "quick.count", 5, "") - flag.IntVar(&qseed, "quick.seed", int(time.Now().UnixNano())%100000, "") - flag.IntVar(&qmaxitems, "quick.maxitems", 1000, "") - flag.IntVar(&qmaxksize, "quick.maxksize", 1024, "") - flag.IntVar(&qmaxvsize, "quick.maxvsize", 1024, "") - flag.Parse() - fmt.Fprintln(os.Stderr, "seed:", qseed) - fmt.Fprintf(os.Stderr, "quick settings: count=%v, items=%v, ksize=%v, vsize=%v\n", qcount, qmaxitems, qmaxksize, qmaxvsize) -} - -func qconfig() *quick.Config { - return &quick.Config{ - MaxCount: qcount, - Rand: rand.New(rand.NewSource(int64(qseed))), - } -} - -type testdata []testdataitem - -func (t testdata) Len() int { return len(t) } -func (t testdata) Swap(i, j int) { t[i], t[j] = t[j], t[i] } -func (t testdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == -1 } - -func (t testdata) Generate(rand *rand.Rand, size int) reflect.Value { - n := rand.Intn(qmaxitems-1) + 1 - items := make(testdata, n) - used := make(map[string]bool) - for i := 0; i < n; i++ { - item := &items[i] - // Ensure that keys are unique by looping until we find one that we have not already used. - for { - item.Key = randByteSlice(rand, 1, qmaxksize) - if !used[string(item.Key)] { - used[string(item.Key)] = true - break - } - } - item.Value = randByteSlice(rand, 0, qmaxvsize) - } - return reflect.ValueOf(items) -} - -type revtestdata []testdataitem - -func (t revtestdata) Len() int { return len(t) } -func (t revtestdata) Swap(i, j int) { t[i], t[j] = t[j], t[i] } -func (t revtestdata) Less(i, j int) bool { return bytes.Compare(t[i].Key, t[j].Key) == 1 } - -type testdataitem struct { - Key []byte - Value []byte -} - -func randByteSlice(rand *rand.Rand, minSize, maxSize int) []byte { - n := rand.Intn(maxSize-minSize) + minSize - b := make([]byte, n) - for i := 0; i < n; i++ { - b[i] = byte(rand.Intn(255)) - } - return b -} diff --git a/tests/simulation_test.go b/tests/simulation_test.go deleted file mode 100644 index 3831016..0000000 --- a/tests/simulation_test.go +++ /dev/null @@ -1,329 +0,0 @@ -package bolt_test - -import ( - "bytes" - "fmt" - "math/rand" - "sync" - "testing" - - "github.com/boltdb/bolt" -) - -func TestSimulate_1op_1p(t *testing.T) { testSimulate(t, 1, 1) } -func TestSimulate_10op_1p(t *testing.T) { testSimulate(t, 10, 1) } -func TestSimulate_100op_1p(t *testing.T) { testSimulate(t, 100, 1) } -func TestSimulate_1000op_1p(t *testing.T) { testSimulate(t, 1000, 1) } -func TestSimulate_10000op_1p(t *testing.T) { testSimulate(t, 10000, 1) } - -func TestSimulate_10op_10p(t *testing.T) { testSimulate(t, 10, 10) } -func TestSimulate_100op_10p(t *testing.T) { testSimulate(t, 100, 10) } -func TestSimulate_1000op_10p(t *testing.T) { testSimulate(t, 1000, 10) } -func TestSimulate_10000op_10p(t *testing.T) { testSimulate(t, 10000, 10) } - -func TestSimulate_100op_100p(t *testing.T) { testSimulate(t, 100, 100) } -func TestSimulate_1000op_100p(t *testing.T) { testSimulate(t, 1000, 100) } -func TestSimulate_10000op_100p(t *testing.T) { testSimulate(t, 10000, 100) } - -func TestSimulate_10000op_1000p(t *testing.T) { testSimulate(t, 10000, 1000) } - -// Randomly generate operations on a given database with multiple clients to ensure consistency and thread safety. -func testSimulate(t *testing.T, threadCount, parallelism int) { - if testing.Short() { - t.Skip("skipping test in short mode.") - } - - rand.Seed(int64(qseed)) - - // A list of operations that readers and writers can perform. - var readerHandlers = []simulateHandler{simulateGetHandler} - var writerHandlers = []simulateHandler{simulateGetHandler, simulatePutHandler} - - var versions = make(map[int]*QuickDB) - versions[1] = NewQuickDB() - - db := MustOpenDB() - defer db.MustClose() - - var mutex sync.Mutex - - // Run n threads in parallel, each with their own operation. - var wg sync.WaitGroup - var threads = make(chan bool, parallelism) - var i int - for { - threads <- true - wg.Add(1) - writable := ((rand.Int() % 100) < 20) // 20% writers - - // Choose an operation to execute. - var handler simulateHandler - if writable { - handler = writerHandlers[rand.Intn(len(writerHandlers))] - } else { - handler = readerHandlers[rand.Intn(len(readerHandlers))] - } - - // Execute a thread for the given operation. - go func(writable bool, handler simulateHandler) { - defer wg.Done() - - // Start transaction. - tx, err := db.Begin(writable) - if err != nil { - t.Fatal("tx begin: ", err) - } - - // Obtain current state of the dataset. - mutex.Lock() - var qdb = versions[tx.ID()] - if writable { - qdb = versions[tx.ID()-1].Copy() - } - mutex.Unlock() - - // Make sure we commit/rollback the tx at the end and update the state. - if writable { - defer func() { - mutex.Lock() - versions[tx.ID()] = qdb - mutex.Unlock() - - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - }() - } else { - defer func() { _ = tx.Rollback() }() - } - - // Ignore operation if we don't have data yet. - if qdb == nil { - return - } - - // Execute handler. - handler(tx, qdb) - - // Release a thread back to the scheduling loop. - <-threads - }(writable, handler) - - i++ - if i > threadCount { - break - } - } - - // Wait until all threads are done. - wg.Wait() -} - -type simulateHandler func(tx *bolt.Tx, qdb *QuickDB) - -// Retrieves a key from the database and verifies that it is what is expected. -func simulateGetHandler(tx *bolt.Tx, qdb *QuickDB) { - // Randomly retrieve an existing exist. - keys := qdb.Rand() - if len(keys) == 0 { - return - } - - // Retrieve root bucket. - b := tx.Bucket(keys[0]) - if b == nil { - panic(fmt.Sprintf("bucket[0] expected: %08x\n", trunc(keys[0], 4))) - } - - // Drill into nested buckets. - for _, key := range keys[1 : len(keys)-1] { - b = b.Bucket(key) - if b == nil { - panic(fmt.Sprintf("bucket[n] expected: %v -> %v\n", keys, key)) - } - } - - // Verify key/value on the final bucket. - expected := qdb.Get(keys) - actual := b.Get(keys[len(keys)-1]) - if !bytes.Equal(actual, expected) { - fmt.Println("=== EXPECTED ===") - fmt.Println(expected) - fmt.Println("=== ACTUAL ===") - fmt.Println(actual) - fmt.Println("=== END ===") - panic("value mismatch") - } -} - -// Inserts a key into the database. -func simulatePutHandler(tx *bolt.Tx, qdb *QuickDB) { - var err error - keys, value := randKeys(), randValue() - - // Retrieve root bucket. - b := tx.Bucket(keys[0]) - if b == nil { - b, err = tx.CreateBucket(keys[0]) - if err != nil { - panic("create bucket: " + err.Error()) - } - } - - // Create nested buckets, if necessary. - for _, key := range keys[1 : len(keys)-1] { - child := b.Bucket(key) - if child != nil { - b = child - } else { - b, err = b.CreateBucket(key) - if err != nil { - panic("create bucket: " + err.Error()) - } - } - } - - // Insert into database. - if err := b.Put(keys[len(keys)-1], value); err != nil { - panic("put: " + err.Error()) - } - - // Insert into in-memory database. - qdb.Put(keys, value) -} - -// QuickDB is an in-memory database that replicates the functionality of the -// Bolt DB type except that it is entirely in-memory. It is meant for testing -// that the Bolt database is consistent. -type QuickDB struct { - sync.RWMutex - m map[string]interface{} -} - -// NewQuickDB returns an instance of QuickDB. -func NewQuickDB() *QuickDB { - return &QuickDB{m: make(map[string]interface{})} -} - -// Get retrieves the value at a key path. -func (db *QuickDB) Get(keys [][]byte) []byte { - db.RLock() - defer db.RUnlock() - - m := db.m - for _, key := range keys[:len(keys)-1] { - value := m[string(key)] - if value == nil { - return nil - } - switch value := value.(type) { - case map[string]interface{}: - m = value - case []byte: - return nil - } - } - - // Only return if it's a simple value. - if value, ok := m[string(keys[len(keys)-1])].([]byte); ok { - return value - } - return nil -} - -// Put inserts a value into a key path. -func (db *QuickDB) Put(keys [][]byte, value []byte) { - db.Lock() - defer db.Unlock() - - // Build buckets all the way down the key path. - m := db.m - for _, key := range keys[:len(keys)-1] { - if _, ok := m[string(key)].([]byte); ok { - return // Keypath intersects with a simple value. Do nothing. - } - - if m[string(key)] == nil { - m[string(key)] = make(map[string]interface{}) - } - m = m[string(key)].(map[string]interface{}) - } - - // Insert value into the last key. - m[string(keys[len(keys)-1])] = value -} - -// Rand returns a random key path that points to a simple value. -func (db *QuickDB) Rand() [][]byte { - db.RLock() - defer db.RUnlock() - if len(db.m) == 0 { - return nil - } - var keys [][]byte - db.rand(db.m, &keys) - return keys -} - -func (db *QuickDB) rand(m map[string]interface{}, keys *[][]byte) { - i, index := 0, rand.Intn(len(m)) - for k, v := range m { - if i == index { - *keys = append(*keys, []byte(k)) - if v, ok := v.(map[string]interface{}); ok { - db.rand(v, keys) - } - return - } - i++ - } - panic("quickdb rand: out-of-range") -} - -// Copy copies the entire database. -func (db *QuickDB) Copy() *QuickDB { - db.RLock() - defer db.RUnlock() - return &QuickDB{m: db.copy(db.m)} -} - -func (db *QuickDB) copy(m map[string]interface{}) map[string]interface{} { - clone := make(map[string]interface{}, len(m)) - for k, v := range m { - switch v := v.(type) { - case map[string]interface{}: - clone[k] = db.copy(v) - default: - clone[k] = v - } - } - return clone -} - -func randKey() []byte { - var min, max = 1, 1024 - n := rand.Intn(max-min) + min - b := make([]byte, n) - for i := 0; i < n; i++ { - b[i] = byte(rand.Intn(255)) - } - return b -} - -func randKeys() [][]byte { - var keys [][]byte - var count = rand.Intn(2) + 2 - for i := 0; i < count; i++ { - keys = append(keys, randKey()) - } - return keys -} - -func randValue() []byte { - n := rand.Intn(8192) - b := make([]byte, n) - for i := 0; i < n; i++ { - b[i] = byte(rand.Intn(255)) - } - return b -} diff --git a/tests/tx_test.go b/tests/tx_test.go deleted file mode 100644 index 2201e79..0000000 --- a/tests/tx_test.go +++ /dev/null @@ -1,716 +0,0 @@ -package bolt_test - -import ( - "bytes" - "errors" - "fmt" - "log" - "os" - "testing" - - "github.com/boltdb/bolt" -) - -// Ensure that committing a closed transaction returns an error. -func TestTx_Commit_ErrTxClosed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - if _, err := tx.CreateBucket([]byte("foo")); err != nil { - t.Fatal(err) - } - - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - - if err := tx.Commit(); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that rolling back a closed transaction returns an error. -func TestTx_Rollback_ErrTxClosed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - - if err := tx.Rollback(); err != nil { - t.Fatal(err) - } - if err := tx.Rollback(); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that committing a read-only transaction returns an error. -func TestTx_Commit_ErrTxNotWritable(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(false) - if err != nil { - t.Fatal(err) - } - if err := tx.Commit(); err != bolt.ErrTxNotWritable { - t.Fatal(err) - } -} - -// Ensure that a transaction can retrieve a cursor on the root bucket. -func TestTx_Cursor(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - - if _, err := tx.CreateBucket([]byte("woojits")); err != nil { - t.Fatal(err) - } - - c := tx.Cursor() - if k, v := c.First(); !bytes.Equal(k, []byte("widgets")) { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("unexpected value: %v", v) - } - - if k, v := c.Next(); !bytes.Equal(k, []byte("woojits")) { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("unexpected value: %v", v) - } - - if k, v := c.Next(); k != nil { - t.Fatalf("unexpected key: %v", k) - } else if v != nil { - t.Fatalf("unexpected value: %v", k) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that creating a bucket with a read-only transaction returns an error. -func TestTx_CreateBucket_ErrTxNotWritable(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.View(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("foo")) - if err != bolt.ErrTxNotWritable { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that creating a bucket on a closed transaction returns an error. -func TestTx_CreateBucket_ErrTxClosed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - - if _, err := tx.CreateBucket([]byte("foo")); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that a Tx can retrieve a bucket. -func TestTx_Bucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a Tx retrieving a non-existent key returns nil. -func TestTx_Get_NotFound(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if b.Get([]byte("no_such_key")) != nil { - t.Fatal("expected nil value") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can be created and retrieved. -func TestTx_CreateBucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Create a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } else if b == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Read the bucket through a separate transaction. - if err := db.View(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can be created if it doesn't already exist. -func TestTx_CreateBucketIfNotExists(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - // Create bucket. - if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil { - t.Fatal(err) - } else if b == nil { - t.Fatal("expected bucket") - } - - // Create bucket again. - if b, err := tx.CreateBucketIfNotExists([]byte("widgets")); err != nil { - t.Fatal(err) - } else if b == nil { - t.Fatal("expected bucket") - } - - return nil - }); err != nil { - t.Fatal(err) - } - - // Read the bucket through a separate transaction. - if err := db.View(func(tx *bolt.Tx) error { - if tx.Bucket([]byte("widgets")) == nil { - t.Fatal("expected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure transaction returns an error if creating an unnamed bucket. -func TestTx_CreateBucketIfNotExists_ErrBucketNameRequired(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucketIfNotExists([]byte{}); err != bolt.ErrBucketNameRequired { - t.Fatalf("unexpected error: %s", err) - } - - if _, err := tx.CreateBucketIfNotExists(nil); err != bolt.ErrBucketNameRequired { - t.Fatalf("unexpected error: %s", err) - } - - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket cannot be created twice. -func TestTx_CreateBucket_ErrBucketExists(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Create a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Create the same bucket again. - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket([]byte("widgets")); err != bolt.ErrBucketExists { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket is created with a non-blank name. -func TestTx_CreateBucket_ErrBucketNameRequired(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - if _, err := tx.CreateBucket(nil); err != bolt.ErrBucketNameRequired { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that a bucket can be deleted. -func TestTx_DeleteBucket(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - // Create a bucket and add a value. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - // Delete the bucket and make sure we can't get the value. - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - if tx.Bucket([]byte("widgets")) != nil { - t.Fatal("unexpected bucket") - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.Update(func(tx *bolt.Tx) error { - // Create the bucket again and make sure there's not a phantom value. - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if v := b.Get([]byte("foo")); v != nil { - t.Fatalf("unexpected phantom value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that deleting a bucket on a closed transaction returns an error. -func TestTx_DeleteBucket_ErrTxClosed(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - tx, err := db.Begin(true) - if err != nil { - t.Fatal(err) - } - if err := tx.Commit(); err != nil { - t.Fatal(err) - } - if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxClosed { - t.Fatalf("unexpected error: %s", err) - } -} - -// Ensure that deleting a bucket with a read-only transaction returns an error. -func TestTx_DeleteBucket_ReadOnly(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.View(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("foo")); err != bolt.ErrTxNotWritable { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that nothing happens when deleting a bucket that doesn't exist. -func TestTx_DeleteBucket_NotFound(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - if err := tx.DeleteBucket([]byte("widgets")); err != bolt.ErrBucketNotFound { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that no error is returned when a tx.ForEach function does not return -// an error. -func TestTx_ForEach_NoError(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - - if err := tx.ForEach(func(name []byte, b *bolt.Bucket) error { - return nil - }); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that an error is returned when a tx.ForEach function returns an error. -func TestTx_ForEach_WithError(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - - marker := errors.New("marker") - if err := tx.ForEach(func(name []byte, b *bolt.Bucket) error { - return marker - }); err != marker { - t.Fatalf("unexpected error: %s", err) - } - return nil - }); err != nil { - t.Fatal(err) - } -} - -// Ensure that Tx commit handlers are called after a transaction successfully commits. -func TestTx_OnCommit(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var x int - if err := db.Update(func(tx *bolt.Tx) error { - tx.OnCommit(func() { x += 1 }) - tx.OnCommit(func() { x += 2 }) - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } else if x != 3 { - t.Fatalf("unexpected x: %d", x) - } -} - -// Ensure that Tx commit handlers are NOT called after a transaction rolls back. -func TestTx_OnCommit_Rollback(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - var x int - if err := db.Update(func(tx *bolt.Tx) error { - tx.OnCommit(func() { x += 1 }) - tx.OnCommit(func() { x += 2 }) - if _, err := tx.CreateBucket([]byte("widgets")); err != nil { - t.Fatal(err) - } - return errors.New("rollback this commit") - }); err == nil || err.Error() != "rollback this commit" { - t.Fatalf("unexpected error: %s", err) - } else if x != 0 { - t.Fatalf("unexpected x: %d", x) - } -} - -// Ensure that the database can be copied to a file path. -func TestTx_CopyFile(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - - path := tempfile() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - return tx.CopyFile(path, 0600) - }); err != nil { - t.Fatal(err) - } - - db2, err := bolt.Open(path, 0600, nil) - if err != nil { - t.Fatal(err) - } - - if err := db2.View(func(tx *bolt.Tx) error { - if v := tx.Bucket([]byte("widgets")).Get([]byte("foo")); !bytes.Equal(v, []byte("bar")) { - t.Fatalf("unexpected value: %v", v) - } - if v := tx.Bucket([]byte("widgets")).Get([]byte("baz")); !bytes.Equal(v, []byte("bat")) { - t.Fatalf("unexpected value: %v", v) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db2.Close(); err != nil { - t.Fatal(err) - } -} - -type failWriterError struct{} - -func (failWriterError) Error() string { - return "error injected for tests" -} - -type failWriter struct { - // fail after this many bytes - After int -} - -func (f *failWriter) Write(p []byte) (n int, err error) { - n = len(p) - if n > f.After { - n = f.After - err = failWriterError{} - } - f.After -= n - return n, err -} - -// Ensure that Copy handles write errors right. -func TestTx_CopyFile_Error_Meta(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - return tx.Copy(&failWriter{}) - }); err == nil || err.Error() != "meta 0 copy: error injected for tests" { - t.Fatalf("unexpected error: %v", err) - } -} - -// Ensure that Copy handles write errors right. -func TestTx_CopyFile_Error_Normal(t *testing.T) { - db := MustOpenDB() - defer db.MustClose() - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - t.Fatal(err) - } - if err := b.Put([]byte("baz"), []byte("bat")); err != nil { - t.Fatal(err) - } - return nil - }); err != nil { - t.Fatal(err) - } - - if err := db.View(func(tx *bolt.Tx) error { - return tx.Copy(&failWriter{3 * db.Info().PageSize}) - }); err == nil || err.Error() != "error injected for tests" { - t.Fatalf("unexpected error: %v", err) - } -} - -func ExampleTx_Rollback() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Create a bucket. - if err := db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucket([]byte("widgets")) - return err - }); err != nil { - log.Fatal(err) - } - - // Set a value for a key. - if err := db.Update(func(tx *bolt.Tx) error { - return tx.Bucket([]byte("widgets")).Put([]byte("foo"), []byte("bar")) - }); err != nil { - log.Fatal(err) - } - - // Update the key but rollback the transaction so it never saves. - tx, err := db.Begin(true) - if err != nil { - log.Fatal(err) - } - b := tx.Bucket([]byte("widgets")) - if err := b.Put([]byte("foo"), []byte("baz")); err != nil { - log.Fatal(err) - } - if err := tx.Rollback(); err != nil { - log.Fatal(err) - } - - // Ensure that our original value is still set. - if err := db.View(func(tx *bolt.Tx) error { - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - fmt.Printf("The value for 'foo' is still: %s\n", value) - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // The value for 'foo' is still: bar -} - -func ExampleTx_CopyFile() { - // Open the database. - db, err := bolt.Open(tempfile(), 0666, nil) - if err != nil { - log.Fatal(err) - } - defer os.Remove(db.Path()) - - // Create a bucket and a key. - if err := db.Update(func(tx *bolt.Tx) error { - b, err := tx.CreateBucket([]byte("widgets")) - if err != nil { - return err - } - if err := b.Put([]byte("foo"), []byte("bar")); err != nil { - return err - } - return nil - }); err != nil { - log.Fatal(err) - } - - // Copy the database to another file. - toFile := tempfile() - if err := db.View(func(tx *bolt.Tx) error { - return tx.CopyFile(toFile, 0666) - }); err != nil { - log.Fatal(err) - } - defer os.Remove(toFile) - - // Open the cloned database. - db2, err := bolt.Open(toFile, 0666, nil) - if err != nil { - log.Fatal(err) - } - - // Ensure that the key exists in the copy. - if err := db2.View(func(tx *bolt.Tx) error { - value := tx.Bucket([]byte("widgets")).Get([]byte("foo")) - fmt.Printf("The value for 'foo' in the clone is: %s\n", value) - return nil - }); err != nil { - log.Fatal(err) - } - - // Close database to release file lock. - if err := db.Close(); err != nil { - log.Fatal(err) - } - - if err := db2.Close(); err != nil { - log.Fatal(err) - } - - // Output: - // The value for 'foo' in the clone is: bar -} |