1 files changed, 208 insertions, 0 deletions
diff --git a/cmd/bolt/main.go b/cmd/bolt/main.go
index b96e6f7..a132ec0 100644
--- a/cmd/bolt/main.go
+++ b/cmd/bolt/main.go
@@ -102,6 +102,8 @@ func (m *Main) Run(args ...string) error {
 		return newBenchCommand(m).Run(args[1:]...)
 	case "check":
 		return newCheckCommand(m).Run(args[1:]...)
+	case "compact":
+		return newCompactCommand(m).Run(args[1:]...)
 	case "dump":
 		return newDumpCommand(m).Run(args[1:]...)
 	case "info":
@@ -130,6 +132,7 @@ The commands are:
 
     bench       run synthetic benchmark against bolt
     check       verifies integrity of bolt database
+    compact     copies a bolt database, compacting it in the process
     info        print basic info
     help        print this screen
     pages       print list of pages with their types
@@ -1530,3 +1533,208 @@ func (n *leafPageElement) value() []byte {
 	buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
 	return buf[n.pos+n.ksize : n.pos+n.ksize+n.vsize]
 }
+
+// CompactCommand represents the "compact" command execution.
+type CompactCommand struct {
+	Stdin  io.Reader
+	Stdout io.Writer
+	Stderr io.Writer
+
+	SrcPath   string
+	DstPath   string
+	TxMaxSize int64
+}
+
+// newCompactCommand returns a CompactCommand.
+func newCompactCommand(m *Main) *CompactCommand {
+	return &CompactCommand{
+		Stdin:  m.Stdin,
+		Stdout: m.Stdout,
+		Stderr: m.Stderr,
+	}
+}
+
+// Run executes the command.
+func (cmd *CompactCommand) Run(args ...string) (err error) {
+	// Parse flags.
+	fs := flag.NewFlagSet("", flag.ContinueOnError)
+	fs.SetOutput(ioutil.Discard)
+	fs.StringVar(&cmd.DstPath, "o", "", "")
+	fs.Int64Var(&cmd.TxMaxSize, "tx-max-size", 65536, "")
+	if err := fs.Parse(args); err == flag.ErrHelp {
+		fmt.Fprintln(cmd.Stderr, cmd.Usage())
+		return ErrUsage
+	} else if err != nil {
+		return err
+	} else if cmd.DstPath == "" {
+		return fmt.Errorf("output file required")
+	}
+
+	// Require database paths.
+	cmd.SrcPath = fs.Arg(0)
+	if cmd.SrcPath == "" {
+		return ErrPathRequired
+	}
+
+	// Ensure source file exists.
+	fi, err := os.Stat(cmd.SrcPath)
+	if os.IsNotExist(err) {
+		return ErrFileNotFound
+	} else if err != nil {
+		return err
+	}
+	initialSize := fi.Size()
+
+	// Open source database.
+	src, err := bolt.Open(cmd.SrcPath, 0444, nil)
+	if err != nil {
+		return err
+	}
+	defer src.Close()
+
+	// Open destination database.
+	dst, err := bolt.Open(cmd.DstPath, fi.Mode(), nil)
+	if err != nil {
+		return err
+	}
+	defer dst.Close()
+
+	// Run compaction.
+	if err := cmd.compact(dst, src); err != nil {
+		return err
+	}
+
+	// Report stats on new size.
+	fi, err = os.Stat(cmd.DstPath)
+	if err != nil {
+		return err
+	} else if fi.Size() == 0 {
+		return fmt.Errorf("zero db size")
+	}
+	fmt.Fprintf(cmd.Stdout, "%d -> %d bytes (gain=%.2fx)\n", initialSize, fi.Size(), float64(initialSize)/float64(fi.Size()))
+
+	return nil
+}
+
+func (cmd *CompactCommand) compact(dst, src *bolt.DB) error {
+	// commit regularly, or we'll run out of memory for large datasets if using one transaction.
+	var size int64
+	tx, err := dst.Begin(true)
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+
+	if err := cmd.walk(src, func(keys [][]byte, k, v []byte, seq uint64) error {
+		// On each key/value, check if we have exceeded tx size.
+		sz := int64(len(k) + len(v))
+		if size+sz > cmd.TxMaxSize && cmd.TxMaxSize != 0 {
+			// Commit previous transaction.
+			if err := tx.Commit(); err != nil {
+				return err
+			}
+
+			// Start new transaction.
+			tx, err = dst.Begin(true)
+			if err != nil {
+				return err
+			}
+			size = 0
+		}
+		size += sz
+
+		// Create bucket on the root transaction if this is the first level.
+		nk := len(keys)
+		if nk == 0 {
+			bkt, err := tx.CreateBucket(k)
+			if err != nil {
+				return err
+			}
+			if err := bkt.SetSequence(seq); err != nil {
+				return err
+			}
+			return nil
+		}
+
+		// Create buckets on subsequent levels, if necessary.
+		b := tx.Bucket(keys[0])
+		if nk > 1 {
+			for _, k := range keys[1:] {
+				b = b.Bucket(k)
+			}
+		}
+
+		// If there is no value then this is a bucket call.
+		if v == nil {
+			bkt, err := b.CreateBucket(k)
+			if err != nil {
+				return err
+			}
+			if err := bkt.SetSequence(seq); err != nil {
+				return err
+			}
+			return nil
+		}
+
+		// Otherwise treat it as a key/value pair.
+		return b.Put(k, v)
+	}); err != nil {
+		return err
+	}
+
+	return tx.Commit()
+}
+
+// walkFunc is the type of the function called for keys (buckets and "normal"
+// values) discovered by Walk. keys is the list of keys to descend to the bucket
+// owning the discovered key/value pair k/v.
+type walkFunc func(keys [][]byte, k, v []byte, seq uint64) error
+
+// walk walks recursively the bolt database db, calling walkFn for each key it finds.
+func (cmd *CompactCommand) walk(db *bolt.DB, walkFn walkFunc) error {
+	return db.View(func(tx *bolt.Tx) error {
+		return tx.ForEach(func(name []byte, b *bolt.Bucket) error {
+			return cmd.walkBucket(b, nil, name, nil, b.Sequence(), walkFn)
+		})
+	})
+}
+
+func (cmd *CompactCommand) walkBucket(b *bolt.Bucket, keypath [][]byte, k, v []byte, seq uint64, fn walkFunc) error {
+	// Execute callback.
+	if err := fn(keypath, k, v, seq); err != nil {
+		return err
+	}
+
+	// If this is not a bucket then stop.
+	if v != nil {
+		return nil
+	}
+
+	// Iterate over each child key/value.
+	keypath = append(keypath, k)
+	return b.ForEach(func(k, v []byte) error {
+		if v == nil {
+			bkt := b.Bucket(k)
+			return cmd.walkBucket(bkt, keypath, k, nil, bkt.Sequence(), fn)
+		}
+		return cmd.walkBucket(b, keypath, k, v, b.Sequence(), fn)
+	})
+}
+
+// Usage returns the help message.
+func (cmd *CompactCommand) Usage() string {
+	return strings.TrimLeft(`
+usage: bolt compact [options] -o DST SRC
+
+Compact opens a database at SRC path and walks it recursively, copying keys
+as they are found from all buckets, to a newly created database at DST path.
+
+The original database is left untouched.
+
+Additional options include:
+
+	-tx-max-size NUM
+		Specifies the maximum size of individual transactions.
+		Defaults to 64KB.
+`, "\n")
+}