#!/bin/sh set -eu usage() { cat <<-'EOF' Usage: git blob init git blob -h EOF } help() { cat <<-'EOF' Options: -h, --help show this message Helper to store binary data on Git. The trouble with adding large binary data to Git repositories is that as change of such data accumulates over time, all of its revisions are managed by Git, and as the repository can only grow it can take a big amount of space. However that data is usually less valuable, as we often don't fiddle with it directly, and treat it as big opaque files, which we aren't so sure if we should keep all of its revisions forever. Instead "git-blob" allows one to manage these large artifacts out-of-band, and only keep pointer files versioned in Git. We use rsync to push and pull these artifacts automatically, relying on the existing SSH infrastructure in place. Examples: Init "git-blob": $ git blob init Make "resources/*" be handled by "git-blob": $ echo 'resources/* filter=blob diff=blob -text' >> .gitattributes EOF } for flag in "$@"; do case "$flag" in --) break ;; --help) usage help exit ;; *) ;; esac done while getopts 'h' flag; do case "$flag" in h) usage help exit ;; *) usage >&2 exit 2 ;; esac done shift $((OPTIND - 1)) ACTION="${1:-}" eval "$(assert-arg "$ACTION" 'ACTION')" shift DIR="$(git rev-parse --git-dir)" case "$ACTION" in init) git config filter.blob.clean 'git blob filter-clean' git config filter.blob.smudge 'git blob filter-smudge' git config diff.blob.binary true git config diff.blob.textconv 'git blob diff-textconv' mkdir -p -- \ "$DIR"/hooks \ "$DIR"/blob/tmp \ "$DIR"/blob/objects cat <<-'EOF' > "$DIR"/hooks/pre-push #!/bin/sh set -eu exec git blob pre-push "$@" EOF chmod +x "$DIR"/hooks/pre-push ;; diff-textconv) cat -- "$@" | git blob filter-clean ;; filter-clean) F="$DIR"/blob/tmp/"$(uuid)" trap 'rm -f -- "$F"' EXIT CHECKSUM="$(tee -- "$F" | sha512sum | cut -d' ' -f1)" mv -- "$F" "$DIR"/blob/objects/"$CHECKSUM" cat <<-EOF git-blob v0 sha512sum $CHECKSUM EOF ;; filter-smudge) CHECKSUM="$(awk '/^sha512sum / { print $2 }')" F="$DIR"/blob/objects/"$CHECKSUM" if [ ! -e "$F" ]; then rsync -va -- "$(git remote get-url origin)"/blob/"$CHECKSUM" "$F" >&2 fi cat -- "$F" ;; pre-push) rsync -va -- "$DIR"/blob/objects/ "$(git remote get-url origin)"/blob/ ;; *) printf 'Unsupported ACTION: "%s".\n\n' "$ACTION" >&2 usage >&2 exit 2 ;; esac