aboutsummaryrefslogtreecommitdiff
#!/bin/sh
set -eu

usage() {
	cat <<-'EOF'
		Usage:
		  git blob init
		  git blob -h
	EOF
}

help() {
	cat <<-'EOF'


		Options:
		  -h, --help    show this message


		Helper to store binary data on Git.

		The trouble with adding large binary data to Git repositories is that
		as change of such data accumulates over time, all of its revisions are
		managed by Git, and as the repository can only grow it can take a big
		amount of space.  However that data is usually less valuable, as we
		often don't fiddle with it directly, and treat it as big opaque files,
		which we aren't so sure if we should keep all of its revisions forever.

		Instead "git-blob" allows one to manage these large artifacts
		out-of-band, and only keep pointer files versioned in Git.

		We use rsync to push and pull these artifacts automatically, relying
		on the existing SSH infrastructure in place.


		Examples:

		  Init "git-blob":

		    $ git blob init


		  Make "resources/*" be handled by "git-blob":

		    $ echo 'resources/* filter=blob diff=blob -text' >> .gitattributes
	EOF
}


for flag in "$@"; do
	case "$flag" in
		--)
			break
			;;
		--help)
			usage
			help
			exit
			;;
		*)
			;;
	esac
done

while getopts 'h' flag; do
	case "$flag" in
		h)
			usage
			help
			exit
			;;
		*)
			usage >&2
			exit 2
			;;
	esac
done
shift $((OPTIND - 1))


ACTION="${1:-}"
eval "$(assert-arg "$ACTION" 'ACTION')"
shift


DIR="$(git rev-parse --git-dir)"
case "$ACTION" in
	init)
		git config filter.blob.clean  'git blob filter-clean'
		git config filter.blob.smudge 'git blob filter-smudge'
		git config diff.blob.binary true
		git config diff.blob.textconv 'git blob diff-textconv'
		mkdir -p -- \
			"$DIR"/hooks    \
			"$DIR"/blob/tmp \
			"$DIR"/blob/objects
		cat <<-'EOF' > "$DIR"/hooks/pre-push
			#!/bin/sh
			set -eu

			exec git blob pre-push "$@"
		EOF
		chmod +x "$DIR"/hooks/pre-push
		;;
	diff-textconv)
		cat -- "$@" | git blob filter-clean
		;;
	filter-clean)
		F="$DIR"/blob/tmp/"$(uuid)"
		trap 'rm -f -- "$F"' EXIT
		CHECKSUM="$(tee -- "$F" | sha512sum | cut -d' ' -f1)"
		mv -- "$F" "$DIR"/blob/objects/"$CHECKSUM"
		cat <<-EOF
			git-blob v0
			sha512sum $CHECKSUM
		EOF
		;;
	filter-smudge)
		CHECKSUM="$(awk '/^sha512sum / { print $2 }')"
		F="$DIR"/blob/objects/"$CHECKSUM"
		if [ ! -e "$F" ]; then
			rsync -va -- "$(git remote get-url origin)"/blob/"$CHECKSUM" "$F" >&2
		fi
		cat -- "$F"
		;;
	pre-push)
		rsync -va -- "$DIR"/blob/objects/ "$(git remote get-url origin)"/blob/
		;;
	*)
		printf 'Unsupported ACTION: "%s".\n\n' "$ACTION" >&2
		usage >&2
		exit 2
		;;
esac