aboutsummaryrefslogtreecommitdiff
path: root/bin/git-blob
blob: f7e3ef9cffc271158515dc76a52627ebc4e570e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/sh
set -eu

usage() {
	cat <<-'EOF'
		Usage:
		  git blob init
		  git blob -h
	EOF
}

help() {
	cat <<-'EOF'


		Options:
		  -h, --help    show this message


		Helper to store binary data on Git.

		The trouble with adding large binary data to Git repositories is that
		as change of such data accumulates over time, all of its revisions are
		managed by Git, and as the repository can only grow it can take a big
		amount of space.  However that data is usually less valuable, as we
		often don't fiddle with it directly, and treat it as big opaque files,
		which we aren't so sure if we should keep all of its revisions forever.

		Instead "git-blob" allows one to manage these large artifacts
		out-of-band, and only keep pointer files versioned in Git.

		We use rsync to push and pull these artifacts automatically, relying
		on the existing SSH infrastructure in place.


		Examples:

		  Init "git-blob":

		    $ git blob init


		  Make "resources/*" be handled by "git-blob":

		    $ echo 'resources/* filter=blob diff=blob -text' >> .gitattributes
	EOF
}


for flag in "$@"; do
	case "$flag" in
		--)
			break
			;;
		--help)
			usage
			help
			exit
			;;
		*)
			;;
	esac
done

while getopts 'h' flag; do
	case "$flag" in
		h)
			usage
			help
			exit
			;;
		*)
			usage >&2
			exit 2
			;;
	esac
done
shift $((OPTIND - 1))


ACTION="${1:-}"
eval "$(assert-arg "$ACTION" 'ACTION')"
shift


DIR="$(git rev-parse --git-dir)"
case "$ACTION" in
	init)
		git config filter.blob.clean  'git blob filter-clean'
		git config filter.blob.smudge 'git blob filter-smudge'
		git config diff.blob.binary true
		git config diff.blob.textconv 'git blob diff-textconv'
		mkdir -p -- \
			"$DIR"/hooks    \
			"$DIR"/blob/tmp \
			"$DIR"/blob/objects
		cat <<-'EOF' > "$DIR"/hooks/pre-push
			#!/bin/sh
			set -eu

			exec git blob pre-push "$@"
		EOF
		chmod +x "$DIR"/hooks/pre-push
		;;
	diff-textconv)
		cat -- "$@" | git blob filter-clean
		;;
	filter-clean)
		F="$DIR"/blob/tmp/"$(uuid)"
		trap 'rm -f -- "$F"' EXIT
		CHECKSUM="$(tee -- "$F" | sha512sum | cut -d' ' -f1)"
		mv -- "$F" "$DIR"/blob/objects/"$CHECKSUM"
		cat <<-EOF
			git-blob v0
			sha512sum $CHECKSUM
		EOF
		;;
	filter-smudge)
		CHECKSUM="$(awk '/^sha512sum / { print $2 }')"
		F="$DIR"/blob/objects/"$CHECKSUM"
		if [ ! -e "$F" ]; then
			rsync -va -- "$(git remote get-url origin)"/blob/"$CHECKSUM" "$F" >&2
		fi
		cat -- "$F"
		;;
	pre-push)
		rsync -va -- "$DIR"/blob/objects/ "$(git remote get-url origin)"/blob/
		;;
	*)
		printf 'Unsupported ACTION: "%s".\n\n' "$ACTION" >&2
		usage >&2
		exit 2
		;;
esac