1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "delta.h"
9 
10 /* maximum hash entry list for the same hash bucket */
11 #define HASH_LIMIT 64
12 
13 #define RABIN_SHIFT 23
14 #define RABIN_WINDOW 16
15 
16 static const unsigned int T[256] = {
17 	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
18 	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
19 	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
20 	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
21 	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
22 	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
23 	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
24 	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
25 	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
26 	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
27 	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
28 	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
29 	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
30 	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
31 	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
32 	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
33 	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
34 	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
35 	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
36 	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
37 	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
38 	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
39 	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
40 	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
41 	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
42 	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
43 	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
44 	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
45 	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
46 	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
47 	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
48 	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
49 	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
50 	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
51 	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
52 	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
53 	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
54 	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
55 	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
56 	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
57 	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
58 	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
59 	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
60 };
61 
62 static const unsigned int U[256] = {
63 	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
64 	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
65 	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
66 	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
67 	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
68 	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
69 	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
70 	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
71 	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
72 	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
73 	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
74 	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
75 	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
76 	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
77 	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
78 	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
79 	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
80 	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
81 	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
82 	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
83 	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
84 	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
85 	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
86 	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
87 	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
88 	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
89 	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
90 	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
91 	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
92 	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
93 	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
94 	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
95 	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
96 	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
97 	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
98 	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
99 	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
100 	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
101 	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
102 	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
103 	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
104 	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
105 	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
106 };
107 
108 struct index_entry {
109 	const unsigned char *ptr;
110 	unsigned int val;
111 	struct index_entry *next;
112 };
113 
114 struct git_delta_index {
115 	unsigned long memsize;
116 	const void *src_buf;
117 	size_t src_size;
118 	unsigned int hash_mask;
119 	struct index_entry *hash[GIT_FLEX_ARRAY];
120 };
121 
lookup_index_alloc(void ** out,unsigned long * out_len,size_t entries,size_t hash_count)122 static int lookup_index_alloc(
123 	void **out, unsigned long *out_len, size_t entries, size_t hash_count)
124 {
125 	size_t entries_len, hash_len, index_len;
126 
127 	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&entries_len, entries, sizeof(struct index_entry));
128 	GIT_ERROR_CHECK_ALLOC_MULTIPLY(&hash_len, hash_count, sizeof(struct index_entry *));
129 
130 	GIT_ERROR_CHECK_ALLOC_ADD(&index_len, sizeof(struct git_delta_index), entries_len);
131 	GIT_ERROR_CHECK_ALLOC_ADD(&index_len, index_len, hash_len);
132 
133 	if (!git__is_ulong(index_len)) {
134 		git_error_set(GIT_ERROR_NOMEMORY, "overly large delta");
135 		return -1;
136 	}
137 
138 	*out = git__malloc(index_len);
139 	GIT_ERROR_CHECK_ALLOC(*out);
140 
141 	*out_len = (unsigned long)index_len;
142 	return 0;
143 }
144 
git_delta_index_init(git_delta_index ** out,const void * buf,size_t bufsize)145 int git_delta_index_init(
146 	git_delta_index **out, const void *buf, size_t bufsize)
147 {
148 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
149 	const unsigned char *data, *buffer = buf;
150 	struct git_delta_index *index;
151 	struct index_entry *entry, **hash;
152 	void *mem;
153 	unsigned long memsize;
154 
155 	*out = NULL;
156 
157 	if (!buf || !bufsize)
158 		return 0;
159 
160 	/* Determine index hash size.  Note that indexing skips the
161 	   first byte to allow for optimizing the rabin polynomial
162 	   initialization in create_delta(). */
163 	entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
164 	if (bufsize >= 0xffffffffUL) {
165 		/*
166 		 * Current delta format can't encode offsets into
167 		 * reference buffer with more than 32 bits.
168 		 */
169 		entries = 0xfffffffeU / RABIN_WINDOW;
170 	}
171 	hsize = entries / 4;
172 	for (i = 4; i < 31 && (1u << i) < hsize; i++);
173 	hsize = 1 << i;
174 	hmask = hsize - 1;
175 
176 	if (lookup_index_alloc(&mem, &memsize, entries, hsize) < 0)
177 		return -1;
178 
179 	index = mem;
180 	mem = index->hash;
181 	hash = mem;
182 	mem = hash + hsize;
183 	entry = mem;
184 
185 	index->memsize = memsize;
186 	index->src_buf = buf;
187 	index->src_size = bufsize;
188 	index->hash_mask = hmask;
189 	memset(hash, 0, hsize * sizeof(*hash));
190 
191 	/* allocate an array to count hash entries */
192 	hash_count = git__calloc(hsize, sizeof(*hash_count));
193 	if (!hash_count) {
194 		git__free(index);
195 		return -1;
196 	}
197 
198 	/* then populate the index */
199 	prev_val = ~0;
200 	for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
201 	     data >= buffer;
202 	     data -= RABIN_WINDOW) {
203 		unsigned int val = 0;
204 		for (i = 1; i <= RABIN_WINDOW; i++)
205 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
206 		if (val == prev_val) {
207 			/* keep the lowest of consecutive identical blocks */
208 			entry[-1].ptr = data + RABIN_WINDOW;
209 		} else {
210 			prev_val = val;
211 			i = val & hmask;
212 			entry->ptr = data + RABIN_WINDOW;
213 			entry->val = val;
214 			entry->next = hash[i];
215 			hash[i] = entry++;
216 			hash_count[i]++;
217 		}
218 	}
219 
220 	/*
221 	 * Determine a limit on the number of entries in the same hash
222 	 * bucket.  This guard us against patological data sets causing
223 	 * really bad hash distribution with most entries in the same hash
224 	 * bucket that would bring us to O(m*n) computing costs (m and n
225 	 * corresponding to reference and target buffer sizes).
226 	 *
227 	 * Make sure none of the hash buckets has more entries than
228 	 * we're willing to test.  Otherwise we cull the entry list
229 	 * uniformly to still preserve a good repartition across
230 	 * the reference buffer.
231 	 */
232 	for (i = 0; i < hsize; i++) {
233 		if (hash_count[i] < HASH_LIMIT)
234 			continue;
235 
236 		entry = hash[i];
237 		do {
238 			struct index_entry *keep = entry;
239 			int skip = hash_count[i] / HASH_LIMIT / 2;
240 			do {
241 				entry = entry->next;
242 			} while(--skip && entry);
243 			keep->next = entry;
244 		} while (entry);
245 	}
246 	git__free(hash_count);
247 
248 	*out = index;
249 	return 0;
250 }
251 
git_delta_index_free(git_delta_index * index)252 void git_delta_index_free(git_delta_index *index)
253 {
254 	git__free(index);
255 }
256 
git_delta_index_size(git_delta_index * index)257 size_t git_delta_index_size(git_delta_index *index)
258 {
259 	assert(index);
260 
261 	return index->memsize;
262 }
263 
264 /*
265  * The maximum size for any opcode sequence, including the initial header
266  * plus rabin window plus biggest copy.
267  */
268 #define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
269 
git_delta_create_from_index(void ** out,size_t * out_len,const struct git_delta_index * index,const void * trg_buf,size_t trg_size,size_t max_size)270 int git_delta_create_from_index(
271 	void **out,
272 	size_t *out_len,
273 	const struct git_delta_index *index,
274 	const void *trg_buf,
275 	size_t trg_size,
276 	size_t max_size)
277 {
278 	unsigned int i, bufpos, bufsize, moff, msize, val;
279 	int inscnt;
280 	const unsigned char *ref_data, *ref_top, *data, *top;
281 	unsigned char *buf;
282 
283 	*out = NULL;
284 	*out_len = 0;
285 
286 	if (!trg_buf || !trg_size)
287 		return 0;
288 
289 	if (index->src_size > UINT_MAX ||
290 	    trg_size > UINT_MAX ||
291 	    max_size > (UINT_MAX - MAX_OP_SIZE - 1)) {
292 		git_error_set(GIT_ERROR_INVALID, "buffer sizes too large for delta processing");
293 		return -1;
294 	}
295 
296 	bufpos = 0;
297 	bufsize = 8192;
298 	if (max_size && bufsize >= max_size)
299 		bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
300 	buf = git__malloc(bufsize);
301 	GIT_ERROR_CHECK_ALLOC(buf);
302 
303 	/* store reference buffer size */
304 	i = (unsigned int)index->src_size;
305 	while (i >= 0x80) {
306 		buf[bufpos++] = i | 0x80;
307 		i >>= 7;
308 	}
309 	buf[bufpos++] = i;
310 
311 	/* store target buffer size */
312 	i = (unsigned int)trg_size;
313 	while (i >= 0x80) {
314 		buf[bufpos++] = i | 0x80;
315 		i >>= 7;
316 	}
317 	buf[bufpos++] = i;
318 
319 	ref_data = index->src_buf;
320 	ref_top = ref_data + index->src_size;
321 	data = trg_buf;
322 	top = (const unsigned char *) trg_buf + trg_size;
323 
324 	bufpos++;
325 	val = 0;
326 	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
327 		buf[bufpos++] = *data;
328 		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
329 	}
330 	inscnt = i;
331 
332 	moff = 0;
333 	msize = 0;
334 	while (data < top) {
335 		if (msize < 4096) {
336 			struct index_entry *entry;
337 			val ^= U[data[-RABIN_WINDOW]];
338 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
339 			i = val & index->hash_mask;
340 			for (entry = index->hash[i]; entry; entry = entry->next) {
341 				const unsigned char *ref = entry->ptr;
342 				const unsigned char *src = data;
343 				unsigned int ref_size = (unsigned int)(ref_top - ref);
344 				if (entry->val != val)
345 					continue;
346 				if (ref_size > (unsigned int)(top - src))
347 					ref_size = (unsigned int)(top - src);
348 				if (ref_size <= msize)
349 					break;
350 				while (ref_size-- && *src++ == *ref)
351 					ref++;
352 				if (msize < (unsigned int)(ref - entry->ptr)) {
353 					/* this is our best match so far */
354 					msize = (unsigned int)(ref - entry->ptr);
355 					moff = (unsigned int)(entry->ptr - ref_data);
356 					if (msize >= 4096) /* good enough */
357 						break;
358 				}
359 			}
360 		}
361 
362 		if (msize < 4) {
363 			if (!inscnt)
364 				bufpos++;
365 			buf[bufpos++] = *data++;
366 			inscnt++;
367 			if (inscnt == 0x7f) {
368 				buf[bufpos - inscnt - 1] = inscnt;
369 				inscnt = 0;
370 			}
371 			msize = 0;
372 		} else {
373 			unsigned int left;
374 			unsigned char *op;
375 
376 			if (inscnt) {
377 				while (moff && ref_data[moff-1] == data[-1]) {
378 					/* we can match one byte back */
379 					msize++;
380 					moff--;
381 					data--;
382 					bufpos--;
383 					if (--inscnt)
384 						continue;
385 					bufpos--;  /* remove count slot */
386 					inscnt--;  /* make it -1 */
387 					break;
388 				}
389 				buf[bufpos - inscnt - 1] = inscnt;
390 				inscnt = 0;
391 			}
392 
393 			/* A copy op is currently limited to 64KB (pack v2) */
394 			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
395 			msize -= left;
396 
397 			op = buf + bufpos++;
398 			i = 0x80;
399 
400 			if (moff & 0x000000ff)
401 				buf[bufpos++] = moff >> 0,  i |= 0x01;
402 			if (moff & 0x0000ff00)
403 				buf[bufpos++] = moff >> 8,  i |= 0x02;
404 			if (moff & 0x00ff0000)
405 				buf[bufpos++] = moff >> 16, i |= 0x04;
406 			if (moff & 0xff000000)
407 				buf[bufpos++] = moff >> 24, i |= 0x08;
408 
409 			if (msize & 0x00ff)
410 				buf[bufpos++] = msize >> 0, i |= 0x10;
411 			if (msize & 0xff00)
412 				buf[bufpos++] = msize >> 8, i |= 0x20;
413 
414 			*op = i;
415 
416 			data += msize;
417 			moff += msize;
418 			msize = left;
419 
420 			if (msize < 4096) {
421 				int j;
422 				val = 0;
423 				for (j = -RABIN_WINDOW; j < 0; j++)
424 					val = ((val << 8) | data[j])
425 					      ^ T[val >> RABIN_SHIFT];
426 			}
427 		}
428 
429 		if (bufpos >= bufsize - MAX_OP_SIZE) {
430 			void *tmp = buf;
431 			bufsize = bufsize * 3 / 2;
432 			if (max_size && bufsize >= max_size)
433 				bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
434 			if (max_size && bufpos > max_size)
435 				break;
436 			buf = git__realloc(buf, bufsize);
437 			if (!buf) {
438 				git__free(tmp);
439 				return -1;
440 			}
441 		}
442 	}
443 
444 	if (inscnt)
445 		buf[bufpos - inscnt - 1] = inscnt;
446 
447 	if (max_size && bufpos > max_size) {
448 		git_error_set(GIT_ERROR_NOMEMORY, "delta would be larger than maximum size");
449 		git__free(buf);
450 		return GIT_EBUFS;
451 	}
452 
453 	*out_len = bufpos;
454 	*out = buf;
455 	return 0;
456 }
457 
458 /*
459 * Delta application was heavily cribbed from BinaryDelta.java in JGit, which
460 * itself was heavily cribbed from <code>patch-delta.c</code> in the
461 * GIT project.	The original delta patching code was written by
462 * Nicolas Pitre <nico@cam.org>.
463 */
464 
hdr_sz(size_t * size,const unsigned char ** delta,const unsigned char * end)465 static int hdr_sz(
466 	size_t *size,
467 	const unsigned char **delta,
468 	const unsigned char *end)
469 {
470 	const unsigned char *d = *delta;
471 	size_t r = 0;
472 	unsigned int c, shift = 0;
473 
474 	do {
475 		if (d == end) {
476 			git_error_set(GIT_ERROR_INVALID, "truncated delta");
477 			return -1;
478 		}
479 
480 		c = *d++;
481 		r |= (c & 0x7f) << shift;
482 		shift += 7;
483 	} while (c & 0x80);
484 	*delta = d;
485 	*size = r;
486 	return 0;
487 }
488 
git_delta_read_header(size_t * base_out,size_t * result_out,const unsigned char * delta,size_t delta_len)489 int git_delta_read_header(
490 	size_t *base_out,
491 	size_t *result_out,
492 	const unsigned char *delta,
493 	size_t delta_len)
494 {
495 	const unsigned char *delta_end = delta + delta_len;
496 	if ((hdr_sz(base_out, &delta, delta_end) < 0) ||
497 		(hdr_sz(result_out, &delta, delta_end) < 0))
498 		return -1;
499 	return 0;
500 }
501 
502 #define DELTA_HEADER_BUFFER_LEN 16
git_delta_read_header_fromstream(size_t * base_sz,size_t * res_sz,git_packfile_stream * stream)503 int git_delta_read_header_fromstream(
504 	size_t *base_sz, size_t *res_sz, git_packfile_stream *stream)
505 {
506 	static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN;
507 	unsigned char buffer[DELTA_HEADER_BUFFER_LEN];
508 	const unsigned char *delta, *delta_end;
509 	size_t len;
510 	ssize_t read;
511 
512 	len = read = 0;
513 	while (len < buffer_len) {
514 		read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len);
515 
516 		if (read == 0)
517 			break;
518 
519 		if (read == GIT_EBUFS)
520 			continue;
521 
522 		len += read;
523 	}
524 
525 	delta = buffer;
526 	delta_end = delta + len;
527 	if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
528 		(hdr_sz(res_sz, &delta, delta_end) < 0))
529 		return -1;
530 
531 	return 0;
532 }
533 
git_delta_apply(void ** out,size_t * out_len,const unsigned char * base,size_t base_len,const unsigned char * delta,size_t delta_len)534 int git_delta_apply(
535 	void **out,
536 	size_t *out_len,
537 	const unsigned char *base,
538 	size_t base_len,
539 	const unsigned char *delta,
540 	size_t delta_len)
541 {
542 	const unsigned char *delta_end = delta + delta_len;
543 	size_t base_sz, res_sz, alloc_sz;
544 	unsigned char *res_dp;
545 
546 	*out = NULL;
547 	*out_len = 0;
548 
549 	/*
550 	 * Check that the base size matches the data we were given;
551 	 * if not we would underflow while accessing data from the
552 	 * base object, resulting in data corruption or segfault.
553 	 */
554 	if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) {
555 		git_error_set(GIT_ERROR_INVALID, "failed to apply delta: base size does not match given data");
556 		return -1;
557 	}
558 
559 	if (hdr_sz(&res_sz, &delta, delta_end) < 0) {
560 		git_error_set(GIT_ERROR_INVALID, "failed to apply delta: base size does not match given data");
561 		return -1;
562 	}
563 
564 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1);
565 	res_dp = git__malloc(alloc_sz);
566 	GIT_ERROR_CHECK_ALLOC(res_dp);
567 
568 	res_dp[res_sz] = '\0';
569 	*out = res_dp;
570 	*out_len = res_sz;
571 
572 	while (delta < delta_end) {
573 		unsigned char cmd = *delta++;
574 		if (cmd & 0x80) {
575 			/* cmd is a copy instruction; copy from the base. */
576 			size_t off = 0, len = 0, end;
577 
578 #define ADD_DELTA(o, shift) { if (delta < delta_end) (o) |= ((unsigned) *delta++ << shift); else goto fail; }
579 			if (cmd & 0x01) ADD_DELTA(off, 0UL);
580 			if (cmd & 0x02) ADD_DELTA(off, 8UL);
581 			if (cmd & 0x04) ADD_DELTA(off, 16UL);
582 			if (cmd & 0x08) ADD_DELTA(off, 24UL);
583 
584 			if (cmd & 0x10) ADD_DELTA(len, 0UL);
585 			if (cmd & 0x20) ADD_DELTA(len, 8UL);
586 			if (cmd & 0x40) ADD_DELTA(len, 16UL);
587 			if (!len)       len = 0x10000;
588 #undef ADD_DELTA
589 
590 			if (GIT_ADD_SIZET_OVERFLOW(&end, off, len) ||
591 			    base_len < end || res_sz < len)
592 				goto fail;
593 
594 			memcpy(res_dp, base + off, len);
595 			res_dp += len;
596 			res_sz -= len;
597 
598 		} else if (cmd) {
599 			/*
600 			 * cmd is a literal insert instruction; copy from
601 			 * the delta stream itself.
602 			 */
603 			if (delta_end - delta < cmd || res_sz < cmd)
604 				goto fail;
605 			memcpy(res_dp, delta, cmd);
606 			delta += cmd;
607 			res_dp += cmd;
608 			res_sz -= cmd;
609 
610 		} else {
611 			/* cmd == 0 is reserved for future encodings. */
612 			goto fail;
613 		}
614 	}
615 
616 	if (delta != delta_end || res_sz)
617 		goto fail;
618 	return 0;
619 
620 fail:
621 	git__free(*out);
622 
623 	*out = NULL;
624 	*out_len = 0;
625 
626 	git_error_set(GIT_ERROR_INVALID, "failed to apply delta");
627 	return -1;
628 }
629