1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "delta.h"
9 
10 /* maximum hash entry list for the same hash bucket */
11 #define HASH_LIMIT 64
12 
13 #define RABIN_SHIFT 23
14 #define RABIN_WINDOW 16
15 
16 static const unsigned int T[256] = {
17 	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
18 	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
19 	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
20 	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
21 	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
22 	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
23 	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
24 	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
25 	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
26 	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
27 	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
28 	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
29 	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
30 	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
31 	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
32 	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
33 	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
34 	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
35 	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
36 	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
37 	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
38 	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
39 	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
40 	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
41 	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
42 	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
43 	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
44 	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
45 	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
46 	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
47 	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
48 	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
49 	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
50 	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
51 	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
52 	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
53 	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
54 	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
55 	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
56 	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
57 	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
58 	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
59 	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
60 };
61 
62 static const unsigned int U[256] = {
63 	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
64 	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
65 	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
66 	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
67 	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
68 	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
69 	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
70 	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
71 	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
72 	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
73 	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
74 	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
75 	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
76 	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
77 	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
78 	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
79 	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
80 	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
81 	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
82 	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
83 	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
84 	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
85 	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
86 	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
87 	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
88 	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
89 	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
90 	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
91 	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
92 	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
93 	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
94 	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
95 	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
96 	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
97 	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
98 	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
99 	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
100 	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
101 	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
102 	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
103 	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
104 	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
105 	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
106 };
107 
108 struct index_entry {
109 	const unsigned char *ptr;
110 	unsigned int val;
111 	struct index_entry *next;
112 };
113 
114 struct git_delta_index {
115 	unsigned long memsize;
116 	const void *src_buf;
117 	size_t src_size;
118 	unsigned int hash_mask;
119 	struct index_entry *hash[GIT_FLEX_ARRAY];
120 };
121 
lookup_index_alloc(void ** out,unsigned long * out_len,size_t entries,size_t hash_count)122 static int lookup_index_alloc(
123 	void **out, unsigned long *out_len, size_t entries, size_t hash_count)
124 {
125 	size_t entries_len, hash_len, index_len;
126 
127 	GITERR_CHECK_ALLOC_MULTIPLY(&entries_len, entries, sizeof(struct index_entry));
128 	GITERR_CHECK_ALLOC_MULTIPLY(&hash_len, hash_count, sizeof(struct index_entry *));
129 
130 	GITERR_CHECK_ALLOC_ADD(&index_len, sizeof(struct git_delta_index), entries_len);
131 	GITERR_CHECK_ALLOC_ADD(&index_len, index_len, hash_len);
132 
133 	if (!git__is_ulong(index_len)) {
134 		giterr_set(GITERR_NOMEMORY, "overly large delta");
135 		return -1;
136 	}
137 
138 	*out = git__malloc(index_len);
139 	GITERR_CHECK_ALLOC(*out);
140 
141 	*out_len = index_len;
142 	return 0;
143 }
144 
git_delta_index_init(git_delta_index ** out,const void * buf,size_t bufsize)145 int git_delta_index_init(
146 	git_delta_index **out, const void *buf, size_t bufsize)
147 {
148 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
149 	const unsigned char *data, *buffer = buf;
150 	struct git_delta_index *index;
151 	struct index_entry *entry, **hash;
152 	void *mem;
153 	unsigned long memsize;
154 
155 	*out = NULL;
156 
157 	if (!buf || !bufsize)
158 		return 0;
159 
160 	/* Determine index hash size.  Note that indexing skips the
161 	   first byte to allow for optimizing the rabin polynomial
162 	   initialization in create_delta(). */
163 	entries = (unsigned int)(bufsize - 1) / RABIN_WINDOW;
164 	if (bufsize >= 0xffffffffUL) {
165 		/*
166 		 * Current delta format can't encode offsets into
167 		 * reference buffer with more than 32 bits.
168 		 */
169 		entries = 0xfffffffeU / RABIN_WINDOW;
170 	}
171 	hsize = entries / 4;
172 	for (i = 4; i < 31 && (1u << i) < hsize; i++);
173 	hsize = 1 << i;
174 	hmask = hsize - 1;
175 
176 	if (lookup_index_alloc(&mem, &memsize, entries, hsize) < 0)
177 		return -1;
178 
179 	index = mem;
180 	mem = index->hash;
181 	hash = mem;
182 	mem = hash + hsize;
183 	entry = mem;
184 
185 	index->memsize = memsize;
186 	index->src_buf = buf;
187 	index->src_size = bufsize;
188 	index->hash_mask = hmask;
189 	memset(hash, 0, hsize * sizeof(*hash));
190 
191 	/* allocate an array to count hash entries */
192 	hash_count = git__calloc(hsize, sizeof(*hash_count));
193 	if (!hash_count) {
194 		git__free(index);
195 		return -1;
196 	}
197 
198 	/* then populate the index */
199 	prev_val = ~0;
200 	for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
201 	     data >= buffer;
202 	     data -= RABIN_WINDOW) {
203 		unsigned int val = 0;
204 		for (i = 1; i <= RABIN_WINDOW; i++)
205 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
206 		if (val == prev_val) {
207 			/* keep the lowest of consecutive identical blocks */
208 			entry[-1].ptr = data + RABIN_WINDOW;
209 		} else {
210 			prev_val = val;
211 			i = val & hmask;
212 			entry->ptr = data + RABIN_WINDOW;
213 			entry->val = val;
214 			entry->next = hash[i];
215 			hash[i] = entry++;
216 			hash_count[i]++;
217 		}
218 	}
219 
220 	/*
221 	 * Determine a limit on the number of entries in the same hash
222 	 * bucket.  This guard us against patological data sets causing
223 	 * really bad hash distribution with most entries in the same hash
224 	 * bucket that would bring us to O(m*n) computing costs (m and n
225 	 * corresponding to reference and target buffer sizes).
226 	 *
227 	 * Make sure none of the hash buckets has more entries than
228 	 * we're willing to test.  Otherwise we cull the entry list
229 	 * uniformly to still preserve a good repartition across
230 	 * the reference buffer.
231 	 */
232 	for (i = 0; i < hsize; i++) {
233 		if (hash_count[i] < HASH_LIMIT)
234 			continue;
235 
236 		entry = hash[i];
237 		do {
238 			struct index_entry *keep = entry;
239 			int skip = hash_count[i] / HASH_LIMIT / 2;
240 			do {
241 				entry = entry->next;
242 			} while(--skip && entry);
243 			keep->next = entry;
244 		} while (entry);
245 	}
246 	git__free(hash_count);
247 
248 	*out = index;
249 	return 0;
250 }
251 
git_delta_index_free(git_delta_index * index)252 void git_delta_index_free(git_delta_index *index)
253 {
254 	git__free(index);
255 }
256 
git_delta_index_size(git_delta_index * index)257 size_t git_delta_index_size(git_delta_index *index)
258 {
259 	assert(index);
260 
261 	return index->memsize;
262 }
263 
264 /*
265  * The maximum size for any opcode sequence, including the initial header
266  * plus rabin window plus biggest copy.
267  */
268 #define MAX_OP_SIZE	(5 + 5 + 1 + RABIN_WINDOW + 7)
269 
git_delta_create_from_index(void ** out,size_t * out_len,const struct git_delta_index * index,const void * trg_buf,size_t trg_size,size_t max_size)270 int git_delta_create_from_index(
271 	void **out,
272 	size_t *out_len,
273 	const struct git_delta_index *index,
274 	const void *trg_buf,
275 	size_t trg_size,
276 	size_t max_size)
277 {
278 	unsigned int i, bufpos, bufsize, moff, msize, val;
279 	int inscnt;
280 	const unsigned char *ref_data, *ref_top, *data, *top;
281 	unsigned char *buf;
282 
283 	*out = NULL;
284 	*out_len = 0;
285 
286 	if (!trg_buf || !trg_size)
287 		return 0;
288 
289 	bufpos = 0;
290 	bufsize = 8192;
291 	if (max_size && bufsize >= max_size)
292 		bufsize = (unsigned int)(max_size + MAX_OP_SIZE + 1);
293 	buf = git__malloc(bufsize);
294 	GITERR_CHECK_ALLOC(buf);
295 
296 	/* store reference buffer size */
297 	i = index->src_size;
298 	while (i >= 0x80) {
299 		buf[bufpos++] = i | 0x80;
300 		i >>= 7;
301 	}
302 	buf[bufpos++] = i;
303 
304 	/* store target buffer size */
305 	i = trg_size;
306 	while (i >= 0x80) {
307 		buf[bufpos++] = i | 0x80;
308 		i >>= 7;
309 	}
310 	buf[bufpos++] = i;
311 
312 	ref_data = index->src_buf;
313 	ref_top = ref_data + index->src_size;
314 	data = trg_buf;
315 	top = (const unsigned char *) trg_buf + trg_size;
316 
317 	bufpos++;
318 	val = 0;
319 	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
320 		buf[bufpos++] = *data;
321 		val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
322 	}
323 	inscnt = i;
324 
325 	moff = 0;
326 	msize = 0;
327 	while (data < top) {
328 		if (msize < 4096) {
329 			struct index_entry *entry;
330 			val ^= U[data[-RABIN_WINDOW]];
331 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
332 			i = val & index->hash_mask;
333 			for (entry = index->hash[i]; entry; entry = entry->next) {
334 				const unsigned char *ref = entry->ptr;
335 				const unsigned char *src = data;
336 				unsigned int ref_size = (unsigned int)(ref_top - ref);
337 				if (entry->val != val)
338 					continue;
339 				if (ref_size > (unsigned int)(top - src))
340 					ref_size = (unsigned int)(top - src);
341 				if (ref_size <= msize)
342 					break;
343 				while (ref_size-- && *src++ == *ref)
344 					ref++;
345 				if (msize < (unsigned int)(ref - entry->ptr)) {
346 					/* this is our best match so far */
347 					msize = (unsigned int)(ref - entry->ptr);
348 					moff = (unsigned int)(entry->ptr - ref_data);
349 					if (msize >= 4096) /* good enough */
350 						break;
351 				}
352 			}
353 		}
354 
355 		if (msize < 4) {
356 			if (!inscnt)
357 				bufpos++;
358 			buf[bufpos++] = *data++;
359 			inscnt++;
360 			if (inscnt == 0x7f) {
361 				buf[bufpos - inscnt - 1] = inscnt;
362 				inscnt = 0;
363 			}
364 			msize = 0;
365 		} else {
366 			unsigned int left;
367 			unsigned char *op;
368 
369 			if (inscnt) {
370 				while (moff && ref_data[moff-1] == data[-1]) {
371 					/* we can match one byte back */
372 					msize++;
373 					moff--;
374 					data--;
375 					bufpos--;
376 					if (--inscnt)
377 						continue;
378 					bufpos--;  /* remove count slot */
379 					inscnt--;  /* make it -1 */
380 					break;
381 				}
382 				buf[bufpos - inscnt - 1] = inscnt;
383 				inscnt = 0;
384 			}
385 
386 			/* A copy op is currently limited to 64KB (pack v2) */
387 			left = (msize < 0x10000) ? 0 : (msize - 0x10000);
388 			msize -= left;
389 
390 			op = buf + bufpos++;
391 			i = 0x80;
392 
393 			if (moff & 0x000000ff)
394 				buf[bufpos++] = moff >> 0,  i |= 0x01;
395 			if (moff & 0x0000ff00)
396 				buf[bufpos++] = moff >> 8,  i |= 0x02;
397 			if (moff & 0x00ff0000)
398 				buf[bufpos++] = moff >> 16, i |= 0x04;
399 			if (moff & 0xff000000)
400 				buf[bufpos++] = moff >> 24, i |= 0x08;
401 
402 			if (msize & 0x00ff)
403 				buf[bufpos++] = msize >> 0, i |= 0x10;
404 			if (msize & 0xff00)
405 				buf[bufpos++] = msize >> 8, i |= 0x20;
406 
407 			*op = i;
408 
409 			data += msize;
410 			moff += msize;
411 			msize = left;
412 
413 			if (msize < 4096) {
414 				int j;
415 				val = 0;
416 				for (j = -RABIN_WINDOW; j < 0; j++)
417 					val = ((val << 8) | data[j])
418 					      ^ T[val >> RABIN_SHIFT];
419 			}
420 		}
421 
422 		if (bufpos >= bufsize - MAX_OP_SIZE) {
423 			void *tmp = buf;
424 			bufsize = bufsize * 3 / 2;
425 			if (max_size && bufsize >= max_size)
426 				bufsize = max_size + MAX_OP_SIZE + 1;
427 			if (max_size && bufpos > max_size)
428 				break;
429 			buf = git__realloc(buf, bufsize);
430 			if (!buf) {
431 				git__free(tmp);
432 				return -1;
433 			}
434 		}
435 	}
436 
437 	if (inscnt)
438 		buf[bufpos - inscnt - 1] = inscnt;
439 
440 	if (max_size && bufpos > max_size) {
441 		giterr_set(GITERR_NOMEMORY, "delta would be larger than maximum size");
442 		git__free(buf);
443 		return GIT_EBUFS;
444 	}
445 
446 	*out_len = bufpos;
447 	*out = buf;
448 	return 0;
449 }
450 
451 /*
452 * Delta application was heavily cribbed from BinaryDelta.java in JGit, which
453 * itself was heavily cribbed from <code>patch-delta.c</code> in the
454 * GIT project.	The original delta patching code was written by
455 * Nicolas Pitre <nico@cam.org>.
456 */
457 
hdr_sz(size_t * size,const unsigned char ** delta,const unsigned char * end)458 static int hdr_sz(
459 	size_t *size,
460 	const unsigned char **delta,
461 	const unsigned char *end)
462 {
463 	const unsigned char *d = *delta;
464 	size_t r = 0;
465 	unsigned int c, shift = 0;
466 
467 	do {
468 		if (d == end) {
469 			giterr_set(GITERR_INVALID, "truncated delta");
470 			return -1;
471 		}
472 
473 		c = *d++;
474 		r |= (c & 0x7f) << shift;
475 		shift += 7;
476 	} while (c & 0x80);
477 	*delta = d;
478 	*size = r;
479 	return 0;
480 }
481 
git_delta_read_header(size_t * base_out,size_t * result_out,const unsigned char * delta,size_t delta_len)482 int git_delta_read_header(
483 	size_t *base_out,
484 	size_t *result_out,
485 	const unsigned char *delta,
486 	size_t delta_len)
487 {
488 	const unsigned char *delta_end = delta + delta_len;
489 	if ((hdr_sz(base_out, &delta, delta_end) < 0) ||
490 		(hdr_sz(result_out, &delta, delta_end) < 0))
491 		return -1;
492 	return 0;
493 }
494 
495 #define DELTA_HEADER_BUFFER_LEN 16
git_delta_read_header_fromstream(size_t * base_sz,size_t * res_sz,git_packfile_stream * stream)496 int git_delta_read_header_fromstream(
497 	size_t *base_sz, size_t *res_sz, git_packfile_stream *stream)
498 {
499 	static const size_t buffer_len = DELTA_HEADER_BUFFER_LEN;
500 	unsigned char buffer[DELTA_HEADER_BUFFER_LEN];
501 	const unsigned char *delta, *delta_end;
502 	size_t len;
503 	ssize_t read;
504 
505 	len = read = 0;
506 	while (len < buffer_len) {
507 		read = git_packfile_stream_read(stream, &buffer[len], buffer_len - len);
508 
509 		if (read == 0)
510 			break;
511 
512 		if (read == GIT_EBUFS)
513 			continue;
514 
515 		len += read;
516 	}
517 
518 	delta = buffer;
519 	delta_end = delta + len;
520 	if ((hdr_sz(base_sz, &delta, delta_end) < 0) ||
521 		(hdr_sz(res_sz, &delta, delta_end) < 0))
522 		return -1;
523 
524 	return 0;
525 }
526 
git_delta_apply(void ** out,size_t * out_len,const unsigned char * base,size_t base_len,const unsigned char * delta,size_t delta_len)527 int git_delta_apply(
528 	void **out,
529 	size_t *out_len,
530 	const unsigned char *base,
531 	size_t base_len,
532 	const unsigned char *delta,
533 	size_t delta_len)
534 {
535 	const unsigned char *delta_end = delta + delta_len;
536 	size_t base_sz, res_sz, alloc_sz;
537 	unsigned char *res_dp;
538 
539 	*out = NULL;
540 	*out_len = 0;
541 
542 	/* Check that the base size matches the data we were given;
543 	* if not we would underflow while accessing data from the
544 	* base object, resulting in data corruption or segfault.
545 	*/
546 	if ((hdr_sz(&base_sz, &delta, delta_end) < 0) || (base_sz != base_len)) {
547 		giterr_set(GITERR_INVALID, "failed to apply delta: base size does not match given data");
548 		return -1;
549 	}
550 
551 	if (hdr_sz(&res_sz, &delta, delta_end) < 0) {
552 		giterr_set(GITERR_INVALID, "failed to apply delta: base size does not match given data");
553 		return -1;
554 	}
555 
556 	GITERR_CHECK_ALLOC_ADD(&alloc_sz, res_sz, 1);
557 	res_dp = git__malloc(alloc_sz);
558 	GITERR_CHECK_ALLOC(res_dp);
559 
560 	res_dp[res_sz] = '\0';
561 	*out = res_dp;
562 	*out_len = res_sz;
563 
564 	while (delta < delta_end) {
565 		unsigned char cmd = *delta++;
566 		if (cmd & 0x80) {
567 			/* cmd is a copy instruction; copy from the base.
568 			*/
569 			size_t off = 0, len = 0;
570 
571 			if (cmd & 0x01) off = *delta++;
572 			if (cmd & 0x02) off |= *delta++ << 8UL;
573 			if (cmd & 0x04) off |= *delta++ << 16UL;
574 			if (cmd & 0x08) off |= *delta++ << 24UL;
575 
576 			if (cmd & 0x10) len = *delta++;
577 			if (cmd & 0x20) len |= *delta++ << 8UL;
578 			if (cmd & 0x40) len |= *delta++ << 16UL;
579 			if (!len)		len = 0x10000;
580 
581 			if (base_len < off + len || res_sz < len)
582 				goto fail;
583 			memcpy(res_dp, base + off, len);
584 			res_dp += len;
585 			res_sz -= len;
586 
587 		}
588 		else if (cmd) {
589 			/* cmd is a literal insert instruction; copy from
590 			* the delta stream itself.
591 			*/
592 			if (delta_end - delta < cmd || res_sz < cmd)
593 				goto fail;
594 			memcpy(res_dp, delta, cmd);
595 			delta += cmd;
596 			res_dp += cmd;
597 			res_sz -= cmd;
598 
599 		}
600 		else {
601 			/* cmd == 0 is reserved for future encodings.
602 			*/
603 			goto fail;
604 		}
605 	}
606 
607 	if (delta != delta_end || res_sz)
608 		goto fail;
609 	return 0;
610 
611 fail:
612 	git__free(*out);
613 
614 	*out = NULL;
615 	*out_len = 0;
616 
617 	giterr_set(GITERR_INVALID, "failed to apply delta");
618 	return -1;
619 }
620