1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "pack.h"
9 
10 #include "delta.h"
11 #include "futils.h"
12 #include "mwindow.h"
13 #include "odb.h"
14 #include "oid.h"
15 #include "oidarray.h"
16 
17 /* Option to bypass checking existence of '.keep' files */
18 bool git_disable_pack_keep_file_checks = false;
19 
20 static int packfile_open_locked(struct git_pack_file *p);
21 static off64_t nth_packed_object_offset_locked(struct git_pack_file *p, uint32_t n);
22 static int packfile_unpack_compressed(
23 		git_rawobj *obj,
24 		struct git_pack_file *p,
25 		git_mwindow **w_curs,
26 		off64_t *curpos,
27 		size_t size,
28 		git_object_t type);
29 
30 /* Can find the offset of an object given
31  * a prefix of an identifier.
32  * Throws GIT_EAMBIGUOUSOIDPREFIX if short oid
33  * is ambiguous within the pack.
34  * This method assumes that len is between
35  * GIT_OID_MINPREFIXLEN and GIT_OID_HEXSZ.
36  */
37 static int pack_entry_find_offset(
38 		off64_t *offset_out,
39 		git_oid *found_oid,
40 		struct git_pack_file *p,
41 		const git_oid *short_oid,
42 		size_t len);
43 
packfile_error(const char * message)44 static int packfile_error(const char *message)
45 {
46 	git_error_set(GIT_ERROR_ODB, "invalid pack file - %s", message);
47 	return -1;
48 }
49 
50 /********************
51  * Delta base cache
52  ********************/
53 
new_cache_object(git_rawobj * source)54 static git_pack_cache_entry *new_cache_object(git_rawobj *source)
55 {
56 	git_pack_cache_entry *e = git__calloc(1, sizeof(git_pack_cache_entry));
57 	if (!e)
58 		return NULL;
59 
60 	git_atomic32_inc(&e->refcount);
61 	memcpy(&e->raw, source, sizeof(git_rawobj));
62 
63 	return e;
64 }
65 
free_cache_object(void * o)66 static void free_cache_object(void *o)
67 {
68 	git_pack_cache_entry *e = (git_pack_cache_entry *)o;
69 
70 	if (e != NULL) {
71 		git__free(e->raw.data);
72 		git__free(e);
73 	}
74 }
75 
cache_free(git_pack_cache * cache)76 static void cache_free(git_pack_cache *cache)
77 {
78 	git_pack_cache_entry *entry;
79 
80 	if (cache->entries) {
81 		git_offmap_foreach_value(cache->entries, entry, {
82 			free_cache_object(entry);
83 		});
84 
85 		git_offmap_free(cache->entries);
86 		cache->entries = NULL;
87 	}
88 }
89 
cache_init(git_pack_cache * cache)90 static int cache_init(git_pack_cache *cache)
91 {
92 	if (git_offmap_new(&cache->entries) < 0)
93 		return -1;
94 
95 	cache->memory_limit = GIT_PACK_CACHE_MEMORY_LIMIT;
96 
97 	if (git_mutex_init(&cache->lock)) {
98 		git_error_set(GIT_ERROR_OS, "failed to initialize pack cache mutex");
99 
100 		git__free(cache->entries);
101 		cache->entries = NULL;
102 
103 		return -1;
104 	}
105 
106 	return 0;
107 }
108 
cache_get(git_pack_cache * cache,off64_t offset)109 static git_pack_cache_entry *cache_get(git_pack_cache *cache, off64_t offset)
110 {
111 	git_pack_cache_entry *entry;
112 
113 	if (git_mutex_lock(&cache->lock) < 0)
114 		return NULL;
115 
116 	if ((entry = git_offmap_get(cache->entries, offset)) != NULL) {
117 		git_atomic32_inc(&entry->refcount);
118 		entry->last_usage = cache->use_ctr++;
119 	}
120 	git_mutex_unlock(&cache->lock);
121 
122 	return entry;
123 }
124 
125 /* Run with the cache lock held */
free_lowest_entry(git_pack_cache * cache)126 static void free_lowest_entry(git_pack_cache *cache)
127 {
128 	off64_t offset;
129 	git_pack_cache_entry *entry;
130 
131 	git_offmap_foreach(cache->entries, offset, entry, {
132 		if (entry && git_atomic32_get(&entry->refcount) == 0) {
133 			cache->memory_used -= entry->raw.len;
134 			git_offmap_delete(cache->entries, offset);
135 			free_cache_object(entry);
136 		}
137 	});
138 }
139 
cache_add(git_pack_cache_entry ** cached_out,git_pack_cache * cache,git_rawobj * base,off64_t offset)140 static int cache_add(
141 		git_pack_cache_entry **cached_out,
142 		git_pack_cache *cache,
143 		git_rawobj *base,
144 		off64_t offset)
145 {
146 	git_pack_cache_entry *entry;
147 	int exists;
148 
149 	if (base->len > GIT_PACK_CACHE_SIZE_LIMIT)
150 		return -1;
151 
152 	entry = new_cache_object(base);
153 	if (entry) {
154 		if (git_mutex_lock(&cache->lock) < 0) {
155 			git_error_set(GIT_ERROR_OS, "failed to lock cache");
156 			git__free(entry);
157 			return -1;
158 		}
159 		/* Add it to the cache if nobody else has */
160 		exists = git_offmap_exists(cache->entries, offset);
161 		if (!exists) {
162 			while (cache->memory_used + base->len > cache->memory_limit)
163 				free_lowest_entry(cache);
164 
165 			git_offmap_set(cache->entries, offset, entry);
166 			cache->memory_used += entry->raw.len;
167 
168 			*cached_out = entry;
169 		}
170 		git_mutex_unlock(&cache->lock);
171 		/* Somebody beat us to adding it into the cache */
172 		if (exists) {
173 			git__free(entry);
174 			return -1;
175 		}
176 	}
177 
178 	return 0;
179 }
180 
181 /***********************************************************
182  *
183  * PACK INDEX METHODS
184  *
185  ***********************************************************/
186 
pack_index_free(struct git_pack_file * p)187 static void pack_index_free(struct git_pack_file *p)
188 {
189 	if (p->oids) {
190 		git__free(p->oids);
191 		p->oids = NULL;
192 	}
193 	if (p->index_map.data) {
194 		git_futils_mmap_free(&p->index_map);
195 		p->index_map.data = NULL;
196 	}
197 }
198 
199 /* Run with the packfile lock held */
pack_index_check_locked(const char * path,struct git_pack_file * p)200 static int pack_index_check_locked(const char *path, struct git_pack_file *p)
201 {
202 	struct git_pack_idx_header *hdr;
203 	uint32_t version, nr, i, *index;
204 	void *idx_map;
205 	size_t idx_size;
206 	struct stat st;
207 	int error;
208 	/* TODO: properly open the file without access time using O_NOATIME */
209 	git_file fd = git_futils_open_ro(path);
210 	if (fd < 0)
211 		return fd;
212 
213 	if (p_fstat(fd, &st) < 0) {
214 		p_close(fd);
215 		git_error_set(GIT_ERROR_OS, "unable to stat pack index '%s'", path);
216 		return -1;
217 	}
218 
219 	if (!S_ISREG(st.st_mode) ||
220 		!git__is_sizet(st.st_size) ||
221 		(idx_size = (size_t)st.st_size) < 4 * 256 + 20 + 20)
222 	{
223 		p_close(fd);
224 		git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
225 		return -1;
226 	}
227 
228 	error = git_futils_mmap_ro(&p->index_map, fd, 0, idx_size);
229 
230 	p_close(fd);
231 
232 	if (error < 0)
233 		return error;
234 
235 	hdr = idx_map = p->index_map.data;
236 
237 	if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) {
238 		version = ntohl(hdr->idx_version);
239 
240 		if (version < 2 || version > 2) {
241 			git_futils_mmap_free(&p->index_map);
242 			return packfile_error("unsupported index version");
243 		}
244 
245 	} else
246 		version = 1;
247 
248 	nr = 0;
249 	index = idx_map;
250 
251 	if (version > 1)
252 		index += 2; /* skip index header */
253 
254 	for (i = 0; i < 256; i++) {
255 		uint32_t n = ntohl(index[i]);
256 		if (n < nr) {
257 			git_futils_mmap_free(&p->index_map);
258 			return packfile_error("index is non-monotonic");
259 		}
260 		nr = n;
261 	}
262 
263 	if (version == 1) {
264 		/*
265 		 * Total size:
266 		 * - 256 index entries 4 bytes each
267 		 * - 24-byte entries * nr (20-byte sha1 + 4-byte offset)
268 		 * - 20-byte SHA1 of the packfile
269 		 * - 20-byte SHA1 file checksum
270 		 */
271 		if (idx_size != 4*256 + nr * 24 + 20 + 20) {
272 			git_futils_mmap_free(&p->index_map);
273 			return packfile_error("index is corrupted");
274 		}
275 	} else if (version == 2) {
276 		/*
277 		 * Minimum size:
278 		 * - 8 bytes of header
279 		 * - 256 index entries 4 bytes each
280 		 * - 20-byte sha1 entry * nr
281 		 * - 4-byte crc entry * nr
282 		 * - 4-byte offset entry * nr
283 		 * - 20-byte SHA1 of the packfile
284 		 * - 20-byte SHA1 file checksum
285 		 * And after the 4-byte offset table might be a
286 		 * variable sized table containing 8-byte entries
287 		 * for offsets larger than 2^31.
288 		 */
289 		unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20;
290 		unsigned long max_size = min_size;
291 
292 		if (nr)
293 			max_size += (nr - 1)*8;
294 
295 		if (idx_size < min_size || idx_size > max_size) {
296 			git_futils_mmap_free(&p->index_map);
297 			return packfile_error("wrong index size");
298 		}
299 	}
300 
301 	p->num_objects = nr;
302 	p->index_version = version;
303 	return 0;
304 }
305 
306 /* Run with the packfile lock held */
pack_index_open_locked(struct git_pack_file * p)307 static int pack_index_open_locked(struct git_pack_file *p)
308 {
309 	int error = 0;
310 	size_t name_len;
311 	git_buf idx_name = GIT_BUF_INIT;
312 
313 	if (p->index_version > -1)
314 		goto cleanup;
315 
316 	/* checked by git_pack_file alloc */
317 	name_len = strlen(p->pack_name);
318 	GIT_ASSERT(name_len > strlen(".pack"));
319 
320 	if ((error = git_buf_init(&idx_name, name_len)) < 0)
321 		goto cleanup;
322 
323 	git_buf_put(&idx_name, p->pack_name, name_len - strlen(".pack"));
324 	git_buf_puts(&idx_name, ".idx");
325 	if (git_buf_oom(&idx_name)) {
326 		error = -1;
327 		goto cleanup;
328 	}
329 
330 	if (p->index_version == -1)
331 		error = pack_index_check_locked(idx_name.ptr, p);
332 
333 cleanup:
334 	git_buf_dispose(&idx_name);
335 
336 	return error;
337 }
338 
pack_window_open(struct git_pack_file * p,git_mwindow ** w_cursor,off64_t offset,unsigned int * left)339 static unsigned char *pack_window_open(
340 		struct git_pack_file *p,
341 		git_mwindow **w_cursor,
342 		off64_t offset,
343 		unsigned int *left)
344 {
345 	unsigned char *pack_data = NULL;
346 
347 	if (git_mutex_lock(&p->lock) < 0) {
348 		git_error_set(GIT_ERROR_THREAD, "unable to lock packfile");
349 		return NULL;
350 	}
351 	if (git_mutex_lock(&p->mwf.lock) < 0) {
352 		git_mutex_unlock(&p->lock);
353 		git_error_set(GIT_ERROR_THREAD, "unable to lock packfile");
354 		return NULL;
355 	}
356 
357 	if (p->mwf.fd == -1 && packfile_open_locked(p) < 0)
358 		goto cleanup;
359 
360 	/* Since packfiles end in a hash of their content and it's
361 	 * pointless to ask for an offset into the middle of that
362 	 * hash, and the pack_window_contains function above wouldn't match
363 	 * don't allow an offset too close to the end of the file.
364 	 *
365 	 * Don't allow a negative offset, as that means we've wrapped
366 	 * around.
367 	 */
368 	if (offset > (p->mwf.size - 20))
369 		goto cleanup;
370 	if (offset < 0)
371 		goto cleanup;
372 
373 	pack_data = git_mwindow_open(&p->mwf, w_cursor, offset, 20, left);
374 
375 cleanup:
376 	git_mutex_unlock(&p->mwf.lock);
377 	git_mutex_unlock(&p->lock);
378 	return pack_data;
379  }
380 
381 /*
382  * The per-object header is a pretty dense thing, which is
383  *  - first byte: low four bits are "size",
384  *    then three bits of "type",
385  *    with the high bit being "size continues".
386  *  - each byte afterwards: low seven bits are size continuation,
387  *    with the high bit being "size continues"
388  */
git_packfile__object_header(size_t * out,unsigned char * hdr,size_t size,git_object_t type)389 int git_packfile__object_header(size_t *out, unsigned char *hdr, size_t size, git_object_t type)
390 {
391 	unsigned char *hdr_base;
392 	unsigned char c;
393 
394 	GIT_ASSERT_ARG(type >= GIT_OBJECT_COMMIT && type <= GIT_OBJECT_REF_DELTA);
395 
396 	/* TODO: add support for chunked objects; see git.git 6c0d19b1 */
397 
398 	c = (unsigned char)((type << 4) | (size & 15));
399 	size >>= 4;
400 	hdr_base = hdr;
401 
402 	while (size) {
403 		*hdr++ = c | 0x80;
404 		c = size & 0x7f;
405 		size >>= 7;
406 	}
407 	*hdr++ = c;
408 
409 	*out = (hdr - hdr_base);
410 	return 0;
411 }
412 
413 
packfile_unpack_header1(unsigned long * usedp,size_t * sizep,git_object_t * type,const unsigned char * buf,unsigned long len)414 static int packfile_unpack_header1(
415 		unsigned long *usedp,
416 		size_t *sizep,
417 		git_object_t *type,
418 		const unsigned char *buf,
419 		unsigned long len)
420 {
421 	unsigned shift;
422 	unsigned long size, c;
423 	unsigned long used = 0;
424 
425 	c = buf[used++];
426 	*type = (c >> 4) & 7;
427 	size = c & 15;
428 	shift = 4;
429 	while (c & 0x80) {
430 		if (len <= used) {
431 			git_error_set(GIT_ERROR_ODB, "buffer too small");
432 			return GIT_EBUFS;
433 		}
434 
435 		if (bitsizeof(long) <= shift) {
436 			*usedp = 0;
437 			git_error_set(GIT_ERROR_ODB, "packfile corrupted");
438 			return -1;
439 		}
440 
441 		c = buf[used++];
442 		size += (c & 0x7f) << shift;
443 		shift += 7;
444 	}
445 
446 	*sizep = (size_t)size;
447 	*usedp = used;
448 	return 0;
449 }
450 
git_packfile_unpack_header(size_t * size_p,git_object_t * type_p,struct git_pack_file * p,git_mwindow ** w_curs,off64_t * curpos)451 int git_packfile_unpack_header(
452 		size_t *size_p,
453 		git_object_t *type_p,
454 		struct git_pack_file *p,
455 		git_mwindow **w_curs,
456 		off64_t *curpos)
457 {
458 	unsigned char *base;
459 	unsigned int left;
460 	unsigned long used;
461 	int error;
462 
463 	if ((error = git_mutex_lock(&p->lock)) < 0)
464 		return error;
465 	if ((error = git_mutex_lock(&p->mwf.lock)) < 0) {
466 		git_mutex_unlock(&p->lock);
467 		return error;
468 	}
469 
470 	if (p->mwf.fd == -1 && (error = packfile_open_locked(p)) < 0) {
471 		git_mutex_unlock(&p->lock);
472 		git_mutex_unlock(&p->mwf.lock);
473 		return error;
474 	}
475 
476 	/* pack_window_open() assures us we have [base, base + 20) available
477 	 * as a range that we can look at at. (Its actually the hash
478 	 * size that is assured.) With our object header encoding
479 	 * the maximum deflated object size is 2^137, which is just
480 	 * insane, so we know won't exceed what we have been given.
481 	 */
482 	base = git_mwindow_open(&p->mwf, w_curs, *curpos, 20, &left);
483 	git_mutex_unlock(&p->lock);
484 	git_mutex_unlock(&p->mwf.lock);
485 	if (base == NULL)
486 		return GIT_EBUFS;
487 
488 	error = packfile_unpack_header1(&used, size_p, type_p, base, left);
489 	git_mwindow_close(w_curs);
490 	if (error == GIT_EBUFS)
491 		return error;
492 	else if (error < 0)
493 		return packfile_error("header length is zero");
494 
495 	*curpos += used;
496 	return 0;
497 }
498 
git_packfile_resolve_header(size_t * size_p,git_object_t * type_p,struct git_pack_file * p,off64_t offset)499 int git_packfile_resolve_header(
500 		size_t *size_p,
501 		git_object_t *type_p,
502 		struct git_pack_file *p,
503 		off64_t offset)
504 {
505 	git_mwindow *w_curs = NULL;
506 	off64_t curpos = offset;
507 	size_t size;
508 	git_object_t type;
509 	off64_t base_offset;
510 	int error;
511 
512 	error = git_mutex_lock(&p->lock);
513 	if (error < 0) {
514 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
515 		return error;
516 	}
517 	error = git_mutex_lock(&p->mwf.lock);
518 	if (error < 0) {
519 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
520 		git_mutex_unlock(&p->lock);
521 		return error;
522 	}
523 
524 	if (p->mwf.fd == -1 && (error = packfile_open_locked(p)) < 0) {
525 		git_mutex_unlock(&p->mwf.lock);
526 		git_mutex_unlock(&p->lock);
527 		return error;
528 	}
529 	git_mutex_unlock(&p->mwf.lock);
530 	git_mutex_unlock(&p->lock);
531 
532 	error = git_packfile_unpack_header(&size, &type, p, &w_curs, &curpos);
533 	if (error < 0)
534 		return error;
535 
536 	if (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
537 		size_t base_size;
538 		git_packfile_stream stream;
539 
540 		error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, offset);
541 		git_mwindow_close(&w_curs);
542 
543 		if (error < 0)
544 			return error;
545 
546 		if ((error = git_packfile_stream_open(&stream, p, curpos)) < 0)
547 			return error;
548 		error = git_delta_read_header_fromstream(&base_size, size_p, &stream);
549 		git_packfile_stream_dispose(&stream);
550 		if (error < 0)
551 			return error;
552 	} else {
553 		*size_p = size;
554 		base_offset = 0;
555 	}
556 
557 	while (type == GIT_OBJECT_OFS_DELTA || type == GIT_OBJECT_REF_DELTA) {
558 		curpos = base_offset;
559 		error = git_packfile_unpack_header(&size, &type, p, &w_curs, &curpos);
560 		if (error < 0)
561 			return error;
562 		if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
563 			break;
564 
565 		error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, base_offset);
566 		git_mwindow_close(&w_curs);
567 
568 		if (error < 0)
569 			return error;
570 	}
571 	*type_p = type;
572 
573 	return error;
574 }
575 
576 #define SMALL_STACK_SIZE 64
577 
578 /**
579  * Generate the chain of dependencies which we need to get to the
580  * object at `off`. `chain` is used a stack, popping gives the right
581  * order to apply deltas on. If an object is found in the pack's base
582  * cache, we stop calculating there.
583  */
pack_dependency_chain(git_dependency_chain * chain_out,git_pack_cache_entry ** cached_out,off64_t * cached_off,struct pack_chain_elem * small_stack,size_t * stack_sz,struct git_pack_file * p,off64_t obj_offset)584 static int pack_dependency_chain(git_dependency_chain *chain_out,
585 				 git_pack_cache_entry **cached_out, off64_t *cached_off,
586 				 struct pack_chain_elem *small_stack, size_t *stack_sz,
587 				 struct git_pack_file *p, off64_t obj_offset)
588 {
589 	git_dependency_chain chain = GIT_ARRAY_INIT;
590 	git_mwindow *w_curs = NULL;
591 	off64_t curpos = obj_offset, base_offset;
592 	int error = 0, use_heap = 0;
593 	size_t size, elem_pos;
594 	git_object_t type;
595 
596 	elem_pos = 0;
597 	while (true) {
598 		struct pack_chain_elem *elem;
599 		git_pack_cache_entry *cached = NULL;
600 
601 		/* if we have a base cached, we can stop here instead */
602 		if ((cached = cache_get(&p->bases, obj_offset)) != NULL) {
603 			*cached_out = cached;
604 			*cached_off = obj_offset;
605 			break;
606 		}
607 
608 		/* if we run out of space on the small stack, use the array */
609 		if (elem_pos == SMALL_STACK_SIZE) {
610 			git_array_init_to_size(chain, elem_pos);
611 			GIT_ERROR_CHECK_ARRAY(chain);
612 			memcpy(chain.ptr, small_stack, elem_pos * sizeof(struct pack_chain_elem));
613 			chain.size = elem_pos;
614 			use_heap = 1;
615 		}
616 
617 		curpos = obj_offset;
618 		if (!use_heap) {
619 			elem = &small_stack[elem_pos];
620 		} else {
621 			elem = git_array_alloc(chain);
622 			if (!elem) {
623 				error = -1;
624 				goto on_error;
625 			}
626 		}
627 
628 		elem->base_key = obj_offset;
629 
630 		error = git_packfile_unpack_header(&size, &type, p, &w_curs, &curpos);
631 		if (error < 0)
632 			goto on_error;
633 
634 		elem->offset = curpos;
635 		elem->size = size;
636 		elem->type = type;
637 		elem->base_key = obj_offset;
638 
639 		if (type != GIT_OBJECT_OFS_DELTA && type != GIT_OBJECT_REF_DELTA)
640 			break;
641 
642 		error = get_delta_base(&base_offset, p, &w_curs, &curpos, type, obj_offset);
643 		git_mwindow_close(&w_curs);
644 
645 		if (error < 0)
646 			goto on_error;
647 
648 		/* we need to pass the pos *after* the delta-base bit */
649 		elem->offset = curpos;
650 
651 		/* go through the loop again, but with the new object */
652 		obj_offset = base_offset;
653 		elem_pos++;
654 	}
655 
656 
657 	*stack_sz = elem_pos + 1;
658 	*chain_out = chain;
659 	return error;
660 
661 on_error:
662 	git_array_clear(chain);
663 	return error;
664 }
665 
git_packfile_unpack(git_rawobj * obj,struct git_pack_file * p,off64_t * obj_offset)666 int git_packfile_unpack(
667 	git_rawobj *obj,
668 	struct git_pack_file *p,
669 	off64_t *obj_offset)
670 {
671 	git_mwindow *w_curs = NULL;
672 	off64_t curpos = *obj_offset;
673 	int error, free_base = 0;
674 	git_dependency_chain chain = GIT_ARRAY_INIT;
675 	struct pack_chain_elem *elem = NULL, *stack;
676 	git_pack_cache_entry *cached = NULL;
677 	struct pack_chain_elem small_stack[SMALL_STACK_SIZE];
678 	size_t stack_size = 0, elem_pos, alloclen;
679 	git_object_t base_type;
680 
681 	error = git_mutex_lock(&p->lock);
682 	if (error < 0) {
683 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
684 		return error;
685 	}
686 	error = git_mutex_lock(&p->mwf.lock);
687 	if (error < 0) {
688 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
689 		git_mutex_unlock(&p->lock);
690 		return error;
691 	}
692 
693 	if (p->mwf.fd == -1)
694 		error = packfile_open_locked(p);
695 	git_mutex_unlock(&p->mwf.lock);
696 	git_mutex_unlock(&p->lock);
697 	if (error < 0)
698 		return error;
699 
700 	/*
701 	 * TODO: optionally check the CRC on the packfile
702 	 */
703 
704 	error = pack_dependency_chain(&chain, &cached, obj_offset, small_stack, &stack_size, p, *obj_offset);
705 	if (error < 0)
706 		return error;
707 
708 	obj->data = NULL;
709 	obj->len = 0;
710 	obj->type = GIT_OBJECT_INVALID;
711 
712 	/* let's point to the right stack */
713 	stack = chain.ptr ? chain.ptr : small_stack;
714 
715 	elem_pos = stack_size;
716 	if (cached) {
717 		memcpy(obj, &cached->raw, sizeof(git_rawobj));
718 		base_type = obj->type;
719 		elem_pos--;	/* stack_size includes the base, which isn't actually there */
720 	} else {
721 		elem = &stack[--elem_pos];
722 		base_type = elem->type;
723 	}
724 
725 	switch (base_type) {
726 	case GIT_OBJECT_COMMIT:
727 	case GIT_OBJECT_TREE:
728 	case GIT_OBJECT_BLOB:
729 	case GIT_OBJECT_TAG:
730 		if (!cached) {
731 			curpos = elem->offset;
732 			error = packfile_unpack_compressed(obj, p, &w_curs, &curpos, elem->size, elem->type);
733 			git_mwindow_close(&w_curs);
734 			base_type = elem->type;
735 		}
736 		if (error < 0)
737 			goto cleanup;
738 		break;
739 	case GIT_OBJECT_OFS_DELTA:
740 	case GIT_OBJECT_REF_DELTA:
741 		error = packfile_error("dependency chain ends in a delta");
742 		goto cleanup;
743 	default:
744 		error = packfile_error("invalid packfile type in header");
745 		goto cleanup;
746 	}
747 
748 	/*
749 	 * Finding the object we want a cached base element is
750 	 * problematic, as we need to make sure we don't accidentally
751 	 * give the caller the cached object, which it would then feel
752 	 * free to free, so we need to copy the data.
753 	 */
754 	if (cached && stack_size == 1) {
755 		void *data = obj->data;
756 
757 		GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, obj->len, 1);
758 		obj->data = git__malloc(alloclen);
759 		GIT_ERROR_CHECK_ALLOC(obj->data);
760 
761 		memcpy(obj->data, data, obj->len + 1);
762 		git_atomic32_dec(&cached->refcount);
763 		goto cleanup;
764 	}
765 
766 	/* we now apply each consecutive delta until we run out */
767 	while (elem_pos > 0 && !error) {
768 		git_rawobj base, delta;
769 
770 		/*
771 		 * We can now try to add the base to the cache, as
772 		 * long as it's not already the cached one.
773 		 */
774 		if (!cached)
775 			free_base = !!cache_add(&cached, &p->bases, obj, elem->base_key);
776 
777 		elem = &stack[elem_pos - 1];
778 		curpos = elem->offset;
779 		error = packfile_unpack_compressed(&delta, p, &w_curs, &curpos, elem->size, elem->type);
780 		git_mwindow_close(&w_curs);
781 
782 		if (error < 0) {
783 			/* We have transferred ownership of the data to the cache. */
784 			obj->data = NULL;
785 			break;
786 		}
787 
788 		/* the current object becomes the new base, on which we apply the delta */
789 		base = *obj;
790 		obj->data = NULL;
791 		obj->len = 0;
792 		obj->type = GIT_OBJECT_INVALID;
793 
794 		error = git_delta_apply(&obj->data, &obj->len, base.data, base.len, delta.data, delta.len);
795 		obj->type = base_type;
796 
797 		/*
798 		 * We usually don't want to free the base at this
799 		 * point, as we put it into the cache in the previous
800 		 * iteration. free_base lets us know that we got the
801 		 * base object directly from the packfile, so we can free it.
802 		 */
803 		git__free(delta.data);
804 		if (free_base) {
805 			free_base = 0;
806 			git__free(base.data);
807 		}
808 
809 		if (cached) {
810 			git_atomic32_dec(&cached->refcount);
811 			cached = NULL;
812 		}
813 
814 		if (error < 0)
815 			break;
816 
817 		elem_pos--;
818 	}
819 
820 cleanup:
821 	if (error < 0) {
822 		git__free(obj->data);
823 		if (cached)
824 			git_atomic32_dec(&cached->refcount);
825 	}
826 
827 	if (elem)
828 		*obj_offset = curpos;
829 
830 	git_array_clear(chain);
831 	return error;
832 }
833 
git_packfile_stream_open(git_packfile_stream * obj,struct git_pack_file * p,off64_t curpos)834 int git_packfile_stream_open(git_packfile_stream *obj, struct git_pack_file *p, off64_t curpos)
835 {
836 	memset(obj, 0, sizeof(git_packfile_stream));
837 	obj->curpos = curpos;
838 	obj->p = p;
839 
840 	if (git_zstream_init(&obj->zstream, GIT_ZSTREAM_INFLATE) < 0) {
841 		git_error_set(GIT_ERROR_ZLIB, "failed to init packfile stream");
842 		return -1;
843 	}
844 
845 	return 0;
846 }
847 
git_packfile_stream_read(git_packfile_stream * obj,void * buffer,size_t len)848 ssize_t git_packfile_stream_read(git_packfile_stream *obj, void *buffer, size_t len)
849 {
850 	unsigned int window_len;
851 	unsigned char *in;
852 	int error;
853 
854 	if (obj->done)
855 		return 0;
856 
857 	if ((in = pack_window_open(obj->p, &obj->mw, obj->curpos, &window_len)) == NULL)
858 		return GIT_EBUFS;
859 
860 	if ((error = git_zstream_set_input(&obj->zstream, in, window_len)) < 0 ||
861 	    (error = git_zstream_get_output_chunk(buffer, &len, &obj->zstream)) < 0) {
862 		git_mwindow_close(&obj->mw);
863 		git_error_set(GIT_ERROR_ZLIB, "error reading from the zlib stream");
864 		return -1;
865 	}
866 
867 	git_mwindow_close(&obj->mw);
868 
869 	obj->curpos += window_len - obj->zstream.in_len;
870 
871 	if (git_zstream_eos(&obj->zstream))
872 		obj->done = 1;
873 
874 	/* If we didn't write anything out but we're not done, we need more data */
875 	if (!len && !git_zstream_eos(&obj->zstream))
876 		return GIT_EBUFS;
877 
878 	return len;
879 
880 }
881 
git_packfile_stream_dispose(git_packfile_stream * obj)882 void git_packfile_stream_dispose(git_packfile_stream *obj)
883 {
884 	git_zstream_free(&obj->zstream);
885 }
886 
packfile_unpack_compressed(git_rawobj * obj,struct git_pack_file * p,git_mwindow ** mwindow,off64_t * position,size_t size,git_object_t type)887 static int packfile_unpack_compressed(
888 	git_rawobj *obj,
889 	struct git_pack_file *p,
890 	git_mwindow **mwindow,
891 	off64_t *position,
892 	size_t size,
893 	git_object_t type)
894 {
895 	git_zstream zstream = GIT_ZSTREAM_INIT;
896 	size_t buffer_len, total = 0;
897 	char *data = NULL;
898 	int error;
899 
900 	GIT_ERROR_CHECK_ALLOC_ADD(&buffer_len, size, 1);
901 	data = git__calloc(1, buffer_len);
902 	GIT_ERROR_CHECK_ALLOC(data);
903 
904 	if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0) {
905 		git_error_set(GIT_ERROR_ZLIB, "failed to init zlib stream on unpack");
906 		goto out;
907 	}
908 
909 	do {
910 		size_t bytes = buffer_len - total;
911 		unsigned int window_len, consumed;
912 		unsigned char *in;
913 
914 		if ((in = pack_window_open(p, mwindow, *position, &window_len)) == NULL) {
915 			error = -1;
916 			goto out;
917 		}
918 
919 		if ((error = git_zstream_set_input(&zstream, in, window_len)) < 0 ||
920 		    (error = git_zstream_get_output_chunk(data + total, &bytes, &zstream)) < 0) {
921 			git_mwindow_close(mwindow);
922 			goto out;
923 		}
924 
925 		git_mwindow_close(mwindow);
926 
927 		consumed = window_len - (unsigned int)zstream.in_len;
928 
929 		if (!bytes && !consumed) {
930 			git_error_set(GIT_ERROR_ZLIB, "error inflating zlib stream");
931 			error = -1;
932 			goto out;
933 		}
934 
935 		*position += consumed;
936 		total += bytes;
937 	} while (!git_zstream_eos(&zstream));
938 
939 	if (total != size || !git_zstream_eos(&zstream)) {
940 		git_error_set(GIT_ERROR_ZLIB, "error inflating zlib stream");
941 		error = -1;
942 		goto out;
943 	}
944 
945 	obj->type = type;
946 	obj->len = size;
947 	obj->data = data;
948 
949 out:
950 	git_zstream_free(&zstream);
951 	if (error)
952 		git__free(data);
953 
954 	return error;
955 }
956 
957 /*
958  * curpos is where the data starts, delta_obj_offset is the where the
959  * header starts
960  */
get_delta_base(off64_t * delta_base_out,struct git_pack_file * p,git_mwindow ** w_curs,off64_t * curpos,git_object_t type,off64_t delta_obj_offset)961 int get_delta_base(
962 		off64_t *delta_base_out,
963 		struct git_pack_file *p,
964 		git_mwindow **w_curs,
965 		off64_t *curpos,
966 		git_object_t type,
967 		off64_t delta_obj_offset)
968 {
969 	unsigned int left = 0;
970 	unsigned char *base_info;
971 	off64_t base_offset;
972 	git_oid unused;
973 
974 	GIT_ASSERT_ARG(delta_base_out);
975 
976 	base_info = pack_window_open(p, w_curs, *curpos, &left);
977 	/* Assumption: the only reason this would fail is because the file is too small */
978 	if (base_info == NULL)
979 		return GIT_EBUFS;
980 	/* pack_window_open() assured us we have [base_info, base_info + 20)
981 	 * as a range that we can look at without walking off the
982 	 * end of the mapped window. Its actually the hash size
983 	 * that is assured. An OFS_DELTA longer than the hash size
984 	 * is stupid, as then a REF_DELTA would be smaller to store.
985 	 */
986 	if (type == GIT_OBJECT_OFS_DELTA) {
987 		unsigned used = 0;
988 		unsigned char c = base_info[used++];
989 		size_t unsigned_base_offset = c & 127;
990 		while (c & 128) {
991 			if (left <= used)
992 				return GIT_EBUFS;
993 			unsigned_base_offset += 1;
994 			if (!unsigned_base_offset || MSB(unsigned_base_offset, 7))
995 				return packfile_error("overflow");
996 			c = base_info[used++];
997 			unsigned_base_offset = (unsigned_base_offset << 7) + (c & 127);
998 		}
999 		if (unsigned_base_offset == 0 || (size_t)delta_obj_offset <= unsigned_base_offset)
1000 			return packfile_error("out of bounds");
1001 		base_offset = delta_obj_offset - unsigned_base_offset;
1002 		*curpos += used;
1003 	} else if (type == GIT_OBJECT_REF_DELTA) {
1004 		/* If we have the cooperative cache, search in it first */
1005 		if (p->has_cache) {
1006 			struct git_pack_entry *entry;
1007 			git_oid oid;
1008 
1009 			git_oid_fromraw(&oid, base_info);
1010 			if ((entry = git_oidmap_get(p->idx_cache, &oid)) != NULL) {
1011 				if (entry->offset == 0)
1012 					return packfile_error("delta offset is zero");
1013 
1014 				*curpos += 20;
1015 				*delta_base_out = entry->offset;
1016 				return 0;
1017 			} else {
1018 				/* If we're building an index, don't try to find the pack
1019 				 * entry; we just haven't seen it yet.  We'll make
1020 				 * progress again in the next loop.
1021 				 */
1022 				return GIT_PASSTHROUGH;
1023 			}
1024 		}
1025 
1026 		/* The base entry _must_ be in the same pack */
1027 		if (pack_entry_find_offset(&base_offset, &unused, p, (git_oid *)base_info, GIT_OID_HEXSZ) < 0)
1028 			return packfile_error("base entry delta is not in the same pack");
1029 		*curpos += 20;
1030 	} else
1031 		return packfile_error("unknown object type");
1032 
1033 	if (base_offset == 0)
1034 		return packfile_error("delta offset is zero");
1035 
1036 	*delta_base_out = base_offset;
1037 	return 0;
1038 }
1039 
1040 /***********************************************************
1041  *
1042  * PACKFILE METHODS
1043  *
1044  ***********************************************************/
1045 
git_packfile_free(struct git_pack_file * p,bool unlink_packfile)1046 void git_packfile_free(struct git_pack_file *p, bool unlink_packfile)
1047 {
1048 	bool locked = true;
1049 
1050 	if (!p)
1051 		return;
1052 
1053 	cache_free(&p->bases);
1054 
1055 	if (git_mutex_lock(&p->lock) < 0) {
1056 		git_error_set(GIT_ERROR_OS, "failed to lock packfile");
1057 		locked = false;
1058 	}
1059 	if (p->mwf.fd >= 0) {
1060 		git_mwindow_free_all(&p->mwf);
1061 		p_close(p->mwf.fd);
1062 		p->mwf.fd = -1;
1063 	}
1064 	if (locked)
1065 		git_mutex_unlock(&p->lock);
1066 
1067 	if (unlink_packfile)
1068 		p_unlink(p->pack_name);
1069 
1070 	pack_index_free(p);
1071 
1072 	git__free(p->bad_object_sha1);
1073 
1074 	git_mutex_free(&p->bases.lock);
1075 	git_mutex_free(&p->mwf.lock);
1076 	git_mutex_free(&p->lock);
1077 	git__free(p);
1078 }
1079 
1080 /* Run with the packfile and mwf locks held */
packfile_open_locked(struct git_pack_file * p)1081 static int packfile_open_locked(struct git_pack_file *p)
1082 {
1083 	struct stat st;
1084 	struct git_pack_header hdr;
1085 	git_oid sha1;
1086 	unsigned char *idx_sha1;
1087 
1088 	if (pack_index_open_locked(p) < 0)
1089 		return git_odb__error_notfound("failed to open packfile", NULL, 0);
1090 
1091 	if (p->mwf.fd >= 0)
1092 		return 0;
1093 
1094 	/* TODO: open with noatime */
1095 	p->mwf.fd = git_futils_open_ro(p->pack_name);
1096 	if (p->mwf.fd < 0)
1097 		goto cleanup;
1098 
1099 	if (p_fstat(p->mwf.fd, &st) < 0) {
1100 		git_error_set(GIT_ERROR_OS, "could not stat packfile");
1101 		goto cleanup;
1102 	}
1103 
1104 	/* If we created the struct before we had the pack we lack size. */
1105 	if (!p->mwf.size) {
1106 		if (!S_ISREG(st.st_mode))
1107 			goto cleanup;
1108 		p->mwf.size = (off64_t)st.st_size;
1109 	} else if (p->mwf.size != st.st_size)
1110 		goto cleanup;
1111 
1112 #if 0
1113 	/* We leave these file descriptors open with sliding mmap;
1114 	 * there is no point keeping them open across exec(), though.
1115 	 */
1116 	fd_flag = fcntl(p->mwf.fd, F_GETFD, 0);
1117 	if (fd_flag < 0)
1118 		goto cleanup;
1119 
1120 	fd_flag |= FD_CLOEXEC;
1121 	if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
1122 		goto cleanup;
1123 #endif
1124 
1125 	/* Verify we recognize this pack file format. */
1126 	if (p_read(p->mwf.fd, &hdr, sizeof(hdr)) < 0 ||
1127 		hdr.hdr_signature != htonl(PACK_SIGNATURE) ||
1128 		!pack_version_ok(hdr.hdr_version))
1129 		goto cleanup;
1130 
1131 	/* Verify the pack matches its index. */
1132 	if (p->num_objects != ntohl(hdr.hdr_entries) ||
1133 		p_pread(p->mwf.fd, sha1.id, GIT_OID_RAWSZ, p->mwf.size - GIT_OID_RAWSZ) < 0)
1134 		goto cleanup;
1135 
1136 	idx_sha1 = ((unsigned char *)p->index_map.data) + p->index_map.len - 40;
1137 
1138 	if (git_oid__cmp(&sha1, (git_oid *)idx_sha1) != 0)
1139 		goto cleanup;
1140 
1141 	if (git_mwindow_file_register(&p->mwf) < 0)
1142 		goto cleanup;
1143 
1144 	return 0;
1145 
1146 cleanup:
1147 	git_error_set(GIT_ERROR_OS, "invalid packfile '%s'", p->pack_name);
1148 
1149 	if (p->mwf.fd >= 0)
1150 		p_close(p->mwf.fd);
1151 	p->mwf.fd = -1;
1152 
1153 	return -1;
1154 }
1155 
git_packfile__name(char ** out,const char * path)1156 int git_packfile__name(char **out, const char *path)
1157 {
1158 	size_t path_len;
1159 	git_buf buf = GIT_BUF_INIT;
1160 
1161 	path_len = strlen(path);
1162 
1163 	if (path_len < strlen(".idx"))
1164 		return git_odb__error_notfound("invalid packfile path", NULL, 0);
1165 
1166 	if (git_buf_printf(&buf, "%.*s.pack", (int)(path_len - strlen(".idx")), path) < 0)
1167 		return -1;
1168 
1169 	*out = git_buf_detach(&buf);
1170 	return 0;
1171 }
1172 
git_packfile_alloc(struct git_pack_file ** pack_out,const char * path)1173 int git_packfile_alloc(struct git_pack_file **pack_out, const char *path)
1174 {
1175 	struct stat st;
1176 	struct git_pack_file *p;
1177 	size_t path_len = path ? strlen(path) : 0, alloc_len;
1178 
1179 	*pack_out = NULL;
1180 
1181 	if (path_len < strlen(".idx"))
1182 		return git_odb__error_notfound("invalid packfile path", NULL, 0);
1183 
1184 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, sizeof(*p), path_len);
1185 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
1186 
1187 	p = git__calloc(1, alloc_len);
1188 	GIT_ERROR_CHECK_ALLOC(p);
1189 
1190 	memcpy(p->pack_name, path, path_len + 1);
1191 
1192 	/*
1193 	 * Make sure a corresponding .pack file exists and that
1194 	 * the index looks sane.
1195 	 */
1196 	if (git__suffixcmp(path, ".idx") == 0) {
1197 		size_t root_len = path_len - strlen(".idx");
1198 
1199 		if (!git_disable_pack_keep_file_checks) {
1200 			memcpy(p->pack_name + root_len, ".keep", sizeof(".keep"));
1201 			if (git_path_exists(p->pack_name) == true)
1202 				p->pack_keep = 1;
1203 		}
1204 
1205 		memcpy(p->pack_name + root_len, ".pack", sizeof(".pack"));
1206 	}
1207 
1208 	if (p_stat(p->pack_name, &st) < 0 || !S_ISREG(st.st_mode)) {
1209 		git__free(p);
1210 		return git_odb__error_notfound("packfile not found", NULL, 0);
1211 	}
1212 
1213 	/* ok, it looks sane as far as we can check without
1214 	 * actually mapping the pack file.
1215 	 */
1216 	p->mwf.fd = -1;
1217 	p->mwf.size = st.st_size;
1218 	p->pack_local = 1;
1219 	p->mtime = (git_time_t)st.st_mtime;
1220 	p->index_version = -1;
1221 
1222 	if (git_mutex_init(&p->lock) < 0) {
1223 		git_error_set(GIT_ERROR_OS, "failed to initialize packfile mutex");
1224 		git__free(p);
1225 		return -1;
1226 	}
1227 
1228 	if (git_mutex_init(&p->mwf.lock) < 0) {
1229 		git_error_set(GIT_ERROR_OS, "failed to initialize packfile window mutex");
1230 		git_mutex_free(&p->lock);
1231 		git__free(p);
1232 		return -1;
1233 	}
1234 
1235 	if (cache_init(&p->bases) < 0) {
1236 		git_mutex_free(&p->mwf.lock);
1237 		git_mutex_free(&p->lock);
1238 		git__free(p);
1239 		return -1;
1240 	}
1241 
1242 	*pack_out = p;
1243 
1244 	return 0;
1245 }
1246 
1247 /***********************************************************
1248  *
1249  * PACKFILE ENTRY SEARCH INTERNALS
1250  *
1251  ***********************************************************/
1252 
nth_packed_object_offset_locked(struct git_pack_file * p,uint32_t n)1253 static off64_t nth_packed_object_offset_locked(struct git_pack_file *p, uint32_t n)
1254 {
1255 	const unsigned char *index, *end;
1256 	uint32_t off32;
1257 
1258 	index = p->index_map.data;
1259 	end = index + p->index_map.len;
1260 	index += 4 * 256;
1261 	if (p->index_version == 1)
1262 		return ntohl(*((uint32_t *)(index + 24 * n)));
1263 
1264 	index += 8 + p->num_objects * (20 + 4);
1265 	off32 = ntohl(*((uint32_t *)(index + 4 * n)));
1266 	if (!(off32 & 0x80000000))
1267 		return off32;
1268 	index += p->num_objects * 4 + (off32 & 0x7fffffff) * 8;
1269 
1270 	/* Make sure we're not being sent out of bounds */
1271 	if (index >= end - 8)
1272 		return -1;
1273 
1274 	return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) |
1275 				ntohl(*((uint32_t *)(index + 4)));
1276 }
1277 
git__memcmp4(const void * a,const void * b)1278 static int git__memcmp4(const void *a, const void *b) {
1279 	return memcmp(a, b, 4);
1280 }
1281 
git_pack_foreach_entry(struct git_pack_file * p,git_odb_foreach_cb cb,void * data)1282 int git_pack_foreach_entry(
1283 	struct git_pack_file *p,
1284 	git_odb_foreach_cb cb,
1285 	void *data)
1286 {
1287 	const unsigned char *index, *current;
1288 	uint32_t i;
1289 	int error = 0;
1290 	git_array_oid_t oids = GIT_ARRAY_INIT;
1291 	git_oid *oid;
1292 
1293 	if (git_mutex_lock(&p->lock) < 0)
1294 		return packfile_error("failed to get lock for git_pack_foreach_entry");
1295 
1296 	if ((error = pack_index_open_locked(p)) < 0) {
1297 		git_mutex_unlock(&p->lock);
1298 		return error;
1299 	}
1300 
1301 	if (!p->index_map.data) {
1302 		git_error_set(GIT_ERROR_INTERNAL, "internal error: p->index_map.data == NULL");
1303 		git_mutex_unlock(&p->lock);
1304 		return -1;
1305 	}
1306 
1307 	index = p->index_map.data;
1308 
1309 	if (p->index_version > 1)
1310 		index += 8;
1311 
1312 	index += 4 * 256;
1313 
1314 	if (p->oids == NULL) {
1315 		git_vector offsets, oids;
1316 
1317 		if ((error = git_vector_init(&oids, p->num_objects, NULL))) {
1318 			git_mutex_unlock(&p->lock);
1319 			return error;
1320 		}
1321 
1322 		if ((error = git_vector_init(&offsets, p->num_objects, git__memcmp4))) {
1323 			git_mutex_unlock(&p->lock);
1324 			return error;
1325 		}
1326 
1327 		if (p->index_version > 1) {
1328 			const unsigned char *off = index + 24 * p->num_objects;
1329 			for (i = 0; i < p->num_objects; i++)
1330 				git_vector_insert(&offsets, (void*)&off[4 * i]);
1331 			git_vector_sort(&offsets);
1332 			git_vector_foreach(&offsets, i, current)
1333 				git_vector_insert(&oids, (void*)&index[5 * (current - off)]);
1334 		} else {
1335 			for (i = 0; i < p->num_objects; i++)
1336 				git_vector_insert(&offsets, (void*)&index[24 * i]);
1337 			git_vector_sort(&offsets);
1338 			git_vector_foreach(&offsets, i, current)
1339 				git_vector_insert(&oids, (void*)&current[4]);
1340 		}
1341 
1342 		git_vector_free(&offsets);
1343 		p->oids = (git_oid **)git_vector_detach(NULL, NULL, &oids);
1344 	}
1345 
1346 	/* We need to copy the OIDs to another array before we relinquish the lock to avoid races. */
1347 	git_array_init_to_size(oids, p->num_objects);
1348 	if (!oids.ptr) {
1349 		git_mutex_unlock(&p->lock);
1350 		git_array_clear(oids);
1351 		GIT_ERROR_CHECK_ARRAY(oids);
1352 	}
1353 	for (i = 0; i < p->num_objects; i++) {
1354 		oid = git_array_alloc(oids);
1355 		if (!oid) {
1356 			git_mutex_unlock(&p->lock);
1357 			git_array_clear(oids);
1358 			GIT_ERROR_CHECK_ALLOC(oid);
1359 		}
1360 		git_oid_cpy(oid, p->oids[i]);
1361 	}
1362 
1363 	git_mutex_unlock(&p->lock);
1364 
1365 	git_array_foreach(oids, i, oid) {
1366 		if ((error = cb(oid, data)) != 0) {
1367 			git_error_set_after_callback(error);
1368 			break;
1369 		}
1370 	}
1371 
1372 	git_array_clear(oids);
1373 	return error;
1374 }
1375 
git_pack_foreach_entry_offset(struct git_pack_file * p,git_pack_foreach_entry_offset_cb cb,void * data)1376 int git_pack_foreach_entry_offset(
1377 	struct git_pack_file *p,
1378 	git_pack_foreach_entry_offset_cb cb,
1379 	void *data)
1380 {
1381 	const unsigned char *index;
1382 	off64_t current_offset;
1383 	const git_oid *current_oid;
1384 	uint32_t i;
1385 	int error = 0;
1386 
1387 	if (git_mutex_lock(&p->lock) < 0)
1388 		return packfile_error("failed to get lock for git_pack_foreach_entry_offset");
1389 
1390 	index = p->index_map.data;
1391 	if (index == NULL) {
1392 		if ((error = pack_index_open_locked(p)) < 0)
1393 			goto cleanup;
1394 
1395 		if (!p->index_map.data) {
1396 			git_error_set(GIT_ERROR_INTERNAL, "internal error: p->index_map.data == NULL");
1397 			goto cleanup;
1398 		}
1399 
1400 		index = p->index_map.data;
1401 	}
1402 
1403 	if (p->index_version > 1)
1404 		index += 8;
1405 
1406 	index += 4 * 256;
1407 
1408 	/* all offsets should have been validated by pack_index_check_locked */
1409 	if (p->index_version > 1) {
1410 		const unsigned char *offsets = index + 24 * p->num_objects;
1411 		const unsigned char *large_offset_ptr;
1412 		const unsigned char *large_offsets = index + 28 * p->num_objects;
1413 		const unsigned char *large_offsets_end = ((const unsigned char *)p->index_map.data) + p->index_map.len - 20;
1414 		for (i = 0; i < p->num_objects; i++) {
1415 			current_offset = ntohl(*(const uint32_t *)(offsets + 4 * i));
1416 			if (current_offset & 0x80000000) {
1417 				large_offset_ptr = large_offsets + (current_offset & 0x7fffffff) * 8;
1418 				if (large_offset_ptr >= large_offsets_end) {
1419 					error = packfile_error("invalid large offset");
1420 					goto cleanup;
1421 				}
1422 				current_offset = (((off64_t)ntohl(*((uint32_t *)(large_offset_ptr + 0)))) << 32) |
1423 						ntohl(*((uint32_t *)(large_offset_ptr + 4)));
1424 			}
1425 			current_oid = (const git_oid *)(index + 20 * i);
1426 			if ((error = cb(current_oid, current_offset, data)) != 0) {
1427 				error = git_error_set_after_callback(error);
1428 				goto cleanup;
1429 			}
1430 		}
1431 	} else {
1432 		for (i = 0; i < p->num_objects; i++) {
1433 			current_offset = ntohl(*(const uint32_t *)(index + 24 * i));
1434 			current_oid = (const git_oid *)(index + 24 * i + 4);
1435 			if ((error = cb(current_oid, current_offset, data)) != 0) {
1436 				error = git_error_set_after_callback(error);
1437 				goto cleanup;
1438 			}
1439 		}
1440 	}
1441 
1442 cleanup:
1443 	git_mutex_unlock(&p->lock);
1444 	return error;
1445 }
1446 
git_pack__lookup_sha1(const void * oid_lookup_table,size_t stride,unsigned lo,unsigned hi,const unsigned char * oid_prefix)1447 int git_pack__lookup_sha1(const void *oid_lookup_table, size_t stride, unsigned lo,
1448 		unsigned hi, const unsigned char *oid_prefix)
1449 {
1450 	const unsigned char *base = oid_lookup_table;
1451 
1452 	while (lo < hi) {
1453 		unsigned mi = (lo + hi) / 2;
1454 		int cmp = git_oid__hashcmp(base + mi * stride, oid_prefix);
1455 
1456 		if (!cmp)
1457 			return mi;
1458 
1459 		if (cmp > 0)
1460 			hi = mi;
1461 		else
1462 			lo = mi+1;
1463 	}
1464 
1465 	return -((int)lo)-1;
1466 }
1467 
pack_entry_find_offset(off64_t * offset_out,git_oid * found_oid,struct git_pack_file * p,const git_oid * short_oid,size_t len)1468 static int pack_entry_find_offset(
1469 	off64_t *offset_out,
1470 	git_oid *found_oid,
1471 	struct git_pack_file *p,
1472 	const git_oid *short_oid,
1473 	size_t len)
1474 {
1475 	const uint32_t *level1_ofs;
1476 	const unsigned char *index;
1477 	unsigned hi, lo, stride;
1478 	int pos, found = 0;
1479 	off64_t offset;
1480 	const unsigned char *current = 0;
1481 	int error = 0;
1482 
1483 	*offset_out = 0;
1484 
1485 	if (git_mutex_lock(&p->lock) < 0)
1486 		return packfile_error("failed to get lock for pack_entry_find_offset");
1487 
1488 	if ((error = pack_index_open_locked(p)) < 0)
1489 		goto cleanup;
1490 
1491 	if (!p->index_map.data) {
1492 		git_error_set(GIT_ERROR_INTERNAL, "internal error: p->index_map.data == NULL");
1493 		goto cleanup;
1494 	}
1495 
1496 	index = p->index_map.data;
1497 	level1_ofs = p->index_map.data;
1498 
1499 	if (p->index_version > 1) {
1500 		level1_ofs += 2;
1501 		index += 8;
1502 	}
1503 
1504 	index += 4 * 256;
1505 	hi = ntohl(level1_ofs[(int)short_oid->id[0]]);
1506 	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(level1_ofs[(int)short_oid->id[0] - 1]));
1507 
1508 	if (p->index_version > 1) {
1509 		stride = 20;
1510 	} else {
1511 		stride = 24;
1512 		index += 4;
1513 	}
1514 
1515 #ifdef INDEX_DEBUG_LOOKUP
1516 	printf("%02x%02x%02x... lo %u hi %u nr %d\n",
1517 		short_oid->id[0], short_oid->id[1], short_oid->id[2], lo, hi, p->num_objects);
1518 #endif
1519 
1520 	pos = git_pack__lookup_sha1(index, stride, lo, hi, short_oid->id);
1521 
1522 	if (pos >= 0) {
1523 		/* An object matching exactly the oid was found */
1524 		found = 1;
1525 		current = index + pos * stride;
1526 	} else {
1527 		/* No object was found */
1528 		/* pos refers to the object with the "closest" oid to short_oid */
1529 		pos = - 1 - pos;
1530 		if (pos < (int)p->num_objects) {
1531 			current = index + pos * stride;
1532 
1533 			if (!git_oid_ncmp(short_oid, (const git_oid *)current, len))
1534 				found = 1;
1535 		}
1536 	}
1537 
1538 	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)p->num_objects) {
1539 		/* Check for ambiguousity */
1540 		const unsigned char *next = current + stride;
1541 
1542 		if (!git_oid_ncmp(short_oid, (const git_oid *)next, len)) {
1543 			found = 2;
1544 		}
1545 	}
1546 
1547 	if (!found) {
1548 		error = git_odb__error_notfound("failed to find offset for pack entry", short_oid, len);
1549 		goto cleanup;
1550 	}
1551 	if (found > 1) {
1552 		error = git_odb__error_ambiguous("found multiple offsets for pack entry");
1553 		goto cleanup;
1554 	}
1555 
1556 	if ((offset = nth_packed_object_offset_locked(p, pos)) < 0) {
1557 		git_error_set(GIT_ERROR_ODB, "packfile index is corrupt");
1558 		error = -1;
1559 		goto cleanup;
1560 	}
1561 
1562 	*offset_out = offset;
1563 	git_oid_fromraw(found_oid, current);
1564 
1565 #ifdef INDEX_DEBUG_LOOKUP
1566 	{
1567 		unsigned char hex_sha1[GIT_OID_HEXSZ + 1];
1568 		git_oid_fmt(hex_sha1, found_oid);
1569 		hex_sha1[GIT_OID_HEXSZ] = '\0';
1570 		printf("found lo=%d %s\n", lo, hex_sha1);
1571 	}
1572 #endif
1573 
1574 cleanup:
1575 	git_mutex_unlock(&p->lock);
1576 	return error;
1577 }
1578 
git_pack_entry_find(struct git_pack_entry * e,struct git_pack_file * p,const git_oid * short_oid,size_t len)1579 int git_pack_entry_find(
1580 		struct git_pack_entry *e,
1581 		struct git_pack_file *p,
1582 		const git_oid *short_oid,
1583 		size_t len)
1584 {
1585 	off64_t offset;
1586 	git_oid found_oid;
1587 	int error;
1588 
1589 	GIT_ASSERT_ARG(p);
1590 
1591 	if (len == GIT_OID_HEXSZ && p->num_bad_objects) {
1592 		unsigned i;
1593 		for (i = 0; i < p->num_bad_objects; i++)
1594 			if (git_oid__cmp(short_oid, &p->bad_object_sha1[i]) == 0)
1595 				return packfile_error("bad object found in packfile");
1596 	}
1597 
1598 	error = pack_entry_find_offset(&offset, &found_oid, p, short_oid, len);
1599 	if (error < 0)
1600 		return error;
1601 
1602 	error = git_mutex_lock(&p->lock);
1603 	if (error < 0) {
1604 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
1605 		return error;
1606 	}
1607 	error = git_mutex_lock(&p->mwf.lock);
1608 	if (error < 0) {
1609 		git_mutex_unlock(&p->lock);
1610 		git_error_set(GIT_ERROR_OS, "failed to lock packfile reader");
1611 		return error;
1612 	}
1613 
1614 	/* we found a unique entry in the index;
1615 	 * make sure the packfile backing the index
1616 	 * still exists on disk */
1617 	if (p->mwf.fd == -1)
1618 		error = packfile_open_locked(p);
1619 	git_mutex_unlock(&p->mwf.lock);
1620 	git_mutex_unlock(&p->lock);
1621 	if (error < 0)
1622 		return error;
1623 
1624 	e->offset = offset;
1625 	e->p = p;
1626 
1627 	git_oid_cpy(&e->sha1, &found_oid);
1628 	return 0;
1629 }
1630