1 #include "builtin.h"
2 #include "cache.h"
3 #include "repository.h"
4 #include "config.h"
5 #include "attr.h"
6 #include "object.h"
7 #include "blob.h"
8 #include "commit.h"
9 #include "tag.h"
10 #include "tree.h"
11 #include "delta.h"
12 #include "pack.h"
13 #include "pack-revindex.h"
14 #include "csum-file.h"
15 #include "tree-walk.h"
16 #include "diff.h"
17 #include "revision.h"
18 #include "list-objects.h"
19 #include "list-objects-filter.h"
20 #include "list-objects-filter-options.h"
21 #include "pack-objects.h"
22 #include "progress.h"
23 #include "refs.h"
24 #include "streaming.h"
25 #include "thread-utils.h"
26 #include "pack-bitmap.h"
27 #include "delta-islands.h"
28 #include "reachable.h"
29 #include "sha1-array.h"
30 #include "argv-array.h"
31 #include "list.h"
32 #include "packfile.h"
33 #include "object-store.h"
34 #include "dir.h"
35 #include "midx.h"
36 #include "trace2.h"
37 
38 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
39 #define SIZE(obj) oe_size(&to_pack, obj)
40 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
41 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
42 #define DELTA(obj) oe_delta(&to_pack, obj)
43 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
44 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
45 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
46 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
47 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
48 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
49 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
50 
51 static const char *pack_usage[] = {
52 	N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
53 	N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
54 	NULL
55 };
56 
57 /*
58  * Objects we are going to pack are collected in the `to_pack` structure.
59  * It contains an array (dynamically expanded) of the object data, and a map
60  * that can resolve SHA1s to their position in the array.
61  */
62 static struct packing_data to_pack;
63 
64 static struct pack_idx_entry **written_list;
65 static uint32_t nr_result, nr_written, nr_seen;
66 static struct bitmap_index *bitmap_git;
67 static uint32_t write_layer;
68 
69 static int non_empty;
70 static int reuse_delta = 1, reuse_object = 1;
71 static int keep_unreachable, unpack_unreachable, include_tag;
72 static timestamp_t unpack_unreachable_expiration;
73 static int pack_loose_unreachable;
74 static int local;
75 static int have_non_local_packs;
76 static int incremental;
77 static int ignore_packed_keep_on_disk;
78 static int ignore_packed_keep_in_core;
79 static int allow_ofs_delta;
80 static struct pack_idx_option pack_idx_opts;
81 static const char *base_name;
82 static int progress = 1;
83 static int window = 10;
84 static unsigned long pack_size_limit;
85 static int depth = 50;
86 static int delta_search_threads;
87 static int pack_to_stdout;
88 static int sparse;
89 static int thin;
90 static int num_preferred_base;
91 static struct progress *progress_state;
92 
93 static struct packed_git *reuse_packfile;
94 static uint32_t reuse_packfile_objects;
95 static off_t reuse_packfile_offset;
96 
97 static int use_bitmap_index_default = 1;
98 static int use_bitmap_index = -1;
99 static enum {
100 	WRITE_BITMAP_FALSE = 0,
101 	WRITE_BITMAP_QUIET,
102 	WRITE_BITMAP_TRUE,
103 } write_bitmap_index;
104 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
105 
106 static int exclude_promisor_objects;
107 
108 static int use_delta_islands;
109 
110 static unsigned long delta_cache_size = 0;
111 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
112 static unsigned long cache_max_small_delta_size = 1000;
113 
114 static unsigned long window_memory_limit = 0;
115 
116 static struct list_objects_filter_options filter_options;
117 
118 enum missing_action {
119 	MA_ERROR = 0,      /* fail if any missing objects are encountered */
120 	MA_ALLOW_ANY,      /* silently allow ALL missing objects */
121 	MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
122 };
123 static enum missing_action arg_missing_action;
124 static show_object_fn fn_show_object;
125 
126 /*
127  * stats
128  */
129 static uint32_t written, written_delta;
130 static uint32_t reused, reused_delta;
131 
132 /*
133  * Indexed commits
134  */
135 static struct commit **indexed_commits;
136 static unsigned int indexed_commits_nr;
137 static unsigned int indexed_commits_alloc;
138 
index_commit_for_bitmap(struct commit * commit)139 static void index_commit_for_bitmap(struct commit *commit)
140 {
141 	if (indexed_commits_nr >= indexed_commits_alloc) {
142 		indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
143 		REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
144 	}
145 
146 	indexed_commits[indexed_commits_nr++] = commit;
147 }
148 
get_delta(struct object_entry * entry)149 static void *get_delta(struct object_entry *entry)
150 {
151 	unsigned long size, base_size, delta_size;
152 	void *buf, *base_buf, *delta_buf;
153 	enum object_type type;
154 
155 	buf = read_object_file(&entry->idx.oid, &type, &size);
156 	if (!buf)
157 		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
158 	base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
159 				    &base_size);
160 	if (!base_buf)
161 		die("unable to read %s",
162 		    oid_to_hex(&DELTA(entry)->idx.oid));
163 	delta_buf = diff_delta(base_buf, base_size,
164 			       buf, size, &delta_size, 0);
165 	/*
166 	 * We successfully computed this delta once but dropped it for
167 	 * memory reasons. Something is very wrong if this time we
168 	 * recompute and create a different delta.
169 	 */
170 	if (!delta_buf || delta_size != DELTA_SIZE(entry))
171 		BUG("delta size changed");
172 	free(buf);
173 	free(base_buf);
174 	return delta_buf;
175 }
176 
do_compress(void ** pptr,unsigned long size)177 static unsigned long do_compress(void **pptr, unsigned long size)
178 {
179 	git_zstream stream;
180 	void *in, *out;
181 	unsigned long maxsize;
182 
183 	git_deflate_init(&stream, pack_compression_level);
184 	maxsize = git_deflate_bound(&stream, size);
185 
186 	in = *pptr;
187 	out = xmalloc(maxsize);
188 	*pptr = out;
189 
190 	stream.next_in = in;
191 	stream.avail_in = size;
192 	stream.next_out = out;
193 	stream.avail_out = maxsize;
194 	while (git_deflate(&stream, Z_FINISH) == Z_OK)
195 		; /* nothing */
196 	git_deflate_end(&stream);
197 
198 	free(in);
199 	return stream.total_out;
200 }
201 
write_large_blob_data(struct git_istream * st,struct hashfile * f,const struct object_id * oid)202 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
203 					   const struct object_id *oid)
204 {
205 	git_zstream stream;
206 	unsigned char ibuf[1024 * 16];
207 	unsigned char obuf[1024 * 16];
208 	unsigned long olen = 0;
209 
210 	git_deflate_init(&stream, pack_compression_level);
211 
212 	for (;;) {
213 		ssize_t readlen;
214 		int zret = Z_OK;
215 		readlen = read_istream(st, ibuf, sizeof(ibuf));
216 		if (readlen == -1)
217 			die(_("unable to read %s"), oid_to_hex(oid));
218 
219 		stream.next_in = ibuf;
220 		stream.avail_in = readlen;
221 		while ((stream.avail_in || readlen == 0) &&
222 		       (zret == Z_OK || zret == Z_BUF_ERROR)) {
223 			stream.next_out = obuf;
224 			stream.avail_out = sizeof(obuf);
225 			zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
226 			hashwrite(f, obuf, stream.next_out - obuf);
227 			olen += stream.next_out - obuf;
228 		}
229 		if (stream.avail_in)
230 			die(_("deflate error (%d)"), zret);
231 		if (readlen == 0) {
232 			if (zret != Z_STREAM_END)
233 				die(_("deflate error (%d)"), zret);
234 			break;
235 		}
236 	}
237 	git_deflate_end(&stream);
238 	return olen;
239 }
240 
241 /*
242  * we are going to reuse the existing object data as is.  make
243  * sure it is not corrupt.
244  */
check_pack_inflate(struct packed_git * p,struct pack_window ** w_curs,off_t offset,off_t len,unsigned long expect)245 static int check_pack_inflate(struct packed_git *p,
246 		struct pack_window **w_curs,
247 		off_t offset,
248 		off_t len,
249 		unsigned long expect)
250 {
251 	git_zstream stream;
252 	unsigned char fakebuf[4096], *in;
253 	int st;
254 
255 	memset(&stream, 0, sizeof(stream));
256 	git_inflate_init(&stream);
257 	do {
258 		in = use_pack(p, w_curs, offset, &stream.avail_in);
259 		stream.next_in = in;
260 		stream.next_out = fakebuf;
261 		stream.avail_out = sizeof(fakebuf);
262 		st = git_inflate(&stream, Z_FINISH);
263 		offset += stream.next_in - in;
264 	} while (st == Z_OK || st == Z_BUF_ERROR);
265 	git_inflate_end(&stream);
266 	return (st == Z_STREAM_END &&
267 		stream.total_out == expect &&
268 		stream.total_in == len) ? 0 : -1;
269 }
270 
copy_pack_data(struct hashfile * f,struct packed_git * p,struct pack_window ** w_curs,off_t offset,off_t len)271 static void copy_pack_data(struct hashfile *f,
272 		struct packed_git *p,
273 		struct pack_window **w_curs,
274 		off_t offset,
275 		off_t len)
276 {
277 	unsigned char *in;
278 	unsigned long avail;
279 
280 	while (len) {
281 		in = use_pack(p, w_curs, offset, &avail);
282 		if (avail > len)
283 			avail = (unsigned long)len;
284 		hashwrite(f, in, avail);
285 		offset += avail;
286 		len -= avail;
287 	}
288 }
289 
290 /* Return 0 if we will bust the pack-size limit */
write_no_reuse_object(struct hashfile * f,struct object_entry * entry,unsigned long limit,int usable_delta)291 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
292 					   unsigned long limit, int usable_delta)
293 {
294 	unsigned long size, datalen;
295 	unsigned char header[MAX_PACK_OBJECT_HEADER],
296 		      dheader[MAX_PACK_OBJECT_HEADER];
297 	unsigned hdrlen;
298 	enum object_type type;
299 	void *buf;
300 	struct git_istream *st = NULL;
301 	const unsigned hashsz = the_hash_algo->rawsz;
302 
303 	if (!usable_delta) {
304 		if (oe_type(entry) == OBJ_BLOB &&
305 		    oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
306 		    (st = open_istream(&entry->idx.oid, &type, &size, NULL)) != NULL)
307 			buf = NULL;
308 		else {
309 			buf = read_object_file(&entry->idx.oid, &type, &size);
310 			if (!buf)
311 				die(_("unable to read %s"),
312 				    oid_to_hex(&entry->idx.oid));
313 		}
314 		/*
315 		 * make sure no cached delta data remains from a
316 		 * previous attempt before a pack split occurred.
317 		 */
318 		FREE_AND_NULL(entry->delta_data);
319 		entry->z_delta_size = 0;
320 	} else if (entry->delta_data) {
321 		size = DELTA_SIZE(entry);
322 		buf = entry->delta_data;
323 		entry->delta_data = NULL;
324 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
325 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
326 	} else {
327 		buf = get_delta(entry);
328 		size = DELTA_SIZE(entry);
329 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
330 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
331 	}
332 
333 	if (st)	/* large blob case, just assume we don't compress well */
334 		datalen = size;
335 	else if (entry->z_delta_size)
336 		datalen = entry->z_delta_size;
337 	else
338 		datalen = do_compress(&buf, size);
339 
340 	/*
341 	 * The object header is a byte of 'type' followed by zero or
342 	 * more bytes of length.
343 	 */
344 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
345 					      type, size);
346 
347 	if (type == OBJ_OFS_DELTA) {
348 		/*
349 		 * Deltas with relative base contain an additional
350 		 * encoding of the relative offset for the delta
351 		 * base from this object's position in the pack.
352 		 */
353 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
354 		unsigned pos = sizeof(dheader) - 1;
355 		dheader[pos] = ofs & 127;
356 		while (ofs >>= 7)
357 			dheader[--pos] = 128 | (--ofs & 127);
358 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
359 			if (st)
360 				close_istream(st);
361 			free(buf);
362 			return 0;
363 		}
364 		hashwrite(f, header, hdrlen);
365 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
366 		hdrlen += sizeof(dheader) - pos;
367 	} else if (type == OBJ_REF_DELTA) {
368 		/*
369 		 * Deltas with a base reference contain
370 		 * additional bytes for the base object ID.
371 		 */
372 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
373 			if (st)
374 				close_istream(st);
375 			free(buf);
376 			return 0;
377 		}
378 		hashwrite(f, header, hdrlen);
379 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
380 		hdrlen += hashsz;
381 	} else {
382 		if (limit && hdrlen + datalen + hashsz >= limit) {
383 			if (st)
384 				close_istream(st);
385 			free(buf);
386 			return 0;
387 		}
388 		hashwrite(f, header, hdrlen);
389 	}
390 	if (st) {
391 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
392 		close_istream(st);
393 	} else {
394 		hashwrite(f, buf, datalen);
395 		free(buf);
396 	}
397 
398 	return hdrlen + datalen;
399 }
400 
401 /* Return 0 if we will bust the pack-size limit */
write_reuse_object(struct hashfile * f,struct object_entry * entry,unsigned long limit,int usable_delta)402 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
403 				unsigned long limit, int usable_delta)
404 {
405 	struct packed_git *p = IN_PACK(entry);
406 	struct pack_window *w_curs = NULL;
407 	struct revindex_entry *revidx;
408 	off_t offset;
409 	enum object_type type = oe_type(entry);
410 	off_t datalen;
411 	unsigned char header[MAX_PACK_OBJECT_HEADER],
412 		      dheader[MAX_PACK_OBJECT_HEADER];
413 	unsigned hdrlen;
414 	const unsigned hashsz = the_hash_algo->rawsz;
415 	unsigned long entry_size = SIZE(entry);
416 
417 	if (DELTA(entry))
418 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
419 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
420 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
421 					      type, entry_size);
422 
423 	offset = entry->in_pack_offset;
424 	revidx = find_pack_revindex(p, offset);
425 	datalen = revidx[1].offset - offset;
426 	if (!pack_to_stdout && p->index_version > 1 &&
427 	    check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) {
428 		error(_("bad packed object CRC for %s"),
429 		      oid_to_hex(&entry->idx.oid));
430 		unuse_pack(&w_curs);
431 		return write_no_reuse_object(f, entry, limit, usable_delta);
432 	}
433 
434 	offset += entry->in_pack_header_size;
435 	datalen -= entry->in_pack_header_size;
436 
437 	if (!pack_to_stdout && p->index_version == 1 &&
438 	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
439 		error(_("corrupt packed object for %s"),
440 		      oid_to_hex(&entry->idx.oid));
441 		unuse_pack(&w_curs);
442 		return write_no_reuse_object(f, entry, limit, usable_delta);
443 	}
444 
445 	if (type == OBJ_OFS_DELTA) {
446 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
447 		unsigned pos = sizeof(dheader) - 1;
448 		dheader[pos] = ofs & 127;
449 		while (ofs >>= 7)
450 			dheader[--pos] = 128 | (--ofs & 127);
451 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
452 			unuse_pack(&w_curs);
453 			return 0;
454 		}
455 		hashwrite(f, header, hdrlen);
456 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
457 		hdrlen += sizeof(dheader) - pos;
458 		reused_delta++;
459 	} else if (type == OBJ_REF_DELTA) {
460 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
461 			unuse_pack(&w_curs);
462 			return 0;
463 		}
464 		hashwrite(f, header, hdrlen);
465 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
466 		hdrlen += hashsz;
467 		reused_delta++;
468 	} else {
469 		if (limit && hdrlen + datalen + hashsz >= limit) {
470 			unuse_pack(&w_curs);
471 			return 0;
472 		}
473 		hashwrite(f, header, hdrlen);
474 	}
475 	copy_pack_data(f, p, &w_curs, offset, datalen);
476 	unuse_pack(&w_curs);
477 	reused++;
478 	return hdrlen + datalen;
479 }
480 
481 /* Return 0 if we will bust the pack-size limit */
write_object(struct hashfile * f,struct object_entry * entry,off_t write_offset)482 static off_t write_object(struct hashfile *f,
483 			  struct object_entry *entry,
484 			  off_t write_offset)
485 {
486 	unsigned long limit;
487 	off_t len;
488 	int usable_delta, to_reuse;
489 
490 	if (!pack_to_stdout)
491 		crc32_begin(f);
492 
493 	/* apply size limit if limited packsize and not first object */
494 	if (!pack_size_limit || !nr_written)
495 		limit = 0;
496 	else if (pack_size_limit <= write_offset)
497 		/*
498 		 * the earlier object did not fit the limit; avoid
499 		 * mistaking this with unlimited (i.e. limit = 0).
500 		 */
501 		limit = 1;
502 	else
503 		limit = pack_size_limit - write_offset;
504 
505 	if (!DELTA(entry))
506 		usable_delta = 0;	/* no delta */
507 	else if (!pack_size_limit)
508 	       usable_delta = 1;	/* unlimited packfile */
509 	else if (DELTA(entry)->idx.offset == (off_t)-1)
510 		usable_delta = 0;	/* base was written to another pack */
511 	else if (DELTA(entry)->idx.offset)
512 		usable_delta = 1;	/* base already exists in this pack */
513 	else
514 		usable_delta = 0;	/* base could end up in another pack */
515 
516 	if (!reuse_object)
517 		to_reuse = 0;	/* explicit */
518 	else if (!IN_PACK(entry))
519 		to_reuse = 0;	/* can't reuse what we don't have */
520 	else if (oe_type(entry) == OBJ_REF_DELTA ||
521 		 oe_type(entry) == OBJ_OFS_DELTA)
522 				/* check_object() decided it for us ... */
523 		to_reuse = usable_delta;
524 				/* ... but pack split may override that */
525 	else if (oe_type(entry) != entry->in_pack_type)
526 		to_reuse = 0;	/* pack has delta which is unusable */
527 	else if (DELTA(entry))
528 		to_reuse = 0;	/* we want to pack afresh */
529 	else
530 		to_reuse = 1;	/* we have it in-pack undeltified,
531 				 * and we do not need to deltify it.
532 				 */
533 
534 	if (!to_reuse)
535 		len = write_no_reuse_object(f, entry, limit, usable_delta);
536 	else
537 		len = write_reuse_object(f, entry, limit, usable_delta);
538 	if (!len)
539 		return 0;
540 
541 	if (usable_delta)
542 		written_delta++;
543 	written++;
544 	if (!pack_to_stdout)
545 		entry->idx.crc32 = crc32_end(f);
546 	return len;
547 }
548 
549 enum write_one_status {
550 	WRITE_ONE_SKIP = -1, /* already written */
551 	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
552 	WRITE_ONE_WRITTEN = 1, /* normal */
553 	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
554 };
555 
write_one(struct hashfile * f,struct object_entry * e,off_t * offset)556 static enum write_one_status write_one(struct hashfile *f,
557 				       struct object_entry *e,
558 				       off_t *offset)
559 {
560 	off_t size;
561 	int recursing;
562 
563 	/*
564 	 * we set offset to 1 (which is an impossible value) to mark
565 	 * the fact that this object is involved in "write its base
566 	 * first before writing a deltified object" recursion.
567 	 */
568 	recursing = (e->idx.offset == 1);
569 	if (recursing) {
570 		warning(_("recursive delta detected for object %s"),
571 			oid_to_hex(&e->idx.oid));
572 		return WRITE_ONE_RECURSIVE;
573 	} else if (e->idx.offset || e->preferred_base) {
574 		/* offset is non zero if object is written already. */
575 		return WRITE_ONE_SKIP;
576 	}
577 
578 	/* if we are deltified, write out base object first. */
579 	if (DELTA(e)) {
580 		e->idx.offset = 1; /* now recurse */
581 		switch (write_one(f, DELTA(e), offset)) {
582 		case WRITE_ONE_RECURSIVE:
583 			/* we cannot depend on this one */
584 			SET_DELTA(e, NULL);
585 			break;
586 		default:
587 			break;
588 		case WRITE_ONE_BREAK:
589 			e->idx.offset = recursing;
590 			return WRITE_ONE_BREAK;
591 		}
592 	}
593 
594 	e->idx.offset = *offset;
595 	size = write_object(f, e, *offset);
596 	if (!size) {
597 		e->idx.offset = recursing;
598 		return WRITE_ONE_BREAK;
599 	}
600 	written_list[nr_written++] = &e->idx;
601 
602 	/* make sure off_t is sufficiently large not to wrap */
603 	if (signed_add_overflows(*offset, size))
604 		die(_("pack too large for current definition of off_t"));
605 	*offset += size;
606 	return WRITE_ONE_WRITTEN;
607 }
608 
mark_tagged(const char * path,const struct object_id * oid,int flag,void * cb_data)609 static int mark_tagged(const char *path, const struct object_id *oid, int flag,
610 		       void *cb_data)
611 {
612 	struct object_id peeled;
613 	struct object_entry *entry = packlist_find(&to_pack, oid);
614 
615 	if (entry)
616 		entry->tagged = 1;
617 	if (!peel_ref(path, &peeled)) {
618 		entry = packlist_find(&to_pack, &peeled);
619 		if (entry)
620 			entry->tagged = 1;
621 	}
622 	return 0;
623 }
624 
add_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)625 static inline void add_to_write_order(struct object_entry **wo,
626 			       unsigned int *endp,
627 			       struct object_entry *e)
628 {
629 	if (e->filled || oe_layer(&to_pack, e) != write_layer)
630 		return;
631 	wo[(*endp)++] = e;
632 	e->filled = 1;
633 }
634 
add_descendants_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)635 static void add_descendants_to_write_order(struct object_entry **wo,
636 					   unsigned int *endp,
637 					   struct object_entry *e)
638 {
639 	int add_to_order = 1;
640 	while (e) {
641 		if (add_to_order) {
642 			struct object_entry *s;
643 			/* add this node... */
644 			add_to_write_order(wo, endp, e);
645 			/* all its siblings... */
646 			for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
647 				add_to_write_order(wo, endp, s);
648 			}
649 		}
650 		/* drop down a level to add left subtree nodes if possible */
651 		if (DELTA_CHILD(e)) {
652 			add_to_order = 1;
653 			e = DELTA_CHILD(e);
654 		} else {
655 			add_to_order = 0;
656 			/* our sibling might have some children, it is next */
657 			if (DELTA_SIBLING(e)) {
658 				e = DELTA_SIBLING(e);
659 				continue;
660 			}
661 			/* go back to our parent node */
662 			e = DELTA(e);
663 			while (e && !DELTA_SIBLING(e)) {
664 				/* we're on the right side of a subtree, keep
665 				 * going up until we can go right again */
666 				e = DELTA(e);
667 			}
668 			if (!e) {
669 				/* done- we hit our original root node */
670 				return;
671 			}
672 			/* pass it off to sibling at this level */
673 			e = DELTA_SIBLING(e);
674 		}
675 	};
676 }
677 
add_family_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)678 static void add_family_to_write_order(struct object_entry **wo,
679 				      unsigned int *endp,
680 				      struct object_entry *e)
681 {
682 	struct object_entry *root;
683 
684 	for (root = e; DELTA(root); root = DELTA(root))
685 		; /* nothing */
686 	add_descendants_to_write_order(wo, endp, root);
687 }
688 
compute_layer_order(struct object_entry ** wo,unsigned int * wo_end)689 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
690 {
691 	unsigned int i, last_untagged;
692 	struct object_entry *objects = to_pack.objects;
693 
694 	for (i = 0; i < to_pack.nr_objects; i++) {
695 		if (objects[i].tagged)
696 			break;
697 		add_to_write_order(wo, wo_end, &objects[i]);
698 	}
699 	last_untagged = i;
700 
701 	/*
702 	 * Then fill all the tagged tips.
703 	 */
704 	for (; i < to_pack.nr_objects; i++) {
705 		if (objects[i].tagged)
706 			add_to_write_order(wo, wo_end, &objects[i]);
707 	}
708 
709 	/*
710 	 * And then all remaining commits and tags.
711 	 */
712 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
713 		if (oe_type(&objects[i]) != OBJ_COMMIT &&
714 		    oe_type(&objects[i]) != OBJ_TAG)
715 			continue;
716 		add_to_write_order(wo, wo_end, &objects[i]);
717 	}
718 
719 	/*
720 	 * And then all the trees.
721 	 */
722 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
723 		if (oe_type(&objects[i]) != OBJ_TREE)
724 			continue;
725 		add_to_write_order(wo, wo_end, &objects[i]);
726 	}
727 
728 	/*
729 	 * Finally all the rest in really tight order
730 	 */
731 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
732 		if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
733 			add_family_to_write_order(wo, wo_end, &objects[i]);
734 	}
735 }
736 
compute_write_order(void)737 static struct object_entry **compute_write_order(void)
738 {
739 	uint32_t max_layers = 1;
740 	unsigned int i, wo_end;
741 
742 	struct object_entry **wo;
743 	struct object_entry *objects = to_pack.objects;
744 
745 	for (i = 0; i < to_pack.nr_objects; i++) {
746 		objects[i].tagged = 0;
747 		objects[i].filled = 0;
748 		SET_DELTA_CHILD(&objects[i], NULL);
749 		SET_DELTA_SIBLING(&objects[i], NULL);
750 	}
751 
752 	/*
753 	 * Fully connect delta_child/delta_sibling network.
754 	 * Make sure delta_sibling is sorted in the original
755 	 * recency order.
756 	 */
757 	for (i = to_pack.nr_objects; i > 0;) {
758 		struct object_entry *e = &objects[--i];
759 		if (!DELTA(e))
760 			continue;
761 		/* Mark me as the first child */
762 		e->delta_sibling_idx = DELTA(e)->delta_child_idx;
763 		SET_DELTA_CHILD(DELTA(e), e);
764 	}
765 
766 	/*
767 	 * Mark objects that are at the tip of tags.
768 	 */
769 	for_each_tag_ref(mark_tagged, NULL);
770 
771 	if (use_delta_islands)
772 		max_layers = compute_pack_layers(&to_pack);
773 
774 	ALLOC_ARRAY(wo, to_pack.nr_objects);
775 	wo_end = 0;
776 
777 	for (; write_layer < max_layers; ++write_layer)
778 		compute_layer_order(wo, &wo_end);
779 
780 	if (wo_end != to_pack.nr_objects)
781 		die(_("ordered %u objects, expected %"PRIu32),
782 		    wo_end, to_pack.nr_objects);
783 
784 	return wo;
785 }
786 
write_reused_pack(struct hashfile * f)787 static off_t write_reused_pack(struct hashfile *f)
788 {
789 	unsigned char buffer[8192];
790 	off_t to_write, total;
791 	int fd;
792 
793 	if (!is_pack_valid(reuse_packfile))
794 		die(_("packfile is invalid: %s"), reuse_packfile->pack_name);
795 
796 	fd = git_open(reuse_packfile->pack_name);
797 	if (fd < 0)
798 		die_errno(_("unable to open packfile for reuse: %s"),
799 			  reuse_packfile->pack_name);
800 
801 	if (lseek(fd, sizeof(struct pack_header), SEEK_SET) == -1)
802 		die_errno(_("unable to seek in reused packfile"));
803 
804 	if (reuse_packfile_offset < 0)
805 		reuse_packfile_offset = reuse_packfile->pack_size - the_hash_algo->rawsz;
806 
807 	total = to_write = reuse_packfile_offset - sizeof(struct pack_header);
808 
809 	while (to_write) {
810 		int read_pack = xread(fd, buffer, sizeof(buffer));
811 
812 		if (read_pack <= 0)
813 			die_errno(_("unable to read from reused packfile"));
814 
815 		if (read_pack > to_write)
816 			read_pack = to_write;
817 
818 		hashwrite(f, buffer, read_pack);
819 		to_write -= read_pack;
820 
821 		/*
822 		 * We don't know the actual number of objects written,
823 		 * only how many bytes written, how many bytes total, and
824 		 * how many objects total. So we can fake it by pretending all
825 		 * objects we are writing are the same size. This gives us a
826 		 * smooth progress meter, and at the end it matches the true
827 		 * answer.
828 		 */
829 		written = reuse_packfile_objects *
830 				(((double)(total - to_write)) / total);
831 		display_progress(progress_state, written);
832 	}
833 
834 	close(fd);
835 	written = reuse_packfile_objects;
836 	display_progress(progress_state, written);
837 	return reuse_packfile_offset - sizeof(struct pack_header);
838 }
839 
840 static const char no_split_warning[] = N_(
841 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
842 );
843 
write_pack_file(void)844 static void write_pack_file(void)
845 {
846 	uint32_t i = 0, j;
847 	struct hashfile *f;
848 	off_t offset;
849 	uint32_t nr_remaining = nr_result;
850 	time_t last_mtime = 0;
851 	struct object_entry **write_order;
852 
853 	if (progress > pack_to_stdout)
854 		progress_state = start_progress(_("Writing objects"), nr_result);
855 	ALLOC_ARRAY(written_list, to_pack.nr_objects);
856 	write_order = compute_write_order();
857 
858 	do {
859 		struct object_id oid;
860 		char *pack_tmp_name = NULL;
861 
862 		if (pack_to_stdout)
863 			f = hashfd_throughput(1, "<stdout>", progress_state);
864 		else
865 			f = create_tmp_packfile(&pack_tmp_name);
866 
867 		offset = write_pack_header(f, nr_remaining);
868 
869 		if (reuse_packfile) {
870 			off_t packfile_size;
871 			assert(pack_to_stdout);
872 
873 			packfile_size = write_reused_pack(f);
874 			offset += packfile_size;
875 		}
876 
877 		nr_written = 0;
878 		for (; i < to_pack.nr_objects; i++) {
879 			struct object_entry *e = write_order[i];
880 			if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
881 				break;
882 			display_progress(progress_state, written);
883 		}
884 
885 		/*
886 		 * Did we write the wrong # entries in the header?
887 		 * If so, rewrite it like in fast-import
888 		 */
889 		if (pack_to_stdout) {
890 			finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
891 		} else if (nr_written == nr_remaining) {
892 			finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
893 		} else {
894 			int fd = finalize_hashfile(f, oid.hash, 0);
895 			fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
896 						 nr_written, oid.hash, offset);
897 			close(fd);
898 			if (write_bitmap_index) {
899 				if (write_bitmap_index != WRITE_BITMAP_QUIET)
900 					warning(_(no_split_warning));
901 				write_bitmap_index = 0;
902 			}
903 		}
904 
905 		if (!pack_to_stdout) {
906 			struct stat st;
907 			struct strbuf tmpname = STRBUF_INIT;
908 
909 			/*
910 			 * Packs are runtime accessed in their mtime
911 			 * order since newer packs are more likely to contain
912 			 * younger objects.  So if we are creating multiple
913 			 * packs then we should modify the mtime of later ones
914 			 * to preserve this property.
915 			 */
916 			if (stat(pack_tmp_name, &st) < 0) {
917 				warning_errno(_("failed to stat %s"), pack_tmp_name);
918 			} else if (!last_mtime) {
919 				last_mtime = st.st_mtime;
920 			} else {
921 				struct utimbuf utb;
922 				utb.actime = st.st_atime;
923 				utb.modtime = --last_mtime;
924 				if (utime(pack_tmp_name, &utb) < 0)
925 					warning_errno(_("failed utime() on %s"), pack_tmp_name);
926 			}
927 
928 			strbuf_addf(&tmpname, "%s-", base_name);
929 
930 			if (write_bitmap_index) {
931 				bitmap_writer_set_checksum(oid.hash);
932 				bitmap_writer_build_type_index(
933 					&to_pack, written_list, nr_written);
934 			}
935 
936 			finish_tmp_packfile(&tmpname, pack_tmp_name,
937 					    written_list, nr_written,
938 					    &pack_idx_opts, oid.hash);
939 
940 			if (write_bitmap_index) {
941 				strbuf_addf(&tmpname, "%s.bitmap", oid_to_hex(&oid));
942 
943 				stop_progress(&progress_state);
944 
945 				bitmap_writer_show_progress(progress);
946 				bitmap_writer_reuse_bitmaps(&to_pack);
947 				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
948 				bitmap_writer_build(&to_pack);
949 				bitmap_writer_finish(written_list, nr_written,
950 						     tmpname.buf, write_bitmap_options);
951 				write_bitmap_index = 0;
952 			}
953 
954 			strbuf_release(&tmpname);
955 			free(pack_tmp_name);
956 			puts(oid_to_hex(&oid));
957 		}
958 
959 		/* mark written objects as written to previous pack */
960 		for (j = 0; j < nr_written; j++) {
961 			written_list[j]->offset = (off_t)-1;
962 		}
963 		nr_remaining -= nr_written;
964 	} while (nr_remaining && i < to_pack.nr_objects);
965 
966 	free(written_list);
967 	free(write_order);
968 	stop_progress(&progress_state);
969 	if (written != nr_result)
970 		die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
971 		    written, nr_result);
972 	trace2_data_intmax("pack-objects", the_repository,
973 			   "write_pack_file/wrote", nr_result);
974 }
975 
no_try_delta(const char * path)976 static int no_try_delta(const char *path)
977 {
978 	static struct attr_check *check;
979 
980 	if (!check)
981 		check = attr_check_initl("delta", NULL);
982 	git_check_attr(the_repository->index, path, check);
983 	if (ATTR_FALSE(check->items[0].value))
984 		return 1;
985 	return 0;
986 }
987 
988 /*
989  * When adding an object, check whether we have already added it
990  * to our packing list. If so, we can skip. However, if we are
991  * being asked to excludei t, but the previous mention was to include
992  * it, make sure to adjust its flags and tweak our numbers accordingly.
993  *
994  * As an optimization, we pass out the index position where we would have
995  * found the item, since that saves us from having to look it up again a
996  * few lines later when we want to add the new entry.
997  */
have_duplicate_entry(const struct object_id * oid,int exclude)998 static int have_duplicate_entry(const struct object_id *oid,
999 				int exclude)
1000 {
1001 	struct object_entry *entry;
1002 
1003 	entry = packlist_find(&to_pack, oid);
1004 	if (!entry)
1005 		return 0;
1006 
1007 	if (exclude) {
1008 		if (!entry->preferred_base)
1009 			nr_result--;
1010 		entry->preferred_base = 1;
1011 	}
1012 
1013 	return 1;
1014 }
1015 
want_found_object(int exclude,struct packed_git * p)1016 static int want_found_object(int exclude, struct packed_git *p)
1017 {
1018 	if (exclude)
1019 		return 1;
1020 	if (incremental)
1021 		return 0;
1022 
1023 	/*
1024 	 * When asked to do --local (do not include an object that appears in a
1025 	 * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1026 	 * an object that appears in a pack marked with .keep), finding a pack
1027 	 * that matches the criteria is sufficient for us to decide to omit it.
1028 	 * However, even if this pack does not satisfy the criteria, we need to
1029 	 * make sure no copy of this object appears in _any_ pack that makes us
1030 	 * to omit the object, so we need to check all the packs.
1031 	 *
1032 	 * We can however first check whether these options can possible matter;
1033 	 * if they do not matter we know we want the object in generated pack.
1034 	 * Otherwise, we signal "-1" at the end to tell the caller that we do
1035 	 * not know either way, and it needs to check more packs.
1036 	 */
1037 	if (!ignore_packed_keep_on_disk &&
1038 	    !ignore_packed_keep_in_core &&
1039 	    (!local || !have_non_local_packs))
1040 		return 1;
1041 
1042 	if (local && !p->pack_local)
1043 		return 0;
1044 	if (p->pack_local &&
1045 	    ((ignore_packed_keep_on_disk && p->pack_keep) ||
1046 	     (ignore_packed_keep_in_core && p->pack_keep_in_core)))
1047 		return 0;
1048 
1049 	/* we don't know yet; keep looking for more packs */
1050 	return -1;
1051 }
1052 
1053 /*
1054  * Check whether we want the object in the pack (e.g., we do not want
1055  * objects found in non-local stores if the "--local" option was used).
1056  *
1057  * If the caller already knows an existing pack it wants to take the object
1058  * from, that is passed in *found_pack and *found_offset; otherwise this
1059  * function finds if there is any pack that has the object and returns the pack
1060  * and its offset in these variables.
1061  */
want_object_in_pack(const struct object_id * oid,int exclude,struct packed_git ** found_pack,off_t * found_offset)1062 static int want_object_in_pack(const struct object_id *oid,
1063 			       int exclude,
1064 			       struct packed_git **found_pack,
1065 			       off_t *found_offset)
1066 {
1067 	int want;
1068 	struct list_head *pos;
1069 	struct multi_pack_index *m;
1070 
1071 	if (!exclude && local && has_loose_object_nonlocal(oid))
1072 		return 0;
1073 
1074 	/*
1075 	 * If we already know the pack object lives in, start checks from that
1076 	 * pack - in the usual case when neither --local was given nor .keep files
1077 	 * are present we will determine the answer right now.
1078 	 */
1079 	if (*found_pack) {
1080 		want = want_found_object(exclude, *found_pack);
1081 		if (want != -1)
1082 			return want;
1083 	}
1084 
1085 	for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1086 		struct pack_entry e;
1087 		if (fill_midx_entry(the_repository, oid, &e, m)) {
1088 			struct packed_git *p = e.p;
1089 			off_t offset;
1090 
1091 			if (p == *found_pack)
1092 				offset = *found_offset;
1093 			else
1094 				offset = find_pack_entry_one(oid->hash, p);
1095 
1096 			if (offset) {
1097 				if (!*found_pack) {
1098 					if (!is_pack_valid(p))
1099 						continue;
1100 					*found_offset = offset;
1101 					*found_pack = p;
1102 				}
1103 				want = want_found_object(exclude, p);
1104 				if (want != -1)
1105 					return want;
1106 			}
1107 		}
1108 	}
1109 
1110 	list_for_each(pos, get_packed_git_mru(the_repository)) {
1111 		struct packed_git *p = list_entry(pos, struct packed_git, mru);
1112 		off_t offset;
1113 
1114 		if (p == *found_pack)
1115 			offset = *found_offset;
1116 		else
1117 			offset = find_pack_entry_one(oid->hash, p);
1118 
1119 		if (offset) {
1120 			if (!*found_pack) {
1121 				if (!is_pack_valid(p))
1122 					continue;
1123 				*found_offset = offset;
1124 				*found_pack = p;
1125 			}
1126 			want = want_found_object(exclude, p);
1127 			if (!exclude && want > 0)
1128 				list_move(&p->mru,
1129 					  get_packed_git_mru(the_repository));
1130 			if (want != -1)
1131 				return want;
1132 		}
1133 	}
1134 
1135 	return 1;
1136 }
1137 
create_object_entry(const struct object_id * oid,enum object_type type,uint32_t hash,int exclude,int no_try_delta,struct packed_git * found_pack,off_t found_offset)1138 static void create_object_entry(const struct object_id *oid,
1139 				enum object_type type,
1140 				uint32_t hash,
1141 				int exclude,
1142 				int no_try_delta,
1143 				struct packed_git *found_pack,
1144 				off_t found_offset)
1145 {
1146 	struct object_entry *entry;
1147 
1148 	entry = packlist_alloc(&to_pack, oid);
1149 	entry->hash = hash;
1150 	oe_set_type(entry, type);
1151 	if (exclude)
1152 		entry->preferred_base = 1;
1153 	else
1154 		nr_result++;
1155 	if (found_pack) {
1156 		oe_set_in_pack(&to_pack, entry, found_pack);
1157 		entry->in_pack_offset = found_offset;
1158 	}
1159 
1160 	entry->no_try_delta = no_try_delta;
1161 }
1162 
1163 static const char no_closure_warning[] = N_(
1164 "disabling bitmap writing, as some objects are not being packed"
1165 );
1166 
add_object_entry(const struct object_id * oid,enum object_type type,const char * name,int exclude)1167 static int add_object_entry(const struct object_id *oid, enum object_type type,
1168 			    const char *name, int exclude)
1169 {
1170 	struct packed_git *found_pack = NULL;
1171 	off_t found_offset = 0;
1172 
1173 	display_progress(progress_state, ++nr_seen);
1174 
1175 	if (have_duplicate_entry(oid, exclude))
1176 		return 0;
1177 
1178 	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1179 		/* The pack is missing an object, so it will not have closure */
1180 		if (write_bitmap_index) {
1181 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
1182 				warning(_(no_closure_warning));
1183 			write_bitmap_index = 0;
1184 		}
1185 		return 0;
1186 	}
1187 
1188 	create_object_entry(oid, type, pack_name_hash(name),
1189 			    exclude, name && no_try_delta(name),
1190 			    found_pack, found_offset);
1191 	return 1;
1192 }
1193 
add_object_entry_from_bitmap(const struct object_id * oid,enum object_type type,int flags,uint32_t name_hash,struct packed_git * pack,off_t offset)1194 static int add_object_entry_from_bitmap(const struct object_id *oid,
1195 					enum object_type type,
1196 					int flags, uint32_t name_hash,
1197 					struct packed_git *pack, off_t offset)
1198 {
1199 	display_progress(progress_state, ++nr_seen);
1200 
1201 	if (have_duplicate_entry(oid, 0))
1202 		return 0;
1203 
1204 	if (!want_object_in_pack(oid, 0, &pack, &offset))
1205 		return 0;
1206 
1207 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1208 	return 1;
1209 }
1210 
1211 struct pbase_tree_cache {
1212 	struct object_id oid;
1213 	int ref;
1214 	int temporary;
1215 	void *tree_data;
1216 	unsigned long tree_size;
1217 };
1218 
1219 static struct pbase_tree_cache *(pbase_tree_cache[256]);
pbase_tree_cache_ix(const struct object_id * oid)1220 static int pbase_tree_cache_ix(const struct object_id *oid)
1221 {
1222 	return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1223 }
pbase_tree_cache_ix_incr(int ix)1224 static int pbase_tree_cache_ix_incr(int ix)
1225 {
1226 	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1227 }
1228 
1229 static struct pbase_tree {
1230 	struct pbase_tree *next;
1231 	/* This is a phony "cache" entry; we are not
1232 	 * going to evict it or find it through _get()
1233 	 * mechanism -- this is for the toplevel node that
1234 	 * would almost always change with any commit.
1235 	 */
1236 	struct pbase_tree_cache pcache;
1237 } *pbase_tree;
1238 
pbase_tree_get(const struct object_id * oid)1239 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1240 {
1241 	struct pbase_tree_cache *ent, *nent;
1242 	void *data;
1243 	unsigned long size;
1244 	enum object_type type;
1245 	int neigh;
1246 	int my_ix = pbase_tree_cache_ix(oid);
1247 	int available_ix = -1;
1248 
1249 	/* pbase-tree-cache acts as a limited hashtable.
1250 	 * your object will be found at your index or within a few
1251 	 * slots after that slot if it is cached.
1252 	 */
1253 	for (neigh = 0; neigh < 8; neigh++) {
1254 		ent = pbase_tree_cache[my_ix];
1255 		if (ent && oideq(&ent->oid, oid)) {
1256 			ent->ref++;
1257 			return ent;
1258 		}
1259 		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1260 			 ((0 <= available_ix) &&
1261 			  (!ent && pbase_tree_cache[available_ix])))
1262 			available_ix = my_ix;
1263 		if (!ent)
1264 			break;
1265 		my_ix = pbase_tree_cache_ix_incr(my_ix);
1266 	}
1267 
1268 	/* Did not find one.  Either we got a bogus request or
1269 	 * we need to read and perhaps cache.
1270 	 */
1271 	data = read_object_file(oid, &type, &size);
1272 	if (!data)
1273 		return NULL;
1274 	if (type != OBJ_TREE) {
1275 		free(data);
1276 		return NULL;
1277 	}
1278 
1279 	/* We need to either cache or return a throwaway copy */
1280 
1281 	if (available_ix < 0)
1282 		ent = NULL;
1283 	else {
1284 		ent = pbase_tree_cache[available_ix];
1285 		my_ix = available_ix;
1286 	}
1287 
1288 	if (!ent) {
1289 		nent = xmalloc(sizeof(*nent));
1290 		nent->temporary = (available_ix < 0);
1291 	}
1292 	else {
1293 		/* evict and reuse */
1294 		free(ent->tree_data);
1295 		nent = ent;
1296 	}
1297 	oidcpy(&nent->oid, oid);
1298 	nent->tree_data = data;
1299 	nent->tree_size = size;
1300 	nent->ref = 1;
1301 	if (!nent->temporary)
1302 		pbase_tree_cache[my_ix] = nent;
1303 	return nent;
1304 }
1305 
pbase_tree_put(struct pbase_tree_cache * cache)1306 static void pbase_tree_put(struct pbase_tree_cache *cache)
1307 {
1308 	if (!cache->temporary) {
1309 		cache->ref--;
1310 		return;
1311 	}
1312 	free(cache->tree_data);
1313 	free(cache);
1314 }
1315 
name_cmp_len(const char * name)1316 static int name_cmp_len(const char *name)
1317 {
1318 	int i;
1319 	for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
1320 		;
1321 	return i;
1322 }
1323 
add_pbase_object(struct tree_desc * tree,const char * name,int cmplen,const char * fullname)1324 static void add_pbase_object(struct tree_desc *tree,
1325 			     const char *name,
1326 			     int cmplen,
1327 			     const char *fullname)
1328 {
1329 	struct name_entry entry;
1330 	int cmp;
1331 
1332 	while (tree_entry(tree,&entry)) {
1333 		if (S_ISGITLINK(entry.mode))
1334 			continue;
1335 		cmp = tree_entry_len(&entry) != cmplen ? 1 :
1336 		      memcmp(name, entry.path, cmplen);
1337 		if (cmp > 0)
1338 			continue;
1339 		if (cmp < 0)
1340 			return;
1341 		if (name[cmplen] != '/') {
1342 			add_object_entry(&entry.oid,
1343 					 object_type(entry.mode),
1344 					 fullname, 1);
1345 			return;
1346 		}
1347 		if (S_ISDIR(entry.mode)) {
1348 			struct tree_desc sub;
1349 			struct pbase_tree_cache *tree;
1350 			const char *down = name+cmplen+1;
1351 			int downlen = name_cmp_len(down);
1352 
1353 			tree = pbase_tree_get(&entry.oid);
1354 			if (!tree)
1355 				return;
1356 			init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1357 
1358 			add_pbase_object(&sub, down, downlen, fullname);
1359 			pbase_tree_put(tree);
1360 		}
1361 	}
1362 }
1363 
1364 static unsigned *done_pbase_paths;
1365 static int done_pbase_paths_num;
1366 static int done_pbase_paths_alloc;
done_pbase_path_pos(unsigned hash)1367 static int done_pbase_path_pos(unsigned hash)
1368 {
1369 	int lo = 0;
1370 	int hi = done_pbase_paths_num;
1371 	while (lo < hi) {
1372 		int mi = lo + (hi - lo) / 2;
1373 		if (done_pbase_paths[mi] == hash)
1374 			return mi;
1375 		if (done_pbase_paths[mi] < hash)
1376 			hi = mi;
1377 		else
1378 			lo = mi + 1;
1379 	}
1380 	return -lo-1;
1381 }
1382 
check_pbase_path(unsigned hash)1383 static int check_pbase_path(unsigned hash)
1384 {
1385 	int pos = done_pbase_path_pos(hash);
1386 	if (0 <= pos)
1387 		return 1;
1388 	pos = -pos - 1;
1389 	ALLOC_GROW(done_pbase_paths,
1390 		   done_pbase_paths_num + 1,
1391 		   done_pbase_paths_alloc);
1392 	done_pbase_paths_num++;
1393 	if (pos < done_pbase_paths_num)
1394 		MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1395 			   done_pbase_paths_num - pos - 1);
1396 	done_pbase_paths[pos] = hash;
1397 	return 0;
1398 }
1399 
add_preferred_base_object(const char * name)1400 static void add_preferred_base_object(const char *name)
1401 {
1402 	struct pbase_tree *it;
1403 	int cmplen;
1404 	unsigned hash = pack_name_hash(name);
1405 
1406 	if (!num_preferred_base || check_pbase_path(hash))
1407 		return;
1408 
1409 	cmplen = name_cmp_len(name);
1410 	for (it = pbase_tree; it; it = it->next) {
1411 		if (cmplen == 0) {
1412 			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1413 		}
1414 		else {
1415 			struct tree_desc tree;
1416 			init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1417 			add_pbase_object(&tree, name, cmplen, name);
1418 		}
1419 	}
1420 }
1421 
add_preferred_base(struct object_id * oid)1422 static void add_preferred_base(struct object_id *oid)
1423 {
1424 	struct pbase_tree *it;
1425 	void *data;
1426 	unsigned long size;
1427 	struct object_id tree_oid;
1428 
1429 	if (window <= num_preferred_base++)
1430 		return;
1431 
1432 	data = read_object_with_reference(the_repository, oid,
1433 					  tree_type, &size, &tree_oid);
1434 	if (!data)
1435 		return;
1436 
1437 	for (it = pbase_tree; it; it = it->next) {
1438 		if (oideq(&it->pcache.oid, &tree_oid)) {
1439 			free(data);
1440 			return;
1441 		}
1442 	}
1443 
1444 	it = xcalloc(1, sizeof(*it));
1445 	it->next = pbase_tree;
1446 	pbase_tree = it;
1447 
1448 	oidcpy(&it->pcache.oid, &tree_oid);
1449 	it->pcache.tree_data = data;
1450 	it->pcache.tree_size = size;
1451 }
1452 
cleanup_preferred_base(void)1453 static void cleanup_preferred_base(void)
1454 {
1455 	struct pbase_tree *it;
1456 	unsigned i;
1457 
1458 	it = pbase_tree;
1459 	pbase_tree = NULL;
1460 	while (it) {
1461 		struct pbase_tree *tmp = it;
1462 		it = tmp->next;
1463 		free(tmp->pcache.tree_data);
1464 		free(tmp);
1465 	}
1466 
1467 	for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1468 		if (!pbase_tree_cache[i])
1469 			continue;
1470 		free(pbase_tree_cache[i]->tree_data);
1471 		FREE_AND_NULL(pbase_tree_cache[i]);
1472 	}
1473 
1474 	FREE_AND_NULL(done_pbase_paths);
1475 	done_pbase_paths_num = done_pbase_paths_alloc = 0;
1476 }
1477 
1478 /*
1479  * Return 1 iff the object specified by "delta" can be sent
1480  * literally as a delta against the base in "base_sha1". If
1481  * so, then *base_out will point to the entry in our packing
1482  * list, or NULL if we must use the external-base list.
1483  *
1484  * Depth value does not matter - find_deltas() will
1485  * never consider reused delta as the base object to
1486  * deltify other objects against, in order to avoid
1487  * circular deltas.
1488  */
can_reuse_delta(const unsigned char * base_sha1,struct object_entry * delta,struct object_entry ** base_out)1489 static int can_reuse_delta(const unsigned char *base_sha1,
1490 			   struct object_entry *delta,
1491 			   struct object_entry **base_out)
1492 {
1493 	struct object_entry *base;
1494 	struct object_id base_oid;
1495 
1496 	if (!base_sha1)
1497 		return 0;
1498 
1499 	oidread(&base_oid, base_sha1);
1500 
1501 	/*
1502 	 * First see if we're already sending the base (or it's explicitly in
1503 	 * our "excluded" list).
1504 	 */
1505 	base = packlist_find(&to_pack, &base_oid);
1506 	if (base) {
1507 		if (!in_same_island(&delta->idx.oid, &base->idx.oid))
1508 			return 0;
1509 		*base_out = base;
1510 		return 1;
1511 	}
1512 
1513 	/*
1514 	 * Otherwise, reachability bitmaps may tell us if the receiver has it,
1515 	 * even if it was buried too deep in history to make it into the
1516 	 * packing list.
1517 	 */
1518 	if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, &base_oid)) {
1519 		if (use_delta_islands) {
1520 			if (!in_same_island(&delta->idx.oid, &base_oid))
1521 				return 0;
1522 		}
1523 		*base_out = NULL;
1524 		return 1;
1525 	}
1526 
1527 	return 0;
1528 }
1529 
check_object(struct object_entry * entry)1530 static void check_object(struct object_entry *entry)
1531 {
1532 	unsigned long canonical_size;
1533 
1534 	if (IN_PACK(entry)) {
1535 		struct packed_git *p = IN_PACK(entry);
1536 		struct pack_window *w_curs = NULL;
1537 		const unsigned char *base_ref = NULL;
1538 		struct object_entry *base_entry;
1539 		unsigned long used, used_0;
1540 		unsigned long avail;
1541 		off_t ofs;
1542 		unsigned char *buf, c;
1543 		enum object_type type;
1544 		unsigned long in_pack_size;
1545 
1546 		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1547 
1548 		/*
1549 		 * We want in_pack_type even if we do not reuse delta
1550 		 * since non-delta representations could still be reused.
1551 		 */
1552 		used = unpack_object_header_buffer(buf, avail,
1553 						   &type,
1554 						   &in_pack_size);
1555 		if (used == 0)
1556 			goto give_up;
1557 
1558 		if (type < 0)
1559 			BUG("invalid type %d", type);
1560 		entry->in_pack_type = type;
1561 
1562 		/*
1563 		 * Determine if this is a delta and if so whether we can
1564 		 * reuse it or not.  Otherwise let's find out as cheaply as
1565 		 * possible what the actual type and size for this object is.
1566 		 */
1567 		switch (entry->in_pack_type) {
1568 		default:
1569 			/* Not a delta hence we've already got all we need. */
1570 			oe_set_type(entry, entry->in_pack_type);
1571 			SET_SIZE(entry, in_pack_size);
1572 			entry->in_pack_header_size = used;
1573 			if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1574 				goto give_up;
1575 			unuse_pack(&w_curs);
1576 			return;
1577 		case OBJ_REF_DELTA:
1578 			if (reuse_delta && !entry->preferred_base)
1579 				base_ref = use_pack(p, &w_curs,
1580 						entry->in_pack_offset + used, NULL);
1581 			entry->in_pack_header_size = used + the_hash_algo->rawsz;
1582 			break;
1583 		case OBJ_OFS_DELTA:
1584 			buf = use_pack(p, &w_curs,
1585 				       entry->in_pack_offset + used, NULL);
1586 			used_0 = 0;
1587 			c = buf[used_0++];
1588 			ofs = c & 127;
1589 			while (c & 128) {
1590 				ofs += 1;
1591 				if (!ofs || MSB(ofs, 7)) {
1592 					error(_("delta base offset overflow in pack for %s"),
1593 					      oid_to_hex(&entry->idx.oid));
1594 					goto give_up;
1595 				}
1596 				c = buf[used_0++];
1597 				ofs = (ofs << 7) + (c & 127);
1598 			}
1599 			ofs = entry->in_pack_offset - ofs;
1600 			if (ofs <= 0 || ofs >= entry->in_pack_offset) {
1601 				error(_("delta base offset out of bound for %s"),
1602 				      oid_to_hex(&entry->idx.oid));
1603 				goto give_up;
1604 			}
1605 			if (reuse_delta && !entry->preferred_base) {
1606 				struct revindex_entry *revidx;
1607 				revidx = find_pack_revindex(p, ofs);
1608 				if (!revidx)
1609 					goto give_up;
1610 				base_ref = nth_packed_object_sha1(p, revidx->nr);
1611 			}
1612 			entry->in_pack_header_size = used + used_0;
1613 			break;
1614 		}
1615 
1616 		if (can_reuse_delta(base_ref, entry, &base_entry)) {
1617 			oe_set_type(entry, entry->in_pack_type);
1618 			SET_SIZE(entry, in_pack_size); /* delta size */
1619 			SET_DELTA_SIZE(entry, in_pack_size);
1620 
1621 			if (base_entry) {
1622 				SET_DELTA(entry, base_entry);
1623 				entry->delta_sibling_idx = base_entry->delta_child_idx;
1624 				SET_DELTA_CHILD(base_entry, entry);
1625 			} else {
1626 				SET_DELTA_EXT(entry, base_ref);
1627 			}
1628 
1629 			unuse_pack(&w_curs);
1630 			return;
1631 		}
1632 
1633 		if (oe_type(entry)) {
1634 			off_t delta_pos;
1635 
1636 			/*
1637 			 * This must be a delta and we already know what the
1638 			 * final object type is.  Let's extract the actual
1639 			 * object size from the delta header.
1640 			 */
1641 			delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
1642 			canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
1643 			if (canonical_size == 0)
1644 				goto give_up;
1645 			SET_SIZE(entry, canonical_size);
1646 			unuse_pack(&w_curs);
1647 			return;
1648 		}
1649 
1650 		/*
1651 		 * No choice but to fall back to the recursive delta walk
1652 		 * with oid_object_info() to find about the object type
1653 		 * at this point...
1654 		 */
1655 		give_up:
1656 		unuse_pack(&w_curs);
1657 	}
1658 
1659 	oe_set_type(entry,
1660 		    oid_object_info(the_repository, &entry->idx.oid, &canonical_size));
1661 	if (entry->type_valid) {
1662 		SET_SIZE(entry, canonical_size);
1663 	} else {
1664 		/*
1665 		 * Bad object type is checked in prepare_pack().  This is
1666 		 * to permit a missing preferred base object to be ignored
1667 		 * as a preferred base.  Doing so can result in a larger
1668 		 * pack file, but the transfer will still take place.
1669 		 */
1670 	}
1671 }
1672 
pack_offset_sort(const void * _a,const void * _b)1673 static int pack_offset_sort(const void *_a, const void *_b)
1674 {
1675 	const struct object_entry *a = *(struct object_entry **)_a;
1676 	const struct object_entry *b = *(struct object_entry **)_b;
1677 	const struct packed_git *a_in_pack = IN_PACK(a);
1678 	const struct packed_git *b_in_pack = IN_PACK(b);
1679 
1680 	/* avoid filesystem trashing with loose objects */
1681 	if (!a_in_pack && !b_in_pack)
1682 		return oidcmp(&a->idx.oid, &b->idx.oid);
1683 
1684 	if (a_in_pack < b_in_pack)
1685 		return -1;
1686 	if (a_in_pack > b_in_pack)
1687 		return 1;
1688 	return a->in_pack_offset < b->in_pack_offset ? -1 :
1689 			(a->in_pack_offset > b->in_pack_offset);
1690 }
1691 
1692 /*
1693  * Drop an on-disk delta we were planning to reuse. Naively, this would
1694  * just involve blanking out the "delta" field, but we have to deal
1695  * with some extra book-keeping:
1696  *
1697  *   1. Removing ourselves from the delta_sibling linked list.
1698  *
1699  *   2. Updating our size/type to the non-delta representation. These were
1700  *      either not recorded initially (size) or overwritten with the delta type
1701  *      (type) when check_object() decided to reuse the delta.
1702  *
1703  *   3. Resetting our delta depth, as we are now a base object.
1704  */
drop_reused_delta(struct object_entry * entry)1705 static void drop_reused_delta(struct object_entry *entry)
1706 {
1707 	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
1708 	struct object_info oi = OBJECT_INFO_INIT;
1709 	enum object_type type;
1710 	unsigned long size;
1711 
1712 	while (*idx) {
1713 		struct object_entry *oe = &to_pack.objects[*idx - 1];
1714 
1715 		if (oe == entry)
1716 			*idx = oe->delta_sibling_idx;
1717 		else
1718 			idx = &oe->delta_sibling_idx;
1719 	}
1720 	SET_DELTA(entry, NULL);
1721 	entry->depth = 0;
1722 
1723 	oi.sizep = &size;
1724 	oi.typep = &type;
1725 	if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
1726 		/*
1727 		 * We failed to get the info from this pack for some reason;
1728 		 * fall back to oid_object_info, which may find another copy.
1729 		 * And if that fails, the error will be recorded in oe_type(entry)
1730 		 * and dealt with in prepare_pack().
1731 		 */
1732 		oe_set_type(entry,
1733 			    oid_object_info(the_repository, &entry->idx.oid, &size));
1734 	} else {
1735 		oe_set_type(entry, type);
1736 	}
1737 	SET_SIZE(entry, size);
1738 }
1739 
1740 /*
1741  * Follow the chain of deltas from this entry onward, throwing away any links
1742  * that cause us to hit a cycle (as determined by the DFS state flags in
1743  * the entries).
1744  *
1745  * We also detect too-long reused chains that would violate our --depth
1746  * limit.
1747  */
break_delta_chains(struct object_entry * entry)1748 static void break_delta_chains(struct object_entry *entry)
1749 {
1750 	/*
1751 	 * The actual depth of each object we will write is stored as an int,
1752 	 * as it cannot exceed our int "depth" limit. But before we break
1753 	 * changes based no that limit, we may potentially go as deep as the
1754 	 * number of objects, which is elsewhere bounded to a uint32_t.
1755 	 */
1756 	uint32_t total_depth;
1757 	struct object_entry *cur, *next;
1758 
1759 	for (cur = entry, total_depth = 0;
1760 	     cur;
1761 	     cur = DELTA(cur), total_depth++) {
1762 		if (cur->dfs_state == DFS_DONE) {
1763 			/*
1764 			 * We've already seen this object and know it isn't
1765 			 * part of a cycle. We do need to append its depth
1766 			 * to our count.
1767 			 */
1768 			total_depth += cur->depth;
1769 			break;
1770 		}
1771 
1772 		/*
1773 		 * We break cycles before looping, so an ACTIVE state (or any
1774 		 * other cruft which made its way into the state variable)
1775 		 * is a bug.
1776 		 */
1777 		if (cur->dfs_state != DFS_NONE)
1778 			BUG("confusing delta dfs state in first pass: %d",
1779 			    cur->dfs_state);
1780 
1781 		/*
1782 		 * Now we know this is the first time we've seen the object. If
1783 		 * it's not a delta, we're done traversing, but we'll mark it
1784 		 * done to save time on future traversals.
1785 		 */
1786 		if (!DELTA(cur)) {
1787 			cur->dfs_state = DFS_DONE;
1788 			break;
1789 		}
1790 
1791 		/*
1792 		 * Mark ourselves as active and see if the next step causes
1793 		 * us to cycle to another active object. It's important to do
1794 		 * this _before_ we loop, because it impacts where we make the
1795 		 * cut, and thus how our total_depth counter works.
1796 		 * E.g., We may see a partial loop like:
1797 		 *
1798 		 *   A -> B -> C -> D -> B
1799 		 *
1800 		 * Cutting B->C breaks the cycle. But now the depth of A is
1801 		 * only 1, and our total_depth counter is at 3. The size of the
1802 		 * error is always one less than the size of the cycle we
1803 		 * broke. Commits C and D were "lost" from A's chain.
1804 		 *
1805 		 * If we instead cut D->B, then the depth of A is correct at 3.
1806 		 * We keep all commits in the chain that we examined.
1807 		 */
1808 		cur->dfs_state = DFS_ACTIVE;
1809 		if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
1810 			drop_reused_delta(cur);
1811 			cur->dfs_state = DFS_DONE;
1812 			break;
1813 		}
1814 	}
1815 
1816 	/*
1817 	 * And now that we've gone all the way to the bottom of the chain, we
1818 	 * need to clear the active flags and set the depth fields as
1819 	 * appropriate. Unlike the loop above, which can quit when it drops a
1820 	 * delta, we need to keep going to look for more depth cuts. So we need
1821 	 * an extra "next" pointer to keep going after we reset cur->delta.
1822 	 */
1823 	for (cur = entry; cur; cur = next) {
1824 		next = DELTA(cur);
1825 
1826 		/*
1827 		 * We should have a chain of zero or more ACTIVE states down to
1828 		 * a final DONE. We can quit after the DONE, because either it
1829 		 * has no bases, or we've already handled them in a previous
1830 		 * call.
1831 		 */
1832 		if (cur->dfs_state == DFS_DONE)
1833 			break;
1834 		else if (cur->dfs_state != DFS_ACTIVE)
1835 			BUG("confusing delta dfs state in second pass: %d",
1836 			    cur->dfs_state);
1837 
1838 		/*
1839 		 * If the total_depth is more than depth, then we need to snip
1840 		 * the chain into two or more smaller chains that don't exceed
1841 		 * the maximum depth. Most of the resulting chains will contain
1842 		 * (depth + 1) entries (i.e., depth deltas plus one base), and
1843 		 * the last chain (i.e., the one containing entry) will contain
1844 		 * whatever entries are left over, namely
1845 		 * (total_depth % (depth + 1)) of them.
1846 		 *
1847 		 * Since we are iterating towards decreasing depth, we need to
1848 		 * decrement total_depth as we go, and we need to write to the
1849 		 * entry what its final depth will be after all of the
1850 		 * snipping. Since we're snipping into chains of length (depth
1851 		 * + 1) entries, the final depth of an entry will be its
1852 		 * original depth modulo (depth + 1). Any time we encounter an
1853 		 * entry whose final depth is supposed to be zero, we snip it
1854 		 * from its delta base, thereby making it so.
1855 		 */
1856 		cur->depth = (total_depth--) % (depth + 1);
1857 		if (!cur->depth)
1858 			drop_reused_delta(cur);
1859 
1860 		cur->dfs_state = DFS_DONE;
1861 	}
1862 }
1863 
get_object_details(void)1864 static void get_object_details(void)
1865 {
1866 	uint32_t i;
1867 	struct object_entry **sorted_by_offset;
1868 
1869 	if (progress)
1870 		progress_state = start_progress(_("Counting objects"),
1871 						to_pack.nr_objects);
1872 
1873 	sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *));
1874 	for (i = 0; i < to_pack.nr_objects; i++)
1875 		sorted_by_offset[i] = to_pack.objects + i;
1876 	QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
1877 
1878 	for (i = 0; i < to_pack.nr_objects; i++) {
1879 		struct object_entry *entry = sorted_by_offset[i];
1880 		check_object(entry);
1881 		if (entry->type_valid &&
1882 		    oe_size_greater_than(&to_pack, entry, big_file_threshold))
1883 			entry->no_try_delta = 1;
1884 		display_progress(progress_state, i + 1);
1885 	}
1886 	stop_progress(&progress_state);
1887 
1888 	/*
1889 	 * This must happen in a second pass, since we rely on the delta
1890 	 * information for the whole list being completed.
1891 	 */
1892 	for (i = 0; i < to_pack.nr_objects; i++)
1893 		break_delta_chains(&to_pack.objects[i]);
1894 
1895 	free(sorted_by_offset);
1896 }
1897 
1898 /*
1899  * We search for deltas in a list sorted by type, by filename hash, and then
1900  * by size, so that we see progressively smaller and smaller files.
1901  * That's because we prefer deltas to be from the bigger file
1902  * to the smaller -- deletes are potentially cheaper, but perhaps
1903  * more importantly, the bigger file is likely the more recent
1904  * one.  The deepest deltas are therefore the oldest objects which are
1905  * less susceptible to be accessed often.
1906  */
type_size_sort(const void * _a,const void * _b)1907 static int type_size_sort(const void *_a, const void *_b)
1908 {
1909 	const struct object_entry *a = *(struct object_entry **)_a;
1910 	const struct object_entry *b = *(struct object_entry **)_b;
1911 	const enum object_type a_type = oe_type(a);
1912 	const enum object_type b_type = oe_type(b);
1913 	const unsigned long a_size = SIZE(a);
1914 	const unsigned long b_size = SIZE(b);
1915 
1916 	if (a_type > b_type)
1917 		return -1;
1918 	if (a_type < b_type)
1919 		return 1;
1920 	if (a->hash > b->hash)
1921 		return -1;
1922 	if (a->hash < b->hash)
1923 		return 1;
1924 	if (a->preferred_base > b->preferred_base)
1925 		return -1;
1926 	if (a->preferred_base < b->preferred_base)
1927 		return 1;
1928 	if (use_delta_islands) {
1929 		const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
1930 		if (island_cmp)
1931 			return island_cmp;
1932 	}
1933 	if (a_size > b_size)
1934 		return -1;
1935 	if (a_size < b_size)
1936 		return 1;
1937 	return a < b ? -1 : (a > b);  /* newest first */
1938 }
1939 
1940 struct unpacked {
1941 	struct object_entry *entry;
1942 	void *data;
1943 	struct delta_index *index;
1944 	unsigned depth;
1945 };
1946 
delta_cacheable(unsigned long src_size,unsigned long trg_size,unsigned long delta_size)1947 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
1948 			   unsigned long delta_size)
1949 {
1950 	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
1951 		return 0;
1952 
1953 	if (delta_size < cache_max_small_delta_size)
1954 		return 1;
1955 
1956 	/* cache delta, if objects are large enough compared to delta size */
1957 	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
1958 		return 1;
1959 
1960 	return 0;
1961 }
1962 
1963 /* Protect delta_cache_size */
1964 static pthread_mutex_t cache_mutex;
1965 #define cache_lock()		pthread_mutex_lock(&cache_mutex)
1966 #define cache_unlock()		pthread_mutex_unlock(&cache_mutex)
1967 
1968 /*
1969  * Protect object list partitioning (e.g. struct thread_param) and
1970  * progress_state
1971  */
1972 static pthread_mutex_t progress_mutex;
1973 #define progress_lock()		pthread_mutex_lock(&progress_mutex)
1974 #define progress_unlock()	pthread_mutex_unlock(&progress_mutex)
1975 
1976 /*
1977  * Access to struct object_entry is unprotected since each thread owns
1978  * a portion of the main object list. Just don't access object entries
1979  * ahead in the list because they can be stolen and would need
1980  * progress_mutex for protection.
1981  */
1982 
1983 /*
1984  * Return the size of the object without doing any delta
1985  * reconstruction (so non-deltas are true object sizes, but deltas
1986  * return the size of the delta data).
1987  */
oe_get_size_slow(struct packing_data * pack,const struct object_entry * e)1988 unsigned long oe_get_size_slow(struct packing_data *pack,
1989 			       const struct object_entry *e)
1990 {
1991 	struct packed_git *p;
1992 	struct pack_window *w_curs;
1993 	unsigned char *buf;
1994 	enum object_type type;
1995 	unsigned long used, avail, size;
1996 
1997 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
1998 		packing_data_lock(&to_pack);
1999 		if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2000 			die(_("unable to get size of %s"),
2001 			    oid_to_hex(&e->idx.oid));
2002 		packing_data_unlock(&to_pack);
2003 		return size;
2004 	}
2005 
2006 	p = oe_in_pack(pack, e);
2007 	if (!p)
2008 		BUG("when e->type is a delta, it must belong to a pack");
2009 
2010 	packing_data_lock(&to_pack);
2011 	w_curs = NULL;
2012 	buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2013 	used = unpack_object_header_buffer(buf, avail, &type, &size);
2014 	if (used == 0)
2015 		die(_("unable to parse object header of %s"),
2016 		    oid_to_hex(&e->idx.oid));
2017 
2018 	unuse_pack(&w_curs);
2019 	packing_data_unlock(&to_pack);
2020 	return size;
2021 }
2022 
try_delta(struct unpacked * trg,struct unpacked * src,unsigned max_depth,unsigned long * mem_usage)2023 static int try_delta(struct unpacked *trg, struct unpacked *src,
2024 		     unsigned max_depth, unsigned long *mem_usage)
2025 {
2026 	struct object_entry *trg_entry = trg->entry;
2027 	struct object_entry *src_entry = src->entry;
2028 	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2029 	unsigned ref_depth;
2030 	enum object_type type;
2031 	void *delta_buf;
2032 
2033 	/* Don't bother doing diffs between different types */
2034 	if (oe_type(trg_entry) != oe_type(src_entry))
2035 		return -1;
2036 
2037 	/*
2038 	 * We do not bother to try a delta that we discarded on an
2039 	 * earlier try, but only when reusing delta data.  Note that
2040 	 * src_entry that is marked as the preferred_base should always
2041 	 * be considered, as even if we produce a suboptimal delta against
2042 	 * it, we will still save the transfer cost, as we already know
2043 	 * the other side has it and we won't send src_entry at all.
2044 	 */
2045 	if (reuse_delta && IN_PACK(trg_entry) &&
2046 	    IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2047 	    !src_entry->preferred_base &&
2048 	    trg_entry->in_pack_type != OBJ_REF_DELTA &&
2049 	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
2050 		return 0;
2051 
2052 	/* Let's not bust the allowed depth. */
2053 	if (src->depth >= max_depth)
2054 		return 0;
2055 
2056 	/* Now some size filtering heuristics. */
2057 	trg_size = SIZE(trg_entry);
2058 	if (!DELTA(trg_entry)) {
2059 		max_size = trg_size/2 - the_hash_algo->rawsz;
2060 		ref_depth = 1;
2061 	} else {
2062 		max_size = DELTA_SIZE(trg_entry);
2063 		ref_depth = trg->depth;
2064 	}
2065 	max_size = (uint64_t)max_size * (max_depth - src->depth) /
2066 						(max_depth - ref_depth + 1);
2067 	if (max_size == 0)
2068 		return 0;
2069 	src_size = SIZE(src_entry);
2070 	sizediff = src_size < trg_size ? trg_size - src_size : 0;
2071 	if (sizediff >= max_size)
2072 		return 0;
2073 	if (trg_size < src_size / 32)
2074 		return 0;
2075 
2076 	if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2077 		return 0;
2078 
2079 	/* Load data if not already done */
2080 	if (!trg->data) {
2081 		packing_data_lock(&to_pack);
2082 		trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
2083 		packing_data_unlock(&to_pack);
2084 		if (!trg->data)
2085 			die(_("object %s cannot be read"),
2086 			    oid_to_hex(&trg_entry->idx.oid));
2087 		if (sz != trg_size)
2088 			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2089 			    oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2090 			    (uintmax_t)trg_size);
2091 		*mem_usage += sz;
2092 	}
2093 	if (!src->data) {
2094 		packing_data_lock(&to_pack);
2095 		src->data = read_object_file(&src_entry->idx.oid, &type, &sz);
2096 		packing_data_unlock(&to_pack);
2097 		if (!src->data) {
2098 			if (src_entry->preferred_base) {
2099 				static int warned = 0;
2100 				if (!warned++)
2101 					warning(_("object %s cannot be read"),
2102 						oid_to_hex(&src_entry->idx.oid));
2103 				/*
2104 				 * Those objects are not included in the
2105 				 * resulting pack.  Be resilient and ignore
2106 				 * them if they can't be read, in case the
2107 				 * pack could be created nevertheless.
2108 				 */
2109 				return 0;
2110 			}
2111 			die(_("object %s cannot be read"),
2112 			    oid_to_hex(&src_entry->idx.oid));
2113 		}
2114 		if (sz != src_size)
2115 			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2116 			    oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2117 			    (uintmax_t)src_size);
2118 		*mem_usage += sz;
2119 	}
2120 	if (!src->index) {
2121 		src->index = create_delta_index(src->data, src_size);
2122 		if (!src->index) {
2123 			static int warned = 0;
2124 			if (!warned++)
2125 				warning(_("suboptimal pack - out of memory"));
2126 			return 0;
2127 		}
2128 		*mem_usage += sizeof_delta_index(src->index);
2129 	}
2130 
2131 	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2132 	if (!delta_buf)
2133 		return 0;
2134 
2135 	if (DELTA(trg_entry)) {
2136 		/* Prefer only shallower same-sized deltas. */
2137 		if (delta_size == DELTA_SIZE(trg_entry) &&
2138 		    src->depth + 1 >= trg->depth) {
2139 			free(delta_buf);
2140 			return 0;
2141 		}
2142 	}
2143 
2144 	/*
2145 	 * Handle memory allocation outside of the cache
2146 	 * accounting lock.  Compiler will optimize the strangeness
2147 	 * away when NO_PTHREADS is defined.
2148 	 */
2149 	free(trg_entry->delta_data);
2150 	cache_lock();
2151 	if (trg_entry->delta_data) {
2152 		delta_cache_size -= DELTA_SIZE(trg_entry);
2153 		trg_entry->delta_data = NULL;
2154 	}
2155 	if (delta_cacheable(src_size, trg_size, delta_size)) {
2156 		delta_cache_size += delta_size;
2157 		cache_unlock();
2158 		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2159 	} else {
2160 		cache_unlock();
2161 		free(delta_buf);
2162 	}
2163 
2164 	SET_DELTA(trg_entry, src_entry);
2165 	SET_DELTA_SIZE(trg_entry, delta_size);
2166 	trg->depth = src->depth + 1;
2167 
2168 	return 1;
2169 }
2170 
check_delta_limit(struct object_entry * me,unsigned int n)2171 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2172 {
2173 	struct object_entry *child = DELTA_CHILD(me);
2174 	unsigned int m = n;
2175 	while (child) {
2176 		const unsigned int c = check_delta_limit(child, n + 1);
2177 		if (m < c)
2178 			m = c;
2179 		child = DELTA_SIBLING(child);
2180 	}
2181 	return m;
2182 }
2183 
free_unpacked(struct unpacked * n)2184 static unsigned long free_unpacked(struct unpacked *n)
2185 {
2186 	unsigned long freed_mem = sizeof_delta_index(n->index);
2187 	free_delta_index(n->index);
2188 	n->index = NULL;
2189 	if (n->data) {
2190 		freed_mem += SIZE(n->entry);
2191 		FREE_AND_NULL(n->data);
2192 	}
2193 	n->entry = NULL;
2194 	n->depth = 0;
2195 	return freed_mem;
2196 }
2197 
find_deltas(struct object_entry ** list,unsigned * list_size,int window,int depth,unsigned * processed)2198 static void find_deltas(struct object_entry **list, unsigned *list_size,
2199 			int window, int depth, unsigned *processed)
2200 {
2201 	uint32_t i, idx = 0, count = 0;
2202 	struct unpacked *array;
2203 	unsigned long mem_usage = 0;
2204 
2205 	array = xcalloc(window, sizeof(struct unpacked));
2206 
2207 	for (;;) {
2208 		struct object_entry *entry;
2209 		struct unpacked *n = array + idx;
2210 		int j, max_depth, best_base = -1;
2211 
2212 		progress_lock();
2213 		if (!*list_size) {
2214 			progress_unlock();
2215 			break;
2216 		}
2217 		entry = *list++;
2218 		(*list_size)--;
2219 		if (!entry->preferred_base) {
2220 			(*processed)++;
2221 			display_progress(progress_state, *processed);
2222 		}
2223 		progress_unlock();
2224 
2225 		mem_usage -= free_unpacked(n);
2226 		n->entry = entry;
2227 
2228 		while (window_memory_limit &&
2229 		       mem_usage > window_memory_limit &&
2230 		       count > 1) {
2231 			const uint32_t tail = (idx + window - count) % window;
2232 			mem_usage -= free_unpacked(array + tail);
2233 			count--;
2234 		}
2235 
2236 		/* We do not compute delta to *create* objects we are not
2237 		 * going to pack.
2238 		 */
2239 		if (entry->preferred_base)
2240 			goto next;
2241 
2242 		/*
2243 		 * If the current object is at pack edge, take the depth the
2244 		 * objects that depend on the current object into account
2245 		 * otherwise they would become too deep.
2246 		 */
2247 		max_depth = depth;
2248 		if (DELTA_CHILD(entry)) {
2249 			max_depth -= check_delta_limit(entry, 0);
2250 			if (max_depth <= 0)
2251 				goto next;
2252 		}
2253 
2254 		j = window;
2255 		while (--j > 0) {
2256 			int ret;
2257 			uint32_t other_idx = idx + j;
2258 			struct unpacked *m;
2259 			if (other_idx >= window)
2260 				other_idx -= window;
2261 			m = array + other_idx;
2262 			if (!m->entry)
2263 				break;
2264 			ret = try_delta(n, m, max_depth, &mem_usage);
2265 			if (ret < 0)
2266 				break;
2267 			else if (ret > 0)
2268 				best_base = other_idx;
2269 		}
2270 
2271 		/*
2272 		 * If we decided to cache the delta data, then it is best
2273 		 * to compress it right away.  First because we have to do
2274 		 * it anyway, and doing it here while we're threaded will
2275 		 * save a lot of time in the non threaded write phase,
2276 		 * as well as allow for caching more deltas within
2277 		 * the same cache size limit.
2278 		 * ...
2279 		 * But only if not writing to stdout, since in that case
2280 		 * the network is most likely throttling writes anyway,
2281 		 * and therefore it is best to go to the write phase ASAP
2282 		 * instead, as we can afford spending more time compressing
2283 		 * between writes at that moment.
2284 		 */
2285 		if (entry->delta_data && !pack_to_stdout) {
2286 			unsigned long size;
2287 
2288 			size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2289 			if (size < (1U << OE_Z_DELTA_BITS)) {
2290 				entry->z_delta_size = size;
2291 				cache_lock();
2292 				delta_cache_size -= DELTA_SIZE(entry);
2293 				delta_cache_size += entry->z_delta_size;
2294 				cache_unlock();
2295 			} else {
2296 				FREE_AND_NULL(entry->delta_data);
2297 				entry->z_delta_size = 0;
2298 			}
2299 		}
2300 
2301 		/* if we made n a delta, and if n is already at max
2302 		 * depth, leaving it in the window is pointless.  we
2303 		 * should evict it first.
2304 		 */
2305 		if (DELTA(entry) && max_depth <= n->depth)
2306 			continue;
2307 
2308 		/*
2309 		 * Move the best delta base up in the window, after the
2310 		 * currently deltified object, to keep it longer.  It will
2311 		 * be the first base object to be attempted next.
2312 		 */
2313 		if (DELTA(entry)) {
2314 			struct unpacked swap = array[best_base];
2315 			int dist = (window + idx - best_base) % window;
2316 			int dst = best_base;
2317 			while (dist--) {
2318 				int src = (dst + 1) % window;
2319 				array[dst] = array[src];
2320 				dst = src;
2321 			}
2322 			array[dst] = swap;
2323 		}
2324 
2325 		next:
2326 		idx++;
2327 		if (count + 1 < window)
2328 			count++;
2329 		if (idx >= window)
2330 			idx = 0;
2331 	}
2332 
2333 	for (i = 0; i < window; ++i) {
2334 		free_delta_index(array[i].index);
2335 		free(array[i].data);
2336 	}
2337 	free(array);
2338 }
2339 
2340 /*
2341  * The main object list is split into smaller lists, each is handed to
2342  * one worker.
2343  *
2344  * The main thread waits on the condition that (at least) one of the workers
2345  * has stopped working (which is indicated in the .working member of
2346  * struct thread_params).
2347  *
2348  * When a work thread has completed its work, it sets .working to 0 and
2349  * signals the main thread and waits on the condition that .data_ready
2350  * becomes 1.
2351  *
2352  * The main thread steals half of the work from the worker that has
2353  * most work left to hand it to the idle worker.
2354  */
2355 
2356 struct thread_params {
2357 	pthread_t thread;
2358 	struct object_entry **list;
2359 	unsigned list_size;
2360 	unsigned remaining;
2361 	int window;
2362 	int depth;
2363 	int working;
2364 	int data_ready;
2365 	pthread_mutex_t mutex;
2366 	pthread_cond_t cond;
2367 	unsigned *processed;
2368 };
2369 
2370 static pthread_cond_t progress_cond;
2371 
2372 /*
2373  * Mutex and conditional variable can't be statically-initialized on Windows.
2374  */
init_threaded_search(void)2375 static void init_threaded_search(void)
2376 {
2377 	pthread_mutex_init(&cache_mutex, NULL);
2378 	pthread_mutex_init(&progress_mutex, NULL);
2379 	pthread_cond_init(&progress_cond, NULL);
2380 }
2381 
cleanup_threaded_search(void)2382 static void cleanup_threaded_search(void)
2383 {
2384 	pthread_cond_destroy(&progress_cond);
2385 	pthread_mutex_destroy(&cache_mutex);
2386 	pthread_mutex_destroy(&progress_mutex);
2387 }
2388 
threaded_find_deltas(void * arg)2389 static void *threaded_find_deltas(void *arg)
2390 {
2391 	struct thread_params *me = arg;
2392 
2393 	progress_lock();
2394 	while (me->remaining) {
2395 		progress_unlock();
2396 
2397 		find_deltas(me->list, &me->remaining,
2398 			    me->window, me->depth, me->processed);
2399 
2400 		progress_lock();
2401 		me->working = 0;
2402 		pthread_cond_signal(&progress_cond);
2403 		progress_unlock();
2404 
2405 		/*
2406 		 * We must not set ->data_ready before we wait on the
2407 		 * condition because the main thread may have set it to 1
2408 		 * before we get here. In order to be sure that new
2409 		 * work is available if we see 1 in ->data_ready, it
2410 		 * was initialized to 0 before this thread was spawned
2411 		 * and we reset it to 0 right away.
2412 		 */
2413 		pthread_mutex_lock(&me->mutex);
2414 		while (!me->data_ready)
2415 			pthread_cond_wait(&me->cond, &me->mutex);
2416 		me->data_ready = 0;
2417 		pthread_mutex_unlock(&me->mutex);
2418 
2419 		progress_lock();
2420 	}
2421 	progress_unlock();
2422 	/* leave ->working 1 so that this doesn't get more work assigned */
2423 	return NULL;
2424 }
2425 
ll_find_deltas(struct object_entry ** list,unsigned list_size,int window,int depth,unsigned * processed)2426 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2427 			   int window, int depth, unsigned *processed)
2428 {
2429 	struct thread_params *p;
2430 	int i, ret, active_threads = 0;
2431 
2432 	init_threaded_search();
2433 
2434 	if (delta_search_threads <= 1) {
2435 		find_deltas(list, &list_size, window, depth, processed);
2436 		cleanup_threaded_search();
2437 		return;
2438 	}
2439 	if (progress > pack_to_stdout)
2440 		fprintf_ln(stderr, _("Delta compression using up to %d threads"),
2441 			   delta_search_threads);
2442 	p = xcalloc(delta_search_threads, sizeof(*p));
2443 
2444 	/* Partition the work amongst work threads. */
2445 	for (i = 0; i < delta_search_threads; i++) {
2446 		unsigned sub_size = list_size / (delta_search_threads - i);
2447 
2448 		/* don't use too small segments or no deltas will be found */
2449 		if (sub_size < 2*window && i+1 < delta_search_threads)
2450 			sub_size = 0;
2451 
2452 		p[i].window = window;
2453 		p[i].depth = depth;
2454 		p[i].processed = processed;
2455 		p[i].working = 1;
2456 		p[i].data_ready = 0;
2457 
2458 		/* try to split chunks on "path" boundaries */
2459 		while (sub_size && sub_size < list_size &&
2460 		       list[sub_size]->hash &&
2461 		       list[sub_size]->hash == list[sub_size-1]->hash)
2462 			sub_size++;
2463 
2464 		p[i].list = list;
2465 		p[i].list_size = sub_size;
2466 		p[i].remaining = sub_size;
2467 
2468 		list += sub_size;
2469 		list_size -= sub_size;
2470 	}
2471 
2472 	/* Start work threads. */
2473 	for (i = 0; i < delta_search_threads; i++) {
2474 		if (!p[i].list_size)
2475 			continue;
2476 		pthread_mutex_init(&p[i].mutex, NULL);
2477 		pthread_cond_init(&p[i].cond, NULL);
2478 		ret = pthread_create(&p[i].thread, NULL,
2479 				     threaded_find_deltas, &p[i]);
2480 		if (ret)
2481 			die(_("unable to create thread: %s"), strerror(ret));
2482 		active_threads++;
2483 	}
2484 
2485 	/*
2486 	 * Now let's wait for work completion.  Each time a thread is done
2487 	 * with its work, we steal half of the remaining work from the
2488 	 * thread with the largest number of unprocessed objects and give
2489 	 * it to that newly idle thread.  This ensure good load balancing
2490 	 * until the remaining object list segments are simply too short
2491 	 * to be worth splitting anymore.
2492 	 */
2493 	while (active_threads) {
2494 		struct thread_params *target = NULL;
2495 		struct thread_params *victim = NULL;
2496 		unsigned sub_size = 0;
2497 
2498 		progress_lock();
2499 		for (;;) {
2500 			for (i = 0; !target && i < delta_search_threads; i++)
2501 				if (!p[i].working)
2502 					target = &p[i];
2503 			if (target)
2504 				break;
2505 			pthread_cond_wait(&progress_cond, &progress_mutex);
2506 		}
2507 
2508 		for (i = 0; i < delta_search_threads; i++)
2509 			if (p[i].remaining > 2*window &&
2510 			    (!victim || victim->remaining < p[i].remaining))
2511 				victim = &p[i];
2512 		if (victim) {
2513 			sub_size = victim->remaining / 2;
2514 			list = victim->list + victim->list_size - sub_size;
2515 			while (sub_size && list[0]->hash &&
2516 			       list[0]->hash == list[-1]->hash) {
2517 				list++;
2518 				sub_size--;
2519 			}
2520 			if (!sub_size) {
2521 				/*
2522 				 * It is possible for some "paths" to have
2523 				 * so many objects that no hash boundary
2524 				 * might be found.  Let's just steal the
2525 				 * exact half in that case.
2526 				 */
2527 				sub_size = victim->remaining / 2;
2528 				list -= sub_size;
2529 			}
2530 			target->list = list;
2531 			victim->list_size -= sub_size;
2532 			victim->remaining -= sub_size;
2533 		}
2534 		target->list_size = sub_size;
2535 		target->remaining = sub_size;
2536 		target->working = 1;
2537 		progress_unlock();
2538 
2539 		pthread_mutex_lock(&target->mutex);
2540 		target->data_ready = 1;
2541 		pthread_cond_signal(&target->cond);
2542 		pthread_mutex_unlock(&target->mutex);
2543 
2544 		if (!sub_size) {
2545 			pthread_join(target->thread, NULL);
2546 			pthread_cond_destroy(&target->cond);
2547 			pthread_mutex_destroy(&target->mutex);
2548 			active_threads--;
2549 		}
2550 	}
2551 	cleanup_threaded_search();
2552 	free(p);
2553 }
2554 
add_tag_chain(const struct object_id * oid)2555 static void add_tag_chain(const struct object_id *oid)
2556 {
2557 	struct tag *tag;
2558 
2559 	/*
2560 	 * We catch duplicates already in add_object_entry(), but we'd
2561 	 * prefer to do this extra check to avoid having to parse the
2562 	 * tag at all if we already know that it's being packed (e.g., if
2563 	 * it was included via bitmaps, we would not have parsed it
2564 	 * previously).
2565 	 */
2566 	if (packlist_find(&to_pack, oid))
2567 		return;
2568 
2569 	tag = lookup_tag(the_repository, oid);
2570 	while (1) {
2571 		if (!tag || parse_tag(tag) || !tag->tagged)
2572 			die(_("unable to pack objects reachable from tag %s"),
2573 			    oid_to_hex(oid));
2574 
2575 		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
2576 
2577 		if (tag->tagged->type != OBJ_TAG)
2578 			return;
2579 
2580 		tag = (struct tag *)tag->tagged;
2581 	}
2582 }
2583 
add_ref_tag(const char * path,const struct object_id * oid,int flag,void * cb_data)2584 static int add_ref_tag(const char *path, const struct object_id *oid, int flag, void *cb_data)
2585 {
2586 	struct object_id peeled;
2587 
2588 	if (starts_with(path, "refs/tags/") && /* is a tag? */
2589 	    !peel_ref(path, &peeled)    && /* peelable? */
2590 	    packlist_find(&to_pack, &peeled))      /* object packed? */
2591 		add_tag_chain(oid);
2592 	return 0;
2593 }
2594 
prepare_pack(int window,int depth)2595 static void prepare_pack(int window, int depth)
2596 {
2597 	struct object_entry **delta_list;
2598 	uint32_t i, nr_deltas;
2599 	unsigned n;
2600 
2601 	if (use_delta_islands)
2602 		resolve_tree_islands(the_repository, progress, &to_pack);
2603 
2604 	get_object_details();
2605 
2606 	/*
2607 	 * If we're locally repacking then we need to be doubly careful
2608 	 * from now on in order to make sure no stealth corruption gets
2609 	 * propagated to the new pack.  Clients receiving streamed packs
2610 	 * should validate everything they get anyway so no need to incur
2611 	 * the additional cost here in that case.
2612 	 */
2613 	if (!pack_to_stdout)
2614 		do_check_packed_object_crc = 1;
2615 
2616 	if (!to_pack.nr_objects || !window || !depth)
2617 		return;
2618 
2619 	ALLOC_ARRAY(delta_list, to_pack.nr_objects);
2620 	nr_deltas = n = 0;
2621 
2622 	for (i = 0; i < to_pack.nr_objects; i++) {
2623 		struct object_entry *entry = to_pack.objects + i;
2624 
2625 		if (DELTA(entry))
2626 			/* This happens if we decided to reuse existing
2627 			 * delta from a pack.  "reuse_delta &&" is implied.
2628 			 */
2629 			continue;
2630 
2631 		if (!entry->type_valid ||
2632 		    oe_size_less_than(&to_pack, entry, 50))
2633 			continue;
2634 
2635 		if (entry->no_try_delta)
2636 			continue;
2637 
2638 		if (!entry->preferred_base) {
2639 			nr_deltas++;
2640 			if (oe_type(entry) < 0)
2641 				die(_("unable to get type of object %s"),
2642 				    oid_to_hex(&entry->idx.oid));
2643 		} else {
2644 			if (oe_type(entry) < 0) {
2645 				/*
2646 				 * This object is not found, but we
2647 				 * don't have to include it anyway.
2648 				 */
2649 				continue;
2650 			}
2651 		}
2652 
2653 		delta_list[n++] = entry;
2654 	}
2655 
2656 	if (nr_deltas && n > 1) {
2657 		unsigned nr_done = 0;
2658 		if (progress)
2659 			progress_state = start_progress(_("Compressing objects"),
2660 							nr_deltas);
2661 		QSORT(delta_list, n, type_size_sort);
2662 		ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
2663 		stop_progress(&progress_state);
2664 		if (nr_done != nr_deltas)
2665 			die(_("inconsistency with delta count"));
2666 	}
2667 	free(delta_list);
2668 }
2669 
git_pack_config(const char * k,const char * v,void * cb)2670 static int git_pack_config(const char *k, const char *v, void *cb)
2671 {
2672 	if (!strcmp(k, "pack.window")) {
2673 		window = git_config_int(k, v);
2674 		return 0;
2675 	}
2676 	if (!strcmp(k, "pack.windowmemory")) {
2677 		window_memory_limit = git_config_ulong(k, v);
2678 		return 0;
2679 	}
2680 	if (!strcmp(k, "pack.depth")) {
2681 		depth = git_config_int(k, v);
2682 		return 0;
2683 	}
2684 	if (!strcmp(k, "pack.deltacachesize")) {
2685 		max_delta_cache_size = git_config_int(k, v);
2686 		return 0;
2687 	}
2688 	if (!strcmp(k, "pack.deltacachelimit")) {
2689 		cache_max_small_delta_size = git_config_int(k, v);
2690 		return 0;
2691 	}
2692 	if (!strcmp(k, "pack.writebitmaphashcache")) {
2693 		if (git_config_bool(k, v))
2694 			write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
2695 		else
2696 			write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
2697 	}
2698 	if (!strcmp(k, "pack.usebitmaps")) {
2699 		use_bitmap_index_default = git_config_bool(k, v);
2700 		return 0;
2701 	}
2702 	if (!strcmp(k, "pack.threads")) {
2703 		delta_search_threads = git_config_int(k, v);
2704 		if (delta_search_threads < 0)
2705 			die(_("invalid number of threads specified (%d)"),
2706 			    delta_search_threads);
2707 		if (!HAVE_THREADS && delta_search_threads != 1) {
2708 			warning(_("no threads support, ignoring %s"), k);
2709 			delta_search_threads = 0;
2710 		}
2711 		return 0;
2712 	}
2713 	if (!strcmp(k, "pack.indexversion")) {
2714 		pack_idx_opts.version = git_config_int(k, v);
2715 		if (pack_idx_opts.version > 2)
2716 			die(_("bad pack.indexversion=%"PRIu32),
2717 			    pack_idx_opts.version);
2718 		return 0;
2719 	}
2720 	return git_default_config(k, v, cb);
2721 }
2722 
read_object_list_from_stdin(void)2723 static void read_object_list_from_stdin(void)
2724 {
2725 	char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
2726 	struct object_id oid;
2727 	const char *p;
2728 
2729 	for (;;) {
2730 		if (!fgets(line, sizeof(line), stdin)) {
2731 			if (feof(stdin))
2732 				break;
2733 			if (!ferror(stdin))
2734 				die("BUG: fgets returned NULL, not EOF, not error!");
2735 			if (errno != EINTR)
2736 				die_errno("fgets");
2737 			clearerr(stdin);
2738 			continue;
2739 		}
2740 		if (line[0] == '-') {
2741 			if (get_oid_hex(line+1, &oid))
2742 				die(_("expected edge object ID, got garbage:\n %s"),
2743 				    line);
2744 			add_preferred_base(&oid);
2745 			continue;
2746 		}
2747 		if (parse_oid_hex(line, &oid, &p))
2748 			die(_("expected object ID, got garbage:\n %s"), line);
2749 
2750 		add_preferred_base_object(p + 1);
2751 		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
2752 	}
2753 }
2754 
2755 /* Remember to update object flag allocation in object.h */
2756 #define OBJECT_ADDED (1u<<20)
2757 
show_commit(struct commit * commit,void * data)2758 static void show_commit(struct commit *commit, void *data)
2759 {
2760 	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
2761 	commit->object.flags |= OBJECT_ADDED;
2762 
2763 	if (write_bitmap_index)
2764 		index_commit_for_bitmap(commit);
2765 
2766 	if (use_delta_islands)
2767 		propagate_island_marks(commit);
2768 }
2769 
show_object(struct object * obj,const char * name,void * data)2770 static void show_object(struct object *obj, const char *name, void *data)
2771 {
2772 	add_preferred_base_object(name);
2773 	add_object_entry(&obj->oid, obj->type, name, 0);
2774 	obj->flags |= OBJECT_ADDED;
2775 
2776 	if (use_delta_islands) {
2777 		const char *p;
2778 		unsigned depth;
2779 		struct object_entry *ent;
2780 
2781 		/* the empty string is a root tree, which is depth 0 */
2782 		depth = *name ? 1 : 0;
2783 		for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
2784 			depth++;
2785 
2786 		ent = packlist_find(&to_pack, &obj->oid);
2787 		if (ent && depth > oe_tree_depth(&to_pack, ent))
2788 			oe_set_tree_depth(&to_pack, ent, depth);
2789 	}
2790 }
2791 
show_object__ma_allow_any(struct object * obj,const char * name,void * data)2792 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
2793 {
2794 	assert(arg_missing_action == MA_ALLOW_ANY);
2795 
2796 	/*
2797 	 * Quietly ignore ALL missing objects.  This avoids problems with
2798 	 * staging them now and getting an odd error later.
2799 	 */
2800 	if (!has_object_file(&obj->oid))
2801 		return;
2802 
2803 	show_object(obj, name, data);
2804 }
2805 
show_object__ma_allow_promisor(struct object * obj,const char * name,void * data)2806 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
2807 {
2808 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
2809 
2810 	/*
2811 	 * Quietly ignore EXPECTED missing objects.  This avoids problems with
2812 	 * staging them now and getting an odd error later.
2813 	 */
2814 	if (!has_object_file(&obj->oid) && is_promisor_object(&obj->oid))
2815 		return;
2816 
2817 	show_object(obj, name, data);
2818 }
2819 
option_parse_missing_action(const struct option * opt,const char * arg,int unset)2820 static int option_parse_missing_action(const struct option *opt,
2821 				       const char *arg, int unset)
2822 {
2823 	assert(arg);
2824 	assert(!unset);
2825 
2826 	if (!strcmp(arg, "error")) {
2827 		arg_missing_action = MA_ERROR;
2828 		fn_show_object = show_object;
2829 		return 0;
2830 	}
2831 
2832 	if (!strcmp(arg, "allow-any")) {
2833 		arg_missing_action = MA_ALLOW_ANY;
2834 		fetch_if_missing = 0;
2835 		fn_show_object = show_object__ma_allow_any;
2836 		return 0;
2837 	}
2838 
2839 	if (!strcmp(arg, "allow-promisor")) {
2840 		arg_missing_action = MA_ALLOW_PROMISOR;
2841 		fetch_if_missing = 0;
2842 		fn_show_object = show_object__ma_allow_promisor;
2843 		return 0;
2844 	}
2845 
2846 	die(_("invalid value for --missing"));
2847 	return 0;
2848 }
2849 
show_edge(struct commit * commit)2850 static void show_edge(struct commit *commit)
2851 {
2852 	add_preferred_base(&commit->object.oid);
2853 }
2854 
2855 struct in_pack_object {
2856 	off_t offset;
2857 	struct object *object;
2858 };
2859 
2860 struct in_pack {
2861 	unsigned int alloc;
2862 	unsigned int nr;
2863 	struct in_pack_object *array;
2864 };
2865 
mark_in_pack_object(struct object * object,struct packed_git * p,struct in_pack * in_pack)2866 static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
2867 {
2868 	in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);
2869 	in_pack->array[in_pack->nr].object = object;
2870 	in_pack->nr++;
2871 }
2872 
2873 /*
2874  * Compare the objects in the offset order, in order to emulate the
2875  * "git rev-list --objects" output that produced the pack originally.
2876  */
ofscmp(const void * a_,const void * b_)2877 static int ofscmp(const void *a_, const void *b_)
2878 {
2879 	struct in_pack_object *a = (struct in_pack_object *)a_;
2880 	struct in_pack_object *b = (struct in_pack_object *)b_;
2881 
2882 	if (a->offset < b->offset)
2883 		return -1;
2884 	else if (a->offset > b->offset)
2885 		return 1;
2886 	else
2887 		return oidcmp(&a->object->oid, &b->object->oid);
2888 }
2889 
add_objects_in_unpacked_packs(void)2890 static void add_objects_in_unpacked_packs(void)
2891 {
2892 	struct packed_git *p;
2893 	struct in_pack in_pack;
2894 	uint32_t i;
2895 
2896 	memset(&in_pack, 0, sizeof(in_pack));
2897 
2898 	for (p = get_all_packs(the_repository); p; p = p->next) {
2899 		struct object_id oid;
2900 		struct object *o;
2901 
2902 		if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
2903 			continue;
2904 		if (open_pack_index(p))
2905 			die(_("cannot open pack index"));
2906 
2907 		ALLOC_GROW(in_pack.array,
2908 			   in_pack.nr + p->num_objects,
2909 			   in_pack.alloc);
2910 
2911 		for (i = 0; i < p->num_objects; i++) {
2912 			nth_packed_object_oid(&oid, p, i);
2913 			o = lookup_unknown_object(&oid);
2914 			if (!(o->flags & OBJECT_ADDED))
2915 				mark_in_pack_object(o, p, &in_pack);
2916 			o->flags |= OBJECT_ADDED;
2917 		}
2918 	}
2919 
2920 	if (in_pack.nr) {
2921 		QSORT(in_pack.array, in_pack.nr, ofscmp);
2922 		for (i = 0; i < in_pack.nr; i++) {
2923 			struct object *o = in_pack.array[i].object;
2924 			add_object_entry(&o->oid, o->type, "", 0);
2925 		}
2926 	}
2927 	free(in_pack.array);
2928 }
2929 
add_loose_object(const struct object_id * oid,const char * path,void * data)2930 static int add_loose_object(const struct object_id *oid, const char *path,
2931 			    void *data)
2932 {
2933 	enum object_type type = oid_object_info(the_repository, oid, NULL);
2934 
2935 	if (type < 0) {
2936 		warning(_("loose object at %s could not be examined"), path);
2937 		return 0;
2938 	}
2939 
2940 	add_object_entry(oid, type, "", 0);
2941 	return 0;
2942 }
2943 
2944 /*
2945  * We actually don't even have to worry about reachability here.
2946  * add_object_entry will weed out duplicates, so we just add every
2947  * loose object we find.
2948  */
add_unreachable_loose_objects(void)2949 static void add_unreachable_loose_objects(void)
2950 {
2951 	for_each_loose_file_in_objdir(get_object_directory(),
2952 				      add_loose_object,
2953 				      NULL, NULL, NULL);
2954 }
2955 
has_sha1_pack_kept_or_nonlocal(const struct object_id * oid)2956 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
2957 {
2958 	static struct packed_git *last_found = (void *)1;
2959 	struct packed_git *p;
2960 
2961 	p = (last_found != (void *)1) ? last_found :
2962 					get_all_packs(the_repository);
2963 
2964 	while (p) {
2965 		if ((!p->pack_local || p->pack_keep ||
2966 				p->pack_keep_in_core) &&
2967 			find_pack_entry_one(oid->hash, p)) {
2968 			last_found = p;
2969 			return 1;
2970 		}
2971 		if (p == last_found)
2972 			p = get_all_packs(the_repository);
2973 		else
2974 			p = p->next;
2975 		if (p == last_found)
2976 			p = p->next;
2977 	}
2978 	return 0;
2979 }
2980 
2981 /*
2982  * Store a list of sha1s that are should not be discarded
2983  * because they are either written too recently, or are
2984  * reachable from another object that was.
2985  *
2986  * This is filled by get_object_list.
2987  */
2988 static struct oid_array recent_objects;
2989 
loosened_object_can_be_discarded(const struct object_id * oid,timestamp_t mtime)2990 static int loosened_object_can_be_discarded(const struct object_id *oid,
2991 					    timestamp_t mtime)
2992 {
2993 	if (!unpack_unreachable_expiration)
2994 		return 0;
2995 	if (mtime > unpack_unreachable_expiration)
2996 		return 0;
2997 	if (oid_array_lookup(&recent_objects, oid) >= 0)
2998 		return 0;
2999 	return 1;
3000 }
3001 
loosen_unused_packed_objects(void)3002 static void loosen_unused_packed_objects(void)
3003 {
3004 	struct packed_git *p;
3005 	uint32_t i;
3006 	struct object_id oid;
3007 
3008 	for (p = get_all_packs(the_repository); p; p = p->next) {
3009 		if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3010 			continue;
3011 
3012 		if (open_pack_index(p))
3013 			die(_("cannot open pack index"));
3014 
3015 		for (i = 0; i < p->num_objects; i++) {
3016 			nth_packed_object_oid(&oid, p, i);
3017 			if (!packlist_find(&to_pack, &oid) &&
3018 			    !has_sha1_pack_kept_or_nonlocal(&oid) &&
3019 			    !loosened_object_can_be_discarded(&oid, p->mtime))
3020 				if (force_object_loose(&oid, p->mtime))
3021 					die(_("unable to force loose object"));
3022 		}
3023 	}
3024 }
3025 
3026 /*
3027  * This tracks any options which pack-reuse code expects to be on, or which a
3028  * reader of the pack might not understand, and which would therefore prevent
3029  * blind reuse of what we have on disk.
3030  */
pack_options_allow_reuse(void)3031 static int pack_options_allow_reuse(void)
3032 {
3033 	return pack_to_stdout &&
3034 	       allow_ofs_delta &&
3035 	       !ignore_packed_keep_on_disk &&
3036 	       !ignore_packed_keep_in_core &&
3037 	       (!local || !have_non_local_packs) &&
3038 	       !incremental;
3039 }
3040 
get_object_list_from_bitmap(struct rev_info * revs)3041 static int get_object_list_from_bitmap(struct rev_info *revs)
3042 {
3043 	if (!(bitmap_git = prepare_bitmap_walk(revs)))
3044 		return -1;
3045 
3046 	if (pack_options_allow_reuse() &&
3047 	    !reuse_partial_packfile_from_bitmap(
3048 			bitmap_git,
3049 			&reuse_packfile,
3050 			&reuse_packfile_objects,
3051 			&reuse_packfile_offset)) {
3052 		assert(reuse_packfile_objects);
3053 		nr_result += reuse_packfile_objects;
3054 		display_progress(progress_state, nr_result);
3055 	}
3056 
3057 	traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
3058 	return 0;
3059 }
3060 
record_recent_object(struct object * obj,const char * name,void * data)3061 static void record_recent_object(struct object *obj,
3062 				 const char *name,
3063 				 void *data)
3064 {
3065 	oid_array_append(&recent_objects, &obj->oid);
3066 }
3067 
record_recent_commit(struct commit * commit,void * data)3068 static void record_recent_commit(struct commit *commit, void *data)
3069 {
3070 	oid_array_append(&recent_objects, &commit->object.oid);
3071 }
3072 
get_object_list(int ac,const char ** av)3073 static void get_object_list(int ac, const char **av)
3074 {
3075 	struct rev_info revs;
3076 	struct setup_revision_opt s_r_opt = {
3077 		.allow_exclude_promisor_objects = 1,
3078 	};
3079 	char line[1000];
3080 	int flags = 0;
3081 	int save_warning;
3082 
3083 	repo_init_revisions(the_repository, &revs, NULL);
3084 	save_commit_buffer = 0;
3085 	setup_revisions(ac, av, &revs, &s_r_opt);
3086 
3087 	/* make sure shallows are read */
3088 	is_repository_shallow(the_repository);
3089 
3090 	save_warning = warn_on_object_refname_ambiguity;
3091 	warn_on_object_refname_ambiguity = 0;
3092 
3093 	while (fgets(line, sizeof(line), stdin) != NULL) {
3094 		int len = strlen(line);
3095 		if (len && line[len - 1] == '\n')
3096 			line[--len] = 0;
3097 		if (!len)
3098 			break;
3099 		if (*line == '-') {
3100 			if (!strcmp(line, "--not")) {
3101 				flags ^= UNINTERESTING;
3102 				write_bitmap_index = 0;
3103 				continue;
3104 			}
3105 			if (starts_with(line, "--shallow ")) {
3106 				struct object_id oid;
3107 				if (get_oid_hex(line + 10, &oid))
3108 					die("not an SHA-1 '%s'", line + 10);
3109 				register_shallow(the_repository, &oid);
3110 				use_bitmap_index = 0;
3111 				continue;
3112 			}
3113 			die(_("not a rev '%s'"), line);
3114 		}
3115 		if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
3116 			die(_("bad revision '%s'"), line);
3117 	}
3118 
3119 	warn_on_object_refname_ambiguity = save_warning;
3120 
3121 	if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
3122 		return;
3123 
3124 	if (use_delta_islands)
3125 		load_delta_islands(the_repository, progress);
3126 
3127 	if (prepare_revision_walk(&revs))
3128 		die(_("revision walk setup failed"));
3129 	mark_edges_uninteresting(&revs, show_edge, sparse);
3130 
3131 	if (!fn_show_object)
3132 		fn_show_object = show_object;
3133 	traverse_commit_list_filtered(&filter_options, &revs,
3134 				      show_commit, fn_show_object, NULL,
3135 				      NULL);
3136 
3137 	if (unpack_unreachable_expiration) {
3138 		revs.ignore_missing_links = 1;
3139 		if (add_unseen_recent_objects_to_traversal(&revs,
3140 				unpack_unreachable_expiration))
3141 			die(_("unable to add recent objects"));
3142 		if (prepare_revision_walk(&revs))
3143 			die(_("revision walk setup failed"));
3144 		traverse_commit_list(&revs, record_recent_commit,
3145 				     record_recent_object, NULL);
3146 	}
3147 
3148 	if (keep_unreachable)
3149 		add_objects_in_unpacked_packs();
3150 	if (pack_loose_unreachable)
3151 		add_unreachable_loose_objects();
3152 	if (unpack_unreachable)
3153 		loosen_unused_packed_objects();
3154 
3155 	oid_array_clear(&recent_objects);
3156 }
3157 
add_extra_kept_packs(const struct string_list * names)3158 static void add_extra_kept_packs(const struct string_list *names)
3159 {
3160 	struct packed_git *p;
3161 
3162 	if (!names->nr)
3163 		return;
3164 
3165 	for (p = get_all_packs(the_repository); p; p = p->next) {
3166 		const char *name = basename(p->pack_name);
3167 		int i;
3168 
3169 		if (!p->pack_local)
3170 			continue;
3171 
3172 		for (i = 0; i < names->nr; i++)
3173 			if (!fspathcmp(name, names->items[i].string))
3174 				break;
3175 
3176 		if (i < names->nr) {
3177 			p->pack_keep_in_core = 1;
3178 			ignore_packed_keep_in_core = 1;
3179 			continue;
3180 		}
3181 	}
3182 }
3183 
option_parse_index_version(const struct option * opt,const char * arg,int unset)3184 static int option_parse_index_version(const struct option *opt,
3185 				      const char *arg, int unset)
3186 {
3187 	char *c;
3188 	const char *val = arg;
3189 
3190 	BUG_ON_OPT_NEG(unset);
3191 
3192 	pack_idx_opts.version = strtoul(val, &c, 10);
3193 	if (pack_idx_opts.version > 2)
3194 		die(_("unsupported index version %s"), val);
3195 	if (*c == ',' && c[1])
3196 		pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
3197 	if (*c || pack_idx_opts.off32_limit & 0x80000000)
3198 		die(_("bad index version '%s'"), val);
3199 	return 0;
3200 }
3201 
option_parse_unpack_unreachable(const struct option * opt,const char * arg,int unset)3202 static int option_parse_unpack_unreachable(const struct option *opt,
3203 					   const char *arg, int unset)
3204 {
3205 	if (unset) {
3206 		unpack_unreachable = 0;
3207 		unpack_unreachable_expiration = 0;
3208 	}
3209 	else {
3210 		unpack_unreachable = 1;
3211 		if (arg)
3212 			unpack_unreachable_expiration = approxidate(arg);
3213 	}
3214 	return 0;
3215 }
3216 
cmd_pack_objects(int argc,const char ** argv,const char * prefix)3217 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
3218 {
3219 	int use_internal_rev_list = 0;
3220 	int shallow = 0;
3221 	int all_progress_implied = 0;
3222 	struct argv_array rp = ARGV_ARRAY_INIT;
3223 	int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
3224 	int rev_list_index = 0;
3225 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
3226 	struct option pack_objects_options[] = {
3227 		OPT_SET_INT('q', "quiet", &progress,
3228 			    N_("do not show progress meter"), 0),
3229 		OPT_SET_INT(0, "progress", &progress,
3230 			    N_("show progress meter"), 1),
3231 		OPT_SET_INT(0, "all-progress", &progress,
3232 			    N_("show progress meter during object writing phase"), 2),
3233 		OPT_BOOL(0, "all-progress-implied",
3234 			 &all_progress_implied,
3235 			 N_("similar to --all-progress when progress meter is shown")),
3236 		{ OPTION_CALLBACK, 0, "index-version", NULL, N_("<version>[,<offset>]"),
3237 		  N_("write the pack index file in the specified idx format version"),
3238 		  PARSE_OPT_NONEG, option_parse_index_version },
3239 		OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
3240 			      N_("maximum size of each output pack file")),
3241 		OPT_BOOL(0, "local", &local,
3242 			 N_("ignore borrowed objects from alternate object store")),
3243 		OPT_BOOL(0, "incremental", &incremental,
3244 			 N_("ignore packed objects")),
3245 		OPT_INTEGER(0, "window", &window,
3246 			    N_("limit pack window by objects")),
3247 		OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
3248 			      N_("limit pack window by memory in addition to object limit")),
3249 		OPT_INTEGER(0, "depth", &depth,
3250 			    N_("maximum length of delta chain allowed in the resulting pack")),
3251 		OPT_BOOL(0, "reuse-delta", &reuse_delta,
3252 			 N_("reuse existing deltas")),
3253 		OPT_BOOL(0, "reuse-object", &reuse_object,
3254 			 N_("reuse existing objects")),
3255 		OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
3256 			 N_("use OFS_DELTA objects")),
3257 		OPT_INTEGER(0, "threads", &delta_search_threads,
3258 			    N_("use threads when searching for best delta matches")),
3259 		OPT_BOOL(0, "non-empty", &non_empty,
3260 			 N_("do not create an empty pack output")),
3261 		OPT_BOOL(0, "revs", &use_internal_rev_list,
3262 			 N_("read revision arguments from standard input")),
3263 		OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
3264 			      N_("limit the objects to those that are not yet packed"),
3265 			      1, PARSE_OPT_NONEG),
3266 		OPT_SET_INT_F(0, "all", &rev_list_all,
3267 			      N_("include objects reachable from any reference"),
3268 			      1, PARSE_OPT_NONEG),
3269 		OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
3270 			      N_("include objects referred by reflog entries"),
3271 			      1, PARSE_OPT_NONEG),
3272 		OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
3273 			      N_("include objects referred to by the index"),
3274 			      1, PARSE_OPT_NONEG),
3275 		OPT_BOOL(0, "stdout", &pack_to_stdout,
3276 			 N_("output pack to stdout")),
3277 		OPT_BOOL(0, "include-tag", &include_tag,
3278 			 N_("include tag objects that refer to objects to be packed")),
3279 		OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
3280 			 N_("keep unreachable objects")),
3281 		OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
3282 			 N_("pack loose unreachable objects")),
3283 		{ OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
3284 		  N_("unpack unreachable objects newer than <time>"),
3285 		  PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
3286 		OPT_BOOL(0, "sparse", &sparse,
3287 			 N_("use the sparse reachability algorithm")),
3288 		OPT_BOOL(0, "thin", &thin,
3289 			 N_("create thin packs")),
3290 		OPT_BOOL(0, "shallow", &shallow,
3291 			 N_("create packs suitable for shallow fetches")),
3292 		OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
3293 			 N_("ignore packs that have companion .keep file")),
3294 		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
3295 				N_("ignore this pack")),
3296 		OPT_INTEGER(0, "compression", &pack_compression_level,
3297 			    N_("pack compression level")),
3298 		OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
3299 			    N_("do not hide commits by grafts"), 0),
3300 		OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
3301 			 N_("use a bitmap index if available to speed up counting objects")),
3302 		OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
3303 			    N_("write a bitmap index together with the pack index"),
3304 			    WRITE_BITMAP_TRUE),
3305 		OPT_SET_INT_F(0, "write-bitmap-index-quiet",
3306 			      &write_bitmap_index,
3307 			      N_("write a bitmap index if possible"),
3308 			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
3309 		OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
3310 		{ OPTION_CALLBACK, 0, "missing", NULL, N_("action"),
3311 		  N_("handling for missing objects"), PARSE_OPT_NONEG,
3312 		  option_parse_missing_action },
3313 		OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
3314 			 N_("do not pack objects in promisor packfiles")),
3315 		OPT_BOOL(0, "delta-islands", &use_delta_islands,
3316 			 N_("respect islands during delta compression")),
3317 		OPT_END(),
3318 	};
3319 
3320 	if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
3321 		BUG("too many dfs states, increase OE_DFS_STATE_BITS");
3322 
3323 	read_replace_refs = 0;
3324 
3325 	sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
3326 	prepare_repo_settings(the_repository);
3327 	if (!sparse && the_repository->settings.pack_use_sparse != -1)
3328 		sparse = the_repository->settings.pack_use_sparse;
3329 
3330 	reset_pack_idx_option(&pack_idx_opts);
3331 	git_config(git_pack_config, NULL);
3332 
3333 	progress = isatty(2);
3334 	argc = parse_options(argc, argv, prefix, pack_objects_options,
3335 			     pack_usage, 0);
3336 
3337 	if (argc) {
3338 		base_name = argv[0];
3339 		argc--;
3340 	}
3341 	if (pack_to_stdout != !base_name || argc)
3342 		usage_with_options(pack_usage, pack_objects_options);
3343 
3344 	if (depth >= (1 << OE_DEPTH_BITS)) {
3345 		warning(_("delta chain depth %d is too deep, forcing %d"),
3346 			depth, (1 << OE_DEPTH_BITS) - 1);
3347 		depth = (1 << OE_DEPTH_BITS) - 1;
3348 	}
3349 	if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
3350 		warning(_("pack.deltaCacheLimit is too high, forcing %d"),
3351 			(1U << OE_Z_DELTA_BITS) - 1);
3352 		cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
3353 	}
3354 
3355 	argv_array_push(&rp, "pack-objects");
3356 	if (thin) {
3357 		use_internal_rev_list = 1;
3358 		argv_array_push(&rp, shallow
3359 				? "--objects-edge-aggressive"
3360 				: "--objects-edge");
3361 	} else
3362 		argv_array_push(&rp, "--objects");
3363 
3364 	if (rev_list_all) {
3365 		use_internal_rev_list = 1;
3366 		argv_array_push(&rp, "--all");
3367 	}
3368 	if (rev_list_reflog) {
3369 		use_internal_rev_list = 1;
3370 		argv_array_push(&rp, "--reflog");
3371 	}
3372 	if (rev_list_index) {
3373 		use_internal_rev_list = 1;
3374 		argv_array_push(&rp, "--indexed-objects");
3375 	}
3376 	if (rev_list_unpacked) {
3377 		use_internal_rev_list = 1;
3378 		argv_array_push(&rp, "--unpacked");
3379 	}
3380 
3381 	if (exclude_promisor_objects) {
3382 		use_internal_rev_list = 1;
3383 		fetch_if_missing = 0;
3384 		argv_array_push(&rp, "--exclude-promisor-objects");
3385 	}
3386 	if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
3387 		use_internal_rev_list = 1;
3388 
3389 	if (!reuse_object)
3390 		reuse_delta = 0;
3391 	if (pack_compression_level == -1)
3392 		pack_compression_level = Z_DEFAULT_COMPRESSION;
3393 	else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
3394 		die(_("bad pack compression level %d"), pack_compression_level);
3395 
3396 	if (!delta_search_threads)	/* --threads=0 means autodetect */
3397 		delta_search_threads = online_cpus();
3398 
3399 	if (!HAVE_THREADS && delta_search_threads != 1)
3400 		warning(_("no threads support, ignoring --threads"));
3401 	if (!pack_to_stdout && !pack_size_limit)
3402 		pack_size_limit = pack_size_limit_cfg;
3403 	if (pack_to_stdout && pack_size_limit)
3404 		die(_("--max-pack-size cannot be used to build a pack for transfer"));
3405 	if (pack_size_limit && pack_size_limit < 1024*1024) {
3406 		warning(_("minimum pack size limit is 1 MiB"));
3407 		pack_size_limit = 1024*1024;
3408 	}
3409 
3410 	if (!pack_to_stdout && thin)
3411 		die(_("--thin cannot be used to build an indexable pack"));
3412 
3413 	if (keep_unreachable && unpack_unreachable)
3414 		die(_("--keep-unreachable and --unpack-unreachable are incompatible"));
3415 	if (!rev_list_all || !rev_list_reflog || !rev_list_index)
3416 		unpack_unreachable_expiration = 0;
3417 
3418 	if (filter_options.choice) {
3419 		if (!pack_to_stdout)
3420 			die(_("cannot use --filter without --stdout"));
3421 		use_bitmap_index = 0;
3422 	}
3423 
3424 	/*
3425 	 * "soft" reasons not to use bitmaps - for on-disk repack by default we want
3426 	 *
3427 	 * - to produce good pack (with bitmap index not-yet-packed objects are
3428 	 *   packed in suboptimal order).
3429 	 *
3430 	 * - to use more robust pack-generation codepath (avoiding possible
3431 	 *   bugs in bitmap code and possible bitmap index corruption).
3432 	 */
3433 	if (!pack_to_stdout)
3434 		use_bitmap_index_default = 0;
3435 
3436 	if (use_bitmap_index < 0)
3437 		use_bitmap_index = use_bitmap_index_default;
3438 
3439 	/* "hard" reasons not to use bitmaps; these just won't work at all */
3440 	if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
3441 		use_bitmap_index = 0;
3442 
3443 	if (pack_to_stdout || !rev_list_all)
3444 		write_bitmap_index = 0;
3445 
3446 	if (use_delta_islands)
3447 		argv_array_push(&rp, "--topo-order");
3448 
3449 	if (progress && all_progress_implied)
3450 		progress = 2;
3451 
3452 	add_extra_kept_packs(&keep_pack_list);
3453 	if (ignore_packed_keep_on_disk) {
3454 		struct packed_git *p;
3455 		for (p = get_all_packs(the_repository); p; p = p->next)
3456 			if (p->pack_local && p->pack_keep)
3457 				break;
3458 		if (!p) /* no keep-able packs found */
3459 			ignore_packed_keep_on_disk = 0;
3460 	}
3461 	if (local) {
3462 		/*
3463 		 * unlike ignore_packed_keep_on_disk above, we do not
3464 		 * want to unset "local" based on looking at packs, as
3465 		 * it also covers non-local objects
3466 		 */
3467 		struct packed_git *p;
3468 		for (p = get_all_packs(the_repository); p; p = p->next) {
3469 			if (!p->pack_local) {
3470 				have_non_local_packs = 1;
3471 				break;
3472 			}
3473 		}
3474 	}
3475 
3476 	trace2_region_enter("pack-objects", "enumerate-objects",
3477 			    the_repository);
3478 	prepare_packing_data(the_repository, &to_pack);
3479 
3480 	if (progress)
3481 		progress_state = start_progress(_("Enumerating objects"), 0);
3482 	if (!use_internal_rev_list)
3483 		read_object_list_from_stdin();
3484 	else {
3485 		get_object_list(rp.argc, rp.argv);
3486 		argv_array_clear(&rp);
3487 	}
3488 	cleanup_preferred_base();
3489 	if (include_tag && nr_result)
3490 		for_each_ref(add_ref_tag, NULL);
3491 	stop_progress(&progress_state);
3492 	trace2_region_leave("pack-objects", "enumerate-objects",
3493 			    the_repository);
3494 
3495 	if (non_empty && !nr_result)
3496 		return 0;
3497 	if (nr_result) {
3498 		trace2_region_enter("pack-objects", "prepare-pack",
3499 				    the_repository);
3500 		prepare_pack(window, depth);
3501 		trace2_region_leave("pack-objects", "prepare-pack",
3502 				    the_repository);
3503 	}
3504 
3505 	trace2_region_enter("pack-objects", "write-pack-file", the_repository);
3506 	write_pack_file();
3507 	trace2_region_leave("pack-objects", "write-pack-file", the_repository);
3508 
3509 	if (progress)
3510 		fprintf_ln(stderr,
3511 			   _("Total %"PRIu32" (delta %"PRIu32"),"
3512 			     " reused %"PRIu32" (delta %"PRIu32")"),
3513 			   written, written_delta, reused, reused_delta);
3514 	return 0;
3515 }
3516