1 #include "builtin.h"
2 #include "cache.h"
3 #include "repository.h"
4 #include "config.h"
5 #include "attr.h"
6 #include "object.h"
7 #include "blob.h"
8 #include "commit.h"
9 #include "tag.h"
10 #include "tree.h"
11 #include "delta.h"
12 #include "pack.h"
13 #include "pack-revindex.h"
14 #include "csum-file.h"
15 #include "tree-walk.h"
16 #include "diff.h"
17 #include "revision.h"
18 #include "list-objects.h"
19 #include "list-objects-filter.h"
20 #include "list-objects-filter-options.h"
21 #include "pack-objects.h"
22 #include "progress.h"
23 #include "refs.h"
24 #include "streaming.h"
25 #include "thread-utils.h"
26 #include "pack-bitmap.h"
27 #include "delta-islands.h"
28 #include "reachable.h"
29 #include "oid-array.h"
30 #include "strvec.h"
31 #include "list.h"
32 #include "packfile.h"
33 #include "object-store.h"
34 #include "dir.h"
35 #include "midx.h"
36 #include "trace2.h"
37 #include "shallow.h"
38 #include "promisor-remote.h"
39 
40 /*
41  * Objects we are going to pack are collected in the `to_pack` structure.
42  * It contains an array (dynamically expanded) of the object data, and a map
43  * that can resolve SHA1s to their position in the array.
44  */
45 static struct packing_data to_pack;
46 
oe_delta(const struct packing_data * pack,const struct object_entry * e)47 static inline struct object_entry *oe_delta(
48 		const struct packing_data *pack,
49 		const struct object_entry *e)
50 {
51 	if (!e->delta_idx)
52 		return NULL;
53 	if (e->ext_base)
54 		return &pack->ext_bases[e->delta_idx - 1];
55 	else
56 		return &pack->objects[e->delta_idx - 1];
57 }
58 
oe_delta_size(struct packing_data * pack,const struct object_entry * e)59 static inline unsigned long oe_delta_size(struct packing_data *pack,
60 					  const struct object_entry *e)
61 {
62 	if (e->delta_size_valid)
63 		return e->delta_size_;
64 
65 	/*
66 	 * pack->delta_size[] can't be NULL because oe_set_delta_size()
67 	 * must have been called when a new delta is saved with
68 	 * oe_set_delta().
69 	 * If oe_delta() returns NULL (i.e. default state, which means
70 	 * delta_size_valid is also false), then the caller must never
71 	 * call oe_delta_size().
72 	 */
73 	return pack->delta_size[e - pack->objects];
74 }
75 
76 unsigned long oe_get_size_slow(struct packing_data *pack,
77 			       const struct object_entry *e);
78 
oe_size(struct packing_data * pack,const struct object_entry * e)79 static inline unsigned long oe_size(struct packing_data *pack,
80 				    const struct object_entry *e)
81 {
82 	if (e->size_valid)
83 		return e->size_;
84 
85 	return oe_get_size_slow(pack, e);
86 }
87 
oe_set_delta(struct packing_data * pack,struct object_entry * e,struct object_entry * delta)88 static inline void oe_set_delta(struct packing_data *pack,
89 				struct object_entry *e,
90 				struct object_entry *delta)
91 {
92 	if (delta)
93 		e->delta_idx = (delta - pack->objects) + 1;
94 	else
95 		e->delta_idx = 0;
96 }
97 
oe_delta_sibling(const struct packing_data * pack,const struct object_entry * e)98 static inline struct object_entry *oe_delta_sibling(
99 		const struct packing_data *pack,
100 		const struct object_entry *e)
101 {
102 	if (e->delta_sibling_idx)
103 		return &pack->objects[e->delta_sibling_idx - 1];
104 	return NULL;
105 }
106 
oe_delta_child(const struct packing_data * pack,const struct object_entry * e)107 static inline struct object_entry *oe_delta_child(
108 		const struct packing_data *pack,
109 		const struct object_entry *e)
110 {
111 	if (e->delta_child_idx)
112 		return &pack->objects[e->delta_child_idx - 1];
113 	return NULL;
114 }
115 
oe_set_delta_child(struct packing_data * pack,struct object_entry * e,struct object_entry * delta)116 static inline void oe_set_delta_child(struct packing_data *pack,
117 				      struct object_entry *e,
118 				      struct object_entry *delta)
119 {
120 	if (delta)
121 		e->delta_child_idx = (delta - pack->objects) + 1;
122 	else
123 		e->delta_child_idx = 0;
124 }
125 
oe_set_delta_sibling(struct packing_data * pack,struct object_entry * e,struct object_entry * delta)126 static inline void oe_set_delta_sibling(struct packing_data *pack,
127 					struct object_entry *e,
128 					struct object_entry *delta)
129 {
130 	if (delta)
131 		e->delta_sibling_idx = (delta - pack->objects) + 1;
132 	else
133 		e->delta_sibling_idx = 0;
134 }
135 
oe_set_size(struct packing_data * pack,struct object_entry * e,unsigned long size)136 static inline void oe_set_size(struct packing_data *pack,
137 			       struct object_entry *e,
138 			       unsigned long size)
139 {
140 	if (size < pack->oe_size_limit) {
141 		e->size_ = size;
142 		e->size_valid = 1;
143 	} else {
144 		e->size_valid = 0;
145 		if (oe_get_size_slow(pack, e) != size)
146 			BUG("'size' is supposed to be the object size!");
147 	}
148 }
149 
oe_set_delta_size(struct packing_data * pack,struct object_entry * e,unsigned long size)150 static inline void oe_set_delta_size(struct packing_data *pack,
151 				     struct object_entry *e,
152 				     unsigned long size)
153 {
154 	if (size < pack->oe_delta_size_limit) {
155 		e->delta_size_ = size;
156 		e->delta_size_valid = 1;
157 	} else {
158 		packing_data_lock(pack);
159 		if (!pack->delta_size)
160 			ALLOC_ARRAY(pack->delta_size, pack->nr_alloc);
161 		packing_data_unlock(pack);
162 
163 		pack->delta_size[e - pack->objects] = size;
164 		e->delta_size_valid = 0;
165 	}
166 }
167 
168 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
169 #define SIZE(obj) oe_size(&to_pack, obj)
170 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
171 #define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
172 #define DELTA(obj) oe_delta(&to_pack, obj)
173 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
174 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
175 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
176 #define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
177 #define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
178 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
179 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
180 
181 static const char *pack_usage[] = {
182 	N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
183 	N_("git pack-objects [<options>...] <base-name> [< <ref-list> | < <object-list>]"),
184 	NULL
185 };
186 
187 static struct pack_idx_entry **written_list;
188 static uint32_t nr_result, nr_written, nr_seen;
189 static struct bitmap_index *bitmap_git;
190 static uint32_t write_layer;
191 
192 static int non_empty;
193 static int reuse_delta = 1, reuse_object = 1;
194 static int keep_unreachable, unpack_unreachable, include_tag;
195 static timestamp_t unpack_unreachable_expiration;
196 static int pack_loose_unreachable;
197 static int local;
198 static int have_non_local_packs;
199 static int incremental;
200 static int ignore_packed_keep_on_disk;
201 static int ignore_packed_keep_in_core;
202 static int allow_ofs_delta;
203 static struct pack_idx_option pack_idx_opts;
204 static const char *base_name;
205 static int progress = 1;
206 static int window = 10;
207 static unsigned long pack_size_limit;
208 static int depth = 50;
209 static int delta_search_threads;
210 static int pack_to_stdout;
211 static int sparse;
212 static int thin;
213 static int num_preferred_base;
214 static struct progress *progress_state;
215 
216 static struct packed_git *reuse_packfile;
217 static uint32_t reuse_packfile_objects;
218 static struct bitmap *reuse_packfile_bitmap;
219 
220 static int use_bitmap_index_default = 1;
221 static int use_bitmap_index = -1;
222 static int allow_pack_reuse = 1;
223 static enum {
224 	WRITE_BITMAP_FALSE = 0,
225 	WRITE_BITMAP_QUIET,
226 	WRITE_BITMAP_TRUE,
227 } write_bitmap_index;
228 static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE;
229 
230 static int exclude_promisor_objects;
231 
232 static int use_delta_islands;
233 
234 static unsigned long delta_cache_size = 0;
235 static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
236 static unsigned long cache_max_small_delta_size = 1000;
237 
238 static unsigned long window_memory_limit = 0;
239 
240 static struct list_objects_filter_options filter_options;
241 
242 static struct string_list uri_protocols = STRING_LIST_INIT_NODUP;
243 
244 enum missing_action {
245 	MA_ERROR = 0,      /* fail if any missing objects are encountered */
246 	MA_ALLOW_ANY,      /* silently allow ALL missing objects */
247 	MA_ALLOW_PROMISOR, /* silently allow all missing PROMISOR objects */
248 };
249 static enum missing_action arg_missing_action;
250 static show_object_fn fn_show_object;
251 
252 struct configured_exclusion {
253 	struct oidmap_entry e;
254 	char *pack_hash_hex;
255 	char *uri;
256 };
257 static struct oidmap configured_exclusions;
258 
259 static struct oidset excluded_by_config;
260 
261 /*
262  * stats
263  */
264 static uint32_t written, written_delta;
265 static uint32_t reused, reused_delta;
266 
267 /*
268  * Indexed commits
269  */
270 static struct commit **indexed_commits;
271 static unsigned int indexed_commits_nr;
272 static unsigned int indexed_commits_alloc;
273 
index_commit_for_bitmap(struct commit * commit)274 static void index_commit_for_bitmap(struct commit *commit)
275 {
276 	if (indexed_commits_nr >= indexed_commits_alloc) {
277 		indexed_commits_alloc = (indexed_commits_alloc + 32) * 2;
278 		REALLOC_ARRAY(indexed_commits, indexed_commits_alloc);
279 	}
280 
281 	indexed_commits[indexed_commits_nr++] = commit;
282 }
283 
get_delta(struct object_entry * entry)284 static void *get_delta(struct object_entry *entry)
285 {
286 	unsigned long size, base_size, delta_size;
287 	void *buf, *base_buf, *delta_buf;
288 	enum object_type type;
289 
290 	buf = read_object_file(&entry->idx.oid, &type, &size);
291 	if (!buf)
292 		die(_("unable to read %s"), oid_to_hex(&entry->idx.oid));
293 	base_buf = read_object_file(&DELTA(entry)->idx.oid, &type,
294 				    &base_size);
295 	if (!base_buf)
296 		die("unable to read %s",
297 		    oid_to_hex(&DELTA(entry)->idx.oid));
298 	delta_buf = diff_delta(base_buf, base_size,
299 			       buf, size, &delta_size, 0);
300 	/*
301 	 * We successfully computed this delta once but dropped it for
302 	 * memory reasons. Something is very wrong if this time we
303 	 * recompute and create a different delta.
304 	 */
305 	if (!delta_buf || delta_size != DELTA_SIZE(entry))
306 		BUG("delta size changed");
307 	free(buf);
308 	free(base_buf);
309 	return delta_buf;
310 }
311 
do_compress(void ** pptr,unsigned long size)312 static unsigned long do_compress(void **pptr, unsigned long size)
313 {
314 	git_zstream stream;
315 	void *in, *out;
316 	unsigned long maxsize;
317 
318 	git_deflate_init(&stream, pack_compression_level);
319 	maxsize = git_deflate_bound(&stream, size);
320 
321 	in = *pptr;
322 	out = xmalloc(maxsize);
323 	*pptr = out;
324 
325 	stream.next_in = in;
326 	stream.avail_in = size;
327 	stream.next_out = out;
328 	stream.avail_out = maxsize;
329 	while (git_deflate(&stream, Z_FINISH) == Z_OK)
330 		; /* nothing */
331 	git_deflate_end(&stream);
332 
333 	free(in);
334 	return stream.total_out;
335 }
336 
write_large_blob_data(struct git_istream * st,struct hashfile * f,const struct object_id * oid)337 static unsigned long write_large_blob_data(struct git_istream *st, struct hashfile *f,
338 					   const struct object_id *oid)
339 {
340 	git_zstream stream;
341 	unsigned char ibuf[1024 * 16];
342 	unsigned char obuf[1024 * 16];
343 	unsigned long olen = 0;
344 
345 	git_deflate_init(&stream, pack_compression_level);
346 
347 	for (;;) {
348 		ssize_t readlen;
349 		int zret = Z_OK;
350 		readlen = read_istream(st, ibuf, sizeof(ibuf));
351 		if (readlen == -1)
352 			die(_("unable to read %s"), oid_to_hex(oid));
353 
354 		stream.next_in = ibuf;
355 		stream.avail_in = readlen;
356 		while ((stream.avail_in || readlen == 0) &&
357 		       (zret == Z_OK || zret == Z_BUF_ERROR)) {
358 			stream.next_out = obuf;
359 			stream.avail_out = sizeof(obuf);
360 			zret = git_deflate(&stream, readlen ? 0 : Z_FINISH);
361 			hashwrite(f, obuf, stream.next_out - obuf);
362 			olen += stream.next_out - obuf;
363 		}
364 		if (stream.avail_in)
365 			die(_("deflate error (%d)"), zret);
366 		if (readlen == 0) {
367 			if (zret != Z_STREAM_END)
368 				die(_("deflate error (%d)"), zret);
369 			break;
370 		}
371 	}
372 	git_deflate_end(&stream);
373 	return olen;
374 }
375 
376 /*
377  * we are going to reuse the existing object data as is.  make
378  * sure it is not corrupt.
379  */
check_pack_inflate(struct packed_git * p,struct pack_window ** w_curs,off_t offset,off_t len,unsigned long expect)380 static int check_pack_inflate(struct packed_git *p,
381 		struct pack_window **w_curs,
382 		off_t offset,
383 		off_t len,
384 		unsigned long expect)
385 {
386 	git_zstream stream;
387 	unsigned char fakebuf[4096], *in;
388 	int st;
389 
390 	memset(&stream, 0, sizeof(stream));
391 	git_inflate_init(&stream);
392 	do {
393 		in = use_pack(p, w_curs, offset, &stream.avail_in);
394 		stream.next_in = in;
395 		stream.next_out = fakebuf;
396 		stream.avail_out = sizeof(fakebuf);
397 		st = git_inflate(&stream, Z_FINISH);
398 		offset += stream.next_in - in;
399 	} while (st == Z_OK || st == Z_BUF_ERROR);
400 	git_inflate_end(&stream);
401 	return (st == Z_STREAM_END &&
402 		stream.total_out == expect &&
403 		stream.total_in == len) ? 0 : -1;
404 }
405 
copy_pack_data(struct hashfile * f,struct packed_git * p,struct pack_window ** w_curs,off_t offset,off_t len)406 static void copy_pack_data(struct hashfile *f,
407 		struct packed_git *p,
408 		struct pack_window **w_curs,
409 		off_t offset,
410 		off_t len)
411 {
412 	unsigned char *in;
413 	unsigned long avail;
414 
415 	while (len) {
416 		in = use_pack(p, w_curs, offset, &avail);
417 		if (avail > len)
418 			avail = (unsigned long)len;
419 		hashwrite(f, in, avail);
420 		offset += avail;
421 		len -= avail;
422 	}
423 }
424 
oe_size_greater_than(struct packing_data * pack,const struct object_entry * lhs,unsigned long rhs)425 static inline int oe_size_greater_than(struct packing_data *pack,
426 				       const struct object_entry *lhs,
427 				       unsigned long rhs)
428 {
429 	if (lhs->size_valid)
430 		return lhs->size_ > rhs;
431 	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
432 		return 1;
433 	return oe_get_size_slow(pack, lhs) > rhs;
434 }
435 
436 /* Return 0 if we will bust the pack-size limit */
write_no_reuse_object(struct hashfile * f,struct object_entry * entry,unsigned long limit,int usable_delta)437 static unsigned long write_no_reuse_object(struct hashfile *f, struct object_entry *entry,
438 					   unsigned long limit, int usable_delta)
439 {
440 	unsigned long size, datalen;
441 	unsigned char header[MAX_PACK_OBJECT_HEADER],
442 		      dheader[MAX_PACK_OBJECT_HEADER];
443 	unsigned hdrlen;
444 	enum object_type type;
445 	void *buf;
446 	struct git_istream *st = NULL;
447 	const unsigned hashsz = the_hash_algo->rawsz;
448 
449 	if (!usable_delta) {
450 		if (oe_type(entry) == OBJ_BLOB &&
451 		    oe_size_greater_than(&to_pack, entry, big_file_threshold) &&
452 		    (st = open_istream(the_repository, &entry->idx.oid, &type,
453 				       &size, NULL)) != NULL)
454 			buf = NULL;
455 		else {
456 			buf = read_object_file(&entry->idx.oid, &type, &size);
457 			if (!buf)
458 				die(_("unable to read %s"),
459 				    oid_to_hex(&entry->idx.oid));
460 		}
461 		/*
462 		 * make sure no cached delta data remains from a
463 		 * previous attempt before a pack split occurred.
464 		 */
465 		FREE_AND_NULL(entry->delta_data);
466 		entry->z_delta_size = 0;
467 	} else if (entry->delta_data) {
468 		size = DELTA_SIZE(entry);
469 		buf = entry->delta_data;
470 		entry->delta_data = NULL;
471 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
472 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
473 	} else {
474 		buf = get_delta(entry);
475 		size = DELTA_SIZE(entry);
476 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
477 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
478 	}
479 
480 	if (st)	/* large blob case, just assume we don't compress well */
481 		datalen = size;
482 	else if (entry->z_delta_size)
483 		datalen = entry->z_delta_size;
484 	else
485 		datalen = do_compress(&buf, size);
486 
487 	/*
488 	 * The object header is a byte of 'type' followed by zero or
489 	 * more bytes of length.
490 	 */
491 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
492 					      type, size);
493 
494 	if (type == OBJ_OFS_DELTA) {
495 		/*
496 		 * Deltas with relative base contain an additional
497 		 * encoding of the relative offset for the delta
498 		 * base from this object's position in the pack.
499 		 */
500 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
501 		unsigned pos = sizeof(dheader) - 1;
502 		dheader[pos] = ofs & 127;
503 		while (ofs >>= 7)
504 			dheader[--pos] = 128 | (--ofs & 127);
505 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
506 			if (st)
507 				close_istream(st);
508 			free(buf);
509 			return 0;
510 		}
511 		hashwrite(f, header, hdrlen);
512 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
513 		hdrlen += sizeof(dheader) - pos;
514 	} else if (type == OBJ_REF_DELTA) {
515 		/*
516 		 * Deltas with a base reference contain
517 		 * additional bytes for the base object ID.
518 		 */
519 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
520 			if (st)
521 				close_istream(st);
522 			free(buf);
523 			return 0;
524 		}
525 		hashwrite(f, header, hdrlen);
526 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
527 		hdrlen += hashsz;
528 	} else {
529 		if (limit && hdrlen + datalen + hashsz >= limit) {
530 			if (st)
531 				close_istream(st);
532 			free(buf);
533 			return 0;
534 		}
535 		hashwrite(f, header, hdrlen);
536 	}
537 	if (st) {
538 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
539 		close_istream(st);
540 	} else {
541 		hashwrite(f, buf, datalen);
542 		free(buf);
543 	}
544 
545 	return hdrlen + datalen;
546 }
547 
548 /* Return 0 if we will bust the pack-size limit */
write_reuse_object(struct hashfile * f,struct object_entry * entry,unsigned long limit,int usable_delta)549 static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
550 				unsigned long limit, int usable_delta)
551 {
552 	struct packed_git *p = IN_PACK(entry);
553 	struct pack_window *w_curs = NULL;
554 	uint32_t pos;
555 	off_t offset;
556 	enum object_type type = oe_type(entry);
557 	off_t datalen;
558 	unsigned char header[MAX_PACK_OBJECT_HEADER],
559 		      dheader[MAX_PACK_OBJECT_HEADER];
560 	unsigned hdrlen;
561 	const unsigned hashsz = the_hash_algo->rawsz;
562 	unsigned long entry_size = SIZE(entry);
563 
564 	if (DELTA(entry))
565 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
566 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
567 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
568 					      type, entry_size);
569 
570 	offset = entry->in_pack_offset;
571 	if (offset_to_pack_pos(p, offset, &pos) < 0)
572 		die(_("write_reuse_object: could not locate %s, expected at "
573 		      "offset %"PRIuMAX" in pack %s"),
574 		    oid_to_hex(&entry->idx.oid), (uintmax_t)offset,
575 		    p->pack_name);
576 	datalen = pack_pos_to_offset(p, pos + 1) - offset;
577 	if (!pack_to_stdout && p->index_version > 1 &&
578 	    check_pack_crc(p, &w_curs, offset, datalen,
579 			   pack_pos_to_index(p, pos))) {
580 		error(_("bad packed object CRC for %s"),
581 		      oid_to_hex(&entry->idx.oid));
582 		unuse_pack(&w_curs);
583 		return write_no_reuse_object(f, entry, limit, usable_delta);
584 	}
585 
586 	offset += entry->in_pack_header_size;
587 	datalen -= entry->in_pack_header_size;
588 
589 	if (!pack_to_stdout && p->index_version == 1 &&
590 	    check_pack_inflate(p, &w_curs, offset, datalen, entry_size)) {
591 		error(_("corrupt packed object for %s"),
592 		      oid_to_hex(&entry->idx.oid));
593 		unuse_pack(&w_curs);
594 		return write_no_reuse_object(f, entry, limit, usable_delta);
595 	}
596 
597 	if (type == OBJ_OFS_DELTA) {
598 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
599 		unsigned pos = sizeof(dheader) - 1;
600 		dheader[pos] = ofs & 127;
601 		while (ofs >>= 7)
602 			dheader[--pos] = 128 | (--ofs & 127);
603 		if (limit && hdrlen + sizeof(dheader) - pos + datalen + hashsz >= limit) {
604 			unuse_pack(&w_curs);
605 			return 0;
606 		}
607 		hashwrite(f, header, hdrlen);
608 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
609 		hdrlen += sizeof(dheader) - pos;
610 		reused_delta++;
611 	} else if (type == OBJ_REF_DELTA) {
612 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
613 			unuse_pack(&w_curs);
614 			return 0;
615 		}
616 		hashwrite(f, header, hdrlen);
617 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
618 		hdrlen += hashsz;
619 		reused_delta++;
620 	} else {
621 		if (limit && hdrlen + datalen + hashsz >= limit) {
622 			unuse_pack(&w_curs);
623 			return 0;
624 		}
625 		hashwrite(f, header, hdrlen);
626 	}
627 	copy_pack_data(f, p, &w_curs, offset, datalen);
628 	unuse_pack(&w_curs);
629 	reused++;
630 	return hdrlen + datalen;
631 }
632 
633 /* Return 0 if we will bust the pack-size limit */
write_object(struct hashfile * f,struct object_entry * entry,off_t write_offset)634 static off_t write_object(struct hashfile *f,
635 			  struct object_entry *entry,
636 			  off_t write_offset)
637 {
638 	unsigned long limit;
639 	off_t len;
640 	int usable_delta, to_reuse;
641 
642 	if (!pack_to_stdout)
643 		crc32_begin(f);
644 
645 	/* apply size limit if limited packsize and not first object */
646 	if (!pack_size_limit || !nr_written)
647 		limit = 0;
648 	else if (pack_size_limit <= write_offset)
649 		/*
650 		 * the earlier object did not fit the limit; avoid
651 		 * mistaking this with unlimited (i.e. limit = 0).
652 		 */
653 		limit = 1;
654 	else
655 		limit = pack_size_limit - write_offset;
656 
657 	if (!DELTA(entry))
658 		usable_delta = 0;	/* no delta */
659 	else if (!pack_size_limit)
660 	       usable_delta = 1;	/* unlimited packfile */
661 	else if (DELTA(entry)->idx.offset == (off_t)-1)
662 		usable_delta = 0;	/* base was written to another pack */
663 	else if (DELTA(entry)->idx.offset)
664 		usable_delta = 1;	/* base already exists in this pack */
665 	else
666 		usable_delta = 0;	/* base could end up in another pack */
667 
668 	if (!reuse_object)
669 		to_reuse = 0;	/* explicit */
670 	else if (!IN_PACK(entry))
671 		to_reuse = 0;	/* can't reuse what we don't have */
672 	else if (oe_type(entry) == OBJ_REF_DELTA ||
673 		 oe_type(entry) == OBJ_OFS_DELTA)
674 				/* check_object() decided it for us ... */
675 		to_reuse = usable_delta;
676 				/* ... but pack split may override that */
677 	else if (oe_type(entry) != entry->in_pack_type)
678 		to_reuse = 0;	/* pack has delta which is unusable */
679 	else if (DELTA(entry))
680 		to_reuse = 0;	/* we want to pack afresh */
681 	else
682 		to_reuse = 1;	/* we have it in-pack undeltified,
683 				 * and we do not need to deltify it.
684 				 */
685 
686 	if (!to_reuse)
687 		len = write_no_reuse_object(f, entry, limit, usable_delta);
688 	else
689 		len = write_reuse_object(f, entry, limit, usable_delta);
690 	if (!len)
691 		return 0;
692 
693 	if (usable_delta)
694 		written_delta++;
695 	written++;
696 	if (!pack_to_stdout)
697 		entry->idx.crc32 = crc32_end(f);
698 	return len;
699 }
700 
701 enum write_one_status {
702 	WRITE_ONE_SKIP = -1, /* already written */
703 	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
704 	WRITE_ONE_WRITTEN = 1, /* normal */
705 	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
706 };
707 
write_one(struct hashfile * f,struct object_entry * e,off_t * offset)708 static enum write_one_status write_one(struct hashfile *f,
709 				       struct object_entry *e,
710 				       off_t *offset)
711 {
712 	off_t size;
713 	int recursing;
714 
715 	/*
716 	 * we set offset to 1 (which is an impossible value) to mark
717 	 * the fact that this object is involved in "write its base
718 	 * first before writing a deltified object" recursion.
719 	 */
720 	recursing = (e->idx.offset == 1);
721 	if (recursing) {
722 		warning(_("recursive delta detected for object %s"),
723 			oid_to_hex(&e->idx.oid));
724 		return WRITE_ONE_RECURSIVE;
725 	} else if (e->idx.offset || e->preferred_base) {
726 		/* offset is non zero if object is written already. */
727 		return WRITE_ONE_SKIP;
728 	}
729 
730 	/* if we are deltified, write out base object first. */
731 	if (DELTA(e)) {
732 		e->idx.offset = 1; /* now recurse */
733 		switch (write_one(f, DELTA(e), offset)) {
734 		case WRITE_ONE_RECURSIVE:
735 			/* we cannot depend on this one */
736 			SET_DELTA(e, NULL);
737 			break;
738 		default:
739 			break;
740 		case WRITE_ONE_BREAK:
741 			e->idx.offset = recursing;
742 			return WRITE_ONE_BREAK;
743 		}
744 	}
745 
746 	e->idx.offset = *offset;
747 	size = write_object(f, e, *offset);
748 	if (!size) {
749 		e->idx.offset = recursing;
750 		return WRITE_ONE_BREAK;
751 	}
752 	written_list[nr_written++] = &e->idx;
753 
754 	/* make sure off_t is sufficiently large not to wrap */
755 	if (signed_add_overflows(*offset, size))
756 		die(_("pack too large for current definition of off_t"));
757 	*offset += size;
758 	return WRITE_ONE_WRITTEN;
759 }
760 
mark_tagged(const char * path,const struct object_id * oid,int flag,void * cb_data)761 static int mark_tagged(const char *path, const struct object_id *oid, int flag,
762 		       void *cb_data)
763 {
764 	struct object_id peeled;
765 	struct object_entry *entry = packlist_find(&to_pack, oid);
766 
767 	if (entry)
768 		entry->tagged = 1;
769 	if (!peel_iterated_oid(oid, &peeled)) {
770 		entry = packlist_find(&to_pack, &peeled);
771 		if (entry)
772 			entry->tagged = 1;
773 	}
774 	return 0;
775 }
776 
oe_layer(struct packing_data * pack,struct object_entry * e)777 static inline unsigned char oe_layer(struct packing_data *pack,
778 				     struct object_entry *e)
779 {
780 	if (!pack->layer)
781 		return 0;
782 	return pack->layer[e - pack->objects];
783 }
784 
add_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)785 static inline void add_to_write_order(struct object_entry **wo,
786 			       unsigned int *endp,
787 			       struct object_entry *e)
788 {
789 	if (e->filled || oe_layer(&to_pack, e) != write_layer)
790 		return;
791 	wo[(*endp)++] = e;
792 	e->filled = 1;
793 }
794 
add_descendants_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)795 static void add_descendants_to_write_order(struct object_entry **wo,
796 					   unsigned int *endp,
797 					   struct object_entry *e)
798 {
799 	int add_to_order = 1;
800 	while (e) {
801 		if (add_to_order) {
802 			struct object_entry *s;
803 			/* add this node... */
804 			add_to_write_order(wo, endp, e);
805 			/* all its siblings... */
806 			for (s = DELTA_SIBLING(e); s; s = DELTA_SIBLING(s)) {
807 				add_to_write_order(wo, endp, s);
808 			}
809 		}
810 		/* drop down a level to add left subtree nodes if possible */
811 		if (DELTA_CHILD(e)) {
812 			add_to_order = 1;
813 			e = DELTA_CHILD(e);
814 		} else {
815 			add_to_order = 0;
816 			/* our sibling might have some children, it is next */
817 			if (DELTA_SIBLING(e)) {
818 				e = DELTA_SIBLING(e);
819 				continue;
820 			}
821 			/* go back to our parent node */
822 			e = DELTA(e);
823 			while (e && !DELTA_SIBLING(e)) {
824 				/* we're on the right side of a subtree, keep
825 				 * going up until we can go right again */
826 				e = DELTA(e);
827 			}
828 			if (!e) {
829 				/* done- we hit our original root node */
830 				return;
831 			}
832 			/* pass it off to sibling at this level */
833 			e = DELTA_SIBLING(e);
834 		}
835 	};
836 }
837 
add_family_to_write_order(struct object_entry ** wo,unsigned int * endp,struct object_entry * e)838 static void add_family_to_write_order(struct object_entry **wo,
839 				      unsigned int *endp,
840 				      struct object_entry *e)
841 {
842 	struct object_entry *root;
843 
844 	for (root = e; DELTA(root); root = DELTA(root))
845 		; /* nothing */
846 	add_descendants_to_write_order(wo, endp, root);
847 }
848 
compute_layer_order(struct object_entry ** wo,unsigned int * wo_end)849 static void compute_layer_order(struct object_entry **wo, unsigned int *wo_end)
850 {
851 	unsigned int i, last_untagged;
852 	struct object_entry *objects = to_pack.objects;
853 
854 	for (i = 0; i < to_pack.nr_objects; i++) {
855 		if (objects[i].tagged)
856 			break;
857 		add_to_write_order(wo, wo_end, &objects[i]);
858 	}
859 	last_untagged = i;
860 
861 	/*
862 	 * Then fill all the tagged tips.
863 	 */
864 	for (; i < to_pack.nr_objects; i++) {
865 		if (objects[i].tagged)
866 			add_to_write_order(wo, wo_end, &objects[i]);
867 	}
868 
869 	/*
870 	 * And then all remaining commits and tags.
871 	 */
872 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
873 		if (oe_type(&objects[i]) != OBJ_COMMIT &&
874 		    oe_type(&objects[i]) != OBJ_TAG)
875 			continue;
876 		add_to_write_order(wo, wo_end, &objects[i]);
877 	}
878 
879 	/*
880 	 * And then all the trees.
881 	 */
882 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
883 		if (oe_type(&objects[i]) != OBJ_TREE)
884 			continue;
885 		add_to_write_order(wo, wo_end, &objects[i]);
886 	}
887 
888 	/*
889 	 * Finally all the rest in really tight order
890 	 */
891 	for (i = last_untagged; i < to_pack.nr_objects; i++) {
892 		if (!objects[i].filled && oe_layer(&to_pack, &objects[i]) == write_layer)
893 			add_family_to_write_order(wo, wo_end, &objects[i]);
894 	}
895 }
896 
compute_write_order(void)897 static struct object_entry **compute_write_order(void)
898 {
899 	uint32_t max_layers = 1;
900 	unsigned int i, wo_end;
901 
902 	struct object_entry **wo;
903 	struct object_entry *objects = to_pack.objects;
904 
905 	for (i = 0; i < to_pack.nr_objects; i++) {
906 		objects[i].tagged = 0;
907 		objects[i].filled = 0;
908 		SET_DELTA_CHILD(&objects[i], NULL);
909 		SET_DELTA_SIBLING(&objects[i], NULL);
910 	}
911 
912 	/*
913 	 * Fully connect delta_child/delta_sibling network.
914 	 * Make sure delta_sibling is sorted in the original
915 	 * recency order.
916 	 */
917 	for (i = to_pack.nr_objects; i > 0;) {
918 		struct object_entry *e = &objects[--i];
919 		if (!DELTA(e))
920 			continue;
921 		/* Mark me as the first child */
922 		e->delta_sibling_idx = DELTA(e)->delta_child_idx;
923 		SET_DELTA_CHILD(DELTA(e), e);
924 	}
925 
926 	/*
927 	 * Mark objects that are at the tip of tags.
928 	 */
929 	for_each_tag_ref(mark_tagged, NULL);
930 
931 	if (use_delta_islands)
932 		max_layers = compute_pack_layers(&to_pack);
933 
934 	ALLOC_ARRAY(wo, to_pack.nr_objects);
935 	wo_end = 0;
936 
937 	for (; write_layer < max_layers; ++write_layer)
938 		compute_layer_order(wo, &wo_end);
939 
940 	if (wo_end != to_pack.nr_objects)
941 		die(_("ordered %u objects, expected %"PRIu32),
942 		    wo_end, to_pack.nr_objects);
943 
944 	return wo;
945 }
946 
947 
948 /*
949  * A reused set of objects. All objects in a chunk have the same
950  * relative position in the original packfile and the generated
951  * packfile.
952  */
953 
954 static struct reused_chunk {
955 	/* The offset of the first object of this chunk in the original
956 	 * packfile. */
957 	off_t original;
958 	/* The difference for "original" minus the offset of the first object of
959 	 * this chunk in the generated packfile. */
960 	off_t difference;
961 } *reused_chunks;
962 static int reused_chunks_nr;
963 static int reused_chunks_alloc;
964 
record_reused_object(off_t where,off_t offset)965 static void record_reused_object(off_t where, off_t offset)
966 {
967 	if (reused_chunks_nr && reused_chunks[reused_chunks_nr-1].difference == offset)
968 		return;
969 
970 	ALLOC_GROW(reused_chunks, reused_chunks_nr + 1,
971 		   reused_chunks_alloc);
972 	reused_chunks[reused_chunks_nr].original = where;
973 	reused_chunks[reused_chunks_nr].difference = offset;
974 	reused_chunks_nr++;
975 }
976 
977 /*
978  * Binary search to find the chunk that "where" is in. Note
979  * that we're not looking for an exact match, just the first
980  * chunk that contains it (which implicitly ends at the start
981  * of the next chunk.
982  */
find_reused_offset(off_t where)983 static off_t find_reused_offset(off_t where)
984 {
985 	int lo = 0, hi = reused_chunks_nr;
986 	while (lo < hi) {
987 		int mi = lo + ((hi - lo) / 2);
988 		if (where == reused_chunks[mi].original)
989 			return reused_chunks[mi].difference;
990 		if (where < reused_chunks[mi].original)
991 			hi = mi;
992 		else
993 			lo = mi + 1;
994 	}
995 
996 	/*
997 	 * The first chunk starts at zero, so we can't have gone below
998 	 * there.
999 	 */
1000 	assert(lo);
1001 	return reused_chunks[lo-1].difference;
1002 }
1003 
write_reused_pack_one(size_t pos,struct hashfile * out,struct pack_window ** w_curs)1004 static void write_reused_pack_one(size_t pos, struct hashfile *out,
1005 				  struct pack_window **w_curs)
1006 {
1007 	off_t offset, next, cur;
1008 	enum object_type type;
1009 	unsigned long size;
1010 
1011 	offset = pack_pos_to_offset(reuse_packfile, pos);
1012 	next = pack_pos_to_offset(reuse_packfile, pos + 1);
1013 
1014 	record_reused_object(offset, offset - hashfile_total(out));
1015 
1016 	cur = offset;
1017 	type = unpack_object_header(reuse_packfile, w_curs, &cur, &size);
1018 	assert(type >= 0);
1019 
1020 	if (type == OBJ_OFS_DELTA) {
1021 		off_t base_offset;
1022 		off_t fixup;
1023 
1024 		unsigned char header[MAX_PACK_OBJECT_HEADER];
1025 		unsigned len;
1026 
1027 		base_offset = get_delta_base(reuse_packfile, w_curs, &cur, type, offset);
1028 		assert(base_offset != 0);
1029 
1030 		/* Convert to REF_DELTA if we must... */
1031 		if (!allow_ofs_delta) {
1032 			uint32_t base_pos;
1033 			struct object_id base_oid;
1034 
1035 			if (offset_to_pack_pos(reuse_packfile, base_offset, &base_pos) < 0)
1036 				die(_("expected object at offset %"PRIuMAX" "
1037 				      "in pack %s"),
1038 				    (uintmax_t)base_offset,
1039 				    reuse_packfile->pack_name);
1040 
1041 			nth_packed_object_id(&base_oid, reuse_packfile,
1042 					     pack_pos_to_index(reuse_packfile, base_pos));
1043 
1044 			len = encode_in_pack_object_header(header, sizeof(header),
1045 							   OBJ_REF_DELTA, size);
1046 			hashwrite(out, header, len);
1047 			hashwrite(out, base_oid.hash, the_hash_algo->rawsz);
1048 			copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1049 			return;
1050 		}
1051 
1052 		/* Otherwise see if we need to rewrite the offset... */
1053 		fixup = find_reused_offset(offset) -
1054 			find_reused_offset(base_offset);
1055 		if (fixup) {
1056 			unsigned char ofs_header[10];
1057 			unsigned i, ofs_len;
1058 			off_t ofs = offset - base_offset - fixup;
1059 
1060 			len = encode_in_pack_object_header(header, sizeof(header),
1061 							   OBJ_OFS_DELTA, size);
1062 
1063 			i = sizeof(ofs_header) - 1;
1064 			ofs_header[i] = ofs & 127;
1065 			while (ofs >>= 7)
1066 				ofs_header[--i] = 128 | (--ofs & 127);
1067 
1068 			ofs_len = sizeof(ofs_header) - i;
1069 
1070 			hashwrite(out, header, len);
1071 			hashwrite(out, ofs_header + sizeof(ofs_header) - ofs_len, ofs_len);
1072 			copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
1073 			return;
1074 		}
1075 
1076 		/* ...otherwise we have no fixup, and can write it verbatim */
1077 	}
1078 
1079 	copy_pack_data(out, reuse_packfile, w_curs, offset, next - offset);
1080 }
1081 
write_reused_pack_verbatim(struct hashfile * out,struct pack_window ** w_curs)1082 static size_t write_reused_pack_verbatim(struct hashfile *out,
1083 					 struct pack_window **w_curs)
1084 {
1085 	size_t pos = 0;
1086 
1087 	while (pos < reuse_packfile_bitmap->word_alloc &&
1088 			reuse_packfile_bitmap->words[pos] == (eword_t)~0)
1089 		pos++;
1090 
1091 	if (pos) {
1092 		off_t to_write;
1093 
1094 		written = (pos * BITS_IN_EWORD);
1095 		to_write = pack_pos_to_offset(reuse_packfile, written)
1096 			- sizeof(struct pack_header);
1097 
1098 		/* We're recording one chunk, not one object. */
1099 		record_reused_object(sizeof(struct pack_header), 0);
1100 		hashflush(out);
1101 		copy_pack_data(out, reuse_packfile, w_curs,
1102 			sizeof(struct pack_header), to_write);
1103 
1104 		display_progress(progress_state, written);
1105 	}
1106 	return pos;
1107 }
1108 
write_reused_pack(struct hashfile * f)1109 static void write_reused_pack(struct hashfile *f)
1110 {
1111 	size_t i = 0;
1112 	uint32_t offset;
1113 	struct pack_window *w_curs = NULL;
1114 
1115 	if (allow_ofs_delta)
1116 		i = write_reused_pack_verbatim(f, &w_curs);
1117 
1118 	for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
1119 		eword_t word = reuse_packfile_bitmap->words[i];
1120 		size_t pos = (i * BITS_IN_EWORD);
1121 
1122 		for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
1123 			if ((word >> offset) == 0)
1124 				break;
1125 
1126 			offset += ewah_bit_ctz64(word >> offset);
1127 			/*
1128 			 * Can use bit positions directly, even for MIDX
1129 			 * bitmaps. See comment in try_partial_reuse()
1130 			 * for why.
1131 			 */
1132 			write_reused_pack_one(pos + offset, f, &w_curs);
1133 			display_progress(progress_state, ++written);
1134 		}
1135 	}
1136 
1137 	unuse_pack(&w_curs);
1138 }
1139 
write_excluded_by_configs(void)1140 static void write_excluded_by_configs(void)
1141 {
1142 	struct oidset_iter iter;
1143 	const struct object_id *oid;
1144 
1145 	oidset_iter_init(&excluded_by_config, &iter);
1146 	while ((oid = oidset_iter_next(&iter))) {
1147 		struct configured_exclusion *ex =
1148 			oidmap_get(&configured_exclusions, oid);
1149 
1150 		if (!ex)
1151 			BUG("configured exclusion wasn't configured");
1152 		write_in_full(1, ex->pack_hash_hex, strlen(ex->pack_hash_hex));
1153 		write_in_full(1, " ", 1);
1154 		write_in_full(1, ex->uri, strlen(ex->uri));
1155 		write_in_full(1, "\n", 1);
1156 	}
1157 }
1158 
1159 static const char no_split_warning[] = N_(
1160 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
1161 );
1162 
write_pack_file(void)1163 static void write_pack_file(void)
1164 {
1165 	uint32_t i = 0, j;
1166 	struct hashfile *f;
1167 	off_t offset;
1168 	uint32_t nr_remaining = nr_result;
1169 	time_t last_mtime = 0;
1170 	struct object_entry **write_order;
1171 
1172 	if (progress > pack_to_stdout)
1173 		progress_state = start_progress(_("Writing objects"), nr_result);
1174 	ALLOC_ARRAY(written_list, to_pack.nr_objects);
1175 	write_order = compute_write_order();
1176 
1177 	do {
1178 		unsigned char hash[GIT_MAX_RAWSZ];
1179 		char *pack_tmp_name = NULL;
1180 
1181 		if (pack_to_stdout)
1182 			f = hashfd_throughput(1, "<stdout>", progress_state);
1183 		else
1184 			f = create_tmp_packfile(&pack_tmp_name);
1185 
1186 		offset = write_pack_header(f, nr_remaining);
1187 
1188 		if (reuse_packfile) {
1189 			assert(pack_to_stdout);
1190 			write_reused_pack(f);
1191 			offset = hashfile_total(f);
1192 		}
1193 
1194 		nr_written = 0;
1195 		for (; i < to_pack.nr_objects; i++) {
1196 			struct object_entry *e = write_order[i];
1197 			if (write_one(f, e, &offset) == WRITE_ONE_BREAK)
1198 				break;
1199 			display_progress(progress_state, written);
1200 		}
1201 
1202 		/*
1203 		 * Did we write the wrong # entries in the header?
1204 		 * If so, rewrite it like in fast-import
1205 		 */
1206 		if (pack_to_stdout) {
1207 			finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
1208 		} else if (nr_written == nr_remaining) {
1209 			finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
1210 		} else {
1211 			int fd = finalize_hashfile(f, hash, 0);
1212 			fixup_pack_header_footer(fd, hash, pack_tmp_name,
1213 						 nr_written, hash, offset);
1214 			close(fd);
1215 			if (write_bitmap_index) {
1216 				if (write_bitmap_index != WRITE_BITMAP_QUIET)
1217 					warning(_(no_split_warning));
1218 				write_bitmap_index = 0;
1219 			}
1220 		}
1221 
1222 		if (!pack_to_stdout) {
1223 			struct stat st;
1224 			struct strbuf tmpname = STRBUF_INIT;
1225 			char *idx_tmp_name = NULL;
1226 
1227 			/*
1228 			 * Packs are runtime accessed in their mtime
1229 			 * order since newer packs are more likely to contain
1230 			 * younger objects.  So if we are creating multiple
1231 			 * packs then we should modify the mtime of later ones
1232 			 * to preserve this property.
1233 			 */
1234 			if (stat(pack_tmp_name, &st) < 0) {
1235 				warning_errno(_("failed to stat %s"), pack_tmp_name);
1236 			} else if (!last_mtime) {
1237 				last_mtime = st.st_mtime;
1238 			} else {
1239 				struct utimbuf utb;
1240 				utb.actime = st.st_atime;
1241 				utb.modtime = --last_mtime;
1242 				if (utime(pack_tmp_name, &utb) < 0)
1243 					warning_errno(_("failed utime() on %s"), pack_tmp_name);
1244 			}
1245 
1246 			strbuf_addf(&tmpname, "%s-%s.", base_name,
1247 				    hash_to_hex(hash));
1248 
1249 			if (write_bitmap_index) {
1250 				bitmap_writer_set_checksum(hash);
1251 				bitmap_writer_build_type_index(
1252 					&to_pack, written_list, nr_written);
1253 			}
1254 
1255 			stage_tmp_packfiles(&tmpname, pack_tmp_name,
1256 					    written_list, nr_written,
1257 					    &pack_idx_opts, hash, &idx_tmp_name);
1258 
1259 			if (write_bitmap_index) {
1260 				size_t tmpname_len = tmpname.len;
1261 
1262 				strbuf_addstr(&tmpname, "bitmap");
1263 				stop_progress(&progress_state);
1264 
1265 				bitmap_writer_show_progress(progress);
1266 				bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1);
1267 				if (bitmap_writer_build(&to_pack) < 0)
1268 					die(_("failed to write bitmap index"));
1269 				bitmap_writer_finish(written_list, nr_written,
1270 						     tmpname.buf, write_bitmap_options);
1271 				write_bitmap_index = 0;
1272 				strbuf_setlen(&tmpname, tmpname_len);
1273 			}
1274 
1275 			rename_tmp_packfile_idx(&tmpname, &idx_tmp_name);
1276 
1277 			free(idx_tmp_name);
1278 			strbuf_release(&tmpname);
1279 			free(pack_tmp_name);
1280 			puts(hash_to_hex(hash));
1281 		}
1282 
1283 		/* mark written objects as written to previous pack */
1284 		for (j = 0; j < nr_written; j++) {
1285 			written_list[j]->offset = (off_t)-1;
1286 		}
1287 		nr_remaining -= nr_written;
1288 	} while (nr_remaining && i < to_pack.nr_objects);
1289 
1290 	free(written_list);
1291 	free(write_order);
1292 	stop_progress(&progress_state);
1293 	if (written != nr_result)
1294 		die(_("wrote %"PRIu32" objects while expecting %"PRIu32),
1295 		    written, nr_result);
1296 	trace2_data_intmax("pack-objects", the_repository,
1297 			   "write_pack_file/wrote", nr_result);
1298 }
1299 
no_try_delta(const char * path)1300 static int no_try_delta(const char *path)
1301 {
1302 	static struct attr_check *check;
1303 
1304 	if (!check)
1305 		check = attr_check_initl("delta", NULL);
1306 	git_check_attr(the_repository->index, path, check);
1307 	if (ATTR_FALSE(check->items[0].value))
1308 		return 1;
1309 	return 0;
1310 }
1311 
1312 /*
1313  * When adding an object, check whether we have already added it
1314  * to our packing list. If so, we can skip. However, if we are
1315  * being asked to excludei t, but the previous mention was to include
1316  * it, make sure to adjust its flags and tweak our numbers accordingly.
1317  *
1318  * As an optimization, we pass out the index position where we would have
1319  * found the item, since that saves us from having to look it up again a
1320  * few lines later when we want to add the new entry.
1321  */
have_duplicate_entry(const struct object_id * oid,int exclude)1322 static int have_duplicate_entry(const struct object_id *oid,
1323 				int exclude)
1324 {
1325 	struct object_entry *entry;
1326 
1327 	if (reuse_packfile_bitmap &&
1328 	    bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid))
1329 		return 1;
1330 
1331 	entry = packlist_find(&to_pack, oid);
1332 	if (!entry)
1333 		return 0;
1334 
1335 	if (exclude) {
1336 		if (!entry->preferred_base)
1337 			nr_result--;
1338 		entry->preferred_base = 1;
1339 	}
1340 
1341 	return 1;
1342 }
1343 
want_found_object(const struct object_id * oid,int exclude,struct packed_git * p)1344 static int want_found_object(const struct object_id *oid, int exclude,
1345 			     struct packed_git *p)
1346 {
1347 	if (exclude)
1348 		return 1;
1349 	if (incremental)
1350 		return 0;
1351 
1352 	/*
1353 	 * When asked to do --local (do not include an object that appears in a
1354 	 * pack we borrow from elsewhere) or --honor-pack-keep (do not include
1355 	 * an object that appears in a pack marked with .keep), finding a pack
1356 	 * that matches the criteria is sufficient for us to decide to omit it.
1357 	 * However, even if this pack does not satisfy the criteria, we need to
1358 	 * make sure no copy of this object appears in _any_ pack that makes us
1359 	 * to omit the object, so we need to check all the packs.
1360 	 *
1361 	 * We can however first check whether these options can possibly matter;
1362 	 * if they do not matter we know we want the object in generated pack.
1363 	 * Otherwise, we signal "-1" at the end to tell the caller that we do
1364 	 * not know either way, and it needs to check more packs.
1365 	 */
1366 
1367 	/*
1368 	 * Objects in packs borrowed from elsewhere are discarded regardless of
1369 	 * if they appear in other packs that weren't borrowed.
1370 	 */
1371 	if (local && !p->pack_local)
1372 		return 0;
1373 
1374 	/*
1375 	 * Then handle .keep first, as we have a fast(er) path there.
1376 	 */
1377 	if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
1378 		/*
1379 		 * Set the flags for the kept-pack cache to be the ones we want
1380 		 * to ignore.
1381 		 *
1382 		 * That is, if we are ignoring objects in on-disk keep packs,
1383 		 * then we want to search through the on-disk keep and ignore
1384 		 * the in-core ones.
1385 		 */
1386 		unsigned flags = 0;
1387 		if (ignore_packed_keep_on_disk)
1388 			flags |= ON_DISK_KEEP_PACKS;
1389 		if (ignore_packed_keep_in_core)
1390 			flags |= IN_CORE_KEEP_PACKS;
1391 
1392 		if (ignore_packed_keep_on_disk && p->pack_keep)
1393 			return 0;
1394 		if (ignore_packed_keep_in_core && p->pack_keep_in_core)
1395 			return 0;
1396 		if (has_object_kept_pack(oid, flags))
1397 			return 0;
1398 	}
1399 
1400 	/*
1401 	 * At this point we know definitively that either we don't care about
1402 	 * keep-packs, or the object is not in one. Keep checking other
1403 	 * conditions...
1404 	 */
1405 	if (!local || !have_non_local_packs)
1406 		return 1;
1407 
1408 	/* we don't know yet; keep looking for more packs */
1409 	return -1;
1410 }
1411 
want_object_in_pack_one(struct packed_git * p,const struct object_id * oid,int exclude,struct packed_git ** found_pack,off_t * found_offset)1412 static int want_object_in_pack_one(struct packed_git *p,
1413 				   const struct object_id *oid,
1414 				   int exclude,
1415 				   struct packed_git **found_pack,
1416 				   off_t *found_offset)
1417 {
1418 	off_t offset;
1419 
1420 	if (p == *found_pack)
1421 		offset = *found_offset;
1422 	else
1423 		offset = find_pack_entry_one(oid->hash, p);
1424 
1425 	if (offset) {
1426 		if (!*found_pack) {
1427 			if (!is_pack_valid(p))
1428 				return -1;
1429 			*found_offset = offset;
1430 			*found_pack = p;
1431 		}
1432 		return want_found_object(oid, exclude, p);
1433 	}
1434 	return -1;
1435 }
1436 
1437 /*
1438  * Check whether we want the object in the pack (e.g., we do not want
1439  * objects found in non-local stores if the "--local" option was used).
1440  *
1441  * If the caller already knows an existing pack it wants to take the object
1442  * from, that is passed in *found_pack and *found_offset; otherwise this
1443  * function finds if there is any pack that has the object and returns the pack
1444  * and its offset in these variables.
1445  */
want_object_in_pack(const struct object_id * oid,int exclude,struct packed_git ** found_pack,off_t * found_offset)1446 static int want_object_in_pack(const struct object_id *oid,
1447 			       int exclude,
1448 			       struct packed_git **found_pack,
1449 			       off_t *found_offset)
1450 {
1451 	int want;
1452 	struct list_head *pos;
1453 	struct multi_pack_index *m;
1454 
1455 	if (!exclude && local && has_loose_object_nonlocal(oid))
1456 		return 0;
1457 
1458 	/*
1459 	 * If we already know the pack object lives in, start checks from that
1460 	 * pack - in the usual case when neither --local was given nor .keep files
1461 	 * are present we will determine the answer right now.
1462 	 */
1463 	if (*found_pack) {
1464 		want = want_found_object(oid, exclude, *found_pack);
1465 		if (want != -1)
1466 			return want;
1467 	}
1468 
1469 	for (m = get_multi_pack_index(the_repository); m; m = m->next) {
1470 		struct pack_entry e;
1471 		if (fill_midx_entry(the_repository, oid, &e, m)) {
1472 			want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
1473 			if (want != -1)
1474 				return want;
1475 		}
1476 	}
1477 
1478 	list_for_each(pos, get_packed_git_mru(the_repository)) {
1479 		struct packed_git *p = list_entry(pos, struct packed_git, mru);
1480 		want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
1481 		if (!exclude && want > 0)
1482 			list_move(&p->mru,
1483 				  get_packed_git_mru(the_repository));
1484 		if (want != -1)
1485 			return want;
1486 	}
1487 
1488 	if (uri_protocols.nr) {
1489 		struct configured_exclusion *ex =
1490 			oidmap_get(&configured_exclusions, oid);
1491 		int i;
1492 		const char *p;
1493 
1494 		if (ex) {
1495 			for (i = 0; i < uri_protocols.nr; i++) {
1496 				if (skip_prefix(ex->uri,
1497 						uri_protocols.items[i].string,
1498 						&p) &&
1499 				    *p == ':') {
1500 					oidset_insert(&excluded_by_config, oid);
1501 					return 0;
1502 				}
1503 			}
1504 		}
1505 	}
1506 
1507 	return 1;
1508 }
1509 
create_object_entry(const struct object_id * oid,enum object_type type,uint32_t hash,int exclude,int no_try_delta,struct packed_git * found_pack,off_t found_offset)1510 static void create_object_entry(const struct object_id *oid,
1511 				enum object_type type,
1512 				uint32_t hash,
1513 				int exclude,
1514 				int no_try_delta,
1515 				struct packed_git *found_pack,
1516 				off_t found_offset)
1517 {
1518 	struct object_entry *entry;
1519 
1520 	entry = packlist_alloc(&to_pack, oid);
1521 	entry->hash = hash;
1522 	oe_set_type(entry, type);
1523 	if (exclude)
1524 		entry->preferred_base = 1;
1525 	else
1526 		nr_result++;
1527 	if (found_pack) {
1528 		oe_set_in_pack(&to_pack, entry, found_pack);
1529 		entry->in_pack_offset = found_offset;
1530 	}
1531 
1532 	entry->no_try_delta = no_try_delta;
1533 }
1534 
1535 static const char no_closure_warning[] = N_(
1536 "disabling bitmap writing, as some objects are not being packed"
1537 );
1538 
add_object_entry(const struct object_id * oid,enum object_type type,const char * name,int exclude)1539 static int add_object_entry(const struct object_id *oid, enum object_type type,
1540 			    const char *name, int exclude)
1541 {
1542 	struct packed_git *found_pack = NULL;
1543 	off_t found_offset = 0;
1544 
1545 	display_progress(progress_state, ++nr_seen);
1546 
1547 	if (have_duplicate_entry(oid, exclude))
1548 		return 0;
1549 
1550 	if (!want_object_in_pack(oid, exclude, &found_pack, &found_offset)) {
1551 		/* The pack is missing an object, so it will not have closure */
1552 		if (write_bitmap_index) {
1553 			if (write_bitmap_index != WRITE_BITMAP_QUIET)
1554 				warning(_(no_closure_warning));
1555 			write_bitmap_index = 0;
1556 		}
1557 		return 0;
1558 	}
1559 
1560 	create_object_entry(oid, type, pack_name_hash(name),
1561 			    exclude, name && no_try_delta(name),
1562 			    found_pack, found_offset);
1563 	return 1;
1564 }
1565 
add_object_entry_from_bitmap(const struct object_id * oid,enum object_type type,int flags,uint32_t name_hash,struct packed_git * pack,off_t offset)1566 static int add_object_entry_from_bitmap(const struct object_id *oid,
1567 					enum object_type type,
1568 					int flags, uint32_t name_hash,
1569 					struct packed_git *pack, off_t offset)
1570 {
1571 	display_progress(progress_state, ++nr_seen);
1572 
1573 	if (have_duplicate_entry(oid, 0))
1574 		return 0;
1575 
1576 	if (!want_object_in_pack(oid, 0, &pack, &offset))
1577 		return 0;
1578 
1579 	create_object_entry(oid, type, name_hash, 0, 0, pack, offset);
1580 	return 1;
1581 }
1582 
1583 struct pbase_tree_cache {
1584 	struct object_id oid;
1585 	int ref;
1586 	int temporary;
1587 	void *tree_data;
1588 	unsigned long tree_size;
1589 };
1590 
1591 static struct pbase_tree_cache *(pbase_tree_cache[256]);
pbase_tree_cache_ix(const struct object_id * oid)1592 static int pbase_tree_cache_ix(const struct object_id *oid)
1593 {
1594 	return oid->hash[0] % ARRAY_SIZE(pbase_tree_cache);
1595 }
pbase_tree_cache_ix_incr(int ix)1596 static int pbase_tree_cache_ix_incr(int ix)
1597 {
1598 	return (ix+1) % ARRAY_SIZE(pbase_tree_cache);
1599 }
1600 
1601 static struct pbase_tree {
1602 	struct pbase_tree *next;
1603 	/* This is a phony "cache" entry; we are not
1604 	 * going to evict it or find it through _get()
1605 	 * mechanism -- this is for the toplevel node that
1606 	 * would almost always change with any commit.
1607 	 */
1608 	struct pbase_tree_cache pcache;
1609 } *pbase_tree;
1610 
pbase_tree_get(const struct object_id * oid)1611 static struct pbase_tree_cache *pbase_tree_get(const struct object_id *oid)
1612 {
1613 	struct pbase_tree_cache *ent, *nent;
1614 	void *data;
1615 	unsigned long size;
1616 	enum object_type type;
1617 	int neigh;
1618 	int my_ix = pbase_tree_cache_ix(oid);
1619 	int available_ix = -1;
1620 
1621 	/* pbase-tree-cache acts as a limited hashtable.
1622 	 * your object will be found at your index or within a few
1623 	 * slots after that slot if it is cached.
1624 	 */
1625 	for (neigh = 0; neigh < 8; neigh++) {
1626 		ent = pbase_tree_cache[my_ix];
1627 		if (ent && oideq(&ent->oid, oid)) {
1628 			ent->ref++;
1629 			return ent;
1630 		}
1631 		else if (((available_ix < 0) && (!ent || !ent->ref)) ||
1632 			 ((0 <= available_ix) &&
1633 			  (!ent && pbase_tree_cache[available_ix])))
1634 			available_ix = my_ix;
1635 		if (!ent)
1636 			break;
1637 		my_ix = pbase_tree_cache_ix_incr(my_ix);
1638 	}
1639 
1640 	/* Did not find one.  Either we got a bogus request or
1641 	 * we need to read and perhaps cache.
1642 	 */
1643 	data = read_object_file(oid, &type, &size);
1644 	if (!data)
1645 		return NULL;
1646 	if (type != OBJ_TREE) {
1647 		free(data);
1648 		return NULL;
1649 	}
1650 
1651 	/* We need to either cache or return a throwaway copy */
1652 
1653 	if (available_ix < 0)
1654 		ent = NULL;
1655 	else {
1656 		ent = pbase_tree_cache[available_ix];
1657 		my_ix = available_ix;
1658 	}
1659 
1660 	if (!ent) {
1661 		nent = xmalloc(sizeof(*nent));
1662 		nent->temporary = (available_ix < 0);
1663 	}
1664 	else {
1665 		/* evict and reuse */
1666 		free(ent->tree_data);
1667 		nent = ent;
1668 	}
1669 	oidcpy(&nent->oid, oid);
1670 	nent->tree_data = data;
1671 	nent->tree_size = size;
1672 	nent->ref = 1;
1673 	if (!nent->temporary)
1674 		pbase_tree_cache[my_ix] = nent;
1675 	return nent;
1676 }
1677 
pbase_tree_put(struct pbase_tree_cache * cache)1678 static void pbase_tree_put(struct pbase_tree_cache *cache)
1679 {
1680 	if (!cache->temporary) {
1681 		cache->ref--;
1682 		return;
1683 	}
1684 	free(cache->tree_data);
1685 	free(cache);
1686 }
1687 
name_cmp_len(const char * name)1688 static int name_cmp_len(const char *name)
1689 {
1690 	int i;
1691 	for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++)
1692 		;
1693 	return i;
1694 }
1695 
add_pbase_object(struct tree_desc * tree,const char * name,int cmplen,const char * fullname)1696 static void add_pbase_object(struct tree_desc *tree,
1697 			     const char *name,
1698 			     int cmplen,
1699 			     const char *fullname)
1700 {
1701 	struct name_entry entry;
1702 	int cmp;
1703 
1704 	while (tree_entry(tree,&entry)) {
1705 		if (S_ISGITLINK(entry.mode))
1706 			continue;
1707 		cmp = tree_entry_len(&entry) != cmplen ? 1 :
1708 		      memcmp(name, entry.path, cmplen);
1709 		if (cmp > 0)
1710 			continue;
1711 		if (cmp < 0)
1712 			return;
1713 		if (name[cmplen] != '/') {
1714 			add_object_entry(&entry.oid,
1715 					 object_type(entry.mode),
1716 					 fullname, 1);
1717 			return;
1718 		}
1719 		if (S_ISDIR(entry.mode)) {
1720 			struct tree_desc sub;
1721 			struct pbase_tree_cache *tree;
1722 			const char *down = name+cmplen+1;
1723 			int downlen = name_cmp_len(down);
1724 
1725 			tree = pbase_tree_get(&entry.oid);
1726 			if (!tree)
1727 				return;
1728 			init_tree_desc(&sub, tree->tree_data, tree->tree_size);
1729 
1730 			add_pbase_object(&sub, down, downlen, fullname);
1731 			pbase_tree_put(tree);
1732 		}
1733 	}
1734 }
1735 
1736 static unsigned *done_pbase_paths;
1737 static int done_pbase_paths_num;
1738 static int done_pbase_paths_alloc;
done_pbase_path_pos(unsigned hash)1739 static int done_pbase_path_pos(unsigned hash)
1740 {
1741 	int lo = 0;
1742 	int hi = done_pbase_paths_num;
1743 	while (lo < hi) {
1744 		int mi = lo + (hi - lo) / 2;
1745 		if (done_pbase_paths[mi] == hash)
1746 			return mi;
1747 		if (done_pbase_paths[mi] < hash)
1748 			hi = mi;
1749 		else
1750 			lo = mi + 1;
1751 	}
1752 	return -lo-1;
1753 }
1754 
check_pbase_path(unsigned hash)1755 static int check_pbase_path(unsigned hash)
1756 {
1757 	int pos = done_pbase_path_pos(hash);
1758 	if (0 <= pos)
1759 		return 1;
1760 	pos = -pos - 1;
1761 	ALLOC_GROW(done_pbase_paths,
1762 		   done_pbase_paths_num + 1,
1763 		   done_pbase_paths_alloc);
1764 	done_pbase_paths_num++;
1765 	if (pos < done_pbase_paths_num)
1766 		MOVE_ARRAY(done_pbase_paths + pos + 1, done_pbase_paths + pos,
1767 			   done_pbase_paths_num - pos - 1);
1768 	done_pbase_paths[pos] = hash;
1769 	return 0;
1770 }
1771 
add_preferred_base_object(const char * name)1772 static void add_preferred_base_object(const char *name)
1773 {
1774 	struct pbase_tree *it;
1775 	int cmplen;
1776 	unsigned hash = pack_name_hash(name);
1777 
1778 	if (!num_preferred_base || check_pbase_path(hash))
1779 		return;
1780 
1781 	cmplen = name_cmp_len(name);
1782 	for (it = pbase_tree; it; it = it->next) {
1783 		if (cmplen == 0) {
1784 			add_object_entry(&it->pcache.oid, OBJ_TREE, NULL, 1);
1785 		}
1786 		else {
1787 			struct tree_desc tree;
1788 			init_tree_desc(&tree, it->pcache.tree_data, it->pcache.tree_size);
1789 			add_pbase_object(&tree, name, cmplen, name);
1790 		}
1791 	}
1792 }
1793 
add_preferred_base(struct object_id * oid)1794 static void add_preferred_base(struct object_id *oid)
1795 {
1796 	struct pbase_tree *it;
1797 	void *data;
1798 	unsigned long size;
1799 	struct object_id tree_oid;
1800 
1801 	if (window <= num_preferred_base++)
1802 		return;
1803 
1804 	data = read_object_with_reference(the_repository, oid,
1805 					  tree_type, &size, &tree_oid);
1806 	if (!data)
1807 		return;
1808 
1809 	for (it = pbase_tree; it; it = it->next) {
1810 		if (oideq(&it->pcache.oid, &tree_oid)) {
1811 			free(data);
1812 			return;
1813 		}
1814 	}
1815 
1816 	CALLOC_ARRAY(it, 1);
1817 	it->next = pbase_tree;
1818 	pbase_tree = it;
1819 
1820 	oidcpy(&it->pcache.oid, &tree_oid);
1821 	it->pcache.tree_data = data;
1822 	it->pcache.tree_size = size;
1823 }
1824 
cleanup_preferred_base(void)1825 static void cleanup_preferred_base(void)
1826 {
1827 	struct pbase_tree *it;
1828 	unsigned i;
1829 
1830 	it = pbase_tree;
1831 	pbase_tree = NULL;
1832 	while (it) {
1833 		struct pbase_tree *tmp = it;
1834 		it = tmp->next;
1835 		free(tmp->pcache.tree_data);
1836 		free(tmp);
1837 	}
1838 
1839 	for (i = 0; i < ARRAY_SIZE(pbase_tree_cache); i++) {
1840 		if (!pbase_tree_cache[i])
1841 			continue;
1842 		free(pbase_tree_cache[i]->tree_data);
1843 		FREE_AND_NULL(pbase_tree_cache[i]);
1844 	}
1845 
1846 	FREE_AND_NULL(done_pbase_paths);
1847 	done_pbase_paths_num = done_pbase_paths_alloc = 0;
1848 }
1849 
1850 /*
1851  * Return 1 iff the object specified by "delta" can be sent
1852  * literally as a delta against the base in "base_sha1". If
1853  * so, then *base_out will point to the entry in our packing
1854  * list, or NULL if we must use the external-base list.
1855  *
1856  * Depth value does not matter - find_deltas() will
1857  * never consider reused delta as the base object to
1858  * deltify other objects against, in order to avoid
1859  * circular deltas.
1860  */
can_reuse_delta(const struct object_id * base_oid,struct object_entry * delta,struct object_entry ** base_out)1861 static int can_reuse_delta(const struct object_id *base_oid,
1862 			   struct object_entry *delta,
1863 			   struct object_entry **base_out)
1864 {
1865 	struct object_entry *base;
1866 
1867 	/*
1868 	 * First see if we're already sending the base (or it's explicitly in
1869 	 * our "excluded" list).
1870 	 */
1871 	base = packlist_find(&to_pack, base_oid);
1872 	if (base) {
1873 		if (!in_same_island(&delta->idx.oid, &base->idx.oid))
1874 			return 0;
1875 		*base_out = base;
1876 		return 1;
1877 	}
1878 
1879 	/*
1880 	 * Otherwise, reachability bitmaps may tell us if the receiver has it,
1881 	 * even if it was buried too deep in history to make it into the
1882 	 * packing list.
1883 	 */
1884 	if (thin && bitmap_has_oid_in_uninteresting(bitmap_git, base_oid)) {
1885 		if (use_delta_islands) {
1886 			if (!in_same_island(&delta->idx.oid, base_oid))
1887 				return 0;
1888 		}
1889 		*base_out = NULL;
1890 		return 1;
1891 	}
1892 
1893 	return 0;
1894 }
1895 
prefetch_to_pack(uint32_t object_index_start)1896 static void prefetch_to_pack(uint32_t object_index_start) {
1897 	struct oid_array to_fetch = OID_ARRAY_INIT;
1898 	uint32_t i;
1899 
1900 	for (i = object_index_start; i < to_pack.nr_objects; i++) {
1901 		struct object_entry *entry = to_pack.objects + i;
1902 
1903 		if (!oid_object_info_extended(the_repository,
1904 					      &entry->idx.oid,
1905 					      NULL,
1906 					      OBJECT_INFO_FOR_PREFETCH))
1907 			continue;
1908 		oid_array_append(&to_fetch, &entry->idx.oid);
1909 	}
1910 	promisor_remote_get_direct(the_repository,
1911 				   to_fetch.oid, to_fetch.nr);
1912 	oid_array_clear(&to_fetch);
1913 }
1914 
check_object(struct object_entry * entry,uint32_t object_index)1915 static void check_object(struct object_entry *entry, uint32_t object_index)
1916 {
1917 	unsigned long canonical_size;
1918 	enum object_type type;
1919 	struct object_info oi = {.typep = &type, .sizep = &canonical_size};
1920 
1921 	if (IN_PACK(entry)) {
1922 		struct packed_git *p = IN_PACK(entry);
1923 		struct pack_window *w_curs = NULL;
1924 		int have_base = 0;
1925 		struct object_id base_ref;
1926 		struct object_entry *base_entry;
1927 		unsigned long used, used_0;
1928 		unsigned long avail;
1929 		off_t ofs;
1930 		unsigned char *buf, c;
1931 		enum object_type type;
1932 		unsigned long in_pack_size;
1933 
1934 		buf = use_pack(p, &w_curs, entry->in_pack_offset, &avail);
1935 
1936 		/*
1937 		 * We want in_pack_type even if we do not reuse delta
1938 		 * since non-delta representations could still be reused.
1939 		 */
1940 		used = unpack_object_header_buffer(buf, avail,
1941 						   &type,
1942 						   &in_pack_size);
1943 		if (used == 0)
1944 			goto give_up;
1945 
1946 		if (type < 0)
1947 			BUG("invalid type %d", type);
1948 		entry->in_pack_type = type;
1949 
1950 		/*
1951 		 * Determine if this is a delta and if so whether we can
1952 		 * reuse it or not.  Otherwise let's find out as cheaply as
1953 		 * possible what the actual type and size for this object is.
1954 		 */
1955 		switch (entry->in_pack_type) {
1956 		default:
1957 			/* Not a delta hence we've already got all we need. */
1958 			oe_set_type(entry, entry->in_pack_type);
1959 			SET_SIZE(entry, in_pack_size);
1960 			entry->in_pack_header_size = used;
1961 			if (oe_type(entry) < OBJ_COMMIT || oe_type(entry) > OBJ_BLOB)
1962 				goto give_up;
1963 			unuse_pack(&w_curs);
1964 			return;
1965 		case OBJ_REF_DELTA:
1966 			if (reuse_delta && !entry->preferred_base) {
1967 				oidread(&base_ref,
1968 					use_pack(p, &w_curs,
1969 						 entry->in_pack_offset + used,
1970 						 NULL));
1971 				have_base = 1;
1972 			}
1973 			entry->in_pack_header_size = used + the_hash_algo->rawsz;
1974 			break;
1975 		case OBJ_OFS_DELTA:
1976 			buf = use_pack(p, &w_curs,
1977 				       entry->in_pack_offset + used, NULL);
1978 			used_0 = 0;
1979 			c = buf[used_0++];
1980 			ofs = c & 127;
1981 			while (c & 128) {
1982 				ofs += 1;
1983 				if (!ofs || MSB(ofs, 7)) {
1984 					error(_("delta base offset overflow in pack for %s"),
1985 					      oid_to_hex(&entry->idx.oid));
1986 					goto give_up;
1987 				}
1988 				c = buf[used_0++];
1989 				ofs = (ofs << 7) + (c & 127);
1990 			}
1991 			ofs = entry->in_pack_offset - ofs;
1992 			if (ofs <= 0 || ofs >= entry->in_pack_offset) {
1993 				error(_("delta base offset out of bound for %s"),
1994 				      oid_to_hex(&entry->idx.oid));
1995 				goto give_up;
1996 			}
1997 			if (reuse_delta && !entry->preferred_base) {
1998 				uint32_t pos;
1999 				if (offset_to_pack_pos(p, ofs, &pos) < 0)
2000 					goto give_up;
2001 				if (!nth_packed_object_id(&base_ref, p,
2002 							  pack_pos_to_index(p, pos)))
2003 					have_base = 1;
2004 			}
2005 			entry->in_pack_header_size = used + used_0;
2006 			break;
2007 		}
2008 
2009 		if (have_base &&
2010 		    can_reuse_delta(&base_ref, entry, &base_entry)) {
2011 			oe_set_type(entry, entry->in_pack_type);
2012 			SET_SIZE(entry, in_pack_size); /* delta size */
2013 			SET_DELTA_SIZE(entry, in_pack_size);
2014 
2015 			if (base_entry) {
2016 				SET_DELTA(entry, base_entry);
2017 				entry->delta_sibling_idx = base_entry->delta_child_idx;
2018 				SET_DELTA_CHILD(base_entry, entry);
2019 			} else {
2020 				SET_DELTA_EXT(entry, &base_ref);
2021 			}
2022 
2023 			unuse_pack(&w_curs);
2024 			return;
2025 		}
2026 
2027 		if (oe_type(entry)) {
2028 			off_t delta_pos;
2029 
2030 			/*
2031 			 * This must be a delta and we already know what the
2032 			 * final object type is.  Let's extract the actual
2033 			 * object size from the delta header.
2034 			 */
2035 			delta_pos = entry->in_pack_offset + entry->in_pack_header_size;
2036 			canonical_size = get_size_from_delta(p, &w_curs, delta_pos);
2037 			if (canonical_size == 0)
2038 				goto give_up;
2039 			SET_SIZE(entry, canonical_size);
2040 			unuse_pack(&w_curs);
2041 			return;
2042 		}
2043 
2044 		/*
2045 		 * No choice but to fall back to the recursive delta walk
2046 		 * with oid_object_info() to find about the object type
2047 		 * at this point...
2048 		 */
2049 		give_up:
2050 		unuse_pack(&w_curs);
2051 	}
2052 
2053 	if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2054 				     OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0) {
2055 		if (has_promisor_remote()) {
2056 			prefetch_to_pack(object_index);
2057 			if (oid_object_info_extended(the_repository, &entry->idx.oid, &oi,
2058 						     OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_LOOKUP_REPLACE) < 0)
2059 				type = -1;
2060 		} else {
2061 			type = -1;
2062 		}
2063 	}
2064 	oe_set_type(entry, type);
2065 	if (entry->type_valid) {
2066 		SET_SIZE(entry, canonical_size);
2067 	} else {
2068 		/*
2069 		 * Bad object type is checked in prepare_pack().  This is
2070 		 * to permit a missing preferred base object to be ignored
2071 		 * as a preferred base.  Doing so can result in a larger
2072 		 * pack file, but the transfer will still take place.
2073 		 */
2074 	}
2075 }
2076 
pack_offset_sort(const void * _a,const void * _b)2077 static int pack_offset_sort(const void *_a, const void *_b)
2078 {
2079 	const struct object_entry *a = *(struct object_entry **)_a;
2080 	const struct object_entry *b = *(struct object_entry **)_b;
2081 	const struct packed_git *a_in_pack = IN_PACK(a);
2082 	const struct packed_git *b_in_pack = IN_PACK(b);
2083 
2084 	/* avoid filesystem trashing with loose objects */
2085 	if (!a_in_pack && !b_in_pack)
2086 		return oidcmp(&a->idx.oid, &b->idx.oid);
2087 
2088 	if (a_in_pack < b_in_pack)
2089 		return -1;
2090 	if (a_in_pack > b_in_pack)
2091 		return 1;
2092 	return a->in_pack_offset < b->in_pack_offset ? -1 :
2093 			(a->in_pack_offset > b->in_pack_offset);
2094 }
2095 
2096 /*
2097  * Drop an on-disk delta we were planning to reuse. Naively, this would
2098  * just involve blanking out the "delta" field, but we have to deal
2099  * with some extra book-keeping:
2100  *
2101  *   1. Removing ourselves from the delta_sibling linked list.
2102  *
2103  *   2. Updating our size/type to the non-delta representation. These were
2104  *      either not recorded initially (size) or overwritten with the delta type
2105  *      (type) when check_object() decided to reuse the delta.
2106  *
2107  *   3. Resetting our delta depth, as we are now a base object.
2108  */
drop_reused_delta(struct object_entry * entry)2109 static void drop_reused_delta(struct object_entry *entry)
2110 {
2111 	unsigned *idx = &to_pack.objects[entry->delta_idx - 1].delta_child_idx;
2112 	struct object_info oi = OBJECT_INFO_INIT;
2113 	enum object_type type;
2114 	unsigned long size;
2115 
2116 	while (*idx) {
2117 		struct object_entry *oe = &to_pack.objects[*idx - 1];
2118 
2119 		if (oe == entry)
2120 			*idx = oe->delta_sibling_idx;
2121 		else
2122 			idx = &oe->delta_sibling_idx;
2123 	}
2124 	SET_DELTA(entry, NULL);
2125 	entry->depth = 0;
2126 
2127 	oi.sizep = &size;
2128 	oi.typep = &type;
2129 	if (packed_object_info(the_repository, IN_PACK(entry), entry->in_pack_offset, &oi) < 0) {
2130 		/*
2131 		 * We failed to get the info from this pack for some reason;
2132 		 * fall back to oid_object_info, which may find another copy.
2133 		 * And if that fails, the error will be recorded in oe_type(entry)
2134 		 * and dealt with in prepare_pack().
2135 		 */
2136 		oe_set_type(entry,
2137 			    oid_object_info(the_repository, &entry->idx.oid, &size));
2138 	} else {
2139 		oe_set_type(entry, type);
2140 	}
2141 	SET_SIZE(entry, size);
2142 }
2143 
2144 /*
2145  * Follow the chain of deltas from this entry onward, throwing away any links
2146  * that cause us to hit a cycle (as determined by the DFS state flags in
2147  * the entries).
2148  *
2149  * We also detect too-long reused chains that would violate our --depth
2150  * limit.
2151  */
break_delta_chains(struct object_entry * entry)2152 static void break_delta_chains(struct object_entry *entry)
2153 {
2154 	/*
2155 	 * The actual depth of each object we will write is stored as an int,
2156 	 * as it cannot exceed our int "depth" limit. But before we break
2157 	 * changes based no that limit, we may potentially go as deep as the
2158 	 * number of objects, which is elsewhere bounded to a uint32_t.
2159 	 */
2160 	uint32_t total_depth;
2161 	struct object_entry *cur, *next;
2162 
2163 	for (cur = entry, total_depth = 0;
2164 	     cur;
2165 	     cur = DELTA(cur), total_depth++) {
2166 		if (cur->dfs_state == DFS_DONE) {
2167 			/*
2168 			 * We've already seen this object and know it isn't
2169 			 * part of a cycle. We do need to append its depth
2170 			 * to our count.
2171 			 */
2172 			total_depth += cur->depth;
2173 			break;
2174 		}
2175 
2176 		/*
2177 		 * We break cycles before looping, so an ACTIVE state (or any
2178 		 * other cruft which made its way into the state variable)
2179 		 * is a bug.
2180 		 */
2181 		if (cur->dfs_state != DFS_NONE)
2182 			BUG("confusing delta dfs state in first pass: %d",
2183 			    cur->dfs_state);
2184 
2185 		/*
2186 		 * Now we know this is the first time we've seen the object. If
2187 		 * it's not a delta, we're done traversing, but we'll mark it
2188 		 * done to save time on future traversals.
2189 		 */
2190 		if (!DELTA(cur)) {
2191 			cur->dfs_state = DFS_DONE;
2192 			break;
2193 		}
2194 
2195 		/*
2196 		 * Mark ourselves as active and see if the next step causes
2197 		 * us to cycle to another active object. It's important to do
2198 		 * this _before_ we loop, because it impacts where we make the
2199 		 * cut, and thus how our total_depth counter works.
2200 		 * E.g., We may see a partial loop like:
2201 		 *
2202 		 *   A -> B -> C -> D -> B
2203 		 *
2204 		 * Cutting B->C breaks the cycle. But now the depth of A is
2205 		 * only 1, and our total_depth counter is at 3. The size of the
2206 		 * error is always one less than the size of the cycle we
2207 		 * broke. Commits C and D were "lost" from A's chain.
2208 		 *
2209 		 * If we instead cut D->B, then the depth of A is correct at 3.
2210 		 * We keep all commits in the chain that we examined.
2211 		 */
2212 		cur->dfs_state = DFS_ACTIVE;
2213 		if (DELTA(cur)->dfs_state == DFS_ACTIVE) {
2214 			drop_reused_delta(cur);
2215 			cur->dfs_state = DFS_DONE;
2216 			break;
2217 		}
2218 	}
2219 
2220 	/*
2221 	 * And now that we've gone all the way to the bottom of the chain, we
2222 	 * need to clear the active flags and set the depth fields as
2223 	 * appropriate. Unlike the loop above, which can quit when it drops a
2224 	 * delta, we need to keep going to look for more depth cuts. So we need
2225 	 * an extra "next" pointer to keep going after we reset cur->delta.
2226 	 */
2227 	for (cur = entry; cur; cur = next) {
2228 		next = DELTA(cur);
2229 
2230 		/*
2231 		 * We should have a chain of zero or more ACTIVE states down to
2232 		 * a final DONE. We can quit after the DONE, because either it
2233 		 * has no bases, or we've already handled them in a previous
2234 		 * call.
2235 		 */
2236 		if (cur->dfs_state == DFS_DONE)
2237 			break;
2238 		else if (cur->dfs_state != DFS_ACTIVE)
2239 			BUG("confusing delta dfs state in second pass: %d",
2240 			    cur->dfs_state);
2241 
2242 		/*
2243 		 * If the total_depth is more than depth, then we need to snip
2244 		 * the chain into two or more smaller chains that don't exceed
2245 		 * the maximum depth. Most of the resulting chains will contain
2246 		 * (depth + 1) entries (i.e., depth deltas plus one base), and
2247 		 * the last chain (i.e., the one containing entry) will contain
2248 		 * whatever entries are left over, namely
2249 		 * (total_depth % (depth + 1)) of them.
2250 		 *
2251 		 * Since we are iterating towards decreasing depth, we need to
2252 		 * decrement total_depth as we go, and we need to write to the
2253 		 * entry what its final depth will be after all of the
2254 		 * snipping. Since we're snipping into chains of length (depth
2255 		 * + 1) entries, the final depth of an entry will be its
2256 		 * original depth modulo (depth + 1). Any time we encounter an
2257 		 * entry whose final depth is supposed to be zero, we snip it
2258 		 * from its delta base, thereby making it so.
2259 		 */
2260 		cur->depth = (total_depth--) % (depth + 1);
2261 		if (!cur->depth)
2262 			drop_reused_delta(cur);
2263 
2264 		cur->dfs_state = DFS_DONE;
2265 	}
2266 }
2267 
get_object_details(void)2268 static void get_object_details(void)
2269 {
2270 	uint32_t i;
2271 	struct object_entry **sorted_by_offset;
2272 
2273 	if (progress)
2274 		progress_state = start_progress(_("Counting objects"),
2275 						to_pack.nr_objects);
2276 
2277 	CALLOC_ARRAY(sorted_by_offset, to_pack.nr_objects);
2278 	for (i = 0; i < to_pack.nr_objects; i++)
2279 		sorted_by_offset[i] = to_pack.objects + i;
2280 	QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort);
2281 
2282 	for (i = 0; i < to_pack.nr_objects; i++) {
2283 		struct object_entry *entry = sorted_by_offset[i];
2284 		check_object(entry, i);
2285 		if (entry->type_valid &&
2286 		    oe_size_greater_than(&to_pack, entry, big_file_threshold))
2287 			entry->no_try_delta = 1;
2288 		display_progress(progress_state, i + 1);
2289 	}
2290 	stop_progress(&progress_state);
2291 
2292 	/*
2293 	 * This must happen in a second pass, since we rely on the delta
2294 	 * information for the whole list being completed.
2295 	 */
2296 	for (i = 0; i < to_pack.nr_objects; i++)
2297 		break_delta_chains(&to_pack.objects[i]);
2298 
2299 	free(sorted_by_offset);
2300 }
2301 
2302 /*
2303  * We search for deltas in a list sorted by type, by filename hash, and then
2304  * by size, so that we see progressively smaller and smaller files.
2305  * That's because we prefer deltas to be from the bigger file
2306  * to the smaller -- deletes are potentially cheaper, but perhaps
2307  * more importantly, the bigger file is likely the more recent
2308  * one.  The deepest deltas are therefore the oldest objects which are
2309  * less susceptible to be accessed often.
2310  */
type_size_sort(const void * _a,const void * _b)2311 static int type_size_sort(const void *_a, const void *_b)
2312 {
2313 	const struct object_entry *a = *(struct object_entry **)_a;
2314 	const struct object_entry *b = *(struct object_entry **)_b;
2315 	const enum object_type a_type = oe_type(a);
2316 	const enum object_type b_type = oe_type(b);
2317 	const unsigned long a_size = SIZE(a);
2318 	const unsigned long b_size = SIZE(b);
2319 
2320 	if (a_type > b_type)
2321 		return -1;
2322 	if (a_type < b_type)
2323 		return 1;
2324 	if (a->hash > b->hash)
2325 		return -1;
2326 	if (a->hash < b->hash)
2327 		return 1;
2328 	if (a->preferred_base > b->preferred_base)
2329 		return -1;
2330 	if (a->preferred_base < b->preferred_base)
2331 		return 1;
2332 	if (use_delta_islands) {
2333 		const int island_cmp = island_delta_cmp(&a->idx.oid, &b->idx.oid);
2334 		if (island_cmp)
2335 			return island_cmp;
2336 	}
2337 	if (a_size > b_size)
2338 		return -1;
2339 	if (a_size < b_size)
2340 		return 1;
2341 	return a < b ? -1 : (a > b);  /* newest first */
2342 }
2343 
2344 struct unpacked {
2345 	struct object_entry *entry;
2346 	void *data;
2347 	struct delta_index *index;
2348 	unsigned depth;
2349 };
2350 
delta_cacheable(unsigned long src_size,unsigned long trg_size,unsigned long delta_size)2351 static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
2352 			   unsigned long delta_size)
2353 {
2354 	if (max_delta_cache_size && delta_cache_size + delta_size > max_delta_cache_size)
2355 		return 0;
2356 
2357 	if (delta_size < cache_max_small_delta_size)
2358 		return 1;
2359 
2360 	/* cache delta, if objects are large enough compared to delta size */
2361 	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
2362 		return 1;
2363 
2364 	return 0;
2365 }
2366 
2367 /* Protect delta_cache_size */
2368 static pthread_mutex_t cache_mutex;
2369 #define cache_lock()		pthread_mutex_lock(&cache_mutex)
2370 #define cache_unlock()		pthread_mutex_unlock(&cache_mutex)
2371 
2372 /*
2373  * Protect object list partitioning (e.g. struct thread_param) and
2374  * progress_state
2375  */
2376 static pthread_mutex_t progress_mutex;
2377 #define progress_lock()		pthread_mutex_lock(&progress_mutex)
2378 #define progress_unlock()	pthread_mutex_unlock(&progress_mutex)
2379 
2380 /*
2381  * Access to struct object_entry is unprotected since each thread owns
2382  * a portion of the main object list. Just don't access object entries
2383  * ahead in the list because they can be stolen and would need
2384  * progress_mutex for protection.
2385  */
2386 
oe_size_less_than(struct packing_data * pack,const struct object_entry * lhs,unsigned long rhs)2387 static inline int oe_size_less_than(struct packing_data *pack,
2388 				    const struct object_entry *lhs,
2389 				    unsigned long rhs)
2390 {
2391 	if (lhs->size_valid)
2392 		return lhs->size_ < rhs;
2393 	if (rhs < pack->oe_size_limit) /* rhs < 2^x <= lhs ? */
2394 		return 0;
2395 	return oe_get_size_slow(pack, lhs) < rhs;
2396 }
2397 
oe_set_tree_depth(struct packing_data * pack,struct object_entry * e,unsigned int tree_depth)2398 static inline void oe_set_tree_depth(struct packing_data *pack,
2399 				     struct object_entry *e,
2400 				     unsigned int tree_depth)
2401 {
2402 	if (!pack->tree_depth)
2403 		CALLOC_ARRAY(pack->tree_depth, pack->nr_alloc);
2404 	pack->tree_depth[e - pack->objects] = tree_depth;
2405 }
2406 
2407 /*
2408  * Return the size of the object without doing any delta
2409  * reconstruction (so non-deltas are true object sizes, but deltas
2410  * return the size of the delta data).
2411  */
oe_get_size_slow(struct packing_data * pack,const struct object_entry * e)2412 unsigned long oe_get_size_slow(struct packing_data *pack,
2413 			       const struct object_entry *e)
2414 {
2415 	struct packed_git *p;
2416 	struct pack_window *w_curs;
2417 	unsigned char *buf;
2418 	enum object_type type;
2419 	unsigned long used, avail, size;
2420 
2421 	if (e->type_ != OBJ_OFS_DELTA && e->type_ != OBJ_REF_DELTA) {
2422 		packing_data_lock(&to_pack);
2423 		if (oid_object_info(the_repository, &e->idx.oid, &size) < 0)
2424 			die(_("unable to get size of %s"),
2425 			    oid_to_hex(&e->idx.oid));
2426 		packing_data_unlock(&to_pack);
2427 		return size;
2428 	}
2429 
2430 	p = oe_in_pack(pack, e);
2431 	if (!p)
2432 		BUG("when e->type is a delta, it must belong to a pack");
2433 
2434 	packing_data_lock(&to_pack);
2435 	w_curs = NULL;
2436 	buf = use_pack(p, &w_curs, e->in_pack_offset, &avail);
2437 	used = unpack_object_header_buffer(buf, avail, &type, &size);
2438 	if (used == 0)
2439 		die(_("unable to parse object header of %s"),
2440 		    oid_to_hex(&e->idx.oid));
2441 
2442 	unuse_pack(&w_curs);
2443 	packing_data_unlock(&to_pack);
2444 	return size;
2445 }
2446 
try_delta(struct unpacked * trg,struct unpacked * src,unsigned max_depth,unsigned long * mem_usage)2447 static int try_delta(struct unpacked *trg, struct unpacked *src,
2448 		     unsigned max_depth, unsigned long *mem_usage)
2449 {
2450 	struct object_entry *trg_entry = trg->entry;
2451 	struct object_entry *src_entry = src->entry;
2452 	unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz;
2453 	unsigned ref_depth;
2454 	enum object_type type;
2455 	void *delta_buf;
2456 
2457 	/* Don't bother doing diffs between different types */
2458 	if (oe_type(trg_entry) != oe_type(src_entry))
2459 		return -1;
2460 
2461 	/*
2462 	 * We do not bother to try a delta that we discarded on an
2463 	 * earlier try, but only when reusing delta data.  Note that
2464 	 * src_entry that is marked as the preferred_base should always
2465 	 * be considered, as even if we produce a suboptimal delta against
2466 	 * it, we will still save the transfer cost, as we already know
2467 	 * the other side has it and we won't send src_entry at all.
2468 	 */
2469 	if (reuse_delta && IN_PACK(trg_entry) &&
2470 	    IN_PACK(trg_entry) == IN_PACK(src_entry) &&
2471 	    !src_entry->preferred_base &&
2472 	    trg_entry->in_pack_type != OBJ_REF_DELTA &&
2473 	    trg_entry->in_pack_type != OBJ_OFS_DELTA)
2474 		return 0;
2475 
2476 	/* Let's not bust the allowed depth. */
2477 	if (src->depth >= max_depth)
2478 		return 0;
2479 
2480 	/* Now some size filtering heuristics. */
2481 	trg_size = SIZE(trg_entry);
2482 	if (!DELTA(trg_entry)) {
2483 		max_size = trg_size/2 - the_hash_algo->rawsz;
2484 		ref_depth = 1;
2485 	} else {
2486 		max_size = DELTA_SIZE(trg_entry);
2487 		ref_depth = trg->depth;
2488 	}
2489 	max_size = (uint64_t)max_size * (max_depth - src->depth) /
2490 						(max_depth - ref_depth + 1);
2491 	if (max_size == 0)
2492 		return 0;
2493 	src_size = SIZE(src_entry);
2494 	sizediff = src_size < trg_size ? trg_size - src_size : 0;
2495 	if (sizediff >= max_size)
2496 		return 0;
2497 	if (trg_size < src_size / 32)
2498 		return 0;
2499 
2500 	if (!in_same_island(&trg->entry->idx.oid, &src->entry->idx.oid))
2501 		return 0;
2502 
2503 	/* Load data if not already done */
2504 	if (!trg->data) {
2505 		packing_data_lock(&to_pack);
2506 		trg->data = read_object_file(&trg_entry->idx.oid, &type, &sz);
2507 		packing_data_unlock(&to_pack);
2508 		if (!trg->data)
2509 			die(_("object %s cannot be read"),
2510 			    oid_to_hex(&trg_entry->idx.oid));
2511 		if (sz != trg_size)
2512 			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2513 			    oid_to_hex(&trg_entry->idx.oid), (uintmax_t)sz,
2514 			    (uintmax_t)trg_size);
2515 		*mem_usage += sz;
2516 	}
2517 	if (!src->data) {
2518 		packing_data_lock(&to_pack);
2519 		src->data = read_object_file(&src_entry->idx.oid, &type, &sz);
2520 		packing_data_unlock(&to_pack);
2521 		if (!src->data) {
2522 			if (src_entry->preferred_base) {
2523 				static int warned = 0;
2524 				if (!warned++)
2525 					warning(_("object %s cannot be read"),
2526 						oid_to_hex(&src_entry->idx.oid));
2527 				/*
2528 				 * Those objects are not included in the
2529 				 * resulting pack.  Be resilient and ignore
2530 				 * them if they can't be read, in case the
2531 				 * pack could be created nevertheless.
2532 				 */
2533 				return 0;
2534 			}
2535 			die(_("object %s cannot be read"),
2536 			    oid_to_hex(&src_entry->idx.oid));
2537 		}
2538 		if (sz != src_size)
2539 			die(_("object %s inconsistent object length (%"PRIuMAX" vs %"PRIuMAX")"),
2540 			    oid_to_hex(&src_entry->idx.oid), (uintmax_t)sz,
2541 			    (uintmax_t)src_size);
2542 		*mem_usage += sz;
2543 	}
2544 	if (!src->index) {
2545 		src->index = create_delta_index(src->data, src_size);
2546 		if (!src->index) {
2547 			static int warned = 0;
2548 			if (!warned++)
2549 				warning(_("suboptimal pack - out of memory"));
2550 			return 0;
2551 		}
2552 		*mem_usage += sizeof_delta_index(src->index);
2553 	}
2554 
2555 	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
2556 	if (!delta_buf)
2557 		return 0;
2558 
2559 	if (DELTA(trg_entry)) {
2560 		/* Prefer only shallower same-sized deltas. */
2561 		if (delta_size == DELTA_SIZE(trg_entry) &&
2562 		    src->depth + 1 >= trg->depth) {
2563 			free(delta_buf);
2564 			return 0;
2565 		}
2566 	}
2567 
2568 	/*
2569 	 * Handle memory allocation outside of the cache
2570 	 * accounting lock.  Compiler will optimize the strangeness
2571 	 * away when NO_PTHREADS is defined.
2572 	 */
2573 	free(trg_entry->delta_data);
2574 	cache_lock();
2575 	if (trg_entry->delta_data) {
2576 		delta_cache_size -= DELTA_SIZE(trg_entry);
2577 		trg_entry->delta_data = NULL;
2578 	}
2579 	if (delta_cacheable(src_size, trg_size, delta_size)) {
2580 		delta_cache_size += delta_size;
2581 		cache_unlock();
2582 		trg_entry->delta_data = xrealloc(delta_buf, delta_size);
2583 	} else {
2584 		cache_unlock();
2585 		free(delta_buf);
2586 	}
2587 
2588 	SET_DELTA(trg_entry, src_entry);
2589 	SET_DELTA_SIZE(trg_entry, delta_size);
2590 	trg->depth = src->depth + 1;
2591 
2592 	return 1;
2593 }
2594 
check_delta_limit(struct object_entry * me,unsigned int n)2595 static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
2596 {
2597 	struct object_entry *child = DELTA_CHILD(me);
2598 	unsigned int m = n;
2599 	while (child) {
2600 		const unsigned int c = check_delta_limit(child, n + 1);
2601 		if (m < c)
2602 			m = c;
2603 		child = DELTA_SIBLING(child);
2604 	}
2605 	return m;
2606 }
2607 
free_unpacked(struct unpacked * n)2608 static unsigned long free_unpacked(struct unpacked *n)
2609 {
2610 	unsigned long freed_mem = sizeof_delta_index(n->index);
2611 	free_delta_index(n->index);
2612 	n->index = NULL;
2613 	if (n->data) {
2614 		freed_mem += SIZE(n->entry);
2615 		FREE_AND_NULL(n->data);
2616 	}
2617 	n->entry = NULL;
2618 	n->depth = 0;
2619 	return freed_mem;
2620 }
2621 
find_deltas(struct object_entry ** list,unsigned * list_size,int window,int depth,unsigned * processed)2622 static void find_deltas(struct object_entry **list, unsigned *list_size,
2623 			int window, int depth, unsigned *processed)
2624 {
2625 	uint32_t i, idx = 0, count = 0;
2626 	struct unpacked *array;
2627 	unsigned long mem_usage = 0;
2628 
2629 	CALLOC_ARRAY(array, window);
2630 
2631 	for (;;) {
2632 		struct object_entry *entry;
2633 		struct unpacked *n = array + idx;
2634 		int j, max_depth, best_base = -1;
2635 
2636 		progress_lock();
2637 		if (!*list_size) {
2638 			progress_unlock();
2639 			break;
2640 		}
2641 		entry = *list++;
2642 		(*list_size)--;
2643 		if (!entry->preferred_base) {
2644 			(*processed)++;
2645 			display_progress(progress_state, *processed);
2646 		}
2647 		progress_unlock();
2648 
2649 		mem_usage -= free_unpacked(n);
2650 		n->entry = entry;
2651 
2652 		while (window_memory_limit &&
2653 		       mem_usage > window_memory_limit &&
2654 		       count > 1) {
2655 			const uint32_t tail = (idx + window - count) % window;
2656 			mem_usage -= free_unpacked(array + tail);
2657 			count--;
2658 		}
2659 
2660 		/* We do not compute delta to *create* objects we are not
2661 		 * going to pack.
2662 		 */
2663 		if (entry->preferred_base)
2664 			goto next;
2665 
2666 		/*
2667 		 * If the current object is at pack edge, take the depth the
2668 		 * objects that depend on the current object into account
2669 		 * otherwise they would become too deep.
2670 		 */
2671 		max_depth = depth;
2672 		if (DELTA_CHILD(entry)) {
2673 			max_depth -= check_delta_limit(entry, 0);
2674 			if (max_depth <= 0)
2675 				goto next;
2676 		}
2677 
2678 		j = window;
2679 		while (--j > 0) {
2680 			int ret;
2681 			uint32_t other_idx = idx + j;
2682 			struct unpacked *m;
2683 			if (other_idx >= window)
2684 				other_idx -= window;
2685 			m = array + other_idx;
2686 			if (!m->entry)
2687 				break;
2688 			ret = try_delta(n, m, max_depth, &mem_usage);
2689 			if (ret < 0)
2690 				break;
2691 			else if (ret > 0)
2692 				best_base = other_idx;
2693 		}
2694 
2695 		/*
2696 		 * If we decided to cache the delta data, then it is best
2697 		 * to compress it right away.  First because we have to do
2698 		 * it anyway, and doing it here while we're threaded will
2699 		 * save a lot of time in the non threaded write phase,
2700 		 * as well as allow for caching more deltas within
2701 		 * the same cache size limit.
2702 		 * ...
2703 		 * But only if not writing to stdout, since in that case
2704 		 * the network is most likely throttling writes anyway,
2705 		 * and therefore it is best to go to the write phase ASAP
2706 		 * instead, as we can afford spending more time compressing
2707 		 * between writes at that moment.
2708 		 */
2709 		if (entry->delta_data && !pack_to_stdout) {
2710 			unsigned long size;
2711 
2712 			size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
2713 			if (size < (1U << OE_Z_DELTA_BITS)) {
2714 				entry->z_delta_size = size;
2715 				cache_lock();
2716 				delta_cache_size -= DELTA_SIZE(entry);
2717 				delta_cache_size += entry->z_delta_size;
2718 				cache_unlock();
2719 			} else {
2720 				FREE_AND_NULL(entry->delta_data);
2721 				entry->z_delta_size = 0;
2722 			}
2723 		}
2724 
2725 		/* if we made n a delta, and if n is already at max
2726 		 * depth, leaving it in the window is pointless.  we
2727 		 * should evict it first.
2728 		 */
2729 		if (DELTA(entry) && max_depth <= n->depth)
2730 			continue;
2731 
2732 		/*
2733 		 * Move the best delta base up in the window, after the
2734 		 * currently deltified object, to keep it longer.  It will
2735 		 * be the first base object to be attempted next.
2736 		 */
2737 		if (DELTA(entry)) {
2738 			struct unpacked swap = array[best_base];
2739 			int dist = (window + idx - best_base) % window;
2740 			int dst = best_base;
2741 			while (dist--) {
2742 				int src = (dst + 1) % window;
2743 				array[dst] = array[src];
2744 				dst = src;
2745 			}
2746 			array[dst] = swap;
2747 		}
2748 
2749 		next:
2750 		idx++;
2751 		if (count + 1 < window)
2752 			count++;
2753 		if (idx >= window)
2754 			idx = 0;
2755 	}
2756 
2757 	for (i = 0; i < window; ++i) {
2758 		free_delta_index(array[i].index);
2759 		free(array[i].data);
2760 	}
2761 	free(array);
2762 }
2763 
2764 /*
2765  * The main object list is split into smaller lists, each is handed to
2766  * one worker.
2767  *
2768  * The main thread waits on the condition that (at least) one of the workers
2769  * has stopped working (which is indicated in the .working member of
2770  * struct thread_params).
2771  *
2772  * When a work thread has completed its work, it sets .working to 0 and
2773  * signals the main thread and waits on the condition that .data_ready
2774  * becomes 1.
2775  *
2776  * The main thread steals half of the work from the worker that has
2777  * most work left to hand it to the idle worker.
2778  */
2779 
2780 struct thread_params {
2781 	pthread_t thread;
2782 	struct object_entry **list;
2783 	unsigned list_size;
2784 	unsigned remaining;
2785 	int window;
2786 	int depth;
2787 	int working;
2788 	int data_ready;
2789 	pthread_mutex_t mutex;
2790 	pthread_cond_t cond;
2791 	unsigned *processed;
2792 };
2793 
2794 static pthread_cond_t progress_cond;
2795 
2796 /*
2797  * Mutex and conditional variable can't be statically-initialized on Windows.
2798  */
init_threaded_search(void)2799 static void init_threaded_search(void)
2800 {
2801 	pthread_mutex_init(&cache_mutex, NULL);
2802 	pthread_mutex_init(&progress_mutex, NULL);
2803 	pthread_cond_init(&progress_cond, NULL);
2804 }
2805 
cleanup_threaded_search(void)2806 static void cleanup_threaded_search(void)
2807 {
2808 	pthread_cond_destroy(&progress_cond);
2809 	pthread_mutex_destroy(&cache_mutex);
2810 	pthread_mutex_destroy(&progress_mutex);
2811 }
2812 
threaded_find_deltas(void * arg)2813 static void *threaded_find_deltas(void *arg)
2814 {
2815 	struct thread_params *me = arg;
2816 
2817 	progress_lock();
2818 	while (me->remaining) {
2819 		progress_unlock();
2820 
2821 		find_deltas(me->list, &me->remaining,
2822 			    me->window, me->depth, me->processed);
2823 
2824 		progress_lock();
2825 		me->working = 0;
2826 		pthread_cond_signal(&progress_cond);
2827 		progress_unlock();
2828 
2829 		/*
2830 		 * We must not set ->data_ready before we wait on the
2831 		 * condition because the main thread may have set it to 1
2832 		 * before we get here. In order to be sure that new
2833 		 * work is available if we see 1 in ->data_ready, it
2834 		 * was initialized to 0 before this thread was spawned
2835 		 * and we reset it to 0 right away.
2836 		 */
2837 		pthread_mutex_lock(&me->mutex);
2838 		while (!me->data_ready)
2839 			pthread_cond_wait(&me->cond, &me->mutex);
2840 		me->data_ready = 0;
2841 		pthread_mutex_unlock(&me->mutex);
2842 
2843 		progress_lock();
2844 	}
2845 	progress_unlock();
2846 	/* leave ->working 1 so that this doesn't get more work assigned */
2847 	return NULL;
2848 }
2849 
ll_find_deltas(struct object_entry ** list,unsigned list_size,int window,int depth,unsigned * processed)2850 static void ll_find_deltas(struct object_entry **list, unsigned list_size,
2851 			   int window, int depth, unsigned *processed)
2852 {
2853 	struct thread_params *p;
2854 	int i, ret, active_threads = 0;
2855 
2856 	init_threaded_search();
2857 
2858 	if (delta_search_threads <= 1) {
2859 		find_deltas(list, &list_size, window, depth, processed);
2860 		cleanup_threaded_search();
2861 		return;
2862 	}
2863 	if (progress > pack_to_stdout)
2864 		fprintf_ln(stderr, _("Delta compression using up to %d threads"),
2865 			   delta_search_threads);
2866 	CALLOC_ARRAY(p, delta_search_threads);
2867 
2868 	/* Partition the work amongst work threads. */
2869 	for (i = 0; i < delta_search_threads; i++) {
2870 		unsigned sub_size = list_size / (delta_search_threads - i);
2871 
2872 		/* don't use too small segments or no deltas will be found */
2873 		if (sub_size < 2*window && i+1 < delta_search_threads)
2874 			sub_size = 0;
2875 
2876 		p[i].window = window;
2877 		p[i].depth = depth;
2878 		p[i].processed = processed;
2879 		p[i].working = 1;
2880 		p[i].data_ready = 0;
2881 
2882 		/* try to split chunks on "path" boundaries */
2883 		while (sub_size && sub_size < list_size &&
2884 		       list[sub_size]->hash &&
2885 		       list[sub_size]->hash == list[sub_size-1]->hash)
2886 			sub_size++;
2887 
2888 		p[i].list = list;
2889 		p[i].list_size = sub_size;
2890 		p[i].remaining = sub_size;
2891 
2892 		list += sub_size;
2893 		list_size -= sub_size;
2894 	}
2895 
2896 	/* Start work threads. */
2897 	for (i = 0; i < delta_search_threads; i++) {
2898 		if (!p[i].list_size)
2899 			continue;
2900 		pthread_mutex_init(&p[i].mutex, NULL);
2901 		pthread_cond_init(&p[i].cond, NULL);
2902 		ret = pthread_create(&p[i].thread, NULL,
2903 				     threaded_find_deltas, &p[i]);
2904 		if (ret)
2905 			die(_("unable to create thread: %s"), strerror(ret));
2906 		active_threads++;
2907 	}
2908 
2909 	/*
2910 	 * Now let's wait for work completion.  Each time a thread is done
2911 	 * with its work, we steal half of the remaining work from the
2912 	 * thread with the largest number of unprocessed objects and give
2913 	 * it to that newly idle thread.  This ensure good load balancing
2914 	 * until the remaining object list segments are simply too short
2915 	 * to be worth splitting anymore.
2916 	 */
2917 	while (active_threads) {
2918 		struct thread_params *target = NULL;
2919 		struct thread_params *victim = NULL;
2920 		unsigned sub_size = 0;
2921 
2922 		progress_lock();
2923 		for (;;) {
2924 			for (i = 0; !target && i < delta_search_threads; i++)
2925 				if (!p[i].working)
2926 					target = &p[i];
2927 			if (target)
2928 				break;
2929 			pthread_cond_wait(&progress_cond, &progress_mutex);
2930 		}
2931 
2932 		for (i = 0; i < delta_search_threads; i++)
2933 			if (p[i].remaining > 2*window &&
2934 			    (!victim || victim->remaining < p[i].remaining))
2935 				victim = &p[i];
2936 		if (victim) {
2937 			sub_size = victim->remaining / 2;
2938 			list = victim->list + victim->list_size - sub_size;
2939 			while (sub_size && list[0]->hash &&
2940 			       list[0]->hash == list[-1]->hash) {
2941 				list++;
2942 				sub_size--;
2943 			}
2944 			if (!sub_size) {
2945 				/*
2946 				 * It is possible for some "paths" to have
2947 				 * so many objects that no hash boundary
2948 				 * might be found.  Let's just steal the
2949 				 * exact half in that case.
2950 				 */
2951 				sub_size = victim->remaining / 2;
2952 				list -= sub_size;
2953 			}
2954 			target->list = list;
2955 			victim->list_size -= sub_size;
2956 			victim->remaining -= sub_size;
2957 		}
2958 		target->list_size = sub_size;
2959 		target->remaining = sub_size;
2960 		target->working = 1;
2961 		progress_unlock();
2962 
2963 		pthread_mutex_lock(&target->mutex);
2964 		target->data_ready = 1;
2965 		pthread_cond_signal(&target->cond);
2966 		pthread_mutex_unlock(&target->mutex);
2967 
2968 		if (!sub_size) {
2969 			pthread_join(target->thread, NULL);
2970 			pthread_cond_destroy(&target->cond);
2971 			pthread_mutex_destroy(&target->mutex);
2972 			active_threads--;
2973 		}
2974 	}
2975 	cleanup_threaded_search();
2976 	free(p);
2977 }
2978 
obj_is_packed(const struct object_id * oid)2979 static int obj_is_packed(const struct object_id *oid)
2980 {
2981 	return packlist_find(&to_pack, oid) ||
2982 		(reuse_packfile_bitmap &&
2983 		 bitmap_walk_contains(bitmap_git, reuse_packfile_bitmap, oid));
2984 }
2985 
add_tag_chain(const struct object_id * oid)2986 static void add_tag_chain(const struct object_id *oid)
2987 {
2988 	struct tag *tag;
2989 
2990 	/*
2991 	 * We catch duplicates already in add_object_entry(), but we'd
2992 	 * prefer to do this extra check to avoid having to parse the
2993 	 * tag at all if we already know that it's being packed (e.g., if
2994 	 * it was included via bitmaps, we would not have parsed it
2995 	 * previously).
2996 	 */
2997 	if (obj_is_packed(oid))
2998 		return;
2999 
3000 	tag = lookup_tag(the_repository, oid);
3001 	while (1) {
3002 		if (!tag || parse_tag(tag) || !tag->tagged)
3003 			die(_("unable to pack objects reachable from tag %s"),
3004 			    oid_to_hex(oid));
3005 
3006 		add_object_entry(&tag->object.oid, OBJ_TAG, NULL, 0);
3007 
3008 		if (tag->tagged->type != OBJ_TAG)
3009 			return;
3010 
3011 		tag = (struct tag *)tag->tagged;
3012 	}
3013 }
3014 
add_ref_tag(const char * tag,const struct object_id * oid,int flag,void * cb_data)3015 static int add_ref_tag(const char *tag, const struct object_id *oid, int flag, void *cb_data)
3016 {
3017 	struct object_id peeled;
3018 
3019 	if (!peel_iterated_oid(oid, &peeled) && obj_is_packed(&peeled))
3020 		add_tag_chain(oid);
3021 	return 0;
3022 }
3023 
prepare_pack(int window,int depth)3024 static void prepare_pack(int window, int depth)
3025 {
3026 	struct object_entry **delta_list;
3027 	uint32_t i, nr_deltas;
3028 	unsigned n;
3029 
3030 	if (use_delta_islands)
3031 		resolve_tree_islands(the_repository, progress, &to_pack);
3032 
3033 	get_object_details();
3034 
3035 	/*
3036 	 * If we're locally repacking then we need to be doubly careful
3037 	 * from now on in order to make sure no stealth corruption gets
3038 	 * propagated to the new pack.  Clients receiving streamed packs
3039 	 * should validate everything they get anyway so no need to incur
3040 	 * the additional cost here in that case.
3041 	 */
3042 	if (!pack_to_stdout)
3043 		do_check_packed_object_crc = 1;
3044 
3045 	if (!to_pack.nr_objects || !window || !depth)
3046 		return;
3047 
3048 	ALLOC_ARRAY(delta_list, to_pack.nr_objects);
3049 	nr_deltas = n = 0;
3050 
3051 	for (i = 0; i < to_pack.nr_objects; i++) {
3052 		struct object_entry *entry = to_pack.objects + i;
3053 
3054 		if (DELTA(entry))
3055 			/* This happens if we decided to reuse existing
3056 			 * delta from a pack.  "reuse_delta &&" is implied.
3057 			 */
3058 			continue;
3059 
3060 		if (!entry->type_valid ||
3061 		    oe_size_less_than(&to_pack, entry, 50))
3062 			continue;
3063 
3064 		if (entry->no_try_delta)
3065 			continue;
3066 
3067 		if (!entry->preferred_base) {
3068 			nr_deltas++;
3069 			if (oe_type(entry) < 0)
3070 				die(_("unable to get type of object %s"),
3071 				    oid_to_hex(&entry->idx.oid));
3072 		} else {
3073 			if (oe_type(entry) < 0) {
3074 				/*
3075 				 * This object is not found, but we
3076 				 * don't have to include it anyway.
3077 				 */
3078 				continue;
3079 			}
3080 		}
3081 
3082 		delta_list[n++] = entry;
3083 	}
3084 
3085 	if (nr_deltas && n > 1) {
3086 		unsigned nr_done = 0;
3087 
3088 		if (progress)
3089 			progress_state = start_progress(_("Compressing objects"),
3090 							nr_deltas);
3091 		QSORT(delta_list, n, type_size_sort);
3092 		ll_find_deltas(delta_list, n, window+1, depth, &nr_done);
3093 		stop_progress(&progress_state);
3094 		if (nr_done != nr_deltas)
3095 			die(_("inconsistency with delta count"));
3096 	}
3097 	free(delta_list);
3098 }
3099 
git_pack_config(const char * k,const char * v,void * cb)3100 static int git_pack_config(const char *k, const char *v, void *cb)
3101 {
3102 	if (!strcmp(k, "pack.window")) {
3103 		window = git_config_int(k, v);
3104 		return 0;
3105 	}
3106 	if (!strcmp(k, "pack.windowmemory")) {
3107 		window_memory_limit = git_config_ulong(k, v);
3108 		return 0;
3109 	}
3110 	if (!strcmp(k, "pack.depth")) {
3111 		depth = git_config_int(k, v);
3112 		return 0;
3113 	}
3114 	if (!strcmp(k, "pack.deltacachesize")) {
3115 		max_delta_cache_size = git_config_int(k, v);
3116 		return 0;
3117 	}
3118 	if (!strcmp(k, "pack.deltacachelimit")) {
3119 		cache_max_small_delta_size = git_config_int(k, v);
3120 		return 0;
3121 	}
3122 	if (!strcmp(k, "pack.writebitmaphashcache")) {
3123 		if (git_config_bool(k, v))
3124 			write_bitmap_options |= BITMAP_OPT_HASH_CACHE;
3125 		else
3126 			write_bitmap_options &= ~BITMAP_OPT_HASH_CACHE;
3127 	}
3128 	if (!strcmp(k, "pack.usebitmaps")) {
3129 		use_bitmap_index_default = git_config_bool(k, v);
3130 		return 0;
3131 	}
3132 	if (!strcmp(k, "pack.allowpackreuse")) {
3133 		allow_pack_reuse = git_config_bool(k, v);
3134 		return 0;
3135 	}
3136 	if (!strcmp(k, "pack.threads")) {
3137 		delta_search_threads = git_config_int(k, v);
3138 		if (delta_search_threads < 0)
3139 			die(_("invalid number of threads specified (%d)"),
3140 			    delta_search_threads);
3141 		if (!HAVE_THREADS && delta_search_threads != 1) {
3142 			warning(_("no threads support, ignoring %s"), k);
3143 			delta_search_threads = 0;
3144 		}
3145 		return 0;
3146 	}
3147 	if (!strcmp(k, "pack.indexversion")) {
3148 		pack_idx_opts.version = git_config_int(k, v);
3149 		if (pack_idx_opts.version > 2)
3150 			die(_("bad pack.indexversion=%"PRIu32),
3151 			    pack_idx_opts.version);
3152 		return 0;
3153 	}
3154 	if (!strcmp(k, "pack.writereverseindex")) {
3155 		if (git_config_bool(k, v))
3156 			pack_idx_opts.flags |= WRITE_REV;
3157 		else
3158 			pack_idx_opts.flags &= ~WRITE_REV;
3159 		return 0;
3160 	}
3161 	if (!strcmp(k, "uploadpack.blobpackfileuri")) {
3162 		struct configured_exclusion *ex = xmalloc(sizeof(*ex));
3163 		const char *oid_end, *pack_end;
3164 		/*
3165 		 * Stores the pack hash. This is not a true object ID, but is
3166 		 * of the same form.
3167 		 */
3168 		struct object_id pack_hash;
3169 
3170 		if (parse_oid_hex(v, &ex->e.oid, &oid_end) ||
3171 		    *oid_end != ' ' ||
3172 		    parse_oid_hex(oid_end + 1, &pack_hash, &pack_end) ||
3173 		    *pack_end != ' ')
3174 			die(_("value of uploadpack.blobpackfileuri must be "
3175 			      "of the form '<object-hash> <pack-hash> <uri>' (got '%s')"), v);
3176 		if (oidmap_get(&configured_exclusions, &ex->e.oid))
3177 			die(_("object already configured in another "
3178 			      "uploadpack.blobpackfileuri (got '%s')"), v);
3179 		ex->pack_hash_hex = xcalloc(1, pack_end - oid_end);
3180 		memcpy(ex->pack_hash_hex, oid_end + 1, pack_end - oid_end - 1);
3181 		ex->uri = xstrdup(pack_end + 1);
3182 		oidmap_put(&configured_exclusions, ex);
3183 	}
3184 	return git_default_config(k, v, cb);
3185 }
3186 
3187 /* Counters for trace2 output when in --stdin-packs mode. */
3188 static int stdin_packs_found_nr;
3189 static int stdin_packs_hints_nr;
3190 
add_object_entry_from_pack(const struct object_id * oid,struct packed_git * p,uint32_t pos,void * _data)3191 static int add_object_entry_from_pack(const struct object_id *oid,
3192 				      struct packed_git *p,
3193 				      uint32_t pos,
3194 				      void *_data)
3195 {
3196 	struct rev_info *revs = _data;
3197 	struct object_info oi = OBJECT_INFO_INIT;
3198 	off_t ofs;
3199 	enum object_type type;
3200 
3201 	display_progress(progress_state, ++nr_seen);
3202 
3203 	if (have_duplicate_entry(oid, 0))
3204 		return 0;
3205 
3206 	ofs = nth_packed_object_offset(p, pos);
3207 	if (!want_object_in_pack(oid, 0, &p, &ofs))
3208 		return 0;
3209 
3210 	oi.typep = &type;
3211 	if (packed_object_info(the_repository, p, ofs, &oi) < 0)
3212 		die(_("could not get type of object %s in pack %s"),
3213 		    oid_to_hex(oid), p->pack_name);
3214 	else if (type == OBJ_COMMIT) {
3215 		/*
3216 		 * commits in included packs are used as starting points for the
3217 		 * subsequent revision walk
3218 		 */
3219 		add_pending_oid(revs, NULL, oid, 0);
3220 	}
3221 
3222 	stdin_packs_found_nr++;
3223 
3224 	create_object_entry(oid, type, 0, 0, 0, p, ofs);
3225 
3226 	return 0;
3227 }
3228 
show_commit_pack_hint(struct commit * commit,void * _data)3229 static void show_commit_pack_hint(struct commit *commit, void *_data)
3230 {
3231 	/* nothing to do; commits don't have a namehash */
3232 }
3233 
show_object_pack_hint(struct object * object,const char * name,void * _data)3234 static void show_object_pack_hint(struct object *object, const char *name,
3235 				  void *_data)
3236 {
3237 	struct object_entry *oe = packlist_find(&to_pack, &object->oid);
3238 	if (!oe)
3239 		return;
3240 
3241 	/*
3242 	 * Our 'to_pack' list was constructed by iterating all objects packed in
3243 	 * included packs, and so doesn't have a non-zero hash field that you
3244 	 * would typically pick up during a reachability traversal.
3245 	 *
3246 	 * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
3247 	 * here using a now in order to perhaps improve the delta selection
3248 	 * process.
3249 	 */
3250 	oe->hash = pack_name_hash(name);
3251 	oe->no_try_delta = name && no_try_delta(name);
3252 
3253 	stdin_packs_hints_nr++;
3254 }
3255 
pack_mtime_cmp(const void * _a,const void * _b)3256 static int pack_mtime_cmp(const void *_a, const void *_b)
3257 {
3258 	struct packed_git *a = ((const struct string_list_item*)_a)->util;
3259 	struct packed_git *b = ((const struct string_list_item*)_b)->util;
3260 
3261 	/*
3262 	 * order packs by descending mtime so that objects are laid out
3263 	 * roughly as newest-to-oldest
3264 	 */
3265 	if (a->mtime < b->mtime)
3266 		return 1;
3267 	else if (b->mtime < a->mtime)
3268 		return -1;
3269 	else
3270 		return 0;
3271 }
3272 
read_packs_list_from_stdin(void)3273 static void read_packs_list_from_stdin(void)
3274 {
3275 	struct strbuf buf = STRBUF_INIT;
3276 	struct string_list include_packs = STRING_LIST_INIT_DUP;
3277 	struct string_list exclude_packs = STRING_LIST_INIT_DUP;
3278 	struct string_list_item *item = NULL;
3279 
3280 	struct packed_git *p;
3281 	struct rev_info revs;
3282 
3283 	repo_init_revisions(the_repository, &revs, NULL);
3284 	/*
3285 	 * Use a revision walk to fill in the namehash of objects in the include
3286 	 * packs. To save time, we'll avoid traversing through objects that are
3287 	 * in excluded packs.
3288 	 *
3289 	 * That may cause us to avoid populating all of the namehash fields of
3290 	 * all included objects, but our goal is best-effort, since this is only
3291 	 * an optimization during delta selection.
3292 	 */
3293 	revs.no_kept_objects = 1;
3294 	revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
3295 	revs.blob_objects = 1;
3296 	revs.tree_objects = 1;
3297 	revs.tag_objects = 1;
3298 	revs.ignore_missing_links = 1;
3299 
3300 	while (strbuf_getline(&buf, stdin) != EOF) {
3301 		if (!buf.len)
3302 			continue;
3303 
3304 		if (*buf.buf == '^')
3305 			string_list_append(&exclude_packs, buf.buf + 1);
3306 		else
3307 			string_list_append(&include_packs, buf.buf);
3308 
3309 		strbuf_reset(&buf);
3310 	}
3311 
3312 	string_list_sort(&include_packs);
3313 	string_list_sort(&exclude_packs);
3314 
3315 	for (p = get_all_packs(the_repository); p; p = p->next) {
3316 		const char *pack_name = pack_basename(p);
3317 
3318 		item = string_list_lookup(&include_packs, pack_name);
3319 		if (!item)
3320 			item = string_list_lookup(&exclude_packs, pack_name);
3321 
3322 		if (item)
3323 			item->util = p;
3324 	}
3325 
3326 	/*
3327 	 * Arguments we got on stdin may not even be packs. First
3328 	 * check that to avoid segfaulting later on in
3329 	 * e.g. pack_mtime_cmp(), excluded packs are handled below.
3330 	 *
3331 	 * Since we first parsed our STDIN and then sorted the input
3332 	 * lines the pack we error on will be whatever line happens to
3333 	 * sort first. This is lazy, it's enough that we report one
3334 	 * bad case here, we don't need to report the first/last one,
3335 	 * or all of them.
3336 	 */
3337 	for_each_string_list_item(item, &include_packs) {
3338 		struct packed_git *p = item->util;
3339 		if (!p)
3340 			die(_("could not find pack '%s'"), item->string);
3341 	}
3342 
3343 	/*
3344 	 * Then, handle all of the excluded packs, marking them as
3345 	 * kept in-core so that later calls to add_object_entry()
3346 	 * discards any objects that are also found in excluded packs.
3347 	 */
3348 	for_each_string_list_item(item, &exclude_packs) {
3349 		struct packed_git *p = item->util;
3350 		if (!p)
3351 			die(_("could not find pack '%s'"), item->string);
3352 		p->pack_keep_in_core = 1;
3353 	}
3354 
3355 	/*
3356 	 * Order packs by ascending mtime; use QSORT directly to access the
3357 	 * string_list_item's ->util pointer, which string_list_sort() does not
3358 	 * provide.
3359 	 */
3360 	QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
3361 
3362 	for_each_string_list_item(item, &include_packs) {
3363 		struct packed_git *p = item->util;
3364 		if (!p)
3365 			die(_("could not find pack '%s'"), item->string);
3366 		for_each_object_in_pack(p,
3367 					add_object_entry_from_pack,
3368 					&revs,
3369 					FOR_EACH_OBJECT_PACK_ORDER);
3370 	}
3371 
3372 	if (prepare_revision_walk(&revs))
3373 		die(_("revision walk setup failed"));
3374 	traverse_commit_list(&revs,
3375 			     show_commit_pack_hint,
3376 			     show_object_pack_hint,
3377 			     NULL);
3378 
3379 	trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
3380 			   stdin_packs_found_nr);
3381 	trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
3382 			   stdin_packs_hints_nr);
3383 
3384 	strbuf_release(&buf);
3385 	string_list_clear(&include_packs, 0);
3386 	string_list_clear(&exclude_packs, 0);
3387 }
3388 
read_object_list_from_stdin(void)3389 static void read_object_list_from_stdin(void)
3390 {
3391 	char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
3392 	struct object_id oid;
3393 	const char *p;
3394 
3395 	for (;;) {
3396 		if (!fgets(line, sizeof(line), stdin)) {
3397 			if (feof(stdin))
3398 				break;
3399 			if (!ferror(stdin))
3400 				die("BUG: fgets returned NULL, not EOF, not error!");
3401 			if (errno != EINTR)
3402 				die_errno("fgets");
3403 			clearerr(stdin);
3404 			continue;
3405 		}
3406 		if (line[0] == '-') {
3407 			if (get_oid_hex(line+1, &oid))
3408 				die(_("expected edge object ID, got garbage:\n %s"),
3409 				    line);
3410 			add_preferred_base(&oid);
3411 			continue;
3412 		}
3413 		if (parse_oid_hex(line, &oid, &p))
3414 			die(_("expected object ID, got garbage:\n %s"), line);
3415 
3416 		add_preferred_base_object(p + 1);
3417 		add_object_entry(&oid, OBJ_NONE, p + 1, 0);
3418 	}
3419 }
3420 
show_commit(struct commit * commit,void * data)3421 static void show_commit(struct commit *commit, void *data)
3422 {
3423 	add_object_entry(&commit->object.oid, OBJ_COMMIT, NULL, 0);
3424 
3425 	if (write_bitmap_index)
3426 		index_commit_for_bitmap(commit);
3427 
3428 	if (use_delta_islands)
3429 		propagate_island_marks(commit);
3430 }
3431 
show_object(struct object * obj,const char * name,void * data)3432 static void show_object(struct object *obj, const char *name, void *data)
3433 {
3434 	add_preferred_base_object(name);
3435 	add_object_entry(&obj->oid, obj->type, name, 0);
3436 
3437 	if (use_delta_islands) {
3438 		const char *p;
3439 		unsigned depth;
3440 		struct object_entry *ent;
3441 
3442 		/* the empty string is a root tree, which is depth 0 */
3443 		depth = *name ? 1 : 0;
3444 		for (p = strchr(name, '/'); p; p = strchr(p + 1, '/'))
3445 			depth++;
3446 
3447 		ent = packlist_find(&to_pack, &obj->oid);
3448 		if (ent && depth > oe_tree_depth(&to_pack, ent))
3449 			oe_set_tree_depth(&to_pack, ent, depth);
3450 	}
3451 }
3452 
show_object__ma_allow_any(struct object * obj,const char * name,void * data)3453 static void show_object__ma_allow_any(struct object *obj, const char *name, void *data)
3454 {
3455 	assert(arg_missing_action == MA_ALLOW_ANY);
3456 
3457 	/*
3458 	 * Quietly ignore ALL missing objects.  This avoids problems with
3459 	 * staging them now and getting an odd error later.
3460 	 */
3461 	if (!has_object(the_repository, &obj->oid, 0))
3462 		return;
3463 
3464 	show_object(obj, name, data);
3465 }
3466 
show_object__ma_allow_promisor(struct object * obj,const char * name,void * data)3467 static void show_object__ma_allow_promisor(struct object *obj, const char *name, void *data)
3468 {
3469 	assert(arg_missing_action == MA_ALLOW_PROMISOR);
3470 
3471 	/*
3472 	 * Quietly ignore EXPECTED missing objects.  This avoids problems with
3473 	 * staging them now and getting an odd error later.
3474 	 */
3475 	if (!has_object(the_repository, &obj->oid, 0) && is_promisor_object(&obj->oid))
3476 		return;
3477 
3478 	show_object(obj, name, data);
3479 }
3480 
option_parse_missing_action(const struct option * opt,const char * arg,int unset)3481 static int option_parse_missing_action(const struct option *opt,
3482 				       const char *arg, int unset)
3483 {
3484 	assert(arg);
3485 	assert(!unset);
3486 
3487 	if (!strcmp(arg, "error")) {
3488 		arg_missing_action = MA_ERROR;
3489 		fn_show_object = show_object;
3490 		return 0;
3491 	}
3492 
3493 	if (!strcmp(arg, "allow-any")) {
3494 		arg_missing_action = MA_ALLOW_ANY;
3495 		fetch_if_missing = 0;
3496 		fn_show_object = show_object__ma_allow_any;
3497 		return 0;
3498 	}
3499 
3500 	if (!strcmp(arg, "allow-promisor")) {
3501 		arg_missing_action = MA_ALLOW_PROMISOR;
3502 		fetch_if_missing = 0;
3503 		fn_show_object = show_object__ma_allow_promisor;
3504 		return 0;
3505 	}
3506 
3507 	die(_("invalid value for --missing"));
3508 	return 0;
3509 }
3510 
show_edge(struct commit * commit)3511 static void show_edge(struct commit *commit)
3512 {
3513 	add_preferred_base(&commit->object.oid);
3514 }
3515 
add_object_in_unpacked_pack(const struct object_id * oid,struct packed_git * pack,uint32_t pos,void * _data)3516 static int add_object_in_unpacked_pack(const struct object_id *oid,
3517 				       struct packed_git *pack,
3518 				       uint32_t pos,
3519 				       void *_data)
3520 {
3521 	add_object_entry(oid, OBJ_NONE, "", 0);
3522 	return 0;
3523 }
3524 
add_objects_in_unpacked_packs(void)3525 static void add_objects_in_unpacked_packs(void)
3526 {
3527 	if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
3528 				   FOR_EACH_OBJECT_PACK_ORDER |
3529 				   FOR_EACH_OBJECT_LOCAL_ONLY |
3530 				   FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
3531 				   FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
3532 		die(_("cannot open pack index"));
3533 }
3534 
add_loose_object(const struct object_id * oid,const char * path,void * data)3535 static int add_loose_object(const struct object_id *oid, const char *path,
3536 			    void *data)
3537 {
3538 	enum object_type type = oid_object_info(the_repository, oid, NULL);
3539 
3540 	if (type < 0) {
3541 		warning(_("loose object at %s could not be examined"), path);
3542 		return 0;
3543 	}
3544 
3545 	add_object_entry(oid, type, "", 0);
3546 	return 0;
3547 }
3548 
3549 /*
3550  * We actually don't even have to worry about reachability here.
3551  * add_object_entry will weed out duplicates, so we just add every
3552  * loose object we find.
3553  */
add_unreachable_loose_objects(void)3554 static void add_unreachable_loose_objects(void)
3555 {
3556 	for_each_loose_file_in_objdir(get_object_directory(),
3557 				      add_loose_object,
3558 				      NULL, NULL, NULL);
3559 }
3560 
has_sha1_pack_kept_or_nonlocal(const struct object_id * oid)3561 static int has_sha1_pack_kept_or_nonlocal(const struct object_id *oid)
3562 {
3563 	static struct packed_git *last_found = (void *)1;
3564 	struct packed_git *p;
3565 
3566 	p = (last_found != (void *)1) ? last_found :
3567 					get_all_packs(the_repository);
3568 
3569 	while (p) {
3570 		if ((!p->pack_local || p->pack_keep ||
3571 				p->pack_keep_in_core) &&
3572 			find_pack_entry_one(oid->hash, p)) {
3573 			last_found = p;
3574 			return 1;
3575 		}
3576 		if (p == last_found)
3577 			p = get_all_packs(the_repository);
3578 		else
3579 			p = p->next;
3580 		if (p == last_found)
3581 			p = p->next;
3582 	}
3583 	return 0;
3584 }
3585 
3586 /*
3587  * Store a list of sha1s that are should not be discarded
3588  * because they are either written too recently, or are
3589  * reachable from another object that was.
3590  *
3591  * This is filled by get_object_list.
3592  */
3593 static struct oid_array recent_objects;
3594 
loosened_object_can_be_discarded(const struct object_id * oid,timestamp_t mtime)3595 static int loosened_object_can_be_discarded(const struct object_id *oid,
3596 					    timestamp_t mtime)
3597 {
3598 	if (!unpack_unreachable_expiration)
3599 		return 0;
3600 	if (mtime > unpack_unreachable_expiration)
3601 		return 0;
3602 	if (oid_array_lookup(&recent_objects, oid) >= 0)
3603 		return 0;
3604 	return 1;
3605 }
3606 
loosen_unused_packed_objects(void)3607 static void loosen_unused_packed_objects(void)
3608 {
3609 	struct packed_git *p;
3610 	uint32_t i;
3611 	uint32_t loosened_objects_nr = 0;
3612 	struct object_id oid;
3613 
3614 	for (p = get_all_packs(the_repository); p; p = p->next) {
3615 		if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
3616 			continue;
3617 
3618 		if (open_pack_index(p))
3619 			die(_("cannot open pack index"));
3620 
3621 		for (i = 0; i < p->num_objects; i++) {
3622 			nth_packed_object_id(&oid, p, i);
3623 			if (!packlist_find(&to_pack, &oid) &&
3624 			    !has_sha1_pack_kept_or_nonlocal(&oid) &&
3625 			    !loosened_object_can_be_discarded(&oid, p->mtime)) {
3626 				if (force_object_loose(&oid, p->mtime))
3627 					die(_("unable to force loose object"));
3628 				loosened_objects_nr++;
3629 			}
3630 		}
3631 	}
3632 
3633 	trace2_data_intmax("pack-objects", the_repository,
3634 			   "loosen_unused_packed_objects/loosened", loosened_objects_nr);
3635 }
3636 
3637 /*
3638  * This tracks any options which pack-reuse code expects to be on, or which a
3639  * reader of the pack might not understand, and which would therefore prevent
3640  * blind reuse of what we have on disk.
3641  */
pack_options_allow_reuse(void)3642 static int pack_options_allow_reuse(void)
3643 {
3644 	return allow_pack_reuse &&
3645 	       pack_to_stdout &&
3646 	       !ignore_packed_keep_on_disk &&
3647 	       !ignore_packed_keep_in_core &&
3648 	       (!local || !have_non_local_packs) &&
3649 	       !incremental;
3650 }
3651 
get_object_list_from_bitmap(struct rev_info * revs)3652 static int get_object_list_from_bitmap(struct rev_info *revs)
3653 {
3654 	if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options, 0)))
3655 		return -1;
3656 
3657 	if (pack_options_allow_reuse() &&
3658 	    !reuse_partial_packfile_from_bitmap(
3659 			bitmap_git,
3660 			&reuse_packfile,
3661 			&reuse_packfile_objects,
3662 			&reuse_packfile_bitmap)) {
3663 		assert(reuse_packfile_objects);
3664 		nr_result += reuse_packfile_objects;
3665 		nr_seen += reuse_packfile_objects;
3666 		display_progress(progress_state, nr_seen);
3667 	}
3668 
3669 	traverse_bitmap_commit_list(bitmap_git, revs,
3670 				    &add_object_entry_from_bitmap);
3671 	return 0;
3672 }
3673 
record_recent_object(struct object * obj,const char * name,void * data)3674 static void record_recent_object(struct object *obj,
3675 				 const char *name,
3676 				 void *data)
3677 {
3678 	oid_array_append(&recent_objects, &obj->oid);
3679 }
3680 
record_recent_commit(struct commit * commit,void * data)3681 static void record_recent_commit(struct commit *commit, void *data)
3682 {
3683 	oid_array_append(&recent_objects, &commit->object.oid);
3684 }
3685 
mark_bitmap_preferred_tip(const char * refname,const struct object_id * oid,int flags,void * _data)3686 static int mark_bitmap_preferred_tip(const char *refname,
3687 				     const struct object_id *oid, int flags,
3688 				     void *_data)
3689 {
3690 	struct object_id peeled;
3691 	struct object *object;
3692 
3693 	if (!peel_iterated_oid(oid, &peeled))
3694 		oid = &peeled;
3695 
3696 	object = parse_object_or_die(oid, refname);
3697 	if (object->type == OBJ_COMMIT)
3698 		object->flags |= NEEDS_BITMAP;
3699 
3700 	return 0;
3701 }
3702 
mark_bitmap_preferred_tips(void)3703 static void mark_bitmap_preferred_tips(void)
3704 {
3705 	struct string_list_item *item;
3706 	const struct string_list *preferred_tips;
3707 
3708 	preferred_tips = bitmap_preferred_tips(the_repository);
3709 	if (!preferred_tips)
3710 		return;
3711 
3712 	for_each_string_list_item(item, preferred_tips) {
3713 		for_each_ref_in(item->string, mark_bitmap_preferred_tip, NULL);
3714 	}
3715 }
3716 
get_object_list(int ac,const char ** av)3717 static void get_object_list(int ac, const char **av)
3718 {
3719 	struct rev_info revs;
3720 	struct setup_revision_opt s_r_opt = {
3721 		.allow_exclude_promisor_objects = 1,
3722 	};
3723 	char line[1000];
3724 	int flags = 0;
3725 	int save_warning;
3726 
3727 	repo_init_revisions(the_repository, &revs, NULL);
3728 	save_commit_buffer = 0;
3729 	setup_revisions(ac, av, &revs, &s_r_opt);
3730 
3731 	/* make sure shallows are read */
3732 	is_repository_shallow(the_repository);
3733 
3734 	save_warning = warn_on_object_refname_ambiguity;
3735 	warn_on_object_refname_ambiguity = 0;
3736 
3737 	while (fgets(line, sizeof(line), stdin) != NULL) {
3738 		int len = strlen(line);
3739 		if (len && line[len - 1] == '\n')
3740 			line[--len] = 0;
3741 		if (!len)
3742 			break;
3743 		if (*line == '-') {
3744 			if (!strcmp(line, "--not")) {
3745 				flags ^= UNINTERESTING;
3746 				write_bitmap_index = 0;
3747 				continue;
3748 			}
3749 			if (starts_with(line, "--shallow ")) {
3750 				struct object_id oid;
3751 				if (get_oid_hex(line + 10, &oid))
3752 					die("not an object name '%s'", line + 10);
3753 				register_shallow(the_repository, &oid);
3754 				use_bitmap_index = 0;
3755 				continue;
3756 			}
3757 			die(_("not a rev '%s'"), line);
3758 		}
3759 		if (handle_revision_arg(line, &revs, flags, REVARG_CANNOT_BE_FILENAME))
3760 			die(_("bad revision '%s'"), line);
3761 	}
3762 
3763 	warn_on_object_refname_ambiguity = save_warning;
3764 
3765 	if (use_bitmap_index && !get_object_list_from_bitmap(&revs))
3766 		return;
3767 
3768 	if (use_delta_islands)
3769 		load_delta_islands(the_repository, progress);
3770 
3771 	if (write_bitmap_index)
3772 		mark_bitmap_preferred_tips();
3773 
3774 	if (prepare_revision_walk(&revs))
3775 		die(_("revision walk setup failed"));
3776 	mark_edges_uninteresting(&revs, show_edge, sparse);
3777 
3778 	if (!fn_show_object)
3779 		fn_show_object = show_object;
3780 	traverse_commit_list_filtered(&filter_options, &revs,
3781 				      show_commit, fn_show_object, NULL,
3782 				      NULL);
3783 
3784 	if (unpack_unreachable_expiration) {
3785 		revs.ignore_missing_links = 1;
3786 		if (add_unseen_recent_objects_to_traversal(&revs,
3787 				unpack_unreachable_expiration))
3788 			die(_("unable to add recent objects"));
3789 		if (prepare_revision_walk(&revs))
3790 			die(_("revision walk setup failed"));
3791 		traverse_commit_list(&revs, record_recent_commit,
3792 				     record_recent_object, NULL);
3793 	}
3794 
3795 	if (keep_unreachable)
3796 		add_objects_in_unpacked_packs();
3797 	if (pack_loose_unreachable)
3798 		add_unreachable_loose_objects();
3799 	if (unpack_unreachable)
3800 		loosen_unused_packed_objects();
3801 
3802 	oid_array_clear(&recent_objects);
3803 }
3804 
add_extra_kept_packs(const struct string_list * names)3805 static void add_extra_kept_packs(const struct string_list *names)
3806 {
3807 	struct packed_git *p;
3808 
3809 	if (!names->nr)
3810 		return;
3811 
3812 	for (p = get_all_packs(the_repository); p; p = p->next) {
3813 		const char *name = basename(p->pack_name);
3814 		int i;
3815 
3816 		if (!p->pack_local)
3817 			continue;
3818 
3819 		for (i = 0; i < names->nr; i++)
3820 			if (!fspathcmp(name, names->items[i].string))
3821 				break;
3822 
3823 		if (i < names->nr) {
3824 			p->pack_keep_in_core = 1;
3825 			ignore_packed_keep_in_core = 1;
3826 			continue;
3827 		}
3828 	}
3829 }
3830 
option_parse_index_version(const struct option * opt,const char * arg,int unset)3831 static int option_parse_index_version(const struct option *opt,
3832 				      const char *arg, int unset)
3833 {
3834 	char *c;
3835 	const char *val = arg;
3836 
3837 	BUG_ON_OPT_NEG(unset);
3838 
3839 	pack_idx_opts.version = strtoul(val, &c, 10);
3840 	if (pack_idx_opts.version > 2)
3841 		die(_("unsupported index version %s"), val);
3842 	if (*c == ',' && c[1])
3843 		pack_idx_opts.off32_limit = strtoul(c+1, &c, 0);
3844 	if (*c || pack_idx_opts.off32_limit & 0x80000000)
3845 		die(_("bad index version '%s'"), val);
3846 	return 0;
3847 }
3848 
option_parse_unpack_unreachable(const struct option * opt,const char * arg,int unset)3849 static int option_parse_unpack_unreachable(const struct option *opt,
3850 					   const char *arg, int unset)
3851 {
3852 	if (unset) {
3853 		unpack_unreachable = 0;
3854 		unpack_unreachable_expiration = 0;
3855 	}
3856 	else {
3857 		unpack_unreachable = 1;
3858 		if (arg)
3859 			unpack_unreachable_expiration = approxidate(arg);
3860 	}
3861 	return 0;
3862 }
3863 
cmd_pack_objects(int argc,const char ** argv,const char * prefix)3864 int cmd_pack_objects(int argc, const char **argv, const char *prefix)
3865 {
3866 	int use_internal_rev_list = 0;
3867 	int shallow = 0;
3868 	int all_progress_implied = 0;
3869 	struct strvec rp = STRVEC_INIT;
3870 	int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
3871 	int rev_list_index = 0;
3872 	int stdin_packs = 0;
3873 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
3874 	struct option pack_objects_options[] = {
3875 		OPT_SET_INT('q', "quiet", &progress,
3876 			    N_("do not show progress meter"), 0),
3877 		OPT_SET_INT(0, "progress", &progress,
3878 			    N_("show progress meter"), 1),
3879 		OPT_SET_INT(0, "all-progress", &progress,
3880 			    N_("show progress meter during object writing phase"), 2),
3881 		OPT_BOOL(0, "all-progress-implied",
3882 			 &all_progress_implied,
3883 			 N_("similar to --all-progress when progress meter is shown")),
3884 		OPT_CALLBACK_F(0, "index-version", NULL, N_("<version>[,<offset>]"),
3885 		  N_("write the pack index file in the specified idx format version"),
3886 		  PARSE_OPT_NONEG, option_parse_index_version),
3887 		OPT_MAGNITUDE(0, "max-pack-size", &pack_size_limit,
3888 			      N_("maximum size of each output pack file")),
3889 		OPT_BOOL(0, "local", &local,
3890 			 N_("ignore borrowed objects from alternate object store")),
3891 		OPT_BOOL(0, "incremental", &incremental,
3892 			 N_("ignore packed objects")),
3893 		OPT_INTEGER(0, "window", &window,
3894 			    N_("limit pack window by objects")),
3895 		OPT_MAGNITUDE(0, "window-memory", &window_memory_limit,
3896 			      N_("limit pack window by memory in addition to object limit")),
3897 		OPT_INTEGER(0, "depth", &depth,
3898 			    N_("maximum length of delta chain allowed in the resulting pack")),
3899 		OPT_BOOL(0, "reuse-delta", &reuse_delta,
3900 			 N_("reuse existing deltas")),
3901 		OPT_BOOL(0, "reuse-object", &reuse_object,
3902 			 N_("reuse existing objects")),
3903 		OPT_BOOL(0, "delta-base-offset", &allow_ofs_delta,
3904 			 N_("use OFS_DELTA objects")),
3905 		OPT_INTEGER(0, "threads", &delta_search_threads,
3906 			    N_("use threads when searching for best delta matches")),
3907 		OPT_BOOL(0, "non-empty", &non_empty,
3908 			 N_("do not create an empty pack output")),
3909 		OPT_BOOL(0, "revs", &use_internal_rev_list,
3910 			 N_("read revision arguments from standard input")),
3911 		OPT_SET_INT_F(0, "unpacked", &rev_list_unpacked,
3912 			      N_("limit the objects to those that are not yet packed"),
3913 			      1, PARSE_OPT_NONEG),
3914 		OPT_SET_INT_F(0, "all", &rev_list_all,
3915 			      N_("include objects reachable from any reference"),
3916 			      1, PARSE_OPT_NONEG),
3917 		OPT_SET_INT_F(0, "reflog", &rev_list_reflog,
3918 			      N_("include objects referred by reflog entries"),
3919 			      1, PARSE_OPT_NONEG),
3920 		OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
3921 			      N_("include objects referred to by the index"),
3922 			      1, PARSE_OPT_NONEG),
3923 		OPT_BOOL(0, "stdin-packs", &stdin_packs,
3924 			 N_("read packs from stdin")),
3925 		OPT_BOOL(0, "stdout", &pack_to_stdout,
3926 			 N_("output pack to stdout")),
3927 		OPT_BOOL(0, "include-tag", &include_tag,
3928 			 N_("include tag objects that refer to objects to be packed")),
3929 		OPT_BOOL(0, "keep-unreachable", &keep_unreachable,
3930 			 N_("keep unreachable objects")),
3931 		OPT_BOOL(0, "pack-loose-unreachable", &pack_loose_unreachable,
3932 			 N_("pack loose unreachable objects")),
3933 		OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"),
3934 		  N_("unpack unreachable objects newer than <time>"),
3935 		  PARSE_OPT_OPTARG, option_parse_unpack_unreachable),
3936 		OPT_BOOL(0, "sparse", &sparse,
3937 			 N_("use the sparse reachability algorithm")),
3938 		OPT_BOOL(0, "thin", &thin,
3939 			 N_("create thin packs")),
3940 		OPT_BOOL(0, "shallow", &shallow,
3941 			 N_("create packs suitable for shallow fetches")),
3942 		OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
3943 			 N_("ignore packs that have companion .keep file")),
3944 		OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
3945 				N_("ignore this pack")),
3946 		OPT_INTEGER(0, "compression", &pack_compression_level,
3947 			    N_("pack compression level")),
3948 		OPT_SET_INT(0, "keep-true-parents", &grafts_replace_parents,
3949 			    N_("do not hide commits by grafts"), 0),
3950 		OPT_BOOL(0, "use-bitmap-index", &use_bitmap_index,
3951 			 N_("use a bitmap index if available to speed up counting objects")),
3952 		OPT_SET_INT(0, "write-bitmap-index", &write_bitmap_index,
3953 			    N_("write a bitmap index together with the pack index"),
3954 			    WRITE_BITMAP_TRUE),
3955 		OPT_SET_INT_F(0, "write-bitmap-index-quiet",
3956 			      &write_bitmap_index,
3957 			      N_("write a bitmap index if possible"),
3958 			      WRITE_BITMAP_QUIET, PARSE_OPT_HIDDEN),
3959 		OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
3960 		OPT_CALLBACK_F(0, "missing", NULL, N_("action"),
3961 		  N_("handling for missing objects"), PARSE_OPT_NONEG,
3962 		  option_parse_missing_action),
3963 		OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects,
3964 			 N_("do not pack objects in promisor packfiles")),
3965 		OPT_BOOL(0, "delta-islands", &use_delta_islands,
3966 			 N_("respect islands during delta compression")),
3967 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
3968 				N_("protocol"),
3969 				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
3970 		OPT_END(),
3971 	};
3972 
3973 	if (DFS_NUM_STATES > (1 << OE_DFS_STATE_BITS))
3974 		BUG("too many dfs states, increase OE_DFS_STATE_BITS");
3975 
3976 	read_replace_refs = 0;
3977 
3978 	sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
3979 	prepare_repo_settings(the_repository);
3980 	if (sparse < 0)
3981 		sparse = the_repository->settings.pack_use_sparse;
3982 
3983 	reset_pack_idx_option(&pack_idx_opts);
3984 	git_config(git_pack_config, NULL);
3985 	if (git_env_bool(GIT_TEST_WRITE_REV_INDEX, 0))
3986 		pack_idx_opts.flags |= WRITE_REV;
3987 
3988 	progress = isatty(2);
3989 	argc = parse_options(argc, argv, prefix, pack_objects_options,
3990 			     pack_usage, 0);
3991 
3992 	if (argc) {
3993 		base_name = argv[0];
3994 		argc--;
3995 	}
3996 	if (pack_to_stdout != !base_name || argc)
3997 		usage_with_options(pack_usage, pack_objects_options);
3998 
3999 	if (depth < 0)
4000 		depth = 0;
4001 	if (depth >= (1 << OE_DEPTH_BITS)) {
4002 		warning(_("delta chain depth %d is too deep, forcing %d"),
4003 			depth, (1 << OE_DEPTH_BITS) - 1);
4004 		depth = (1 << OE_DEPTH_BITS) - 1;
4005 	}
4006 	if (cache_max_small_delta_size >= (1U << OE_Z_DELTA_BITS)) {
4007 		warning(_("pack.deltaCacheLimit is too high, forcing %d"),
4008 			(1U << OE_Z_DELTA_BITS) - 1);
4009 		cache_max_small_delta_size = (1U << OE_Z_DELTA_BITS) - 1;
4010 	}
4011 	if (window < 0)
4012 		window = 0;
4013 
4014 	strvec_push(&rp, "pack-objects");
4015 	if (thin) {
4016 		use_internal_rev_list = 1;
4017 		strvec_push(&rp, shallow
4018 				? "--objects-edge-aggressive"
4019 				: "--objects-edge");
4020 	} else
4021 		strvec_push(&rp, "--objects");
4022 
4023 	if (rev_list_all) {
4024 		use_internal_rev_list = 1;
4025 		strvec_push(&rp, "--all");
4026 	}
4027 	if (rev_list_reflog) {
4028 		use_internal_rev_list = 1;
4029 		strvec_push(&rp, "--reflog");
4030 	}
4031 	if (rev_list_index) {
4032 		use_internal_rev_list = 1;
4033 		strvec_push(&rp, "--indexed-objects");
4034 	}
4035 	if (rev_list_unpacked && !stdin_packs) {
4036 		use_internal_rev_list = 1;
4037 		strvec_push(&rp, "--unpacked");
4038 	}
4039 
4040 	if (exclude_promisor_objects) {
4041 		use_internal_rev_list = 1;
4042 		fetch_if_missing = 0;
4043 		strvec_push(&rp, "--exclude-promisor-objects");
4044 	}
4045 	if (unpack_unreachable || keep_unreachable || pack_loose_unreachable)
4046 		use_internal_rev_list = 1;
4047 
4048 	if (!reuse_object)
4049 		reuse_delta = 0;
4050 	if (pack_compression_level == -1)
4051 		pack_compression_level = Z_DEFAULT_COMPRESSION;
4052 	else if (pack_compression_level < 0 || pack_compression_level > Z_BEST_COMPRESSION)
4053 		die(_("bad pack compression level %d"), pack_compression_level);
4054 
4055 	if (!delta_search_threads)	/* --threads=0 means autodetect */
4056 		delta_search_threads = online_cpus();
4057 
4058 	if (!HAVE_THREADS && delta_search_threads != 1)
4059 		warning(_("no threads support, ignoring --threads"));
4060 	if (!pack_to_stdout && !pack_size_limit)
4061 		pack_size_limit = pack_size_limit_cfg;
4062 	if (pack_to_stdout && pack_size_limit)
4063 		die(_("--max-pack-size cannot be used to build a pack for transfer"));
4064 	if (pack_size_limit && pack_size_limit < 1024*1024) {
4065 		warning(_("minimum pack size limit is 1 MiB"));
4066 		pack_size_limit = 1024*1024;
4067 	}
4068 
4069 	if (!pack_to_stdout && thin)
4070 		die(_("--thin cannot be used to build an indexable pack"));
4071 
4072 	if (keep_unreachable && unpack_unreachable)
4073 		die(_("--keep-unreachable and --unpack-unreachable are incompatible"));
4074 	if (!rev_list_all || !rev_list_reflog || !rev_list_index)
4075 		unpack_unreachable_expiration = 0;
4076 
4077 	if (filter_options.choice) {
4078 		if (!pack_to_stdout)
4079 			die(_("cannot use --filter without --stdout"));
4080 		if (stdin_packs)
4081 			die(_("cannot use --filter with --stdin-packs"));
4082 	}
4083 
4084 	if (stdin_packs && use_internal_rev_list)
4085 		die(_("cannot use internal rev list with --stdin-packs"));
4086 
4087 	/*
4088 	 * "soft" reasons not to use bitmaps - for on-disk repack by default we want
4089 	 *
4090 	 * - to produce good pack (with bitmap index not-yet-packed objects are
4091 	 *   packed in suboptimal order).
4092 	 *
4093 	 * - to use more robust pack-generation codepath (avoiding possible
4094 	 *   bugs in bitmap code and possible bitmap index corruption).
4095 	 */
4096 	if (!pack_to_stdout)
4097 		use_bitmap_index_default = 0;
4098 
4099 	if (use_bitmap_index < 0)
4100 		use_bitmap_index = use_bitmap_index_default;
4101 
4102 	/* "hard" reasons not to use bitmaps; these just won't work at all */
4103 	if (!use_internal_rev_list || (!pack_to_stdout && write_bitmap_index) || is_repository_shallow(the_repository))
4104 		use_bitmap_index = 0;
4105 
4106 	if (pack_to_stdout || !rev_list_all)
4107 		write_bitmap_index = 0;
4108 
4109 	if (use_delta_islands)
4110 		strvec_push(&rp, "--topo-order");
4111 
4112 	if (progress && all_progress_implied)
4113 		progress = 2;
4114 
4115 	add_extra_kept_packs(&keep_pack_list);
4116 	if (ignore_packed_keep_on_disk) {
4117 		struct packed_git *p;
4118 		for (p = get_all_packs(the_repository); p; p = p->next)
4119 			if (p->pack_local && p->pack_keep)
4120 				break;
4121 		if (!p) /* no keep-able packs found */
4122 			ignore_packed_keep_on_disk = 0;
4123 	}
4124 	if (local) {
4125 		/*
4126 		 * unlike ignore_packed_keep_on_disk above, we do not
4127 		 * want to unset "local" based on looking at packs, as
4128 		 * it also covers non-local objects
4129 		 */
4130 		struct packed_git *p;
4131 		for (p = get_all_packs(the_repository); p; p = p->next) {
4132 			if (!p->pack_local) {
4133 				have_non_local_packs = 1;
4134 				break;
4135 			}
4136 		}
4137 	}
4138 
4139 	trace2_region_enter("pack-objects", "enumerate-objects",
4140 			    the_repository);
4141 	prepare_packing_data(the_repository, &to_pack);
4142 
4143 	if (progress)
4144 		progress_state = start_progress(_("Enumerating objects"), 0);
4145 	if (stdin_packs) {
4146 		/* avoids adding objects in excluded packs */
4147 		ignore_packed_keep_in_core = 1;
4148 		read_packs_list_from_stdin();
4149 		if (rev_list_unpacked)
4150 			add_unreachable_loose_objects();
4151 	} else if (!use_internal_rev_list)
4152 		read_object_list_from_stdin();
4153 	else {
4154 		get_object_list(rp.nr, rp.v);
4155 		strvec_clear(&rp);
4156 	}
4157 	cleanup_preferred_base();
4158 	if (include_tag && nr_result)
4159 		for_each_tag_ref(add_ref_tag, NULL);
4160 	stop_progress(&progress_state);
4161 	trace2_region_leave("pack-objects", "enumerate-objects",
4162 			    the_repository);
4163 
4164 	if (non_empty && !nr_result)
4165 		return 0;
4166 	if (nr_result) {
4167 		trace2_region_enter("pack-objects", "prepare-pack",
4168 				    the_repository);
4169 		prepare_pack(window, depth);
4170 		trace2_region_leave("pack-objects", "prepare-pack",
4171 				    the_repository);
4172 	}
4173 
4174 	trace2_region_enter("pack-objects", "write-pack-file", the_repository);
4175 	write_excluded_by_configs();
4176 	write_pack_file();
4177 	trace2_region_leave("pack-objects", "write-pack-file", the_repository);
4178 
4179 	if (progress)
4180 		fprintf_ln(stderr,
4181 			   _("Total %"PRIu32" (delta %"PRIu32"),"
4182 			     " reused %"PRIu32" (delta %"PRIu32"),"
4183 			     " pack-reused %"PRIu32),
4184 			   written, written_delta, reused, reused_delta,
4185 			   reuse_packfile_objects);
4186 	return 0;
4187 }
4188