1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "pack-objects.h"
9 
10 #include "zstream.h"
11 #include "delta.h"
12 #include "iterator.h"
13 #include "netops.h"
14 #include "pack.h"
15 #include "thread.h"
16 #include "tree.h"
17 #include "util.h"
18 #include "revwalk.h"
19 #include "commit_list.h"
20 
21 #include "git2/pack.h"
22 #include "git2/commit.h"
23 #include "git2/tag.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
26 
27 struct unpacked {
28 	git_pobject *object;
29 	void *data;
30 	struct git_delta_index *index;
31 	size_t depth;
32 };
33 
34 struct tree_walk_context {
35 	git_packbuilder *pb;
36 	git_buf buf;
37 };
38 
39 struct pack_write_context {
40 	git_indexer *indexer;
41 	git_indexer_progress *stats;
42 };
43 
44 struct walk_object {
45 	git_oid id;
46 	unsigned int uninteresting:1,
47 		seen:1;
48 };
49 
50 #ifdef GIT_THREADS
51 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git_mutex_##op(&(pb)->mtx)
52 #else
53 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git__noop()
54 #endif
55 
56 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
57 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
58 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
59 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
60 
61 /* The minimal interval between progress updates (in seconds). */
62 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
63 
64 /* Size of the buffer to feed to zlib */
65 #define COMPRESS_BUFLEN (1024 * 1024)
66 
name_hash(const char * name)67 static unsigned name_hash(const char *name)
68 {
69 	unsigned c, hash = 0;
70 
71 	if (!name)
72 		return 0;
73 
74 	/*
75 	 * This effectively just creates a sortable number from the
76 	 * last sixteen non-whitespace characters. Last characters
77 	 * count "most", so things that end in ".c" sort together.
78 	 */
79 	while ((c = *name++) != 0) {
80 		if (git__isspace(c))
81 			continue;
82 		hash = (hash >> 2) + (c << 24);
83 	}
84 	return hash;
85 }
86 
packbuilder_config(git_packbuilder * pb)87 static int packbuilder_config(git_packbuilder *pb)
88 {
89 	git_config *config;
90 	int ret = 0;
91 	int64_t val;
92 
93 	if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
94 		return ret;
95 
96 #define config_get(KEY,DST,DFLT) do { \
97 	ret = git_config_get_int64(&val, config, KEY); \
98 	if (!ret) { \
99 		if (!git__is_sizet(val)) { \
100 			git_error_set(GIT_ERROR_CONFIG, \
101 				"configuration value '%s' is too large", KEY); \
102 			ret = -1; \
103 			goto out; \
104 		} \
105 		(DST) = (size_t)val; \
106 	} else if (ret == GIT_ENOTFOUND) { \
107 	    (DST) = (DFLT); \
108 	    ret = 0; \
109 	} else if (ret < 0) goto out; } while (0)
110 
111 	config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
112 		   GIT_PACK_DELTA_CACHE_SIZE);
113 	config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
114 		   GIT_PACK_DELTA_CACHE_LIMIT);
115 	config_get("pack.deltaCacheSize", pb->big_file_threshold,
116 		   GIT_PACK_BIG_FILE_THRESHOLD);
117 	config_get("pack.windowMemory", pb->window_memory_limit, 0);
118 
119 #undef config_get
120 
121 out:
122 	git_config_free(config);
123 
124 	return ret;
125 }
126 
git_packbuilder_new(git_packbuilder ** out,git_repository * repo)127 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
128 {
129 	git_packbuilder *pb;
130 
131 	*out = NULL;
132 
133 	pb = git__calloc(1, sizeof(*pb));
134 	GIT_ERROR_CHECK_ALLOC(pb);
135 
136 	if (git_oidmap_new(&pb->object_ix) < 0 ||
137 	    git_oidmap_new(&pb->walk_objects) < 0 ||
138 	    git_pool_init(&pb->object_pool, sizeof(struct walk_object)) < 0)
139 		goto on_error;
140 
141 	pb->repo = repo;
142 	pb->nr_threads = 1; /* do not spawn any thread by default */
143 
144 	if (git_hash_ctx_init(&pb->ctx) < 0 ||
145 		git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
146 		git_repository_odb(&pb->odb, repo) < 0 ||
147 		packbuilder_config(pb) < 0)
148 		goto on_error;
149 
150 #ifdef GIT_THREADS
151 
152 	if (git_mutex_init(&pb->cache_mutex) ||
153 		git_mutex_init(&pb->progress_mutex) ||
154 		git_cond_init(&pb->progress_cond))
155 	{
156 		git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
157 		goto on_error;
158 	}
159 
160 #endif
161 
162 	*out = pb;
163 	return 0;
164 
165 on_error:
166 	git_packbuilder_free(pb);
167 	return -1;
168 }
169 
git_packbuilder_set_threads(git_packbuilder * pb,unsigned int n)170 unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
171 {
172 	GIT_ASSERT_ARG(pb);
173 
174 #ifdef GIT_THREADS
175 	pb->nr_threads = n;
176 #else
177 	GIT_UNUSED(n);
178 	GIT_ASSERT(pb->nr_threads == 1);
179 #endif
180 
181 	return pb->nr_threads;
182 }
183 
rehash(git_packbuilder * pb)184 static int rehash(git_packbuilder *pb)
185 {
186 	git_pobject *po;
187 	size_t i;
188 
189 	git_oidmap_clear(pb->object_ix);
190 
191 	for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
192 		if (git_oidmap_set(pb->object_ix, &po->id, po) < 0)
193 			return -1;
194 	}
195 
196 	return 0;
197 }
198 
git_packbuilder_insert(git_packbuilder * pb,const git_oid * oid,const char * name)199 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
200 			   const char *name)
201 {
202 	git_pobject *po;
203 	size_t newsize;
204 	int ret;
205 
206 	GIT_ASSERT_ARG(pb);
207 	GIT_ASSERT_ARG(oid);
208 
209 	/* If the object already exists in the hash table, then we don't
210 	 * have any work to do */
211 	if (git_oidmap_exists(pb->object_ix, oid))
212 		return 0;
213 
214 	if (pb->nr_objects >= pb->nr_alloc) {
215 		GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
216 		GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize / 2, 3);
217 
218 		if (!git__is_uint32(newsize)) {
219 			git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
220 			return -1;
221 		}
222 
223 		pb->nr_alloc = newsize;
224 
225 		pb->object_list = git__reallocarray(pb->object_list,
226 			pb->nr_alloc, sizeof(*po));
227 		GIT_ERROR_CHECK_ALLOC(pb->object_list);
228 
229 		if (rehash(pb) < 0)
230 			return -1;
231 	}
232 
233 	po = pb->object_list + pb->nr_objects;
234 	memset(po, 0x0, sizeof(*po));
235 
236 	if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
237 		return ret;
238 
239 	pb->nr_objects++;
240 	git_oid_cpy(&po->id, oid);
241 	po->hash = name_hash(name);
242 
243 	if (git_oidmap_set(pb->object_ix, &po->id, po) < 0) {
244 		git_error_set_oom();
245 		return -1;
246 	}
247 
248 	pb->done = false;
249 
250 	if (pb->progress_cb) {
251 		double current_time = git__timer();
252 		double elapsed = current_time - pb->last_progress_report_time;
253 
254 		if (elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
255 			pb->last_progress_report_time = current_time;
256 
257 			ret = pb->progress_cb(
258 				GIT_PACKBUILDER_ADDING_OBJECTS,
259 				pb->nr_objects, 0, pb->progress_cb_payload);
260 
261 			if (ret)
262 				return git_error_set_after_callback(ret);
263 		}
264 	}
265 
266 	return 0;
267 }
268 
get_delta(void ** out,git_odb * odb,git_pobject * po)269 static int get_delta(void **out, git_odb *odb, git_pobject *po)
270 {
271 	git_odb_object *src = NULL, *trg = NULL;
272 	size_t delta_size;
273 	void *delta_buf;
274 	int error;
275 
276 	*out = NULL;
277 
278 	if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
279 	    git_odb_read(&trg, odb, &po->id) < 0)
280 		goto on_error;
281 
282 	error = git_delta(&delta_buf, &delta_size,
283 		git_odb_object_data(src), git_odb_object_size(src),
284 		git_odb_object_data(trg), git_odb_object_size(trg),
285 		0);
286 
287 	if (error < 0 && error != GIT_EBUFS)
288 		goto on_error;
289 
290 	if (error == GIT_EBUFS || delta_size != po->delta_size) {
291 		git_error_set(GIT_ERROR_INVALID, "delta size changed");
292 		goto on_error;
293 	}
294 
295 	*out = delta_buf;
296 
297 	git_odb_object_free(src);
298 	git_odb_object_free(trg);
299 	return 0;
300 
301 on_error:
302 	git_odb_object_free(src);
303 	git_odb_object_free(trg);
304 	return -1;
305 }
306 
write_object(git_packbuilder * pb,git_pobject * po,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)307 static int write_object(
308 	git_packbuilder *pb,
309 	git_pobject *po,
310 	int (*write_cb)(void *buf, size_t size, void *cb_data),
311 	void *cb_data)
312 {
313 	git_odb_object *obj = NULL;
314 	git_object_t type;
315 	unsigned char hdr[10], *zbuf = NULL;
316 	void *data = NULL;
317 	size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
318 	int error;
319 
320 	/*
321 	 * If we have a delta base, let's use the delta to save space.
322 	 * Otherwise load the whole object. 'data' ends up pointing to
323 	 * whatever data we want to put into the packfile.
324 	 */
325 	if (po->delta) {
326 		if (po->delta_data)
327 			data = po->delta_data;
328 		else if ((error = get_delta(&data, pb->odb, po)) < 0)
329 				goto done;
330 
331 		data_len = po->delta_size;
332 		type = GIT_OBJECT_REF_DELTA;
333 	} else {
334 		if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
335 			goto done;
336 
337 		data = (void *)git_odb_object_data(obj);
338 		data_len = git_odb_object_size(obj);
339 		type = git_odb_object_type(obj);
340 	}
341 
342 	/* Write header */
343 	if ((error = git_packfile__object_header(&hdr_len, hdr, data_len, type)) < 0 ||
344 	    (error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
345 	    (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
346 		goto done;
347 
348 	if (type == GIT_OBJECT_REF_DELTA) {
349 		if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
350 			(error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
351 			goto done;
352 	}
353 
354 	/* Write data */
355 	if (po->z_delta_size) {
356 		data_len = po->z_delta_size;
357 
358 		if ((error = write_cb(data, data_len, cb_data)) < 0 ||
359 			(error = git_hash_update(&pb->ctx, data, data_len)) < 0)
360 			goto done;
361 	} else {
362 		zbuf = git__malloc(zbuf_len);
363 		GIT_ERROR_CHECK_ALLOC(zbuf);
364 
365 		git_zstream_reset(&pb->zstream);
366 
367 		if ((error = git_zstream_set_input(&pb->zstream, data, data_len)) < 0)
368 			goto done;
369 
370 		while (!git_zstream_done(&pb->zstream)) {
371 			if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
372 				(error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
373 				(error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
374 				goto done;
375 
376 			zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
377 		}
378 	}
379 
380 	/*
381 	 * If po->delta is true, data is a delta and it is our
382 	 * responsibility to free it (otherwise it's a git_object's
383 	 * data). We set po->delta_data to NULL in case we got the
384 	 * data from there instead of get_delta(). If we didn't,
385 	 * there's no harm.
386 	 */
387 	if (po->delta) {
388 		git__free(data);
389 		po->delta_data = NULL;
390 	}
391 
392 	pb->nr_written++;
393 
394 done:
395 	git__free(zbuf);
396 	git_odb_object_free(obj);
397 	return error;
398 }
399 
400 enum write_one_status {
401 	WRITE_ONE_SKIP = -1, /* already written */
402 	WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
403 	WRITE_ONE_WRITTEN = 1, /* normal */
404 	WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
405 };
406 
write_one(enum write_one_status * status,git_packbuilder * pb,git_pobject * po,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)407 static int write_one(
408 	enum write_one_status *status,
409 	git_packbuilder *pb,
410 	git_pobject *po,
411 	int (*write_cb)(void *buf, size_t size, void *cb_data),
412 	void *cb_data)
413 {
414 	int error;
415 
416 	if (po->recursing) {
417 		*status = WRITE_ONE_RECURSIVE;
418 		return 0;
419 	} else if (po->written) {
420 		*status = WRITE_ONE_SKIP;
421 		return 0;
422 	}
423 
424 	if (po->delta) {
425 		po->recursing = 1;
426 
427 		if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
428 			return error;
429 
430 		/* we cannot depend on this one */
431 		if (*status == WRITE_ONE_RECURSIVE)
432 			po->delta = NULL;
433 	}
434 
435 	*status = WRITE_ONE_WRITTEN;
436 	po->written = 1;
437 	po->recursing = 0;
438 
439 	return write_object(pb, po, write_cb, cb_data);
440 }
441 
add_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)442 GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
443 	git_pobject *po)
444 {
445 	if (po->filled)
446 		return;
447 	wo[(*endp)++] = po;
448 	po->filled = 1;
449 }
450 
add_descendants_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)451 static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
452 	git_pobject *po)
453 {
454 	int add_to_order = 1;
455 	while (po) {
456 		if (add_to_order) {
457 			git_pobject *s;
458 			/* add this node... */
459 			add_to_write_order(wo, endp, po);
460 			/* all its siblings... */
461 			for (s = po->delta_sibling; s; s = s->delta_sibling) {
462 				add_to_write_order(wo, endp, s);
463 			}
464 		}
465 		/* drop down a level to add left subtree nodes if possible */
466 		if (po->delta_child) {
467 			add_to_order = 1;
468 			po = po->delta_child;
469 		} else {
470 			add_to_order = 0;
471 			/* our sibling might have some children, it is next */
472 			if (po->delta_sibling) {
473 				po = po->delta_sibling;
474 				continue;
475 			}
476 			/* go back to our parent node */
477 			po = po->delta;
478 			while (po && !po->delta_sibling) {
479 				/* we're on the right side of a subtree, keep
480 				 * going up until we can go right again */
481 				po = po->delta;
482 			}
483 			if (!po) {
484 				/* done- we hit our original root node */
485 				return;
486 			}
487 			/* pass it off to sibling at this level */
488 			po = po->delta_sibling;
489 		}
490 	};
491 }
492 
add_family_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)493 static void add_family_to_write_order(git_pobject **wo, size_t *endp,
494 	git_pobject *po)
495 {
496 	git_pobject *root;
497 
498 	for (root = po; root->delta; root = root->delta)
499 		; /* nothing */
500 	add_descendants_to_write_order(wo, endp, root);
501 }
502 
cb_tag_foreach(const char * name,git_oid * oid,void * data)503 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
504 {
505 	git_packbuilder *pb = data;
506 	git_pobject *po;
507 
508 	GIT_UNUSED(name);
509 
510 	if ((po = git_oidmap_get(pb->object_ix, oid)) == NULL)
511 		return 0;
512 
513 	po->tagged = 1;
514 
515 	/* TODO: peel objects */
516 
517 	return 0;
518 }
519 
compute_write_order(git_pobject *** out,git_packbuilder * pb)520 static int compute_write_order(git_pobject ***out, git_packbuilder *pb)
521 {
522 	size_t i, wo_end, last_untagged;
523 	git_pobject **wo;
524 
525 	*out = NULL;
526 
527 	if (!pb->nr_objects)
528 		return 0;
529 
530 	if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
531 		return -1;
532 
533 	for (i = 0; i < pb->nr_objects; i++) {
534 		git_pobject *po = pb->object_list + i;
535 		po->tagged = 0;
536 		po->filled = 0;
537 		po->delta_child = NULL;
538 		po->delta_sibling = NULL;
539 	}
540 
541 	/*
542 	 * Fully connect delta_child/delta_sibling network.
543 	 * Make sure delta_sibling is sorted in the original
544 	 * recency order.
545 	 */
546 	for (i = pb->nr_objects; i > 0;) {
547 		git_pobject *po = &pb->object_list[--i];
548 		if (!po->delta)
549 			continue;
550 		/* Mark me as the first child */
551 		po->delta_sibling = po->delta->delta_child;
552 		po->delta->delta_child = po;
553 	}
554 
555 	/*
556 	 * Mark objects that are at the tip of tags.
557 	 */
558 	if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
559 		git__free(wo);
560 		return -1;
561 	}
562 
563 	/*
564 	 * Give the objects in the original recency order until
565 	 * we see a tagged tip.
566 	 */
567 	for (i = wo_end = 0; i < pb->nr_objects; i++) {
568 		git_pobject *po = pb->object_list + i;
569 		if (po->tagged)
570 			break;
571 		add_to_write_order(wo, &wo_end, po);
572 	}
573 	last_untagged = i;
574 
575 	/*
576 	 * Then fill all the tagged tips.
577 	 */
578 	for (; i < pb->nr_objects; i++) {
579 		git_pobject *po = pb->object_list + i;
580 		if (po->tagged)
581 			add_to_write_order(wo, &wo_end, po);
582 	}
583 
584 	/*
585 	 * And then all remaining commits and tags.
586 	 */
587 	for (i = last_untagged; i < pb->nr_objects; i++) {
588 		git_pobject *po = pb->object_list + i;
589 		if (po->type != GIT_OBJECT_COMMIT &&
590 		    po->type != GIT_OBJECT_TAG)
591 			continue;
592 		add_to_write_order(wo, &wo_end, po);
593 	}
594 
595 	/*
596 	 * And then all the trees.
597 	 */
598 	for (i = last_untagged; i < pb->nr_objects; i++) {
599 		git_pobject *po = pb->object_list + i;
600 		if (po->type != GIT_OBJECT_TREE)
601 			continue;
602 		add_to_write_order(wo, &wo_end, po);
603 	}
604 
605 	/*
606 	 * Finally all the rest in really tight order
607 	 */
608 	for (i = last_untagged; i < pb->nr_objects; i++) {
609 		git_pobject *po = pb->object_list + i;
610 		if (!po->filled)
611 			add_family_to_write_order(wo, &wo_end, po);
612 	}
613 
614 	if (wo_end != pb->nr_objects) {
615 		git__free(wo);
616 		git_error_set(GIT_ERROR_INVALID, "invalid write order");
617 		return -1;
618 	}
619 
620 	*out = wo;
621 	return 0;
622 }
623 
write_pack(git_packbuilder * pb,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)624 static int write_pack(git_packbuilder *pb,
625 	int (*write_cb)(void *buf, size_t size, void *cb_data),
626 	void *cb_data)
627 {
628 	git_pobject **write_order;
629 	git_pobject *po;
630 	enum write_one_status status;
631 	struct git_pack_header ph;
632 	git_oid entry_oid;
633 	size_t i = 0;
634 	int error;
635 
636 	if ((error = compute_write_order(&write_order, pb)) < 0)
637 		return error;
638 
639 	if (!git__is_uint32(pb->nr_objects)) {
640 		git_error_set(GIT_ERROR_INVALID, "too many objects");
641 		error = -1;
642 		goto done;
643 	}
644 
645 	/* Write pack header */
646 	ph.hdr_signature = htonl(PACK_SIGNATURE);
647 	ph.hdr_version = htonl(PACK_VERSION);
648 	ph.hdr_entries = htonl(pb->nr_objects);
649 
650 	if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
651 		(error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
652 		goto done;
653 
654 	pb->nr_remaining = pb->nr_objects;
655 	do {
656 		pb->nr_written = 0;
657 		for ( ; i < pb->nr_objects; ++i) {
658 			po = write_order[i];
659 
660 			if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
661 				goto done;
662 		}
663 
664 		pb->nr_remaining -= pb->nr_written;
665 	} while (pb->nr_remaining && i < pb->nr_objects);
666 
667 	if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
668 		goto done;
669 
670 	error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
671 
672 done:
673 	/* if callback cancelled writing, we must still free delta_data */
674 	for ( ; i < pb->nr_objects; ++i) {
675 		po = write_order[i];
676 		if (po->delta_data) {
677 			git__free(po->delta_data);
678 			po->delta_data = NULL;
679 		}
680 	}
681 
682 	git__free(write_order);
683 	return error;
684 }
685 
write_pack_buf(void * buf,size_t size,void * data)686 static int write_pack_buf(void *buf, size_t size, void *data)
687 {
688 	git_buf *b = (git_buf *)data;
689 	return git_buf_put(b, buf, size);
690 }
691 
type_size_sort(const void * _a,const void * _b)692 static int type_size_sort(const void *_a, const void *_b)
693 {
694 	const git_pobject *a = (git_pobject *)_a;
695 	const git_pobject *b = (git_pobject *)_b;
696 
697 	if (a->type > b->type)
698 		return -1;
699 	if (a->type < b->type)
700 		return 1;
701 	if (a->hash > b->hash)
702 		return -1;
703 	if (a->hash < b->hash)
704 		return 1;
705 	/*
706 	 * TODO
707 	 *
708 	if (a->preferred_base > b->preferred_base)
709 		return -1;
710 	if (a->preferred_base < b->preferred_base)
711 		return 1;
712 	*/
713 	if (a->size > b->size)
714 		return -1;
715 	if (a->size < b->size)
716 		return 1;
717 	return a < b ? -1 : (a > b); /* newest first */
718 }
719 
delta_cacheable(git_packbuilder * pb,size_t src_size,size_t trg_size,size_t delta_size)720 static int delta_cacheable(
721 	git_packbuilder *pb,
722 	size_t src_size,
723 	size_t trg_size,
724 	size_t delta_size)
725 {
726 	size_t new_size;
727 
728 	if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
729 		return 0;
730 
731 	if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
732 		return 0;
733 
734 	if (delta_size < pb->cache_max_small_delta_size)
735 		return 1;
736 
737 	/* cache delta, if objects are large enough compared to delta size */
738 	if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
739 		return 1;
740 
741 	return 0;
742 }
743 
try_delta(git_packbuilder * pb,struct unpacked * trg,struct unpacked * src,size_t max_depth,size_t * mem_usage,int * ret)744 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
745 		     struct unpacked *src, size_t max_depth,
746 			 size_t *mem_usage, int *ret)
747 {
748 	git_pobject *trg_object = trg->object;
749 	git_pobject *src_object = src->object;
750 	git_odb_object *obj;
751 	size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
752 	size_t ref_depth;
753 	void *delta_buf;
754 
755 	/* Don't bother doing diffs between different types */
756 	if (trg_object->type != src_object->type) {
757 		*ret = -1;
758 		return 0;
759 	}
760 
761 	*ret = 0;
762 
763 	/* TODO: support reuse-delta */
764 
765 	/* Let's not bust the allowed depth. */
766 	if (src->depth >= max_depth)
767 		return 0;
768 
769 	/* Now some size filtering heuristics. */
770 	trg_size = trg_object->size;
771 	if (!trg_object->delta) {
772 		max_size = trg_size/2 - 20;
773 		ref_depth = 1;
774 	} else {
775 		max_size = trg_object->delta_size;
776 		ref_depth = trg->depth;
777 	}
778 
779 	max_size = (uint64_t)max_size * (max_depth - src->depth) /
780 					(max_depth - ref_depth + 1);
781 	if (max_size == 0)
782 		return 0;
783 
784 	src_size = src_object->size;
785 	sizediff = src_size < trg_size ? trg_size - src_size : 0;
786 	if (sizediff >= max_size)
787 		return 0;
788 	if (trg_size < src_size / 32)
789 		return 0;
790 
791 	/* Load data if not already done */
792 	if (!trg->data) {
793 		if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
794 			return -1;
795 
796 		sz = git_odb_object_size(obj);
797 		trg->data = git__malloc(sz);
798 		GIT_ERROR_CHECK_ALLOC(trg->data);
799 		memcpy(trg->data, git_odb_object_data(obj), sz);
800 
801 		git_odb_object_free(obj);
802 
803 		if (sz != trg_size) {
804 			git_error_set(GIT_ERROR_INVALID,
805 				   "inconsistent target object length");
806 			return -1;
807 		}
808 
809 		*mem_usage += sz;
810 	}
811 	if (!src->data) {
812 		size_t obj_sz;
813 
814 		if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
815 			!git__is_ulong(obj_sz = git_odb_object_size(obj)))
816 			return -1;
817 
818 		sz = obj_sz;
819 		src->data = git__malloc(sz);
820 		GIT_ERROR_CHECK_ALLOC(src->data);
821 		memcpy(src->data, git_odb_object_data(obj), sz);
822 
823 		git_odb_object_free(obj);
824 
825 		if (sz != src_size) {
826 			git_error_set(GIT_ERROR_INVALID,
827 				   "inconsistent source object length");
828 			return -1;
829 		}
830 
831 		*mem_usage += sz;
832 	}
833 	if (!src->index) {
834 		if (git_delta_index_init(&src->index, src->data, src_size) < 0)
835 			return 0; /* suboptimal pack - out of memory */
836 
837 		*mem_usage += git_delta_index_size(src->index);
838 	}
839 
840 	if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
841 		max_size) < 0)
842 		return 0;
843 
844 	if (trg_object->delta) {
845 		/* Prefer only shallower same-sized deltas. */
846 		if (delta_size == trg_object->delta_size &&
847 		    src->depth + 1 >= trg->depth) {
848 			git__free(delta_buf);
849 			return 0;
850 		}
851 	}
852 
853 	GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
854 
855 	if (trg_object->delta_data) {
856 		git__free(trg_object->delta_data);
857 		GIT_ASSERT(pb->delta_cache_size >= trg_object->delta_size);
858 		pb->delta_cache_size -= trg_object->delta_size;
859 		trg_object->delta_data = NULL;
860 	}
861 	if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
862 		bool overflow = git__add_sizet_overflow(
863 			&pb->delta_cache_size, pb->delta_cache_size, delta_size);
864 
865 		GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
866 
867 		if (overflow) {
868 			git__free(delta_buf);
869 			return -1;
870 		}
871 
872 		trg_object->delta_data = git__realloc(delta_buf, delta_size);
873 		GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
874 	} else {
875 		/* create delta when writing the pack */
876 		GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
877 		git__free(delta_buf);
878 	}
879 
880 	trg_object->delta = src_object;
881 	trg_object->delta_size = delta_size;
882 	trg->depth = src->depth + 1;
883 
884 	*ret = 1;
885 	return 0;
886 }
887 
check_delta_limit(git_pobject * me,size_t n)888 static size_t check_delta_limit(git_pobject *me, size_t n)
889 {
890 	git_pobject *child = me->delta_child;
891 	size_t m = n;
892 
893 	while (child) {
894 		size_t c = check_delta_limit(child, n + 1);
895 		if (m < c)
896 			m = c;
897 		child = child->delta_sibling;
898 	}
899 	return m;
900 }
901 
free_unpacked(struct unpacked * n)902 static size_t free_unpacked(struct unpacked *n)
903 {
904 	size_t freed_mem = 0;
905 
906 	if (n->index) {
907 		freed_mem += git_delta_index_size(n->index);
908 		git_delta_index_free(n->index);
909 	}
910 	n->index = NULL;
911 
912 	if (n->data) {
913 		freed_mem += n->object->size;
914 		git__free(n->data);
915 		n->data = NULL;
916 	}
917 	n->object = NULL;
918 	n->depth = 0;
919 	return freed_mem;
920 }
921 
report_delta_progress(git_packbuilder * pb,uint32_t count,bool force)922 static int report_delta_progress(
923 	git_packbuilder *pb, uint32_t count, bool force)
924 {
925 	int ret;
926 
927 	if (pb->progress_cb) {
928 		double current_time = git__timer();
929 		double elapsed = current_time - pb->last_progress_report_time;
930 
931 		if (force || elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
932 			pb->last_progress_report_time = current_time;
933 
934 			ret = pb->progress_cb(
935 				GIT_PACKBUILDER_DELTAFICATION,
936 				count, pb->nr_objects, pb->progress_cb_payload);
937 
938 			if (ret)
939 				return git_error_set_after_callback(ret);
940 		}
941 	}
942 
943 	return 0;
944 }
945 
find_deltas(git_packbuilder * pb,git_pobject ** list,size_t * list_size,size_t window,size_t depth)946 static int find_deltas(git_packbuilder *pb, git_pobject **list,
947 	size_t *list_size, size_t window, size_t depth)
948 {
949 	git_pobject *po;
950 	git_buf zbuf = GIT_BUF_INIT;
951 	struct unpacked *array;
952 	size_t idx = 0, count = 0;
953 	size_t mem_usage = 0;
954 	size_t i;
955 	int error = -1;
956 
957 	array = git__calloc(window, sizeof(struct unpacked));
958 	GIT_ERROR_CHECK_ALLOC(array);
959 
960 	for (;;) {
961 		struct unpacked *n = array + idx;
962 		size_t max_depth, j, best_base = SIZE_MAX;
963 
964 		GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
965 		if (!*list_size) {
966 			GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
967 			break;
968 		}
969 
970 		pb->nr_deltified += 1;
971 		report_delta_progress(pb, pb->nr_deltified, false);
972 
973 		po = *list++;
974 		(*list_size)--;
975 		GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
976 
977 		mem_usage -= free_unpacked(n);
978 		n->object = po;
979 
980 		while (pb->window_memory_limit &&
981 		       mem_usage > pb->window_memory_limit &&
982 		       count > 1) {
983 			size_t tail = (idx + window - count) % window;
984 			mem_usage -= free_unpacked(array + tail);
985 			count--;
986 		}
987 
988 		/*
989 		 * If the current object is at pack edge, take the depth the
990 		 * objects that depend on the current object into account
991 		 * otherwise they would become too deep.
992 		 */
993 		max_depth = depth;
994 		if (po->delta_child) {
995 			size_t delta_limit = check_delta_limit(po, 0);
996 
997 			if (delta_limit > max_depth)
998 				goto next;
999 
1000 			max_depth -= delta_limit;
1001 		}
1002 
1003 		j = window;
1004 		while (--j > 0) {
1005 			int ret;
1006 			size_t other_idx = idx + j;
1007 			struct unpacked *m;
1008 
1009 			if (other_idx >= window)
1010 				other_idx -= window;
1011 
1012 			m = array + other_idx;
1013 			if (!m->object)
1014 				break;
1015 
1016 			if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1017 				goto on_error;
1018 			if (ret < 0)
1019 				break;
1020 			else if (ret > 0)
1021 				best_base = other_idx;
1022 		}
1023 
1024 		/*
1025 		 * If we decided to cache the delta data, then it is best
1026 		 * to compress it right away.  First because we have to do
1027 		 * it anyway, and doing it here while we're threaded will
1028 		 * save a lot of time in the non threaded write phase,
1029 		 * as well as allow for caching more deltas within
1030 		 * the same cache size limit.
1031 		 * ...
1032 		 * But only if not writing to stdout, since in that case
1033 		 * the network is most likely throttling writes anyway,
1034 		 * and therefore it is best to go to the write phase ASAP
1035 		 * instead, as we can afford spending more time compressing
1036 		 * between writes at that moment.
1037 		 */
1038 		if (po->delta_data) {
1039 			if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
1040 				goto on_error;
1041 
1042 			git__free(po->delta_data);
1043 			po->delta_data = git__malloc(zbuf.size);
1044 			GIT_ERROR_CHECK_ALLOC(po->delta_data);
1045 
1046 			memcpy(po->delta_data, zbuf.ptr, zbuf.size);
1047 			po->z_delta_size = zbuf.size;
1048 			git_buf_clear(&zbuf);
1049 
1050 			GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
1051 			pb->delta_cache_size -= po->delta_size;
1052 			pb->delta_cache_size += po->z_delta_size;
1053 			GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
1054 		}
1055 
1056 		/*
1057 		 * If we made n a delta, and if n is already at max
1058 		 * depth, leaving it in the window is pointless.  we
1059 		 * should evict it first.
1060 		 */
1061 		if (po->delta && max_depth <= n->depth)
1062 			continue;
1063 
1064 		/*
1065 		 * Move the best delta base up in the window, after the
1066 		 * currently deltified object, to keep it longer.  It will
1067 		 * be the first base object to be attempted next.
1068 		 */
1069 		if (po->delta) {
1070 			struct unpacked swap = array[best_base];
1071 			size_t dist = (window + idx - best_base) % window;
1072 			size_t dst = best_base;
1073 			while (dist--) {
1074 				size_t src = (dst + 1) % window;
1075 				array[dst] = array[src];
1076 				dst = src;
1077 			}
1078 			array[dst] = swap;
1079 		}
1080 
1081 		next:
1082 		idx++;
1083 		if (count + 1 < window)
1084 			count++;
1085 		if (idx >= window)
1086 			idx = 0;
1087 	}
1088 	error = 0;
1089 
1090 on_error:
1091 	for (i = 0; i < window; ++i) {
1092 		git__free(array[i].index);
1093 		git__free(array[i].data);
1094 	}
1095 	git__free(array);
1096 	git_buf_dispose(&zbuf);
1097 
1098 	return error;
1099 }
1100 
1101 #ifdef GIT_THREADS
1102 
1103 struct thread_params {
1104 	git_thread thread;
1105 	git_packbuilder *pb;
1106 
1107 	git_pobject **list;
1108 
1109 	git_cond cond;
1110 	git_mutex mutex;
1111 
1112 	size_t list_size;
1113 	size_t remaining;
1114 
1115 	size_t window;
1116 	size_t depth;
1117 	size_t working;
1118 	size_t data_ready;
1119 };
1120 
threaded_find_deltas(void * arg)1121 static void *threaded_find_deltas(void *arg)
1122 {
1123 	struct thread_params *me = arg;
1124 
1125 	while (me->remaining) {
1126 		if (find_deltas(me->pb, me->list, &me->remaining,
1127 				me->window, me->depth) < 0) {
1128 			; /* TODO */
1129 		}
1130 
1131 		GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_lock(me->pb) == 0, NULL);
1132 		me->working = 0;
1133 		git_cond_signal(&me->pb->progress_cond);
1134 		GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_unlock(me->pb) == 0, NULL);
1135 
1136 		if (git_mutex_lock(&me->mutex)) {
1137 			git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1138 			return NULL;
1139 		}
1140 
1141 		while (!me->data_ready)
1142 			git_cond_wait(&me->cond, &me->mutex);
1143 
1144 		/*
1145 		 * We must not set ->data_ready before we wait on the
1146 		 * condition because the main thread may have set it to 1
1147 		 * before we get here. In order to be sure that new
1148 		 * work is available if we see 1 in ->data_ready, it
1149 		 * was initialized to 0 before this thread was spawned
1150 		 * and we reset it to 0 right away.
1151 		 */
1152 		me->data_ready = 0;
1153 		git_mutex_unlock(&me->mutex);
1154 	}
1155 	/* leave ->working 1 so that this doesn't get more work assigned */
1156 	return NULL;
1157 }
1158 
ll_find_deltas(git_packbuilder * pb,git_pobject ** list,size_t list_size,size_t window,size_t depth)1159 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1160 			  size_t list_size, size_t window, size_t depth)
1161 {
1162 	struct thread_params *p;
1163 	size_t i;
1164 	int ret, active_threads = 0;
1165 
1166 	if (!pb->nr_threads)
1167 		pb->nr_threads = git__online_cpus();
1168 
1169 	if (pb->nr_threads <= 1) {
1170 		find_deltas(pb, list, &list_size, window, depth);
1171 		return 0;
1172 	}
1173 
1174 	p = git__mallocarray(pb->nr_threads, sizeof(*p));
1175 	GIT_ERROR_CHECK_ALLOC(p);
1176 
1177 	/* Partition the work among the threads */
1178 	for (i = 0; i < pb->nr_threads; ++i) {
1179 		size_t sub_size = list_size / (pb->nr_threads - i);
1180 
1181 		/* don't use too small segments or no deltas will be found */
1182 		if (sub_size < 2*window && i+1 < pb->nr_threads)
1183 			sub_size = 0;
1184 
1185 		p[i].pb = pb;
1186 		p[i].window = window;
1187 		p[i].depth = depth;
1188 		p[i].working = 1;
1189 		p[i].data_ready = 0;
1190 
1191 		/* try to split chunks on "path" boundaries */
1192 		while (sub_size && sub_size < list_size &&
1193 		       list[sub_size]->hash &&
1194 		       list[sub_size]->hash == list[sub_size-1]->hash)
1195 			sub_size++;
1196 
1197 		p[i].list = list;
1198 		p[i].list_size = sub_size;
1199 		p[i].remaining = sub_size;
1200 
1201 		list += sub_size;
1202 		list_size -= sub_size;
1203 	}
1204 
1205 	/* Start work threads */
1206 	for (i = 0; i < pb->nr_threads; ++i) {
1207 		if (!p[i].list_size)
1208 			continue;
1209 
1210 		git_mutex_init(&p[i].mutex);
1211 		git_cond_init(&p[i].cond);
1212 
1213 		ret = git_thread_create(&p[i].thread,
1214 					threaded_find_deltas, &p[i]);
1215 		if (ret) {
1216 			git_error_set(GIT_ERROR_THREAD, "unable to create thread");
1217 			return -1;
1218 		}
1219 		active_threads++;
1220 	}
1221 
1222 	/*
1223 	 * Now let's wait for work completion.  Each time a thread is done
1224 	 * with its work, we steal half of the remaining work from the
1225 	 * thread with the largest number of unprocessed objects and give
1226 	 * it to that newly idle thread.  This ensure good load balancing
1227 	 * until the remaining object list segments are simply too short
1228 	 * to be worth splitting anymore.
1229 	 */
1230 	while (active_threads) {
1231 		struct thread_params *target = NULL;
1232 		struct thread_params *victim = NULL;
1233 		size_t sub_size = 0;
1234 
1235 		/* Start by locating a thread that has transitioned its
1236 		 * 'working' flag from 1 -> 0. This indicates that it is
1237 		 * ready to receive more work using our work-stealing
1238 		 * algorithm. */
1239 		GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
1240 		for (;;) {
1241 			for (i = 0; !target && i < pb->nr_threads; i++)
1242 				if (!p[i].working)
1243 					target = &p[i];
1244 			if (target)
1245 				break;
1246 			git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1247 		}
1248 
1249 		/* At this point we hold the progress lock and have located
1250 		 * a thread to receive more work. We still need to locate a
1251 		 * thread from which to steal work (the victim). */
1252 		for (i = 0; i < pb->nr_threads; i++)
1253 			if (p[i].remaining > 2*window &&
1254 			    (!victim || victim->remaining < p[i].remaining))
1255 				victim = &p[i];
1256 
1257 		if (victim) {
1258 			sub_size = victim->remaining / 2;
1259 			list = victim->list + victim->list_size - sub_size;
1260 			while (sub_size && list[0]->hash &&
1261 			       list[0]->hash == list[-1]->hash) {
1262 				list++;
1263 				sub_size--;
1264 			}
1265 			if (!sub_size) {
1266 				/*
1267 				 * It is possible for some "paths" to have
1268 				 * so many objects that no hash boundary
1269 				 * might be found.  Let's just steal the
1270 				 * exact half in that case.
1271 				 */
1272 				sub_size = victim->remaining / 2;
1273 				list -= sub_size;
1274 			}
1275 			target->list = list;
1276 			victim->list_size -= sub_size;
1277 			victim->remaining -= sub_size;
1278 		}
1279 		target->list_size = sub_size;
1280 		target->remaining = sub_size;
1281 		target->working = 1;
1282 		GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
1283 
1284 		if (git_mutex_lock(&target->mutex)) {
1285 			git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1286 			git__free(p);
1287 			return -1;
1288 		}
1289 
1290 		target->data_ready = 1;
1291 		git_cond_signal(&target->cond);
1292 		git_mutex_unlock(&target->mutex);
1293 
1294 		if (!sub_size) {
1295 			git_thread_join(&target->thread, NULL);
1296 			git_cond_free(&target->cond);
1297 			git_mutex_free(&target->mutex);
1298 			active_threads--;
1299 		}
1300 	}
1301 
1302 	git__free(p);
1303 	return 0;
1304 }
1305 
1306 #else
1307 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1308 #endif
1309 
prepare_pack(git_packbuilder * pb)1310 static int prepare_pack(git_packbuilder *pb)
1311 {
1312 	git_pobject **delta_list;
1313 	size_t i, n = 0;
1314 
1315 	if (pb->nr_objects == 0 || pb->done)
1316 		return 0; /* nothing to do */
1317 
1318 	/*
1319 	 * Although we do not report progress during deltafication, we
1320 	 * at least report that we are in the deltafication stage
1321 	 */
1322 	if (pb->progress_cb)
1323 			pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1324 
1325 	delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
1326 	GIT_ERROR_CHECK_ALLOC(delta_list);
1327 
1328 	for (i = 0; i < pb->nr_objects; ++i) {
1329 		git_pobject *po = pb->object_list + i;
1330 
1331 		/* Make sure the item is within our size limits */
1332 		if (po->size < 50 || po->size > pb->big_file_threshold)
1333 			continue;
1334 
1335 		delta_list[n++] = po;
1336 	}
1337 
1338 	if (n > 1) {
1339 		git__tsort((void **)delta_list, n, type_size_sort);
1340 		if (ll_find_deltas(pb, delta_list, n,
1341 				   GIT_PACK_WINDOW + 1,
1342 				   GIT_PACK_DEPTH) < 0) {
1343 			git__free(delta_list);
1344 			return -1;
1345 		}
1346 	}
1347 
1348 	report_delta_progress(pb, pb->nr_objects, true);
1349 
1350 	pb->done = true;
1351 	git__free(delta_list);
1352 	return 0;
1353 }
1354 
1355 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1356 
git_packbuilder_foreach(git_packbuilder * pb,int (* cb)(void * buf,size_t size,void * payload),void * payload)1357 int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1358 {
1359 	PREPARE_PACK;
1360 	return write_pack(pb, cb, payload);
1361 }
1362 
git_packbuilder_write_buf(git_buf * buf,git_packbuilder * pb)1363 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1364 {
1365 	int error;
1366 
1367 	if ((error = git_buf_sanitize(buf)) < 0)
1368 		return error;
1369 
1370 	PREPARE_PACK;
1371 
1372 	return write_pack(pb, &write_pack_buf, buf);
1373 }
1374 
write_cb(void * buf,size_t len,void * payload)1375 static int write_cb(void *buf, size_t len, void *payload)
1376 {
1377 	struct pack_write_context *ctx = payload;
1378 	return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
1379 }
1380 
git_packbuilder_write(git_packbuilder * pb,const char * path,unsigned int mode,git_indexer_progress_cb progress_cb,void * progress_cb_payload)1381 int git_packbuilder_write(
1382 	git_packbuilder *pb,
1383 	const char *path,
1384 	unsigned int mode,
1385 	git_indexer_progress_cb progress_cb,
1386 	void *progress_cb_payload)
1387 {
1388 	int error = -1;
1389 	git_buf object_path = GIT_BUF_INIT;
1390 	git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
1391 	git_indexer *indexer = NULL;
1392 	git_indexer_progress stats;
1393 	struct pack_write_context ctx;
1394 	int t;
1395 
1396 	PREPARE_PACK;
1397 
1398 	if (path == NULL) {
1399 		if ((error = git_repository_item_path(&object_path, pb->repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0)
1400 			goto cleanup;
1401 		if ((error = git_buf_joinpath(&object_path, git_buf_cstr(&object_path), "pack")) < 0)
1402 			goto cleanup;
1403 		path = git_buf_cstr(&object_path);
1404 	}
1405 
1406 	opts.progress_cb = progress_cb;
1407 	opts.progress_cb_payload = progress_cb_payload;
1408 
1409 	if ((error = git_indexer_new(&indexer, path, mode, pb->odb, &opts)) < 0)
1410 		goto cleanup;
1411 
1412 	if (!git_repository__configmap_lookup(&t, pb->repo, GIT_CONFIGMAP_FSYNCOBJECTFILES) && t)
1413 		git_indexer__set_fsync(indexer, 1);
1414 
1415 	ctx.indexer = indexer;
1416 	ctx.stats = &stats;
1417 
1418 	if ((error = git_packbuilder_foreach(pb, write_cb, &ctx)) < 0)
1419 		goto cleanup;
1420 
1421 	if ((error = git_indexer_commit(indexer, &stats)) < 0)
1422 		goto cleanup;
1423 
1424 	git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1425 
1426 cleanup:
1427 	git_indexer_free(indexer);
1428 	git_buf_dispose(&object_path);
1429 	return error;
1430 }
1431 
1432 #undef PREPARE_PACK
1433 
git_packbuilder_hash(git_packbuilder * pb)1434 const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1435 {
1436 	return &pb->pack_oid;
1437 }
1438 
1439 
cb_tree_walk(const char * root,const git_tree_entry * entry,void * payload)1440 static int cb_tree_walk(
1441 	const char *root, const git_tree_entry *entry, void *payload)
1442 {
1443 	int error;
1444 	struct tree_walk_context *ctx = payload;
1445 
1446 	/* A commit inside a tree represents a submodule commit and should be skipped. */
1447 	if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
1448 		return 0;
1449 
1450 	if (!(error = git_buf_sets(&ctx->buf, root)) &&
1451 		!(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1452 		error = git_packbuilder_insert(
1453 			ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
1454 
1455 	return error;
1456 }
1457 
git_packbuilder_insert_commit(git_packbuilder * pb,const git_oid * oid)1458 int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1459 {
1460 	git_commit *commit;
1461 
1462 	if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1463 		git_packbuilder_insert(pb, oid, NULL) < 0)
1464 		return -1;
1465 
1466 	if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1467 		return -1;
1468 
1469 	git_commit_free(commit);
1470 	return 0;
1471 }
1472 
git_packbuilder_insert_tree(git_packbuilder * pb,const git_oid * oid)1473 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1474 {
1475 	int error;
1476 	git_tree *tree = NULL;
1477 	struct tree_walk_context context = { pb, GIT_BUF_INIT };
1478 
1479 	if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1480 	    !(error = git_packbuilder_insert(pb, oid, NULL)))
1481 		error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
1482 
1483 	git_tree_free(tree);
1484 	git_buf_dispose(&context.buf);
1485 	return error;
1486 }
1487 
git_packbuilder_insert_recur(git_packbuilder * pb,const git_oid * id,const char * name)1488 int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1489 {
1490 	git_object *obj;
1491 	int error;
1492 
1493 	GIT_ASSERT_ARG(pb);
1494 	GIT_ASSERT_ARG(id);
1495 
1496 	if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
1497 		return error;
1498 
1499 	switch (git_object_type(obj)) {
1500 	case GIT_OBJECT_BLOB:
1501 		error = git_packbuilder_insert(pb, id, name);
1502 		break;
1503 	case GIT_OBJECT_TREE:
1504 		error = git_packbuilder_insert_tree(pb, id);
1505 		break;
1506 	case GIT_OBJECT_COMMIT:
1507 		error = git_packbuilder_insert_commit(pb, id);
1508 		break;
1509 	case GIT_OBJECT_TAG:
1510 		if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1511 			goto cleanup;
1512 		error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1513 		break;
1514 
1515 	default:
1516 		git_error_set(GIT_ERROR_INVALID, "unknown object type");
1517 		error = -1;
1518 	}
1519 
1520 cleanup:
1521 	git_object_free(obj);
1522 	return error;
1523 }
1524 
git_packbuilder_object_count(git_packbuilder * pb)1525 size_t git_packbuilder_object_count(git_packbuilder *pb)
1526 {
1527 	return pb->nr_objects;
1528 }
1529 
git_packbuilder_written(git_packbuilder * pb)1530 size_t git_packbuilder_written(git_packbuilder *pb)
1531 {
1532 	return pb->nr_written;
1533 }
1534 
lookup_walk_object(struct walk_object ** out,git_packbuilder * pb,const git_oid * id)1535 static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1536 {
1537 	struct walk_object *obj;
1538 
1539 	obj = git_pool_mallocz(&pb->object_pool, 1);
1540 	if (!obj) {
1541 		git_error_set_oom();
1542 		return -1;
1543 	}
1544 
1545 	git_oid_cpy(&obj->id, id);
1546 
1547 	*out = obj;
1548 	return 0;
1549 }
1550 
retrieve_object(struct walk_object ** out,git_packbuilder * pb,const git_oid * id)1551 static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1552 {
1553 	struct walk_object *obj;
1554 	int error;
1555 
1556 	if ((obj = git_oidmap_get(pb->walk_objects, id)) == NULL) {
1557 		if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1558 			return error;
1559 
1560 		if ((error = git_oidmap_set(pb->walk_objects, &obj->id, obj)) < 0)
1561 			return error;
1562 	}
1563 
1564 	*out = obj;
1565 	return 0;
1566 }
1567 
mark_blob_uninteresting(git_packbuilder * pb,const git_oid * id)1568 static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1569 {
1570 	int error;
1571 	struct walk_object *obj;
1572 
1573 	if ((error = retrieve_object(&obj, pb, id)) < 0)
1574 		return error;
1575 
1576 	obj->uninteresting = 1;
1577 
1578 	return 0;
1579 }
1580 
mark_tree_uninteresting(git_packbuilder * pb,const git_oid * id)1581 static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1582 {
1583 	struct walk_object *obj;
1584 	git_tree *tree;
1585 	int error;
1586 	size_t i;
1587 
1588 	if ((error = retrieve_object(&obj, pb, id)) < 0)
1589 		return error;
1590 
1591 	if (obj->uninteresting)
1592 		return 0;
1593 
1594 	obj->uninteresting = 1;
1595 
1596 	if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1597 		return error;
1598 
1599 	for (i = 0; i < git_tree_entrycount(tree); i++) {
1600 		const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1601 		const git_oid *entry_id = git_tree_entry_id(entry);
1602 		switch (git_tree_entry_type(entry)) {
1603 		case GIT_OBJECT_TREE:
1604 			if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1605 				goto cleanup;
1606 			break;
1607 		case GIT_OBJECT_BLOB:
1608 			if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1609 				goto cleanup;
1610 			break;
1611 		default:
1612 			/* it's a submodule or something unknown, we don't want it */
1613 			;
1614 		}
1615 	}
1616 
1617 cleanup:
1618 	git_tree_free(tree);
1619 	return error;
1620 }
1621 
1622 /*
1623  * Mark the edges of the graph uninteresting. Since we start from a
1624  * git_revwalk, the commits are already uninteresting, but we need to
1625  * mark the trees and blobs.
1626  */
mark_edges_uninteresting(git_packbuilder * pb,git_commit_list * commits)1627 static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1628 {
1629 	int error;
1630 	git_commit_list *list;
1631 	git_commit *commit;
1632 
1633 	for (list = commits; list; list = list->next) {
1634 		if (!list->item->uninteresting)
1635 			continue;
1636 
1637 		if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1638 			return error;
1639 
1640 		error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1641 		git_commit_free(commit);
1642 
1643 		if (error < 0)
1644 			return error;
1645 	}
1646 
1647 	return 0;
1648 }
1649 
pack_objects_insert_tree(git_packbuilder * pb,git_tree * tree)1650 static int pack_objects_insert_tree(git_packbuilder *pb, git_tree *tree)
1651 {
1652 	size_t i;
1653 	int error;
1654 	git_tree *subtree;
1655 	struct walk_object *obj;
1656 	const char *name;
1657 
1658 	if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1659 		return error;
1660 
1661 	if (obj->seen || obj->uninteresting)
1662 		return 0;
1663 
1664 	obj->seen = 1;
1665 
1666 	if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1667 		return error;
1668 
1669 	for (i = 0; i < git_tree_entrycount(tree); i++) {
1670 		const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1671 		const git_oid *entry_id = git_tree_entry_id(entry);
1672 		switch (git_tree_entry_type(entry)) {
1673 		case GIT_OBJECT_TREE:
1674 			if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1675 				return error;
1676 
1677 			error = pack_objects_insert_tree(pb, subtree);
1678 			git_tree_free(subtree);
1679 
1680 			if (error < 0)
1681 				return error;
1682 
1683 			break;
1684 		case GIT_OBJECT_BLOB:
1685 			if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
1686 				return error;
1687 			if (obj->uninteresting)
1688 				continue;
1689 			name = git_tree_entry_name(entry);
1690 			if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1691 				return error;
1692 			break;
1693 		default:
1694 			/* it's a submodule or something unknown, we don't want it */
1695 			;
1696 		}
1697 	}
1698 
1699 
1700 	return error;
1701 }
1702 
pack_objects_insert_commit(git_packbuilder * pb,struct walk_object * obj)1703 static int pack_objects_insert_commit(git_packbuilder *pb, struct walk_object *obj)
1704 {
1705 	int error;
1706 	git_commit *commit = NULL;
1707 	git_tree *tree = NULL;
1708 
1709 	obj->seen = 1;
1710 
1711 	if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1712 		return error;
1713 
1714 	if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1715 		return error;
1716 
1717 	if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1718 		goto cleanup;
1719 
1720 	if ((error = pack_objects_insert_tree(pb, tree)) < 0)
1721 		goto cleanup;
1722 
1723 cleanup:
1724 	git_commit_free(commit);
1725 	git_tree_free(tree);
1726 	return error;
1727 }
1728 
git_packbuilder_insert_walk(git_packbuilder * pb,git_revwalk * walk)1729 int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1730 {
1731 	int error;
1732 	git_oid id;
1733 	struct walk_object *obj;
1734 
1735 	GIT_ASSERT_ARG(pb);
1736 	GIT_ASSERT_ARG(walk);
1737 
1738 	if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1739 		return error;
1740 
1741 	/*
1742 	 * TODO: git marks the parents of the edges
1743 	 * uninteresting. This may provide a speed advantage, but does
1744 	 * seem to assume the remote does not have a single-commit
1745 	 * history on the other end.
1746 	 */
1747 
1748 	/* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1749 	while ((error = git_revwalk_next(&id, walk)) == 0) {
1750 		if ((error = retrieve_object(&obj, pb, &id)) < 0)
1751 			return error;
1752 
1753 		if (obj->seen || obj->uninteresting)
1754 			continue;
1755 
1756 		if ((error = pack_objects_insert_commit(pb, obj)) < 0)
1757 			return error;
1758 	}
1759 
1760 	if (error == GIT_ITEROVER)
1761 		error = 0;
1762 
1763 	return error;
1764 }
1765 
git_packbuilder_set_callbacks(git_packbuilder * pb,git_packbuilder_progress progress_cb,void * progress_cb_payload)1766 int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1767 {
1768 	if (!pb)
1769 		return -1;
1770 
1771 	pb->progress_cb = progress_cb;
1772 	pb->progress_cb_payload = progress_cb_payload;
1773 
1774 	return 0;
1775 }
1776 
git_packbuilder_free(git_packbuilder * pb)1777 void git_packbuilder_free(git_packbuilder *pb)
1778 {
1779 	if (pb == NULL)
1780 		return;
1781 
1782 #ifdef GIT_THREADS
1783 
1784 	git_mutex_free(&pb->cache_mutex);
1785 	git_mutex_free(&pb->progress_mutex);
1786 	git_cond_free(&pb->progress_cond);
1787 
1788 #endif
1789 
1790 	if (pb->odb)
1791 		git_odb_free(pb->odb);
1792 
1793 	if (pb->object_ix)
1794 		git_oidmap_free(pb->object_ix);
1795 
1796 	if (pb->object_list)
1797 		git__free(pb->object_list);
1798 
1799 	git_oidmap_free(pb->walk_objects);
1800 	git_pool_clear(&pb->object_pool);
1801 
1802 	git_hash_ctx_cleanup(&pb->ctx);
1803 	git_zstream_free(&pb->zstream);
1804 
1805 	git__free(pb);
1806 }
1807