1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "pack-objects.h"
9
10 #include "zstream.h"
11 #include "delta.h"
12 #include "iterator.h"
13 #include "netops.h"
14 #include "pack.h"
15 #include "thread.h"
16 #include "tree.h"
17 #include "util.h"
18 #include "revwalk.h"
19 #include "commit_list.h"
20
21 #include "git2/pack.h"
22 #include "git2/commit.h"
23 #include "git2/tag.h"
24 #include "git2/indexer.h"
25 #include "git2/config.h"
26
27 struct unpacked {
28 git_pobject *object;
29 void *data;
30 struct git_delta_index *index;
31 size_t depth;
32 };
33
34 struct tree_walk_context {
35 git_packbuilder *pb;
36 git_buf buf;
37 };
38
39 struct pack_write_context {
40 git_indexer *indexer;
41 git_indexer_progress *stats;
42 };
43
44 struct walk_object {
45 git_oid id;
46 unsigned int uninteresting:1,
47 seen:1;
48 };
49
50 #ifdef GIT_THREADS
51 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git_mutex_##op(&(pb)->mtx)
52 #else
53 # define GIT_PACKBUILDER__MUTEX_OP(pb, mtx, op) git__noop()
54 #endif
55
56 #define git_packbuilder__cache_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, lock)
57 #define git_packbuilder__cache_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, cache_mutex, unlock)
58 #define git_packbuilder__progress_lock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, lock)
59 #define git_packbuilder__progress_unlock(pb) GIT_PACKBUILDER__MUTEX_OP(pb, progress_mutex, unlock)
60
61 /* The minimal interval between progress updates (in seconds). */
62 #define MIN_PROGRESS_UPDATE_INTERVAL 0.5
63
64 /* Size of the buffer to feed to zlib */
65 #define COMPRESS_BUFLEN (1024 * 1024)
66
name_hash(const char * name)67 static unsigned name_hash(const char *name)
68 {
69 unsigned c, hash = 0;
70
71 if (!name)
72 return 0;
73
74 /*
75 * This effectively just creates a sortable number from the
76 * last sixteen non-whitespace characters. Last characters
77 * count "most", so things that end in ".c" sort together.
78 */
79 while ((c = *name++) != 0) {
80 if (git__isspace(c))
81 continue;
82 hash = (hash >> 2) + (c << 24);
83 }
84 return hash;
85 }
86
packbuilder_config(git_packbuilder * pb)87 static int packbuilder_config(git_packbuilder *pb)
88 {
89 git_config *config;
90 int ret = 0;
91 int64_t val;
92
93 if ((ret = git_repository_config_snapshot(&config, pb->repo)) < 0)
94 return ret;
95
96 #define config_get(KEY,DST,DFLT) do { \
97 ret = git_config_get_int64(&val, config, KEY); \
98 if (!ret) { \
99 if (!git__is_sizet(val)) { \
100 git_error_set(GIT_ERROR_CONFIG, \
101 "configuration value '%s' is too large", KEY); \
102 ret = -1; \
103 goto out; \
104 } \
105 (DST) = (size_t)val; \
106 } else if (ret == GIT_ENOTFOUND) { \
107 (DST) = (DFLT); \
108 ret = 0; \
109 } else if (ret < 0) goto out; } while (0)
110
111 config_get("pack.deltaCacheSize", pb->max_delta_cache_size,
112 GIT_PACK_DELTA_CACHE_SIZE);
113 config_get("pack.deltaCacheLimit", pb->cache_max_small_delta_size,
114 GIT_PACK_DELTA_CACHE_LIMIT);
115 config_get("pack.deltaCacheSize", pb->big_file_threshold,
116 GIT_PACK_BIG_FILE_THRESHOLD);
117 config_get("pack.windowMemory", pb->window_memory_limit, 0);
118
119 #undef config_get
120
121 out:
122 git_config_free(config);
123
124 return ret;
125 }
126
git_packbuilder_new(git_packbuilder ** out,git_repository * repo)127 int git_packbuilder_new(git_packbuilder **out, git_repository *repo)
128 {
129 git_packbuilder *pb;
130
131 *out = NULL;
132
133 pb = git__calloc(1, sizeof(*pb));
134 GIT_ERROR_CHECK_ALLOC(pb);
135
136 if (git_oidmap_new(&pb->object_ix) < 0 ||
137 git_oidmap_new(&pb->walk_objects) < 0 ||
138 git_pool_init(&pb->object_pool, sizeof(struct walk_object)) < 0)
139 goto on_error;
140
141 pb->repo = repo;
142 pb->nr_threads = 1; /* do not spawn any thread by default */
143
144 if (git_hash_ctx_init(&pb->ctx) < 0 ||
145 git_zstream_init(&pb->zstream, GIT_ZSTREAM_DEFLATE) < 0 ||
146 git_repository_odb(&pb->odb, repo) < 0 ||
147 packbuilder_config(pb) < 0)
148 goto on_error;
149
150 #ifdef GIT_THREADS
151
152 if (git_mutex_init(&pb->cache_mutex) ||
153 git_mutex_init(&pb->progress_mutex) ||
154 git_cond_init(&pb->progress_cond))
155 {
156 git_error_set(GIT_ERROR_OS, "failed to initialize packbuilder mutex");
157 goto on_error;
158 }
159
160 #endif
161
162 *out = pb;
163 return 0;
164
165 on_error:
166 git_packbuilder_free(pb);
167 return -1;
168 }
169
git_packbuilder_set_threads(git_packbuilder * pb,unsigned int n)170 unsigned int git_packbuilder_set_threads(git_packbuilder *pb, unsigned int n)
171 {
172 GIT_ASSERT_ARG(pb);
173
174 #ifdef GIT_THREADS
175 pb->nr_threads = n;
176 #else
177 GIT_UNUSED(n);
178 GIT_ASSERT(pb->nr_threads == 1);
179 #endif
180
181 return pb->nr_threads;
182 }
183
rehash(git_packbuilder * pb)184 static int rehash(git_packbuilder *pb)
185 {
186 git_pobject *po;
187 size_t i;
188
189 git_oidmap_clear(pb->object_ix);
190
191 for (i = 0, po = pb->object_list; i < pb->nr_objects; i++, po++) {
192 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0)
193 return -1;
194 }
195
196 return 0;
197 }
198
git_packbuilder_insert(git_packbuilder * pb,const git_oid * oid,const char * name)199 int git_packbuilder_insert(git_packbuilder *pb, const git_oid *oid,
200 const char *name)
201 {
202 git_pobject *po;
203 size_t newsize;
204 int ret;
205
206 GIT_ASSERT_ARG(pb);
207 GIT_ASSERT_ARG(oid);
208
209 /* If the object already exists in the hash table, then we don't
210 * have any work to do */
211 if (git_oidmap_exists(pb->object_ix, oid))
212 return 0;
213
214 if (pb->nr_objects >= pb->nr_alloc) {
215 GIT_ERROR_CHECK_ALLOC_ADD(&newsize, pb->nr_alloc, 1024);
216 GIT_ERROR_CHECK_ALLOC_MULTIPLY(&newsize, newsize / 2, 3);
217
218 if (!git__is_uint32(newsize)) {
219 git_error_set(GIT_ERROR_NOMEMORY, "packfile too large to fit in memory.");
220 return -1;
221 }
222
223 pb->nr_alloc = newsize;
224
225 pb->object_list = git__reallocarray(pb->object_list,
226 pb->nr_alloc, sizeof(*po));
227 GIT_ERROR_CHECK_ALLOC(pb->object_list);
228
229 if (rehash(pb) < 0)
230 return -1;
231 }
232
233 po = pb->object_list + pb->nr_objects;
234 memset(po, 0x0, sizeof(*po));
235
236 if ((ret = git_odb_read_header(&po->size, &po->type, pb->odb, oid)) < 0)
237 return ret;
238
239 pb->nr_objects++;
240 git_oid_cpy(&po->id, oid);
241 po->hash = name_hash(name);
242
243 if (git_oidmap_set(pb->object_ix, &po->id, po) < 0) {
244 git_error_set_oom();
245 return -1;
246 }
247
248 pb->done = false;
249
250 if (pb->progress_cb) {
251 double current_time = git__timer();
252 double elapsed = current_time - pb->last_progress_report_time;
253
254 if (elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
255 pb->last_progress_report_time = current_time;
256
257 ret = pb->progress_cb(
258 GIT_PACKBUILDER_ADDING_OBJECTS,
259 pb->nr_objects, 0, pb->progress_cb_payload);
260
261 if (ret)
262 return git_error_set_after_callback(ret);
263 }
264 }
265
266 return 0;
267 }
268
get_delta(void ** out,git_odb * odb,git_pobject * po)269 static int get_delta(void **out, git_odb *odb, git_pobject *po)
270 {
271 git_odb_object *src = NULL, *trg = NULL;
272 size_t delta_size;
273 void *delta_buf;
274 int error;
275
276 *out = NULL;
277
278 if (git_odb_read(&src, odb, &po->delta->id) < 0 ||
279 git_odb_read(&trg, odb, &po->id) < 0)
280 goto on_error;
281
282 error = git_delta(&delta_buf, &delta_size,
283 git_odb_object_data(src), git_odb_object_size(src),
284 git_odb_object_data(trg), git_odb_object_size(trg),
285 0);
286
287 if (error < 0 && error != GIT_EBUFS)
288 goto on_error;
289
290 if (error == GIT_EBUFS || delta_size != po->delta_size) {
291 git_error_set(GIT_ERROR_INVALID, "delta size changed");
292 goto on_error;
293 }
294
295 *out = delta_buf;
296
297 git_odb_object_free(src);
298 git_odb_object_free(trg);
299 return 0;
300
301 on_error:
302 git_odb_object_free(src);
303 git_odb_object_free(trg);
304 return -1;
305 }
306
write_object(git_packbuilder * pb,git_pobject * po,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)307 static int write_object(
308 git_packbuilder *pb,
309 git_pobject *po,
310 int (*write_cb)(void *buf, size_t size, void *cb_data),
311 void *cb_data)
312 {
313 git_odb_object *obj = NULL;
314 git_object_t type;
315 unsigned char hdr[10], *zbuf = NULL;
316 void *data = NULL;
317 size_t hdr_len, zbuf_len = COMPRESS_BUFLEN, data_len;
318 int error;
319
320 /*
321 * If we have a delta base, let's use the delta to save space.
322 * Otherwise load the whole object. 'data' ends up pointing to
323 * whatever data we want to put into the packfile.
324 */
325 if (po->delta) {
326 if (po->delta_data)
327 data = po->delta_data;
328 else if ((error = get_delta(&data, pb->odb, po)) < 0)
329 goto done;
330
331 data_len = po->delta_size;
332 type = GIT_OBJECT_REF_DELTA;
333 } else {
334 if ((error = git_odb_read(&obj, pb->odb, &po->id)) < 0)
335 goto done;
336
337 data = (void *)git_odb_object_data(obj);
338 data_len = git_odb_object_size(obj);
339 type = git_odb_object_type(obj);
340 }
341
342 /* Write header */
343 if ((error = git_packfile__object_header(&hdr_len, hdr, data_len, type)) < 0 ||
344 (error = write_cb(hdr, hdr_len, cb_data)) < 0 ||
345 (error = git_hash_update(&pb->ctx, hdr, hdr_len)) < 0)
346 goto done;
347
348 if (type == GIT_OBJECT_REF_DELTA) {
349 if ((error = write_cb(po->delta->id.id, GIT_OID_RAWSZ, cb_data)) < 0 ||
350 (error = git_hash_update(&pb->ctx, po->delta->id.id, GIT_OID_RAWSZ)) < 0)
351 goto done;
352 }
353
354 /* Write data */
355 if (po->z_delta_size) {
356 data_len = po->z_delta_size;
357
358 if ((error = write_cb(data, data_len, cb_data)) < 0 ||
359 (error = git_hash_update(&pb->ctx, data, data_len)) < 0)
360 goto done;
361 } else {
362 zbuf = git__malloc(zbuf_len);
363 GIT_ERROR_CHECK_ALLOC(zbuf);
364
365 git_zstream_reset(&pb->zstream);
366
367 if ((error = git_zstream_set_input(&pb->zstream, data, data_len)) < 0)
368 goto done;
369
370 while (!git_zstream_done(&pb->zstream)) {
371 if ((error = git_zstream_get_output(zbuf, &zbuf_len, &pb->zstream)) < 0 ||
372 (error = write_cb(zbuf, zbuf_len, cb_data)) < 0 ||
373 (error = git_hash_update(&pb->ctx, zbuf, zbuf_len)) < 0)
374 goto done;
375
376 zbuf_len = COMPRESS_BUFLEN; /* reuse buffer */
377 }
378 }
379
380 /*
381 * If po->delta is true, data is a delta and it is our
382 * responsibility to free it (otherwise it's a git_object's
383 * data). We set po->delta_data to NULL in case we got the
384 * data from there instead of get_delta(). If we didn't,
385 * there's no harm.
386 */
387 if (po->delta) {
388 git__free(data);
389 po->delta_data = NULL;
390 }
391
392 pb->nr_written++;
393
394 done:
395 git__free(zbuf);
396 git_odb_object_free(obj);
397 return error;
398 }
399
400 enum write_one_status {
401 WRITE_ONE_SKIP = -1, /* already written */
402 WRITE_ONE_BREAK = 0, /* writing this will bust the limit; not written */
403 WRITE_ONE_WRITTEN = 1, /* normal */
404 WRITE_ONE_RECURSIVE = 2 /* already scheduled to be written */
405 };
406
write_one(enum write_one_status * status,git_packbuilder * pb,git_pobject * po,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)407 static int write_one(
408 enum write_one_status *status,
409 git_packbuilder *pb,
410 git_pobject *po,
411 int (*write_cb)(void *buf, size_t size, void *cb_data),
412 void *cb_data)
413 {
414 int error;
415
416 if (po->recursing) {
417 *status = WRITE_ONE_RECURSIVE;
418 return 0;
419 } else if (po->written) {
420 *status = WRITE_ONE_SKIP;
421 return 0;
422 }
423
424 if (po->delta) {
425 po->recursing = 1;
426
427 if ((error = write_one(status, pb, po->delta, write_cb, cb_data)) < 0)
428 return error;
429
430 /* we cannot depend on this one */
431 if (*status == WRITE_ONE_RECURSIVE)
432 po->delta = NULL;
433 }
434
435 *status = WRITE_ONE_WRITTEN;
436 po->written = 1;
437 po->recursing = 0;
438
439 return write_object(pb, po, write_cb, cb_data);
440 }
441
add_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)442 GIT_INLINE(void) add_to_write_order(git_pobject **wo, size_t *endp,
443 git_pobject *po)
444 {
445 if (po->filled)
446 return;
447 wo[(*endp)++] = po;
448 po->filled = 1;
449 }
450
add_descendants_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)451 static void add_descendants_to_write_order(git_pobject **wo, size_t *endp,
452 git_pobject *po)
453 {
454 int add_to_order = 1;
455 while (po) {
456 if (add_to_order) {
457 git_pobject *s;
458 /* add this node... */
459 add_to_write_order(wo, endp, po);
460 /* all its siblings... */
461 for (s = po->delta_sibling; s; s = s->delta_sibling) {
462 add_to_write_order(wo, endp, s);
463 }
464 }
465 /* drop down a level to add left subtree nodes if possible */
466 if (po->delta_child) {
467 add_to_order = 1;
468 po = po->delta_child;
469 } else {
470 add_to_order = 0;
471 /* our sibling might have some children, it is next */
472 if (po->delta_sibling) {
473 po = po->delta_sibling;
474 continue;
475 }
476 /* go back to our parent node */
477 po = po->delta;
478 while (po && !po->delta_sibling) {
479 /* we're on the right side of a subtree, keep
480 * going up until we can go right again */
481 po = po->delta;
482 }
483 if (!po) {
484 /* done- we hit our original root node */
485 return;
486 }
487 /* pass it off to sibling at this level */
488 po = po->delta_sibling;
489 }
490 };
491 }
492
add_family_to_write_order(git_pobject ** wo,size_t * endp,git_pobject * po)493 static void add_family_to_write_order(git_pobject **wo, size_t *endp,
494 git_pobject *po)
495 {
496 git_pobject *root;
497
498 for (root = po; root->delta; root = root->delta)
499 ; /* nothing */
500 add_descendants_to_write_order(wo, endp, root);
501 }
502
cb_tag_foreach(const char * name,git_oid * oid,void * data)503 static int cb_tag_foreach(const char *name, git_oid *oid, void *data)
504 {
505 git_packbuilder *pb = data;
506 git_pobject *po;
507
508 GIT_UNUSED(name);
509
510 if ((po = git_oidmap_get(pb->object_ix, oid)) == NULL)
511 return 0;
512
513 po->tagged = 1;
514
515 /* TODO: peel objects */
516
517 return 0;
518 }
519
compute_write_order(git_pobject *** out,git_packbuilder * pb)520 static int compute_write_order(git_pobject ***out, git_packbuilder *pb)
521 {
522 size_t i, wo_end, last_untagged;
523 git_pobject **wo;
524
525 *out = NULL;
526
527 if (!pb->nr_objects)
528 return 0;
529
530 if ((wo = git__mallocarray(pb->nr_objects, sizeof(*wo))) == NULL)
531 return -1;
532
533 for (i = 0; i < pb->nr_objects; i++) {
534 git_pobject *po = pb->object_list + i;
535 po->tagged = 0;
536 po->filled = 0;
537 po->delta_child = NULL;
538 po->delta_sibling = NULL;
539 }
540
541 /*
542 * Fully connect delta_child/delta_sibling network.
543 * Make sure delta_sibling is sorted in the original
544 * recency order.
545 */
546 for (i = pb->nr_objects; i > 0;) {
547 git_pobject *po = &pb->object_list[--i];
548 if (!po->delta)
549 continue;
550 /* Mark me as the first child */
551 po->delta_sibling = po->delta->delta_child;
552 po->delta->delta_child = po;
553 }
554
555 /*
556 * Mark objects that are at the tip of tags.
557 */
558 if (git_tag_foreach(pb->repo, &cb_tag_foreach, pb) < 0) {
559 git__free(wo);
560 return -1;
561 }
562
563 /*
564 * Give the objects in the original recency order until
565 * we see a tagged tip.
566 */
567 for (i = wo_end = 0; i < pb->nr_objects; i++) {
568 git_pobject *po = pb->object_list + i;
569 if (po->tagged)
570 break;
571 add_to_write_order(wo, &wo_end, po);
572 }
573 last_untagged = i;
574
575 /*
576 * Then fill all the tagged tips.
577 */
578 for (; i < pb->nr_objects; i++) {
579 git_pobject *po = pb->object_list + i;
580 if (po->tagged)
581 add_to_write_order(wo, &wo_end, po);
582 }
583
584 /*
585 * And then all remaining commits and tags.
586 */
587 for (i = last_untagged; i < pb->nr_objects; i++) {
588 git_pobject *po = pb->object_list + i;
589 if (po->type != GIT_OBJECT_COMMIT &&
590 po->type != GIT_OBJECT_TAG)
591 continue;
592 add_to_write_order(wo, &wo_end, po);
593 }
594
595 /*
596 * And then all the trees.
597 */
598 for (i = last_untagged; i < pb->nr_objects; i++) {
599 git_pobject *po = pb->object_list + i;
600 if (po->type != GIT_OBJECT_TREE)
601 continue;
602 add_to_write_order(wo, &wo_end, po);
603 }
604
605 /*
606 * Finally all the rest in really tight order
607 */
608 for (i = last_untagged; i < pb->nr_objects; i++) {
609 git_pobject *po = pb->object_list + i;
610 if (!po->filled)
611 add_family_to_write_order(wo, &wo_end, po);
612 }
613
614 if (wo_end != pb->nr_objects) {
615 git__free(wo);
616 git_error_set(GIT_ERROR_INVALID, "invalid write order");
617 return -1;
618 }
619
620 *out = wo;
621 return 0;
622 }
623
write_pack(git_packbuilder * pb,int (* write_cb)(void * buf,size_t size,void * cb_data),void * cb_data)624 static int write_pack(git_packbuilder *pb,
625 int (*write_cb)(void *buf, size_t size, void *cb_data),
626 void *cb_data)
627 {
628 git_pobject **write_order;
629 git_pobject *po;
630 enum write_one_status status;
631 struct git_pack_header ph;
632 git_oid entry_oid;
633 size_t i = 0;
634 int error;
635
636 if ((error = compute_write_order(&write_order, pb)) < 0)
637 return error;
638
639 if (!git__is_uint32(pb->nr_objects)) {
640 git_error_set(GIT_ERROR_INVALID, "too many objects");
641 error = -1;
642 goto done;
643 }
644
645 /* Write pack header */
646 ph.hdr_signature = htonl(PACK_SIGNATURE);
647 ph.hdr_version = htonl(PACK_VERSION);
648 ph.hdr_entries = htonl(pb->nr_objects);
649
650 if ((error = write_cb(&ph, sizeof(ph), cb_data)) < 0 ||
651 (error = git_hash_update(&pb->ctx, &ph, sizeof(ph))) < 0)
652 goto done;
653
654 pb->nr_remaining = pb->nr_objects;
655 do {
656 pb->nr_written = 0;
657 for ( ; i < pb->nr_objects; ++i) {
658 po = write_order[i];
659
660 if ((error = write_one(&status, pb, po, write_cb, cb_data)) < 0)
661 goto done;
662 }
663
664 pb->nr_remaining -= pb->nr_written;
665 } while (pb->nr_remaining && i < pb->nr_objects);
666
667 if ((error = git_hash_final(&entry_oid, &pb->ctx)) < 0)
668 goto done;
669
670 error = write_cb(entry_oid.id, GIT_OID_RAWSZ, cb_data);
671
672 done:
673 /* if callback cancelled writing, we must still free delta_data */
674 for ( ; i < pb->nr_objects; ++i) {
675 po = write_order[i];
676 if (po->delta_data) {
677 git__free(po->delta_data);
678 po->delta_data = NULL;
679 }
680 }
681
682 git__free(write_order);
683 return error;
684 }
685
write_pack_buf(void * buf,size_t size,void * data)686 static int write_pack_buf(void *buf, size_t size, void *data)
687 {
688 git_buf *b = (git_buf *)data;
689 return git_buf_put(b, buf, size);
690 }
691
type_size_sort(const void * _a,const void * _b)692 static int type_size_sort(const void *_a, const void *_b)
693 {
694 const git_pobject *a = (git_pobject *)_a;
695 const git_pobject *b = (git_pobject *)_b;
696
697 if (a->type > b->type)
698 return -1;
699 if (a->type < b->type)
700 return 1;
701 if (a->hash > b->hash)
702 return -1;
703 if (a->hash < b->hash)
704 return 1;
705 /*
706 * TODO
707 *
708 if (a->preferred_base > b->preferred_base)
709 return -1;
710 if (a->preferred_base < b->preferred_base)
711 return 1;
712 */
713 if (a->size > b->size)
714 return -1;
715 if (a->size < b->size)
716 return 1;
717 return a < b ? -1 : (a > b); /* newest first */
718 }
719
delta_cacheable(git_packbuilder * pb,size_t src_size,size_t trg_size,size_t delta_size)720 static int delta_cacheable(
721 git_packbuilder *pb,
722 size_t src_size,
723 size_t trg_size,
724 size_t delta_size)
725 {
726 size_t new_size;
727
728 if (git__add_sizet_overflow(&new_size, pb->delta_cache_size, delta_size))
729 return 0;
730
731 if (pb->max_delta_cache_size && new_size > pb->max_delta_cache_size)
732 return 0;
733
734 if (delta_size < pb->cache_max_small_delta_size)
735 return 1;
736
737 /* cache delta, if objects are large enough compared to delta size */
738 if ((src_size >> 20) + (trg_size >> 21) > (delta_size >> 10))
739 return 1;
740
741 return 0;
742 }
743
try_delta(git_packbuilder * pb,struct unpacked * trg,struct unpacked * src,size_t max_depth,size_t * mem_usage,int * ret)744 static int try_delta(git_packbuilder *pb, struct unpacked *trg,
745 struct unpacked *src, size_t max_depth,
746 size_t *mem_usage, int *ret)
747 {
748 git_pobject *trg_object = trg->object;
749 git_pobject *src_object = src->object;
750 git_odb_object *obj;
751 size_t trg_size, src_size, delta_size, sizediff, max_size, sz;
752 size_t ref_depth;
753 void *delta_buf;
754
755 /* Don't bother doing diffs between different types */
756 if (trg_object->type != src_object->type) {
757 *ret = -1;
758 return 0;
759 }
760
761 *ret = 0;
762
763 /* TODO: support reuse-delta */
764
765 /* Let's not bust the allowed depth. */
766 if (src->depth >= max_depth)
767 return 0;
768
769 /* Now some size filtering heuristics. */
770 trg_size = trg_object->size;
771 if (!trg_object->delta) {
772 max_size = trg_size/2 - 20;
773 ref_depth = 1;
774 } else {
775 max_size = trg_object->delta_size;
776 ref_depth = trg->depth;
777 }
778
779 max_size = (uint64_t)max_size * (max_depth - src->depth) /
780 (max_depth - ref_depth + 1);
781 if (max_size == 0)
782 return 0;
783
784 src_size = src_object->size;
785 sizediff = src_size < trg_size ? trg_size - src_size : 0;
786 if (sizediff >= max_size)
787 return 0;
788 if (trg_size < src_size / 32)
789 return 0;
790
791 /* Load data if not already done */
792 if (!trg->data) {
793 if (git_odb_read(&obj, pb->odb, &trg_object->id) < 0)
794 return -1;
795
796 sz = git_odb_object_size(obj);
797 trg->data = git__malloc(sz);
798 GIT_ERROR_CHECK_ALLOC(trg->data);
799 memcpy(trg->data, git_odb_object_data(obj), sz);
800
801 git_odb_object_free(obj);
802
803 if (sz != trg_size) {
804 git_error_set(GIT_ERROR_INVALID,
805 "inconsistent target object length");
806 return -1;
807 }
808
809 *mem_usage += sz;
810 }
811 if (!src->data) {
812 size_t obj_sz;
813
814 if (git_odb_read(&obj, pb->odb, &src_object->id) < 0 ||
815 !git__is_ulong(obj_sz = git_odb_object_size(obj)))
816 return -1;
817
818 sz = obj_sz;
819 src->data = git__malloc(sz);
820 GIT_ERROR_CHECK_ALLOC(src->data);
821 memcpy(src->data, git_odb_object_data(obj), sz);
822
823 git_odb_object_free(obj);
824
825 if (sz != src_size) {
826 git_error_set(GIT_ERROR_INVALID,
827 "inconsistent source object length");
828 return -1;
829 }
830
831 *mem_usage += sz;
832 }
833 if (!src->index) {
834 if (git_delta_index_init(&src->index, src->data, src_size) < 0)
835 return 0; /* suboptimal pack - out of memory */
836
837 *mem_usage += git_delta_index_size(src->index);
838 }
839
840 if (git_delta_create_from_index(&delta_buf, &delta_size, src->index, trg->data, trg_size,
841 max_size) < 0)
842 return 0;
843
844 if (trg_object->delta) {
845 /* Prefer only shallower same-sized deltas. */
846 if (delta_size == trg_object->delta_size &&
847 src->depth + 1 >= trg->depth) {
848 git__free(delta_buf);
849 return 0;
850 }
851 }
852
853 GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
854
855 if (trg_object->delta_data) {
856 git__free(trg_object->delta_data);
857 GIT_ASSERT(pb->delta_cache_size >= trg_object->delta_size);
858 pb->delta_cache_size -= trg_object->delta_size;
859 trg_object->delta_data = NULL;
860 }
861 if (delta_cacheable(pb, src_size, trg_size, delta_size)) {
862 bool overflow = git__add_sizet_overflow(
863 &pb->delta_cache_size, pb->delta_cache_size, delta_size);
864
865 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
866
867 if (overflow) {
868 git__free(delta_buf);
869 return -1;
870 }
871
872 trg_object->delta_data = git__realloc(delta_buf, delta_size);
873 GIT_ERROR_CHECK_ALLOC(trg_object->delta_data);
874 } else {
875 /* create delta when writing the pack */
876 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
877 git__free(delta_buf);
878 }
879
880 trg_object->delta = src_object;
881 trg_object->delta_size = delta_size;
882 trg->depth = src->depth + 1;
883
884 *ret = 1;
885 return 0;
886 }
887
check_delta_limit(git_pobject * me,size_t n)888 static size_t check_delta_limit(git_pobject *me, size_t n)
889 {
890 git_pobject *child = me->delta_child;
891 size_t m = n;
892
893 while (child) {
894 size_t c = check_delta_limit(child, n + 1);
895 if (m < c)
896 m = c;
897 child = child->delta_sibling;
898 }
899 return m;
900 }
901
free_unpacked(struct unpacked * n)902 static size_t free_unpacked(struct unpacked *n)
903 {
904 size_t freed_mem = 0;
905
906 if (n->index) {
907 freed_mem += git_delta_index_size(n->index);
908 git_delta_index_free(n->index);
909 }
910 n->index = NULL;
911
912 if (n->data) {
913 freed_mem += n->object->size;
914 git__free(n->data);
915 n->data = NULL;
916 }
917 n->object = NULL;
918 n->depth = 0;
919 return freed_mem;
920 }
921
report_delta_progress(git_packbuilder * pb,uint32_t count,bool force)922 static int report_delta_progress(
923 git_packbuilder *pb, uint32_t count, bool force)
924 {
925 int ret;
926
927 if (pb->progress_cb) {
928 double current_time = git__timer();
929 double elapsed = current_time - pb->last_progress_report_time;
930
931 if (force || elapsed < 0 || elapsed >= MIN_PROGRESS_UPDATE_INTERVAL) {
932 pb->last_progress_report_time = current_time;
933
934 ret = pb->progress_cb(
935 GIT_PACKBUILDER_DELTAFICATION,
936 count, pb->nr_objects, pb->progress_cb_payload);
937
938 if (ret)
939 return git_error_set_after_callback(ret);
940 }
941 }
942
943 return 0;
944 }
945
find_deltas(git_packbuilder * pb,git_pobject ** list,size_t * list_size,size_t window,size_t depth)946 static int find_deltas(git_packbuilder *pb, git_pobject **list,
947 size_t *list_size, size_t window, size_t depth)
948 {
949 git_pobject *po;
950 git_buf zbuf = GIT_BUF_INIT;
951 struct unpacked *array;
952 size_t idx = 0, count = 0;
953 size_t mem_usage = 0;
954 size_t i;
955 int error = -1;
956
957 array = git__calloc(window, sizeof(struct unpacked));
958 GIT_ERROR_CHECK_ALLOC(array);
959
960 for (;;) {
961 struct unpacked *n = array + idx;
962 size_t max_depth, j, best_base = SIZE_MAX;
963
964 GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
965 if (!*list_size) {
966 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
967 break;
968 }
969
970 pb->nr_deltified += 1;
971 report_delta_progress(pb, pb->nr_deltified, false);
972
973 po = *list++;
974 (*list_size)--;
975 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
976
977 mem_usage -= free_unpacked(n);
978 n->object = po;
979
980 while (pb->window_memory_limit &&
981 mem_usage > pb->window_memory_limit &&
982 count > 1) {
983 size_t tail = (idx + window - count) % window;
984 mem_usage -= free_unpacked(array + tail);
985 count--;
986 }
987
988 /*
989 * If the current object is at pack edge, take the depth the
990 * objects that depend on the current object into account
991 * otherwise they would become too deep.
992 */
993 max_depth = depth;
994 if (po->delta_child) {
995 size_t delta_limit = check_delta_limit(po, 0);
996
997 if (delta_limit > max_depth)
998 goto next;
999
1000 max_depth -= delta_limit;
1001 }
1002
1003 j = window;
1004 while (--j > 0) {
1005 int ret;
1006 size_t other_idx = idx + j;
1007 struct unpacked *m;
1008
1009 if (other_idx >= window)
1010 other_idx -= window;
1011
1012 m = array + other_idx;
1013 if (!m->object)
1014 break;
1015
1016 if (try_delta(pb, n, m, max_depth, &mem_usage, &ret) < 0)
1017 goto on_error;
1018 if (ret < 0)
1019 break;
1020 else if (ret > 0)
1021 best_base = other_idx;
1022 }
1023
1024 /*
1025 * If we decided to cache the delta data, then it is best
1026 * to compress it right away. First because we have to do
1027 * it anyway, and doing it here while we're threaded will
1028 * save a lot of time in the non threaded write phase,
1029 * as well as allow for caching more deltas within
1030 * the same cache size limit.
1031 * ...
1032 * But only if not writing to stdout, since in that case
1033 * the network is most likely throttling writes anyway,
1034 * and therefore it is best to go to the write phase ASAP
1035 * instead, as we can afford spending more time compressing
1036 * between writes at that moment.
1037 */
1038 if (po->delta_data) {
1039 if (git_zstream_deflatebuf(&zbuf, po->delta_data, po->delta_size) < 0)
1040 goto on_error;
1041
1042 git__free(po->delta_data);
1043 po->delta_data = git__malloc(zbuf.size);
1044 GIT_ERROR_CHECK_ALLOC(po->delta_data);
1045
1046 memcpy(po->delta_data, zbuf.ptr, zbuf.size);
1047 po->z_delta_size = zbuf.size;
1048 git_buf_clear(&zbuf);
1049
1050 GIT_ASSERT(git_packbuilder__cache_lock(pb) == 0);
1051 pb->delta_cache_size -= po->delta_size;
1052 pb->delta_cache_size += po->z_delta_size;
1053 GIT_ASSERT(git_packbuilder__cache_unlock(pb) == 0);
1054 }
1055
1056 /*
1057 * If we made n a delta, and if n is already at max
1058 * depth, leaving it in the window is pointless. we
1059 * should evict it first.
1060 */
1061 if (po->delta && max_depth <= n->depth)
1062 continue;
1063
1064 /*
1065 * Move the best delta base up in the window, after the
1066 * currently deltified object, to keep it longer. It will
1067 * be the first base object to be attempted next.
1068 */
1069 if (po->delta) {
1070 struct unpacked swap = array[best_base];
1071 size_t dist = (window + idx - best_base) % window;
1072 size_t dst = best_base;
1073 while (dist--) {
1074 size_t src = (dst + 1) % window;
1075 array[dst] = array[src];
1076 dst = src;
1077 }
1078 array[dst] = swap;
1079 }
1080
1081 next:
1082 idx++;
1083 if (count + 1 < window)
1084 count++;
1085 if (idx >= window)
1086 idx = 0;
1087 }
1088 error = 0;
1089
1090 on_error:
1091 for (i = 0; i < window; ++i) {
1092 git__free(array[i].index);
1093 git__free(array[i].data);
1094 }
1095 git__free(array);
1096 git_buf_dispose(&zbuf);
1097
1098 return error;
1099 }
1100
1101 #ifdef GIT_THREADS
1102
1103 struct thread_params {
1104 git_thread thread;
1105 git_packbuilder *pb;
1106
1107 git_pobject **list;
1108
1109 git_cond cond;
1110 git_mutex mutex;
1111
1112 size_t list_size;
1113 size_t remaining;
1114
1115 size_t window;
1116 size_t depth;
1117 size_t working;
1118 size_t data_ready;
1119 };
1120
threaded_find_deltas(void * arg)1121 static void *threaded_find_deltas(void *arg)
1122 {
1123 struct thread_params *me = arg;
1124
1125 while (me->remaining) {
1126 if (find_deltas(me->pb, me->list, &me->remaining,
1127 me->window, me->depth) < 0) {
1128 ; /* TODO */
1129 }
1130
1131 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_lock(me->pb) == 0, NULL);
1132 me->working = 0;
1133 git_cond_signal(&me->pb->progress_cond);
1134 GIT_ASSERT_WITH_RETVAL(git_packbuilder__progress_unlock(me->pb) == 0, NULL);
1135
1136 if (git_mutex_lock(&me->mutex)) {
1137 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1138 return NULL;
1139 }
1140
1141 while (!me->data_ready)
1142 git_cond_wait(&me->cond, &me->mutex);
1143
1144 /*
1145 * We must not set ->data_ready before we wait on the
1146 * condition because the main thread may have set it to 1
1147 * before we get here. In order to be sure that new
1148 * work is available if we see 1 in ->data_ready, it
1149 * was initialized to 0 before this thread was spawned
1150 * and we reset it to 0 right away.
1151 */
1152 me->data_ready = 0;
1153 git_mutex_unlock(&me->mutex);
1154 }
1155 /* leave ->working 1 so that this doesn't get more work assigned */
1156 return NULL;
1157 }
1158
ll_find_deltas(git_packbuilder * pb,git_pobject ** list,size_t list_size,size_t window,size_t depth)1159 static int ll_find_deltas(git_packbuilder *pb, git_pobject **list,
1160 size_t list_size, size_t window, size_t depth)
1161 {
1162 struct thread_params *p;
1163 size_t i;
1164 int ret, active_threads = 0;
1165
1166 if (!pb->nr_threads)
1167 pb->nr_threads = git__online_cpus();
1168
1169 if (pb->nr_threads <= 1) {
1170 find_deltas(pb, list, &list_size, window, depth);
1171 return 0;
1172 }
1173
1174 p = git__mallocarray(pb->nr_threads, sizeof(*p));
1175 GIT_ERROR_CHECK_ALLOC(p);
1176
1177 /* Partition the work among the threads */
1178 for (i = 0; i < pb->nr_threads; ++i) {
1179 size_t sub_size = list_size / (pb->nr_threads - i);
1180
1181 /* don't use too small segments or no deltas will be found */
1182 if (sub_size < 2*window && i+1 < pb->nr_threads)
1183 sub_size = 0;
1184
1185 p[i].pb = pb;
1186 p[i].window = window;
1187 p[i].depth = depth;
1188 p[i].working = 1;
1189 p[i].data_ready = 0;
1190
1191 /* try to split chunks on "path" boundaries */
1192 while (sub_size && sub_size < list_size &&
1193 list[sub_size]->hash &&
1194 list[sub_size]->hash == list[sub_size-1]->hash)
1195 sub_size++;
1196
1197 p[i].list = list;
1198 p[i].list_size = sub_size;
1199 p[i].remaining = sub_size;
1200
1201 list += sub_size;
1202 list_size -= sub_size;
1203 }
1204
1205 /* Start work threads */
1206 for (i = 0; i < pb->nr_threads; ++i) {
1207 if (!p[i].list_size)
1208 continue;
1209
1210 git_mutex_init(&p[i].mutex);
1211 git_cond_init(&p[i].cond);
1212
1213 ret = git_thread_create(&p[i].thread,
1214 threaded_find_deltas, &p[i]);
1215 if (ret) {
1216 git_error_set(GIT_ERROR_THREAD, "unable to create thread");
1217 return -1;
1218 }
1219 active_threads++;
1220 }
1221
1222 /*
1223 * Now let's wait for work completion. Each time a thread is done
1224 * with its work, we steal half of the remaining work from the
1225 * thread with the largest number of unprocessed objects and give
1226 * it to that newly idle thread. This ensure good load balancing
1227 * until the remaining object list segments are simply too short
1228 * to be worth splitting anymore.
1229 */
1230 while (active_threads) {
1231 struct thread_params *target = NULL;
1232 struct thread_params *victim = NULL;
1233 size_t sub_size = 0;
1234
1235 /* Start by locating a thread that has transitioned its
1236 * 'working' flag from 1 -> 0. This indicates that it is
1237 * ready to receive more work using our work-stealing
1238 * algorithm. */
1239 GIT_ASSERT(git_packbuilder__progress_lock(pb) == 0);
1240 for (;;) {
1241 for (i = 0; !target && i < pb->nr_threads; i++)
1242 if (!p[i].working)
1243 target = &p[i];
1244 if (target)
1245 break;
1246 git_cond_wait(&pb->progress_cond, &pb->progress_mutex);
1247 }
1248
1249 /* At this point we hold the progress lock and have located
1250 * a thread to receive more work. We still need to locate a
1251 * thread from which to steal work (the victim). */
1252 for (i = 0; i < pb->nr_threads; i++)
1253 if (p[i].remaining > 2*window &&
1254 (!victim || victim->remaining < p[i].remaining))
1255 victim = &p[i];
1256
1257 if (victim) {
1258 sub_size = victim->remaining / 2;
1259 list = victim->list + victim->list_size - sub_size;
1260 while (sub_size && list[0]->hash &&
1261 list[0]->hash == list[-1]->hash) {
1262 list++;
1263 sub_size--;
1264 }
1265 if (!sub_size) {
1266 /*
1267 * It is possible for some "paths" to have
1268 * so many objects that no hash boundary
1269 * might be found. Let's just steal the
1270 * exact half in that case.
1271 */
1272 sub_size = victim->remaining / 2;
1273 list -= sub_size;
1274 }
1275 target->list = list;
1276 victim->list_size -= sub_size;
1277 victim->remaining -= sub_size;
1278 }
1279 target->list_size = sub_size;
1280 target->remaining = sub_size;
1281 target->working = 1;
1282 GIT_ASSERT(git_packbuilder__progress_unlock(pb) == 0);
1283
1284 if (git_mutex_lock(&target->mutex)) {
1285 git_error_set(GIT_ERROR_THREAD, "unable to lock packfile condition mutex");
1286 git__free(p);
1287 return -1;
1288 }
1289
1290 target->data_ready = 1;
1291 git_cond_signal(&target->cond);
1292 git_mutex_unlock(&target->mutex);
1293
1294 if (!sub_size) {
1295 git_thread_join(&target->thread, NULL);
1296 git_cond_free(&target->cond);
1297 git_mutex_free(&target->mutex);
1298 active_threads--;
1299 }
1300 }
1301
1302 git__free(p);
1303 return 0;
1304 }
1305
1306 #else
1307 #define ll_find_deltas(pb, l, ls, w, d) find_deltas(pb, l, &ls, w, d)
1308 #endif
1309
prepare_pack(git_packbuilder * pb)1310 static int prepare_pack(git_packbuilder *pb)
1311 {
1312 git_pobject **delta_list;
1313 size_t i, n = 0;
1314
1315 if (pb->nr_objects == 0 || pb->done)
1316 return 0; /* nothing to do */
1317
1318 /*
1319 * Although we do not report progress during deltafication, we
1320 * at least report that we are in the deltafication stage
1321 */
1322 if (pb->progress_cb)
1323 pb->progress_cb(GIT_PACKBUILDER_DELTAFICATION, 0, pb->nr_objects, pb->progress_cb_payload);
1324
1325 delta_list = git__mallocarray(pb->nr_objects, sizeof(*delta_list));
1326 GIT_ERROR_CHECK_ALLOC(delta_list);
1327
1328 for (i = 0; i < pb->nr_objects; ++i) {
1329 git_pobject *po = pb->object_list + i;
1330
1331 /* Make sure the item is within our size limits */
1332 if (po->size < 50 || po->size > pb->big_file_threshold)
1333 continue;
1334
1335 delta_list[n++] = po;
1336 }
1337
1338 if (n > 1) {
1339 git__tsort((void **)delta_list, n, type_size_sort);
1340 if (ll_find_deltas(pb, delta_list, n,
1341 GIT_PACK_WINDOW + 1,
1342 GIT_PACK_DEPTH) < 0) {
1343 git__free(delta_list);
1344 return -1;
1345 }
1346 }
1347
1348 report_delta_progress(pb, pb->nr_objects, true);
1349
1350 pb->done = true;
1351 git__free(delta_list);
1352 return 0;
1353 }
1354
1355 #define PREPARE_PACK if (prepare_pack(pb) < 0) { return -1; }
1356
git_packbuilder_foreach(git_packbuilder * pb,int (* cb)(void * buf,size_t size,void * payload),void * payload)1357 int git_packbuilder_foreach(git_packbuilder *pb, int (*cb)(void *buf, size_t size, void *payload), void *payload)
1358 {
1359 PREPARE_PACK;
1360 return write_pack(pb, cb, payload);
1361 }
1362
git_packbuilder_write_buf(git_buf * buf,git_packbuilder * pb)1363 int git_packbuilder_write_buf(git_buf *buf, git_packbuilder *pb)
1364 {
1365 int error;
1366
1367 if ((error = git_buf_sanitize(buf)) < 0)
1368 return error;
1369
1370 PREPARE_PACK;
1371
1372 return write_pack(pb, &write_pack_buf, buf);
1373 }
1374
write_cb(void * buf,size_t len,void * payload)1375 static int write_cb(void *buf, size_t len, void *payload)
1376 {
1377 struct pack_write_context *ctx = payload;
1378 return git_indexer_append(ctx->indexer, buf, len, ctx->stats);
1379 }
1380
git_packbuilder_write(git_packbuilder * pb,const char * path,unsigned int mode,git_indexer_progress_cb progress_cb,void * progress_cb_payload)1381 int git_packbuilder_write(
1382 git_packbuilder *pb,
1383 const char *path,
1384 unsigned int mode,
1385 git_indexer_progress_cb progress_cb,
1386 void *progress_cb_payload)
1387 {
1388 int error = -1;
1389 git_buf object_path = GIT_BUF_INIT;
1390 git_indexer_options opts = GIT_INDEXER_OPTIONS_INIT;
1391 git_indexer *indexer = NULL;
1392 git_indexer_progress stats;
1393 struct pack_write_context ctx;
1394 int t;
1395
1396 PREPARE_PACK;
1397
1398 if (path == NULL) {
1399 if ((error = git_repository_item_path(&object_path, pb->repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0)
1400 goto cleanup;
1401 if ((error = git_buf_joinpath(&object_path, git_buf_cstr(&object_path), "pack")) < 0)
1402 goto cleanup;
1403 path = git_buf_cstr(&object_path);
1404 }
1405
1406 opts.progress_cb = progress_cb;
1407 opts.progress_cb_payload = progress_cb_payload;
1408
1409 if ((error = git_indexer_new(&indexer, path, mode, pb->odb, &opts)) < 0)
1410 goto cleanup;
1411
1412 if (!git_repository__configmap_lookup(&t, pb->repo, GIT_CONFIGMAP_FSYNCOBJECTFILES) && t)
1413 git_indexer__set_fsync(indexer, 1);
1414
1415 ctx.indexer = indexer;
1416 ctx.stats = &stats;
1417
1418 if ((error = git_packbuilder_foreach(pb, write_cb, &ctx)) < 0)
1419 goto cleanup;
1420
1421 if ((error = git_indexer_commit(indexer, &stats)) < 0)
1422 goto cleanup;
1423
1424 git_oid_cpy(&pb->pack_oid, git_indexer_hash(indexer));
1425
1426 cleanup:
1427 git_indexer_free(indexer);
1428 git_buf_dispose(&object_path);
1429 return error;
1430 }
1431
1432 #undef PREPARE_PACK
1433
git_packbuilder_hash(git_packbuilder * pb)1434 const git_oid *git_packbuilder_hash(git_packbuilder *pb)
1435 {
1436 return &pb->pack_oid;
1437 }
1438
1439
cb_tree_walk(const char * root,const git_tree_entry * entry,void * payload)1440 static int cb_tree_walk(
1441 const char *root, const git_tree_entry *entry, void *payload)
1442 {
1443 int error;
1444 struct tree_walk_context *ctx = payload;
1445
1446 /* A commit inside a tree represents a submodule commit and should be skipped. */
1447 if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT)
1448 return 0;
1449
1450 if (!(error = git_buf_sets(&ctx->buf, root)) &&
1451 !(error = git_buf_puts(&ctx->buf, git_tree_entry_name(entry))))
1452 error = git_packbuilder_insert(
1453 ctx->pb, git_tree_entry_id(entry), git_buf_cstr(&ctx->buf));
1454
1455 return error;
1456 }
1457
git_packbuilder_insert_commit(git_packbuilder * pb,const git_oid * oid)1458 int git_packbuilder_insert_commit(git_packbuilder *pb, const git_oid *oid)
1459 {
1460 git_commit *commit;
1461
1462 if (git_commit_lookup(&commit, pb->repo, oid) < 0 ||
1463 git_packbuilder_insert(pb, oid, NULL) < 0)
1464 return -1;
1465
1466 if (git_packbuilder_insert_tree(pb, git_commit_tree_id(commit)) < 0)
1467 return -1;
1468
1469 git_commit_free(commit);
1470 return 0;
1471 }
1472
git_packbuilder_insert_tree(git_packbuilder * pb,const git_oid * oid)1473 int git_packbuilder_insert_tree(git_packbuilder *pb, const git_oid *oid)
1474 {
1475 int error;
1476 git_tree *tree = NULL;
1477 struct tree_walk_context context = { pb, GIT_BUF_INIT };
1478
1479 if (!(error = git_tree_lookup(&tree, pb->repo, oid)) &&
1480 !(error = git_packbuilder_insert(pb, oid, NULL)))
1481 error = git_tree_walk(tree, GIT_TREEWALK_PRE, cb_tree_walk, &context);
1482
1483 git_tree_free(tree);
1484 git_buf_dispose(&context.buf);
1485 return error;
1486 }
1487
git_packbuilder_insert_recur(git_packbuilder * pb,const git_oid * id,const char * name)1488 int git_packbuilder_insert_recur(git_packbuilder *pb, const git_oid *id, const char *name)
1489 {
1490 git_object *obj;
1491 int error;
1492
1493 GIT_ASSERT_ARG(pb);
1494 GIT_ASSERT_ARG(id);
1495
1496 if ((error = git_object_lookup(&obj, pb->repo, id, GIT_OBJECT_ANY)) < 0)
1497 return error;
1498
1499 switch (git_object_type(obj)) {
1500 case GIT_OBJECT_BLOB:
1501 error = git_packbuilder_insert(pb, id, name);
1502 break;
1503 case GIT_OBJECT_TREE:
1504 error = git_packbuilder_insert_tree(pb, id);
1505 break;
1506 case GIT_OBJECT_COMMIT:
1507 error = git_packbuilder_insert_commit(pb, id);
1508 break;
1509 case GIT_OBJECT_TAG:
1510 if ((error = git_packbuilder_insert(pb, id, name)) < 0)
1511 goto cleanup;
1512 error = git_packbuilder_insert_recur(pb, git_tag_target_id((git_tag *) obj), NULL);
1513 break;
1514
1515 default:
1516 git_error_set(GIT_ERROR_INVALID, "unknown object type");
1517 error = -1;
1518 }
1519
1520 cleanup:
1521 git_object_free(obj);
1522 return error;
1523 }
1524
git_packbuilder_object_count(git_packbuilder * pb)1525 size_t git_packbuilder_object_count(git_packbuilder *pb)
1526 {
1527 return pb->nr_objects;
1528 }
1529
git_packbuilder_written(git_packbuilder * pb)1530 size_t git_packbuilder_written(git_packbuilder *pb)
1531 {
1532 return pb->nr_written;
1533 }
1534
lookup_walk_object(struct walk_object ** out,git_packbuilder * pb,const git_oid * id)1535 static int lookup_walk_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1536 {
1537 struct walk_object *obj;
1538
1539 obj = git_pool_mallocz(&pb->object_pool, 1);
1540 if (!obj) {
1541 git_error_set_oom();
1542 return -1;
1543 }
1544
1545 git_oid_cpy(&obj->id, id);
1546
1547 *out = obj;
1548 return 0;
1549 }
1550
retrieve_object(struct walk_object ** out,git_packbuilder * pb,const git_oid * id)1551 static int retrieve_object(struct walk_object **out, git_packbuilder *pb, const git_oid *id)
1552 {
1553 struct walk_object *obj;
1554 int error;
1555
1556 if ((obj = git_oidmap_get(pb->walk_objects, id)) == NULL) {
1557 if ((error = lookup_walk_object(&obj, pb, id)) < 0)
1558 return error;
1559
1560 if ((error = git_oidmap_set(pb->walk_objects, &obj->id, obj)) < 0)
1561 return error;
1562 }
1563
1564 *out = obj;
1565 return 0;
1566 }
1567
mark_blob_uninteresting(git_packbuilder * pb,const git_oid * id)1568 static int mark_blob_uninteresting(git_packbuilder *pb, const git_oid *id)
1569 {
1570 int error;
1571 struct walk_object *obj;
1572
1573 if ((error = retrieve_object(&obj, pb, id)) < 0)
1574 return error;
1575
1576 obj->uninteresting = 1;
1577
1578 return 0;
1579 }
1580
mark_tree_uninteresting(git_packbuilder * pb,const git_oid * id)1581 static int mark_tree_uninteresting(git_packbuilder *pb, const git_oid *id)
1582 {
1583 struct walk_object *obj;
1584 git_tree *tree;
1585 int error;
1586 size_t i;
1587
1588 if ((error = retrieve_object(&obj, pb, id)) < 0)
1589 return error;
1590
1591 if (obj->uninteresting)
1592 return 0;
1593
1594 obj->uninteresting = 1;
1595
1596 if ((error = git_tree_lookup(&tree, pb->repo, id)) < 0)
1597 return error;
1598
1599 for (i = 0; i < git_tree_entrycount(tree); i++) {
1600 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1601 const git_oid *entry_id = git_tree_entry_id(entry);
1602 switch (git_tree_entry_type(entry)) {
1603 case GIT_OBJECT_TREE:
1604 if ((error = mark_tree_uninteresting(pb, entry_id)) < 0)
1605 goto cleanup;
1606 break;
1607 case GIT_OBJECT_BLOB:
1608 if ((error = mark_blob_uninteresting(pb, entry_id)) < 0)
1609 goto cleanup;
1610 break;
1611 default:
1612 /* it's a submodule or something unknown, we don't want it */
1613 ;
1614 }
1615 }
1616
1617 cleanup:
1618 git_tree_free(tree);
1619 return error;
1620 }
1621
1622 /*
1623 * Mark the edges of the graph uninteresting. Since we start from a
1624 * git_revwalk, the commits are already uninteresting, but we need to
1625 * mark the trees and blobs.
1626 */
mark_edges_uninteresting(git_packbuilder * pb,git_commit_list * commits)1627 static int mark_edges_uninteresting(git_packbuilder *pb, git_commit_list *commits)
1628 {
1629 int error;
1630 git_commit_list *list;
1631 git_commit *commit;
1632
1633 for (list = commits; list; list = list->next) {
1634 if (!list->item->uninteresting)
1635 continue;
1636
1637 if ((error = git_commit_lookup(&commit, pb->repo, &list->item->oid)) < 0)
1638 return error;
1639
1640 error = mark_tree_uninteresting(pb, git_commit_tree_id(commit));
1641 git_commit_free(commit);
1642
1643 if (error < 0)
1644 return error;
1645 }
1646
1647 return 0;
1648 }
1649
pack_objects_insert_tree(git_packbuilder * pb,git_tree * tree)1650 static int pack_objects_insert_tree(git_packbuilder *pb, git_tree *tree)
1651 {
1652 size_t i;
1653 int error;
1654 git_tree *subtree;
1655 struct walk_object *obj;
1656 const char *name;
1657
1658 if ((error = retrieve_object(&obj, pb, git_tree_id(tree))) < 0)
1659 return error;
1660
1661 if (obj->seen || obj->uninteresting)
1662 return 0;
1663
1664 obj->seen = 1;
1665
1666 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)))
1667 return error;
1668
1669 for (i = 0; i < git_tree_entrycount(tree); i++) {
1670 const git_tree_entry *entry = git_tree_entry_byindex(tree, i);
1671 const git_oid *entry_id = git_tree_entry_id(entry);
1672 switch (git_tree_entry_type(entry)) {
1673 case GIT_OBJECT_TREE:
1674 if ((error = git_tree_lookup(&subtree, pb->repo, entry_id)) < 0)
1675 return error;
1676
1677 error = pack_objects_insert_tree(pb, subtree);
1678 git_tree_free(subtree);
1679
1680 if (error < 0)
1681 return error;
1682
1683 break;
1684 case GIT_OBJECT_BLOB:
1685 if ((error = retrieve_object(&obj, pb, entry_id)) < 0)
1686 return error;
1687 if (obj->uninteresting)
1688 continue;
1689 name = git_tree_entry_name(entry);
1690 if ((error = git_packbuilder_insert(pb, entry_id, name)) < 0)
1691 return error;
1692 break;
1693 default:
1694 /* it's a submodule or something unknown, we don't want it */
1695 ;
1696 }
1697 }
1698
1699
1700 return error;
1701 }
1702
pack_objects_insert_commit(git_packbuilder * pb,struct walk_object * obj)1703 static int pack_objects_insert_commit(git_packbuilder *pb, struct walk_object *obj)
1704 {
1705 int error;
1706 git_commit *commit = NULL;
1707 git_tree *tree = NULL;
1708
1709 obj->seen = 1;
1710
1711 if ((error = git_packbuilder_insert(pb, &obj->id, NULL)) < 0)
1712 return error;
1713
1714 if ((error = git_commit_lookup(&commit, pb->repo, &obj->id)) < 0)
1715 return error;
1716
1717 if ((error = git_tree_lookup(&tree, pb->repo, git_commit_tree_id(commit))) < 0)
1718 goto cleanup;
1719
1720 if ((error = pack_objects_insert_tree(pb, tree)) < 0)
1721 goto cleanup;
1722
1723 cleanup:
1724 git_commit_free(commit);
1725 git_tree_free(tree);
1726 return error;
1727 }
1728
git_packbuilder_insert_walk(git_packbuilder * pb,git_revwalk * walk)1729 int git_packbuilder_insert_walk(git_packbuilder *pb, git_revwalk *walk)
1730 {
1731 int error;
1732 git_oid id;
1733 struct walk_object *obj;
1734
1735 GIT_ASSERT_ARG(pb);
1736 GIT_ASSERT_ARG(walk);
1737
1738 if ((error = mark_edges_uninteresting(pb, walk->user_input)) < 0)
1739 return error;
1740
1741 /*
1742 * TODO: git marks the parents of the edges
1743 * uninteresting. This may provide a speed advantage, but does
1744 * seem to assume the remote does not have a single-commit
1745 * history on the other end.
1746 */
1747
1748 /* walk down each tree up to the blobs and insert them, stopping when uninteresting */
1749 while ((error = git_revwalk_next(&id, walk)) == 0) {
1750 if ((error = retrieve_object(&obj, pb, &id)) < 0)
1751 return error;
1752
1753 if (obj->seen || obj->uninteresting)
1754 continue;
1755
1756 if ((error = pack_objects_insert_commit(pb, obj)) < 0)
1757 return error;
1758 }
1759
1760 if (error == GIT_ITEROVER)
1761 error = 0;
1762
1763 return error;
1764 }
1765
git_packbuilder_set_callbacks(git_packbuilder * pb,git_packbuilder_progress progress_cb,void * progress_cb_payload)1766 int git_packbuilder_set_callbacks(git_packbuilder *pb, git_packbuilder_progress progress_cb, void *progress_cb_payload)
1767 {
1768 if (!pb)
1769 return -1;
1770
1771 pb->progress_cb = progress_cb;
1772 pb->progress_cb_payload = progress_cb_payload;
1773
1774 return 0;
1775 }
1776
git_packbuilder_free(git_packbuilder * pb)1777 void git_packbuilder_free(git_packbuilder *pb)
1778 {
1779 if (pb == NULL)
1780 return;
1781
1782 #ifdef GIT_THREADS
1783
1784 git_mutex_free(&pb->cache_mutex);
1785 git_mutex_free(&pb->progress_mutex);
1786 git_cond_free(&pb->progress_cond);
1787
1788 #endif
1789
1790 if (pb->odb)
1791 git_odb_free(pb->odb);
1792
1793 if (pb->object_ix)
1794 git_oidmap_free(pb->object_ix);
1795
1796 if (pb->object_list)
1797 git__free(pb->object_list);
1798
1799 git_oidmap_free(pb->walk_objects);
1800 git_pool_clear(&pb->object_pool);
1801
1802 git_hash_ctx_cleanup(&pb->ctx);
1803 git_zstream_free(&pb->zstream);
1804
1805 git__free(pb);
1806 }
1807