1 #include "git-compat-util.h"
2 #include "config.h"
3 #include "lockfile.h"
4 #include "pack.h"
5 #include "packfile.h"
6 #include "commit.h"
7 #include "object.h"
8 #include "refs.h"
9 #include "revision.h"
10 #include "hash-lookup.h"
11 #include "commit-graph.h"
12 #include "object-store.h"
13 #include "alloc.h"
14 #include "hashmap.h"
15 #include "replace-object.h"
16 #include "progress.h"
17 #include "bloom.h"
18 #include "commit-slab.h"
19 #include "shallow.h"
20 #include "json-writer.h"
21 #include "trace2.h"
22 #include "chunk-format.h"
23 
git_test_write_commit_graph_or_die(void)24 void git_test_write_commit_graph_or_die(void)
25 {
26 	int flags = 0;
27 	if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
28 		return;
29 
30 	if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
31 		flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
32 
33 	if (write_commit_graph_reachable(the_repository->objects->odb,
34 					 flags, NULL))
35 		die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
36 }
37 
38 #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
39 #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
40 #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
41 #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
42 #define GRAPH_CHUNKID_GENERATION_DATA 0x47444154 /* "GDAT" */
43 #define GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW 0x47444f56 /* "GDOV" */
44 #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
45 #define GRAPH_CHUNKID_BLOOMINDEXES 0x42494458 /* "BIDX" */
46 #define GRAPH_CHUNKID_BLOOMDATA 0x42444154 /* "BDAT" */
47 #define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
48 
49 #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
50 
51 #define GRAPH_VERSION_1 0x1
52 #define GRAPH_VERSION GRAPH_VERSION_1
53 
54 #define GRAPH_EXTRA_EDGES_NEEDED 0x80000000
55 #define GRAPH_EDGE_LAST_MASK 0x7fffffff
56 #define GRAPH_PARENT_NONE 0x70000000
57 
58 #define GRAPH_LAST_EDGE 0x80000000
59 
60 #define GRAPH_HEADER_SIZE 8
61 #define GRAPH_FANOUT_SIZE (4 * 256)
62 #define GRAPH_MIN_SIZE (GRAPH_HEADER_SIZE + 4 * CHUNK_TOC_ENTRY_SIZE \
63 			+ GRAPH_FANOUT_SIZE + the_hash_algo->rawsz)
64 
65 #define CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW (1ULL << 31)
66 
67 /* Remember to update object flag allocation in object.h */
68 #define REACHABLE       (1u<<15)
69 
70 define_commit_slab(topo_level_slab, uint32_t);
71 
72 /* Keep track of the order in which commits are added to our list. */
73 define_commit_slab(commit_pos, int);
74 static struct commit_pos commit_pos = COMMIT_SLAB_INIT(1, commit_pos);
75 
set_commit_pos(struct repository * r,const struct object_id * oid)76 static void set_commit_pos(struct repository *r, const struct object_id *oid)
77 {
78 	static int32_t max_pos;
79 	struct commit *commit = lookup_commit(r, oid);
80 
81 	if (!commit)
82 		return; /* should never happen, but be lenient */
83 
84 	*commit_pos_at(&commit_pos, commit) = max_pos++;
85 }
86 
commit_pos_cmp(const void * va,const void * vb)87 static int commit_pos_cmp(const void *va, const void *vb)
88 {
89 	const struct commit *a = *(const struct commit **)va;
90 	const struct commit *b = *(const struct commit **)vb;
91 	return commit_pos_at(&commit_pos, a) -
92 	       commit_pos_at(&commit_pos, b);
93 }
94 
95 define_commit_slab(commit_graph_data_slab, struct commit_graph_data);
96 static struct commit_graph_data_slab commit_graph_data_slab =
97 	COMMIT_SLAB_INIT(1, commit_graph_data_slab);
98 
get_configured_generation_version(struct repository * r)99 static int get_configured_generation_version(struct repository *r)
100 {
101 	int version = 2;
102 	repo_config_get_int(r, "commitgraph.generationversion", &version);
103 	return version;
104 }
105 
commit_graph_position(const struct commit * c)106 uint32_t commit_graph_position(const struct commit *c)
107 {
108 	struct commit_graph_data *data =
109 		commit_graph_data_slab_peek(&commit_graph_data_slab, c);
110 
111 	return data ? data->graph_pos : COMMIT_NOT_FROM_GRAPH;
112 }
113 
commit_graph_generation(const struct commit * c)114 timestamp_t commit_graph_generation(const struct commit *c)
115 {
116 	struct commit_graph_data *data =
117 		commit_graph_data_slab_peek(&commit_graph_data_slab, c);
118 
119 	if (!data)
120 		return GENERATION_NUMBER_INFINITY;
121 	else if (data->graph_pos == COMMIT_NOT_FROM_GRAPH)
122 		return GENERATION_NUMBER_INFINITY;
123 
124 	return data->generation;
125 }
126 
commit_graph_data_at(const struct commit * c)127 static struct commit_graph_data *commit_graph_data_at(const struct commit *c)
128 {
129 	unsigned int i, nth_slab;
130 	struct commit_graph_data *data =
131 		commit_graph_data_slab_peek(&commit_graph_data_slab, c);
132 
133 	if (data)
134 		return data;
135 
136 	nth_slab = c->index / commit_graph_data_slab.slab_size;
137 	data = commit_graph_data_slab_at(&commit_graph_data_slab, c);
138 
139 	/*
140 	 * commit-slab initializes elements with zero, overwrite this with
141 	 * COMMIT_NOT_FROM_GRAPH for graph_pos.
142 	 *
143 	 * We avoid initializing generation with checking if graph position
144 	 * is not COMMIT_NOT_FROM_GRAPH.
145 	 */
146 	for (i = 0; i < commit_graph_data_slab.slab_size; i++) {
147 		commit_graph_data_slab.slab[nth_slab][i].graph_pos =
148 			COMMIT_NOT_FROM_GRAPH;
149 	}
150 
151 	return data;
152 }
153 
154 /*
155  * Should be used only while writing commit-graph as it compares
156  * generation value of commits by directly accessing commit-slab.
157  */
commit_gen_cmp(const void * va,const void * vb)158 static int commit_gen_cmp(const void *va, const void *vb)
159 {
160 	const struct commit *a = *(const struct commit **)va;
161 	const struct commit *b = *(const struct commit **)vb;
162 
163 	const timestamp_t generation_a = commit_graph_data_at(a)->generation;
164 	const timestamp_t generation_b = commit_graph_data_at(b)->generation;
165 	/* lower generation commits first */
166 	if (generation_a < generation_b)
167 		return -1;
168 	else if (generation_a > generation_b)
169 		return 1;
170 
171 	/* use date as a heuristic when generations are equal */
172 	if (a->date < b->date)
173 		return -1;
174 	else if (a->date > b->date)
175 		return 1;
176 	return 0;
177 }
178 
get_commit_graph_filename(struct object_directory * obj_dir)179 char *get_commit_graph_filename(struct object_directory *obj_dir)
180 {
181 	return xstrfmt("%s/info/commit-graph", obj_dir->path);
182 }
183 
get_split_graph_filename(struct object_directory * odb,const char * oid_hex)184 static char *get_split_graph_filename(struct object_directory *odb,
185 				      const char *oid_hex)
186 {
187 	return xstrfmt("%s/info/commit-graphs/graph-%s.graph", odb->path,
188 		       oid_hex);
189 }
190 
get_commit_graph_chain_filename(struct object_directory * odb)191 char *get_commit_graph_chain_filename(struct object_directory *odb)
192 {
193 	return xstrfmt("%s/info/commit-graphs/commit-graph-chain", odb->path);
194 }
195 
oid_version(void)196 static uint8_t oid_version(void)
197 {
198 	switch (hash_algo_by_ptr(the_hash_algo)) {
199 	case GIT_HASH_SHA1:
200 		return 1;
201 	case GIT_HASH_SHA256:
202 		return 2;
203 	default:
204 		die(_("invalid hash version"));
205 	}
206 }
207 
alloc_commit_graph(void)208 static struct commit_graph *alloc_commit_graph(void)
209 {
210 	struct commit_graph *g = xcalloc(1, sizeof(*g));
211 
212 	return g;
213 }
214 
215 extern int read_replace_refs;
216 
commit_graph_compatible(struct repository * r)217 static int commit_graph_compatible(struct repository *r)
218 {
219 	if (!r->gitdir)
220 		return 0;
221 
222 	if (read_replace_refs) {
223 		prepare_replace_object(r);
224 		if (hashmap_get_size(&r->objects->replace_map->map))
225 			return 0;
226 	}
227 
228 	prepare_commit_graft(r);
229 	if (r->parsed_objects &&
230 	    (r->parsed_objects->grafts_nr || r->parsed_objects->substituted_parent))
231 		return 0;
232 	if (is_repository_shallow(r))
233 		return 0;
234 
235 	return 1;
236 }
237 
open_commit_graph(const char * graph_file,int * fd,struct stat * st)238 int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
239 {
240 	*fd = git_open(graph_file);
241 	if (*fd < 0)
242 		return 0;
243 	if (fstat(*fd, st)) {
244 		close(*fd);
245 		return 0;
246 	}
247 	return 1;
248 }
249 
load_commit_graph_one_fd_st(struct repository * r,int fd,struct stat * st,struct object_directory * odb)250 struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
251 						 int fd, struct stat *st,
252 						 struct object_directory *odb)
253 {
254 	void *graph_map;
255 	size_t graph_size;
256 	struct commit_graph *ret;
257 
258 	graph_size = xsize_t(st->st_size);
259 
260 	if (graph_size < GRAPH_MIN_SIZE) {
261 		close(fd);
262 		error(_("commit-graph file is too small"));
263 		return NULL;
264 	}
265 	graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
266 	close(fd);
267 	ret = parse_commit_graph(r, graph_map, graph_size);
268 
269 	if (ret)
270 		ret->odb = odb;
271 	else
272 		munmap(graph_map, graph_size);
273 
274 	return ret;
275 }
276 
verify_commit_graph_lite(struct commit_graph * g)277 static int verify_commit_graph_lite(struct commit_graph *g)
278 {
279 	/*
280 	 * Basic validation shared between parse_commit_graph()
281 	 * which'll be called every time the graph is used, and the
282 	 * much more expensive verify_commit_graph() used by
283 	 * "commit-graph verify".
284 	 *
285 	 * There should only be very basic checks here to ensure that
286 	 * we don't e.g. segfault in fill_commit_in_graph(), but
287 	 * because this is a very hot codepath nothing that e.g. loops
288 	 * over g->num_commits, or runs a checksum on the commit-graph
289 	 * itself.
290 	 */
291 	if (!g->chunk_oid_fanout) {
292 		error("commit-graph is missing the OID Fanout chunk");
293 		return 1;
294 	}
295 	if (!g->chunk_oid_lookup) {
296 		error("commit-graph is missing the OID Lookup chunk");
297 		return 1;
298 	}
299 	if (!g->chunk_commit_data) {
300 		error("commit-graph is missing the Commit Data chunk");
301 		return 1;
302 	}
303 
304 	return 0;
305 }
306 
graph_read_oid_lookup(const unsigned char * chunk_start,size_t chunk_size,void * data)307 static int graph_read_oid_lookup(const unsigned char *chunk_start,
308 				 size_t chunk_size, void *data)
309 {
310 	struct commit_graph *g = data;
311 	g->chunk_oid_lookup = chunk_start;
312 	g->num_commits = chunk_size / g->hash_len;
313 	return 0;
314 }
315 
graph_read_bloom_data(const unsigned char * chunk_start,size_t chunk_size,void * data)316 static int graph_read_bloom_data(const unsigned char *chunk_start,
317 				  size_t chunk_size, void *data)
318 {
319 	struct commit_graph *g = data;
320 	uint32_t hash_version;
321 	g->chunk_bloom_data = chunk_start;
322 	hash_version = get_be32(chunk_start);
323 
324 	if (hash_version != 1)
325 		return 0;
326 
327 	g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
328 	g->bloom_filter_settings->hash_version = hash_version;
329 	g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
330 	g->bloom_filter_settings->bits_per_entry = get_be32(chunk_start + 8);
331 	g->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
332 
333 	return 0;
334 }
335 
parse_commit_graph(struct repository * r,void * graph_map,size_t graph_size)336 struct commit_graph *parse_commit_graph(struct repository *r,
337 					void *graph_map, size_t graph_size)
338 {
339 	const unsigned char *data;
340 	struct commit_graph *graph;
341 	uint32_t graph_signature;
342 	unsigned char graph_version, hash_version;
343 	struct chunkfile *cf = NULL;
344 
345 	if (!graph_map)
346 		return NULL;
347 
348 	if (graph_size < GRAPH_MIN_SIZE)
349 		return NULL;
350 
351 	data = (const unsigned char *)graph_map;
352 
353 	graph_signature = get_be32(data);
354 	if (graph_signature != GRAPH_SIGNATURE) {
355 		error(_("commit-graph signature %X does not match signature %X"),
356 		      graph_signature, GRAPH_SIGNATURE);
357 		return NULL;
358 	}
359 
360 	graph_version = *(unsigned char*)(data + 4);
361 	if (graph_version != GRAPH_VERSION) {
362 		error(_("commit-graph version %X does not match version %X"),
363 		      graph_version, GRAPH_VERSION);
364 		return NULL;
365 	}
366 
367 	hash_version = *(unsigned char*)(data + 5);
368 	if (hash_version != oid_version()) {
369 		error(_("commit-graph hash version %X does not match version %X"),
370 		      hash_version, oid_version());
371 		return NULL;
372 	}
373 
374 	prepare_repo_settings(r);
375 
376 	graph = alloc_commit_graph();
377 
378 	graph->hash_len = the_hash_algo->rawsz;
379 	graph->num_chunks = *(unsigned char*)(data + 6);
380 	graph->data = graph_map;
381 	graph->data_len = graph_size;
382 
383 	if (graph_size < GRAPH_HEADER_SIZE +
384 			 (graph->num_chunks + 1) * CHUNK_TOC_ENTRY_SIZE +
385 			 GRAPH_FANOUT_SIZE + the_hash_algo->rawsz) {
386 		error(_("commit-graph file is too small to hold %u chunks"),
387 		      graph->num_chunks);
388 		free(graph);
389 		return NULL;
390 	}
391 
392 	cf = init_chunkfile(NULL);
393 
394 	if (read_table_of_contents(cf, graph->data, graph_size,
395 				   GRAPH_HEADER_SIZE, graph->num_chunks))
396 		goto free_and_return;
397 
398 	pair_chunk(cf, GRAPH_CHUNKID_OIDFANOUT,
399 		   (const unsigned char **)&graph->chunk_oid_fanout);
400 	read_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, graph_read_oid_lookup, graph);
401 	pair_chunk(cf, GRAPH_CHUNKID_DATA, &graph->chunk_commit_data);
402 	pair_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES, &graph->chunk_extra_edges);
403 	pair_chunk(cf, GRAPH_CHUNKID_BASE, &graph->chunk_base_graphs);
404 
405 	if (get_configured_generation_version(r) >= 2) {
406 		pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
407 			&graph->chunk_generation_data);
408 		pair_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
409 			&graph->chunk_generation_data_overflow);
410 	}
411 
412 	if (r->settings.commit_graph_read_changed_paths) {
413 		pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
414 			   &graph->chunk_bloom_indexes);
415 		read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
416 			   graph_read_bloom_data, graph);
417 	}
418 
419 	if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
420 		init_bloom_filters();
421 	} else {
422 		/* We need both the bloom chunks to exist together. Else ignore the data */
423 		graph->chunk_bloom_indexes = NULL;
424 		graph->chunk_bloom_data = NULL;
425 		FREE_AND_NULL(graph->bloom_filter_settings);
426 	}
427 
428 	oidread(&graph->oid, graph->data + graph->data_len - graph->hash_len);
429 
430 	if (verify_commit_graph_lite(graph))
431 		goto free_and_return;
432 
433 	free_chunkfile(cf);
434 	return graph;
435 
436 free_and_return:
437 	free_chunkfile(cf);
438 	free(graph->bloom_filter_settings);
439 	free(graph);
440 	return NULL;
441 }
442 
load_commit_graph_one(struct repository * r,const char * graph_file,struct object_directory * odb)443 static struct commit_graph *load_commit_graph_one(struct repository *r,
444 						  const char *graph_file,
445 						  struct object_directory *odb)
446 {
447 
448 	struct stat st;
449 	int fd;
450 	struct commit_graph *g;
451 	int open_ok = open_commit_graph(graph_file, &fd, &st);
452 
453 	if (!open_ok)
454 		return NULL;
455 
456 	g = load_commit_graph_one_fd_st(r, fd, &st, odb);
457 
458 	if (g)
459 		g->filename = xstrdup(graph_file);
460 
461 	return g;
462 }
463 
load_commit_graph_v1(struct repository * r,struct object_directory * odb)464 static struct commit_graph *load_commit_graph_v1(struct repository *r,
465 						 struct object_directory *odb)
466 {
467 	char *graph_name = get_commit_graph_filename(odb);
468 	struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
469 	free(graph_name);
470 
471 	return g;
472 }
473 
add_graph_to_chain(struct commit_graph * g,struct commit_graph * chain,struct object_id * oids,int n)474 static int add_graph_to_chain(struct commit_graph *g,
475 			      struct commit_graph *chain,
476 			      struct object_id *oids,
477 			      int n)
478 {
479 	struct commit_graph *cur_g = chain;
480 
481 	if (n && !g->chunk_base_graphs) {
482 		warning(_("commit-graph has no base graphs chunk"));
483 		return 0;
484 	}
485 
486 	while (n) {
487 		n--;
488 
489 		if (!cur_g ||
490 		    !oideq(&oids[n], &cur_g->oid) ||
491 		    !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) {
492 			warning(_("commit-graph chain does not match"));
493 			return 0;
494 		}
495 
496 		cur_g = cur_g->base_graph;
497 	}
498 
499 	g->base_graph = chain;
500 
501 	if (chain)
502 		g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base;
503 
504 	return 1;
505 }
506 
load_commit_graph_chain(struct repository * r,struct object_directory * odb)507 static struct commit_graph *load_commit_graph_chain(struct repository *r,
508 						    struct object_directory *odb)
509 {
510 	struct commit_graph *graph_chain = NULL;
511 	struct strbuf line = STRBUF_INIT;
512 	struct stat st;
513 	struct object_id *oids;
514 	int i = 0, valid = 1, count;
515 	char *chain_name = get_commit_graph_chain_filename(odb);
516 	FILE *fp;
517 	int stat_res;
518 
519 	fp = fopen(chain_name, "r");
520 	stat_res = stat(chain_name, &st);
521 	free(chain_name);
522 
523 	if (!fp ||
524 	    stat_res ||
525 	    st.st_size <= the_hash_algo->hexsz)
526 		return NULL;
527 
528 	count = st.st_size / (the_hash_algo->hexsz + 1);
529 	CALLOC_ARRAY(oids, count);
530 
531 	prepare_alt_odb(r);
532 
533 	for (i = 0; i < count; i++) {
534 		struct object_directory *odb;
535 
536 		if (strbuf_getline_lf(&line, fp) == EOF)
537 			break;
538 
539 		if (get_oid_hex(line.buf, &oids[i])) {
540 			warning(_("invalid commit-graph chain: line '%s' not a hash"),
541 				line.buf);
542 			valid = 0;
543 			break;
544 		}
545 
546 		valid = 0;
547 		for (odb = r->objects->odb; odb; odb = odb->next) {
548 			char *graph_name = get_split_graph_filename(odb, line.buf);
549 			struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
550 
551 			free(graph_name);
552 
553 			if (g) {
554 				if (add_graph_to_chain(g, graph_chain, oids, i)) {
555 					graph_chain = g;
556 					valid = 1;
557 				}
558 
559 				break;
560 			}
561 		}
562 
563 		if (!valid) {
564 			warning(_("unable to find all commit-graph files"));
565 			break;
566 		}
567 	}
568 
569 	free(oids);
570 	fclose(fp);
571 	strbuf_release(&line);
572 
573 	return graph_chain;
574 }
575 
576 /*
577  * returns 1 if and only if all graphs in the chain have
578  * corrected commit dates stored in the generation_data chunk.
579  */
validate_mixed_generation_chain(struct commit_graph * g)580 static int validate_mixed_generation_chain(struct commit_graph *g)
581 {
582 	int read_generation_data = 1;
583 	struct commit_graph *p = g;
584 
585 	while (read_generation_data && p) {
586 		read_generation_data = p->read_generation_data;
587 		p = p->base_graph;
588 	}
589 
590 	if (read_generation_data)
591 		return 1;
592 
593 	while (g) {
594 		g->read_generation_data = 0;
595 		g = g->base_graph;
596 	}
597 
598 	return 0;
599 }
600 
read_commit_graph_one(struct repository * r,struct object_directory * odb)601 struct commit_graph *read_commit_graph_one(struct repository *r,
602 					   struct object_directory *odb)
603 {
604 	struct commit_graph *g = load_commit_graph_v1(r, odb);
605 
606 	if (!g)
607 		g = load_commit_graph_chain(r, odb);
608 
609 	validate_mixed_generation_chain(g);
610 
611 	return g;
612 }
613 
prepare_commit_graph_one(struct repository * r,struct object_directory * odb)614 static void prepare_commit_graph_one(struct repository *r,
615 				     struct object_directory *odb)
616 {
617 
618 	if (r->objects->commit_graph)
619 		return;
620 
621 	r->objects->commit_graph = read_commit_graph_one(r, odb);
622 }
623 
624 /*
625  * Return 1 if commit_graph is non-NULL, and 0 otherwise.
626  *
627  * On the first invocation, this function attempts to load the commit
628  * graph if the_repository is configured to have one.
629  */
prepare_commit_graph(struct repository * r)630 static int prepare_commit_graph(struct repository *r)
631 {
632 	struct object_directory *odb;
633 
634 	/*
635 	 * This must come before the "already attempted?" check below, because
636 	 * we want to disable even an already-loaded graph file.
637 	 */
638 	if (r->commit_graph_disabled)
639 		return 0;
640 
641 	if (r->objects->commit_graph_attempted)
642 		return !!r->objects->commit_graph;
643 	r->objects->commit_graph_attempted = 1;
644 
645 	prepare_repo_settings(r);
646 
647 	if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
648 	    r->settings.core_commit_graph != 1)
649 		/*
650 		 * This repository is not configured to use commit graphs, so
651 		 * do not load one. (But report commit_graph_attempted anyway
652 		 * so that commit graph loading is not attempted again for this
653 		 * repository.)
654 		 */
655 		return 0;
656 
657 	if (!commit_graph_compatible(r))
658 		return 0;
659 
660 	prepare_alt_odb(r);
661 	for (odb = r->objects->odb;
662 	     !r->objects->commit_graph && odb;
663 	     odb = odb->next)
664 		prepare_commit_graph_one(r, odb);
665 	return !!r->objects->commit_graph;
666 }
667 
generation_numbers_enabled(struct repository * r)668 int generation_numbers_enabled(struct repository *r)
669 {
670 	uint32_t first_generation;
671 	struct commit_graph *g;
672 	if (!prepare_commit_graph(r))
673 	       return 0;
674 
675 	g = r->objects->commit_graph;
676 
677 	if (!g->num_commits)
678 		return 0;
679 
680 	first_generation = get_be32(g->chunk_commit_data +
681 				    g->hash_len + 8) >> 2;
682 
683 	return !!first_generation;
684 }
685 
corrected_commit_dates_enabled(struct repository * r)686 int corrected_commit_dates_enabled(struct repository *r)
687 {
688 	struct commit_graph *g;
689 	if (!prepare_commit_graph(r))
690 		return 0;
691 
692 	g = r->objects->commit_graph;
693 
694 	if (!g->num_commits)
695 		return 0;
696 
697 	return g->read_generation_data;
698 }
699 
get_bloom_filter_settings(struct repository * r)700 struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
701 {
702 	struct commit_graph *g = r->objects->commit_graph;
703 	while (g) {
704 		if (g->bloom_filter_settings)
705 			return g->bloom_filter_settings;
706 		g = g->base_graph;
707 	}
708 	return NULL;
709 }
710 
close_commit_graph_one(struct commit_graph * g)711 static void close_commit_graph_one(struct commit_graph *g)
712 {
713 	if (!g)
714 		return;
715 
716 	clear_commit_graph_data_slab(&commit_graph_data_slab);
717 	close_commit_graph_one(g->base_graph);
718 	free_commit_graph(g);
719 }
720 
close_commit_graph(struct raw_object_store * o)721 void close_commit_graph(struct raw_object_store *o)
722 {
723 	close_commit_graph_one(o->commit_graph);
724 	o->commit_graph = NULL;
725 }
726 
bsearch_graph(struct commit_graph * g,const struct object_id * oid,uint32_t * pos)727 static int bsearch_graph(struct commit_graph *g, const struct object_id *oid, uint32_t *pos)
728 {
729 	return bsearch_hash(oid->hash, g->chunk_oid_fanout,
730 			    g->chunk_oid_lookup, g->hash_len, pos);
731 }
732 
load_oid_from_graph(struct commit_graph * g,uint32_t pos,struct object_id * oid)733 static void load_oid_from_graph(struct commit_graph *g,
734 				uint32_t pos,
735 				struct object_id *oid)
736 {
737 	uint32_t lex_index;
738 
739 	while (g && pos < g->num_commits_in_base)
740 		g = g->base_graph;
741 
742 	if (!g)
743 		BUG("NULL commit-graph");
744 
745 	if (pos >= g->num_commits + g->num_commits_in_base)
746 		die(_("invalid commit position. commit-graph is likely corrupt"));
747 
748 	lex_index = pos - g->num_commits_in_base;
749 
750 	oidread(oid, g->chunk_oid_lookup + g->hash_len * lex_index);
751 }
752 
insert_parent_or_die(struct repository * r,struct commit_graph * g,uint32_t pos,struct commit_list ** pptr)753 static struct commit_list **insert_parent_or_die(struct repository *r,
754 						 struct commit_graph *g,
755 						 uint32_t pos,
756 						 struct commit_list **pptr)
757 {
758 	struct commit *c;
759 	struct object_id oid;
760 
761 	if (pos >= g->num_commits + g->num_commits_in_base)
762 		die("invalid parent position %"PRIu32, pos);
763 
764 	load_oid_from_graph(g, pos, &oid);
765 	c = lookup_commit(r, &oid);
766 	if (!c)
767 		die(_("could not find commit %s"), oid_to_hex(&oid));
768 	commit_graph_data_at(c)->graph_pos = pos;
769 	return &commit_list_insert(c, pptr)->next;
770 }
771 
fill_commit_graph_info(struct commit * item,struct commit_graph * g,uint32_t pos)772 static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos)
773 {
774 	const unsigned char *commit_data;
775 	struct commit_graph_data *graph_data;
776 	uint32_t lex_index, offset_pos;
777 	uint64_t date_high, date_low, offset;
778 
779 	while (pos < g->num_commits_in_base)
780 		g = g->base_graph;
781 
782 	if (pos >= g->num_commits + g->num_commits_in_base)
783 		die(_("invalid commit position. commit-graph is likely corrupt"));
784 
785 	lex_index = pos - g->num_commits_in_base;
786 	commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index;
787 
788 	graph_data = commit_graph_data_at(item);
789 	graph_data->graph_pos = pos;
790 
791 	date_high = get_be32(commit_data + g->hash_len + 8) & 0x3;
792 	date_low = get_be32(commit_data + g->hash_len + 12);
793 	item->date = (timestamp_t)((date_high << 32) | date_low);
794 
795 	if (g->read_generation_data) {
796 		offset = (timestamp_t)get_be32(g->chunk_generation_data + sizeof(uint32_t) * lex_index);
797 
798 		if (offset & CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW) {
799 			if (!g->chunk_generation_data_overflow)
800 				die(_("commit-graph requires overflow generation data but has none"));
801 
802 			offset_pos = offset ^ CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW;
803 			graph_data->generation = get_be64(g->chunk_generation_data_overflow + 8 * offset_pos);
804 		} else
805 			graph_data->generation = item->date + offset;
806 	} else
807 		graph_data->generation = get_be32(commit_data + g->hash_len + 8) >> 2;
808 
809 	if (g->topo_levels)
810 		*topo_level_slab_at(g->topo_levels, item) = get_be32(commit_data + g->hash_len + 8) >> 2;
811 }
812 
set_commit_tree(struct commit * c,struct tree * t)813 static inline void set_commit_tree(struct commit *c, struct tree *t)
814 {
815 	c->maybe_tree = t;
816 }
817 
fill_commit_in_graph(struct repository * r,struct commit * item,struct commit_graph * g,uint32_t pos)818 static int fill_commit_in_graph(struct repository *r,
819 				struct commit *item,
820 				struct commit_graph *g, uint32_t pos)
821 {
822 	uint32_t edge_value;
823 	uint32_t *parent_data_ptr;
824 	struct commit_list **pptr;
825 	const unsigned char *commit_data;
826 	uint32_t lex_index;
827 
828 	while (pos < g->num_commits_in_base)
829 		g = g->base_graph;
830 
831 	fill_commit_graph_info(item, g, pos);
832 
833 	lex_index = pos - g->num_commits_in_base;
834 	commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index;
835 
836 	item->object.parsed = 1;
837 
838 	set_commit_tree(item, NULL);
839 
840 	pptr = &item->parents;
841 
842 	edge_value = get_be32(commit_data + g->hash_len);
843 	if (edge_value == GRAPH_PARENT_NONE)
844 		return 1;
845 	pptr = insert_parent_or_die(r, g, edge_value, pptr);
846 
847 	edge_value = get_be32(commit_data + g->hash_len + 4);
848 	if (edge_value == GRAPH_PARENT_NONE)
849 		return 1;
850 	if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) {
851 		pptr = insert_parent_or_die(r, g, edge_value, pptr);
852 		return 1;
853 	}
854 
855 	parent_data_ptr = (uint32_t*)(g->chunk_extra_edges +
856 			  4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK));
857 	do {
858 		edge_value = get_be32(parent_data_ptr);
859 		pptr = insert_parent_or_die(r, g,
860 					    edge_value & GRAPH_EDGE_LAST_MASK,
861 					    pptr);
862 		parent_data_ptr++;
863 	} while (!(edge_value & GRAPH_LAST_EDGE));
864 
865 	return 1;
866 }
867 
search_commit_pos_in_graph(const struct object_id * id,struct commit_graph * g,uint32_t * pos)868 static int search_commit_pos_in_graph(const struct object_id *id, struct commit_graph *g, uint32_t *pos)
869 {
870 	struct commit_graph *cur_g = g;
871 	uint32_t lex_index;
872 
873 	while (cur_g && !bsearch_graph(cur_g, id, &lex_index))
874 		cur_g = cur_g->base_graph;
875 
876 	if (cur_g) {
877 		*pos = lex_index + cur_g->num_commits_in_base;
878 		return 1;
879 	}
880 
881 	return 0;
882 }
883 
find_commit_pos_in_graph(struct commit * item,struct commit_graph * g,uint32_t * pos)884 static int find_commit_pos_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos)
885 {
886 	uint32_t graph_pos = commit_graph_position(item);
887 	if (graph_pos != COMMIT_NOT_FROM_GRAPH) {
888 		*pos = graph_pos;
889 		return 1;
890 	} else {
891 		return search_commit_pos_in_graph(&item->object.oid, g, pos);
892 	}
893 }
894 
lookup_commit_in_graph(struct repository * repo,const struct object_id * id)895 struct commit *lookup_commit_in_graph(struct repository *repo, const struct object_id *id)
896 {
897 	struct commit *commit;
898 	uint32_t pos;
899 
900 	if (!repo->objects->commit_graph)
901 		return NULL;
902 	if (!search_commit_pos_in_graph(id, repo->objects->commit_graph, &pos))
903 		return NULL;
904 	if (!repo_has_object_file(repo, id))
905 		return NULL;
906 
907 	commit = lookup_commit(repo, id);
908 	if (!commit)
909 		return NULL;
910 	if (commit->object.parsed)
911 		return commit;
912 
913 	if (!fill_commit_in_graph(repo, commit, repo->objects->commit_graph, pos))
914 		return NULL;
915 
916 	return commit;
917 }
918 
parse_commit_in_graph_one(struct repository * r,struct commit_graph * g,struct commit * item)919 static int parse_commit_in_graph_one(struct repository *r,
920 				     struct commit_graph *g,
921 				     struct commit *item)
922 {
923 	uint32_t pos;
924 
925 	if (item->object.parsed)
926 		return 1;
927 
928 	if (find_commit_pos_in_graph(item, g, &pos))
929 		return fill_commit_in_graph(r, item, g, pos);
930 
931 	return 0;
932 }
933 
parse_commit_in_graph(struct repository * r,struct commit * item)934 int parse_commit_in_graph(struct repository *r, struct commit *item)
935 {
936 	static int checked_env = 0;
937 
938 	if (!checked_env &&
939 	    git_env_bool(GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE, 0))
940 		die("dying as requested by the '%s' variable on commit-graph parse!",
941 		    GIT_TEST_COMMIT_GRAPH_DIE_ON_PARSE);
942 	checked_env = 1;
943 
944 	if (!prepare_commit_graph(r))
945 		return 0;
946 	return parse_commit_in_graph_one(r, r->objects->commit_graph, item);
947 }
948 
load_commit_graph_info(struct repository * r,struct commit * item)949 void load_commit_graph_info(struct repository *r, struct commit *item)
950 {
951 	uint32_t pos;
952 	if (!prepare_commit_graph(r))
953 		return;
954 	if (find_commit_pos_in_graph(item, r->objects->commit_graph, &pos))
955 		fill_commit_graph_info(item, r->objects->commit_graph, pos);
956 }
957 
load_tree_for_commit(struct repository * r,struct commit_graph * g,struct commit * c)958 static struct tree *load_tree_for_commit(struct repository *r,
959 					 struct commit_graph *g,
960 					 struct commit *c)
961 {
962 	struct object_id oid;
963 	const unsigned char *commit_data;
964 	uint32_t graph_pos = commit_graph_position(c);
965 
966 	while (graph_pos < g->num_commits_in_base)
967 		g = g->base_graph;
968 
969 	commit_data = g->chunk_commit_data +
970 			GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base);
971 
972 	oidread(&oid, commit_data);
973 	set_commit_tree(c, lookup_tree(r, &oid));
974 
975 	return c->maybe_tree;
976 }
977 
get_commit_tree_in_graph_one(struct repository * r,struct commit_graph * g,const struct commit * c)978 static struct tree *get_commit_tree_in_graph_one(struct repository *r,
979 						 struct commit_graph *g,
980 						 const struct commit *c)
981 {
982 	if (c->maybe_tree)
983 		return c->maybe_tree;
984 	if (commit_graph_position(c) == COMMIT_NOT_FROM_GRAPH)
985 		BUG("get_commit_tree_in_graph_one called from non-commit-graph commit");
986 
987 	return load_tree_for_commit(r, g, (struct commit *)c);
988 }
989 
get_commit_tree_in_graph(struct repository * r,const struct commit * c)990 struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c)
991 {
992 	return get_commit_tree_in_graph_one(r, r->objects->commit_graph, c);
993 }
994 
995 struct packed_commit_list {
996 	struct commit **list;
997 	size_t nr;
998 	size_t alloc;
999 };
1000 
1001 struct write_commit_graph_context {
1002 	struct repository *r;
1003 	struct object_directory *odb;
1004 	char *graph_name;
1005 	struct oid_array oids;
1006 	struct packed_commit_list commits;
1007 	int num_extra_edges;
1008 	int num_generation_data_overflows;
1009 	unsigned long approx_nr_objects;
1010 	struct progress *progress;
1011 	int progress_done;
1012 	uint64_t progress_cnt;
1013 
1014 	char *base_graph_name;
1015 	int num_commit_graphs_before;
1016 	int num_commit_graphs_after;
1017 	char **commit_graph_filenames_before;
1018 	char **commit_graph_filenames_after;
1019 	char **commit_graph_hash_after;
1020 	uint32_t new_num_commits_in_base;
1021 	struct commit_graph *new_base_graph;
1022 
1023 	unsigned append:1,
1024 		 report_progress:1,
1025 		 split:1,
1026 		 changed_paths:1,
1027 		 order_by_pack:1,
1028 		 write_generation_data:1,
1029 		 trust_generation_numbers:1;
1030 
1031 	struct topo_level_slab *topo_levels;
1032 	const struct commit_graph_opts *opts;
1033 	size_t total_bloom_filter_data_size;
1034 	const struct bloom_filter_settings *bloom_settings;
1035 
1036 	int count_bloom_filter_computed;
1037 	int count_bloom_filter_not_computed;
1038 	int count_bloom_filter_trunc_empty;
1039 	int count_bloom_filter_trunc_large;
1040 };
1041 
write_graph_chunk_fanout(struct hashfile * f,void * data)1042 static int write_graph_chunk_fanout(struct hashfile *f,
1043 				    void *data)
1044 {
1045 	struct write_commit_graph_context *ctx = data;
1046 	int i, count = 0;
1047 	struct commit **list = ctx->commits.list;
1048 
1049 	/*
1050 	 * Write the first-level table (the list is sorted,
1051 	 * but we use a 256-entry lookup to be able to avoid
1052 	 * having to do eight extra binary search iterations).
1053 	 */
1054 	for (i = 0; i < 256; i++) {
1055 		while (count < ctx->commits.nr) {
1056 			if ((*list)->object.oid.hash[0] != i)
1057 				break;
1058 			display_progress(ctx->progress, ++ctx->progress_cnt);
1059 			count++;
1060 			list++;
1061 		}
1062 
1063 		hashwrite_be32(f, count);
1064 	}
1065 
1066 	return 0;
1067 }
1068 
write_graph_chunk_oids(struct hashfile * f,void * data)1069 static int write_graph_chunk_oids(struct hashfile *f,
1070 				  void *data)
1071 {
1072 	struct write_commit_graph_context *ctx = data;
1073 	struct commit **list = ctx->commits.list;
1074 	int count;
1075 	for (count = 0; count < ctx->commits.nr; count++, list++) {
1076 		display_progress(ctx->progress, ++ctx->progress_cnt);
1077 		hashwrite(f, (*list)->object.oid.hash, the_hash_algo->rawsz);
1078 	}
1079 
1080 	return 0;
1081 }
1082 
commit_to_oid(size_t index,const void * table)1083 static const struct object_id *commit_to_oid(size_t index, const void *table)
1084 {
1085 	const struct commit * const *commits = table;
1086 	return &commits[index]->object.oid;
1087 }
1088 
write_graph_chunk_data(struct hashfile * f,void * data)1089 static int write_graph_chunk_data(struct hashfile *f,
1090 				  void *data)
1091 {
1092 	struct write_commit_graph_context *ctx = data;
1093 	struct commit **list = ctx->commits.list;
1094 	struct commit **last = ctx->commits.list + ctx->commits.nr;
1095 	uint32_t num_extra_edges = 0;
1096 
1097 	while (list < last) {
1098 		struct commit_list *parent;
1099 		struct object_id *tree;
1100 		int edge_value;
1101 		uint32_t packedDate[2];
1102 		display_progress(ctx->progress, ++ctx->progress_cnt);
1103 
1104 		if (repo_parse_commit_no_graph(ctx->r, *list))
1105 			die(_("unable to parse commit %s"),
1106 				oid_to_hex(&(*list)->object.oid));
1107 		tree = get_commit_tree_oid(*list);
1108 		hashwrite(f, tree->hash, the_hash_algo->rawsz);
1109 
1110 		parent = (*list)->parents;
1111 
1112 		if (!parent)
1113 			edge_value = GRAPH_PARENT_NONE;
1114 		else {
1115 			edge_value = oid_pos(&parent->item->object.oid,
1116 					     ctx->commits.list,
1117 					     ctx->commits.nr,
1118 					     commit_to_oid);
1119 
1120 			if (edge_value >= 0)
1121 				edge_value += ctx->new_num_commits_in_base;
1122 			else if (ctx->new_base_graph) {
1123 				uint32_t pos;
1124 				if (find_commit_pos_in_graph(parent->item,
1125 							     ctx->new_base_graph,
1126 							     &pos))
1127 					edge_value = pos;
1128 			}
1129 
1130 			if (edge_value < 0)
1131 				BUG("missing parent %s for commit %s",
1132 				    oid_to_hex(&parent->item->object.oid),
1133 				    oid_to_hex(&(*list)->object.oid));
1134 		}
1135 
1136 		hashwrite_be32(f, edge_value);
1137 
1138 		if (parent)
1139 			parent = parent->next;
1140 
1141 		if (!parent)
1142 			edge_value = GRAPH_PARENT_NONE;
1143 		else if (parent->next)
1144 			edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges;
1145 		else {
1146 			edge_value = oid_pos(&parent->item->object.oid,
1147 					     ctx->commits.list,
1148 					     ctx->commits.nr,
1149 					     commit_to_oid);
1150 
1151 			if (edge_value >= 0)
1152 				edge_value += ctx->new_num_commits_in_base;
1153 			else if (ctx->new_base_graph) {
1154 				uint32_t pos;
1155 				if (find_commit_pos_in_graph(parent->item,
1156 							     ctx->new_base_graph,
1157 							     &pos))
1158 					edge_value = pos;
1159 			}
1160 
1161 			if (edge_value < 0)
1162 				BUG("missing parent %s for commit %s",
1163 				    oid_to_hex(&parent->item->object.oid),
1164 				    oid_to_hex(&(*list)->object.oid));
1165 		}
1166 
1167 		hashwrite_be32(f, edge_value);
1168 
1169 		if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) {
1170 			do {
1171 				num_extra_edges++;
1172 				parent = parent->next;
1173 			} while (parent);
1174 		}
1175 
1176 		if (sizeof((*list)->date) > 4)
1177 			packedDate[0] = htonl(((*list)->date >> 32) & 0x3);
1178 		else
1179 			packedDate[0] = 0;
1180 
1181 		packedDate[0] |= htonl(*topo_level_slab_at(ctx->topo_levels, *list) << 2);
1182 
1183 		packedDate[1] = htonl((*list)->date);
1184 		hashwrite(f, packedDate, 8);
1185 
1186 		list++;
1187 	}
1188 
1189 	return 0;
1190 }
1191 
write_graph_chunk_generation_data(struct hashfile * f,void * data)1192 static int write_graph_chunk_generation_data(struct hashfile *f,
1193 					     void *data)
1194 {
1195 	struct write_commit_graph_context *ctx = data;
1196 	int i, num_generation_data_overflows = 0;
1197 
1198 	for (i = 0; i < ctx->commits.nr; i++) {
1199 		struct commit *c = ctx->commits.list[i];
1200 		timestamp_t offset;
1201 		repo_parse_commit(ctx->r, c);
1202 		offset = commit_graph_data_at(c)->generation - c->date;
1203 		display_progress(ctx->progress, ++ctx->progress_cnt);
1204 
1205 		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1206 			offset = CORRECTED_COMMIT_DATE_OFFSET_OVERFLOW | num_generation_data_overflows;
1207 			num_generation_data_overflows++;
1208 		}
1209 
1210 		hashwrite_be32(f, offset);
1211 	}
1212 
1213 	return 0;
1214 }
1215 
write_graph_chunk_generation_data_overflow(struct hashfile * f,void * data)1216 static int write_graph_chunk_generation_data_overflow(struct hashfile *f,
1217 						      void *data)
1218 {
1219 	struct write_commit_graph_context *ctx = data;
1220 	int i;
1221 	for (i = 0; i < ctx->commits.nr; i++) {
1222 		struct commit *c = ctx->commits.list[i];
1223 		timestamp_t offset = commit_graph_data_at(c)->generation - c->date;
1224 		display_progress(ctx->progress, ++ctx->progress_cnt);
1225 
1226 		if (offset > GENERATION_NUMBER_V2_OFFSET_MAX) {
1227 			hashwrite_be32(f, offset >> 32);
1228 			hashwrite_be32(f, (uint32_t) offset);
1229 		}
1230 	}
1231 
1232 	return 0;
1233 }
1234 
write_graph_chunk_extra_edges(struct hashfile * f,void * data)1235 static int write_graph_chunk_extra_edges(struct hashfile *f,
1236 					 void *data)
1237 {
1238 	struct write_commit_graph_context *ctx = data;
1239 	struct commit **list = ctx->commits.list;
1240 	struct commit **last = ctx->commits.list + ctx->commits.nr;
1241 	struct commit_list *parent;
1242 
1243 	while (list < last) {
1244 		int num_parents = 0;
1245 
1246 		display_progress(ctx->progress, ++ctx->progress_cnt);
1247 
1248 		for (parent = (*list)->parents; num_parents < 3 && parent;
1249 		     parent = parent->next)
1250 			num_parents++;
1251 
1252 		if (num_parents <= 2) {
1253 			list++;
1254 			continue;
1255 		}
1256 
1257 		/* Since num_parents > 2, this initializer is safe. */
1258 		for (parent = (*list)->parents->next; parent; parent = parent->next) {
1259 			int edge_value = oid_pos(&parent->item->object.oid,
1260 						 ctx->commits.list,
1261 						 ctx->commits.nr,
1262 						 commit_to_oid);
1263 
1264 			if (edge_value >= 0)
1265 				edge_value += ctx->new_num_commits_in_base;
1266 			else if (ctx->new_base_graph) {
1267 				uint32_t pos;
1268 				if (find_commit_pos_in_graph(parent->item,
1269 							     ctx->new_base_graph,
1270 							     &pos))
1271 					edge_value = pos;
1272 			}
1273 
1274 			if (edge_value < 0)
1275 				BUG("missing parent %s for commit %s",
1276 				    oid_to_hex(&parent->item->object.oid),
1277 				    oid_to_hex(&(*list)->object.oid));
1278 			else if (!parent->next)
1279 				edge_value |= GRAPH_LAST_EDGE;
1280 
1281 			hashwrite_be32(f, edge_value);
1282 		}
1283 
1284 		list++;
1285 	}
1286 
1287 	return 0;
1288 }
1289 
write_graph_chunk_bloom_indexes(struct hashfile * f,void * data)1290 static int write_graph_chunk_bloom_indexes(struct hashfile *f,
1291 					   void *data)
1292 {
1293 	struct write_commit_graph_context *ctx = data;
1294 	struct commit **list = ctx->commits.list;
1295 	struct commit **last = ctx->commits.list + ctx->commits.nr;
1296 	uint32_t cur_pos = 0;
1297 
1298 	while (list < last) {
1299 		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1300 		size_t len = filter ? filter->len : 0;
1301 		cur_pos += len;
1302 		display_progress(ctx->progress, ++ctx->progress_cnt);
1303 		hashwrite_be32(f, cur_pos);
1304 		list++;
1305 	}
1306 
1307 	return 0;
1308 }
1309 
trace2_bloom_filter_settings(struct write_commit_graph_context * ctx)1310 static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
1311 {
1312 	struct json_writer jw = JSON_WRITER_INIT;
1313 
1314 	jw_object_begin(&jw, 0);
1315 	jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
1316 	jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
1317 	jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
1318 	jw_object_intmax(&jw, "max_changed_paths", ctx->bloom_settings->max_changed_paths);
1319 	jw_end(&jw);
1320 
1321 	trace2_data_json("bloom", ctx->r, "settings", &jw);
1322 
1323 	jw_release(&jw);
1324 }
1325 
write_graph_chunk_bloom_data(struct hashfile * f,void * data)1326 static int write_graph_chunk_bloom_data(struct hashfile *f,
1327 					void *data)
1328 {
1329 	struct write_commit_graph_context *ctx = data;
1330 	struct commit **list = ctx->commits.list;
1331 	struct commit **last = ctx->commits.list + ctx->commits.nr;
1332 
1333 	trace2_bloom_filter_settings(ctx);
1334 
1335 	hashwrite_be32(f, ctx->bloom_settings->hash_version);
1336 	hashwrite_be32(f, ctx->bloom_settings->num_hashes);
1337 	hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
1338 
1339 	while (list < last) {
1340 		struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
1341 		size_t len = filter ? filter->len : 0;
1342 
1343 		display_progress(ctx->progress, ++ctx->progress_cnt);
1344 		if (len)
1345 			hashwrite(f, filter->data, len * sizeof(unsigned char));
1346 		list++;
1347 	}
1348 
1349 	return 0;
1350 }
1351 
add_packed_commits(const struct object_id * oid,struct packed_git * pack,uint32_t pos,void * data)1352 static int add_packed_commits(const struct object_id *oid,
1353 			      struct packed_git *pack,
1354 			      uint32_t pos,
1355 			      void *data)
1356 {
1357 	struct write_commit_graph_context *ctx = (struct write_commit_graph_context*)data;
1358 	enum object_type type;
1359 	off_t offset = nth_packed_object_offset(pack, pos);
1360 	struct object_info oi = OBJECT_INFO_INIT;
1361 
1362 	if (ctx->progress)
1363 		display_progress(ctx->progress, ++ctx->progress_done);
1364 
1365 	oi.typep = &type;
1366 	if (packed_object_info(ctx->r, pack, offset, &oi) < 0)
1367 		die(_("unable to get type of object %s"), oid_to_hex(oid));
1368 
1369 	if (type != OBJ_COMMIT)
1370 		return 0;
1371 
1372 	oid_array_append(&ctx->oids, oid);
1373 	set_commit_pos(ctx->r, oid);
1374 
1375 	return 0;
1376 }
1377 
add_missing_parents(struct write_commit_graph_context * ctx,struct commit * commit)1378 static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
1379 {
1380 	struct commit_list *parent;
1381 	for (parent = commit->parents; parent; parent = parent->next) {
1382 		if (!(parent->item->object.flags & REACHABLE)) {
1383 			oid_array_append(&ctx->oids, &parent->item->object.oid);
1384 			parent->item->object.flags |= REACHABLE;
1385 		}
1386 	}
1387 }
1388 
close_reachable(struct write_commit_graph_context * ctx)1389 static void close_reachable(struct write_commit_graph_context *ctx)
1390 {
1391 	int i;
1392 	struct commit *commit;
1393 	enum commit_graph_split_flags flags = ctx->opts ?
1394 		ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1395 
1396 	if (ctx->report_progress)
1397 		ctx->progress = start_delayed_progress(
1398 					_("Loading known commits in commit graph"),
1399 					ctx->oids.nr);
1400 	for (i = 0; i < ctx->oids.nr; i++) {
1401 		display_progress(ctx->progress, i + 1);
1402 		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1403 		if (commit)
1404 			commit->object.flags |= REACHABLE;
1405 	}
1406 	stop_progress(&ctx->progress);
1407 
1408 	/*
1409 	 * As this loop runs, ctx->oids.nr may grow, but not more
1410 	 * than the number of missing commits in the reachable
1411 	 * closure.
1412 	 */
1413 	if (ctx->report_progress)
1414 		ctx->progress = start_delayed_progress(
1415 					_("Expanding reachable commits in commit graph"),
1416 					0);
1417 	for (i = 0; i < ctx->oids.nr; i++) {
1418 		display_progress(ctx->progress, i + 1);
1419 		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1420 
1421 		if (!commit)
1422 			continue;
1423 		if (ctx->split) {
1424 			if ((!repo_parse_commit(ctx->r, commit) &&
1425 			     commit_graph_position(commit) == COMMIT_NOT_FROM_GRAPH) ||
1426 			    flags == COMMIT_GRAPH_SPLIT_REPLACE)
1427 				add_missing_parents(ctx, commit);
1428 		} else if (!repo_parse_commit_no_graph(ctx->r, commit))
1429 			add_missing_parents(ctx, commit);
1430 	}
1431 	stop_progress(&ctx->progress);
1432 
1433 	if (ctx->report_progress)
1434 		ctx->progress = start_delayed_progress(
1435 					_("Clearing commit marks in commit graph"),
1436 					ctx->oids.nr);
1437 	for (i = 0; i < ctx->oids.nr; i++) {
1438 		display_progress(ctx->progress, i + 1);
1439 		commit = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1440 
1441 		if (commit)
1442 			commit->object.flags &= ~REACHABLE;
1443 	}
1444 	stop_progress(&ctx->progress);
1445 }
1446 
compute_topological_levels(struct write_commit_graph_context * ctx)1447 static void compute_topological_levels(struct write_commit_graph_context *ctx)
1448 {
1449 	int i;
1450 	struct commit_list *list = NULL;
1451 
1452 	if (ctx->report_progress)
1453 		ctx->progress = start_delayed_progress(
1454 					_("Computing commit graph topological levels"),
1455 					ctx->commits.nr);
1456 	for (i = 0; i < ctx->commits.nr; i++) {
1457 		struct commit *c = ctx->commits.list[i];
1458 		uint32_t level;
1459 
1460 		repo_parse_commit(ctx->r, c);
1461 		level = *topo_level_slab_at(ctx->topo_levels, c);
1462 
1463 		display_progress(ctx->progress, i + 1);
1464 		if (level != GENERATION_NUMBER_ZERO)
1465 			continue;
1466 
1467 		commit_list_insert(c, &list);
1468 		while (list) {
1469 			struct commit *current = list->item;
1470 			struct commit_list *parent;
1471 			int all_parents_computed = 1;
1472 			uint32_t max_level = 0;
1473 
1474 			for (parent = current->parents; parent; parent = parent->next) {
1475 				repo_parse_commit(ctx->r, parent->item);
1476 				level = *topo_level_slab_at(ctx->topo_levels, parent->item);
1477 
1478 				if (level == GENERATION_NUMBER_ZERO) {
1479 					all_parents_computed = 0;
1480 					commit_list_insert(parent->item, &list);
1481 					break;
1482 				}
1483 
1484 				if (level > max_level)
1485 					max_level = level;
1486 			}
1487 
1488 			if (all_parents_computed) {
1489 				pop_commit(&list);
1490 
1491 				if (max_level > GENERATION_NUMBER_V1_MAX - 1)
1492 					max_level = GENERATION_NUMBER_V1_MAX - 1;
1493 				*topo_level_slab_at(ctx->topo_levels, current) = max_level + 1;
1494 			}
1495 		}
1496 	}
1497 	stop_progress(&ctx->progress);
1498 }
1499 
compute_generation_numbers(struct write_commit_graph_context * ctx)1500 static void compute_generation_numbers(struct write_commit_graph_context *ctx)
1501 {
1502 	int i;
1503 	struct commit_list *list = NULL;
1504 
1505 	if (ctx->report_progress)
1506 		ctx->progress = start_delayed_progress(
1507 					_("Computing commit graph generation numbers"),
1508 					ctx->commits.nr);
1509 
1510 	if (!ctx->trust_generation_numbers) {
1511 		for (i = 0; i < ctx->commits.nr; i++) {
1512 			struct commit *c = ctx->commits.list[i];
1513 			repo_parse_commit(ctx->r, c);
1514 			commit_graph_data_at(c)->generation = GENERATION_NUMBER_ZERO;
1515 		}
1516 	}
1517 
1518 	for (i = 0; i < ctx->commits.nr; i++) {
1519 		struct commit *c = ctx->commits.list[i];
1520 		timestamp_t corrected_commit_date;
1521 
1522 		repo_parse_commit(ctx->r, c);
1523 		corrected_commit_date = commit_graph_data_at(c)->generation;
1524 
1525 		display_progress(ctx->progress, i + 1);
1526 		if (corrected_commit_date != GENERATION_NUMBER_ZERO)
1527 			continue;
1528 
1529 		commit_list_insert(c, &list);
1530 		while (list) {
1531 			struct commit *current = list->item;
1532 			struct commit_list *parent;
1533 			int all_parents_computed = 1;
1534 			timestamp_t max_corrected_commit_date = 0;
1535 
1536 			for (parent = current->parents; parent; parent = parent->next) {
1537 				repo_parse_commit(ctx->r, parent->item);
1538 				corrected_commit_date = commit_graph_data_at(parent->item)->generation;
1539 
1540 				if (corrected_commit_date == GENERATION_NUMBER_ZERO) {
1541 					all_parents_computed = 0;
1542 					commit_list_insert(parent->item, &list);
1543 					break;
1544 				}
1545 
1546 				if (corrected_commit_date > max_corrected_commit_date)
1547 					max_corrected_commit_date = corrected_commit_date;
1548 			}
1549 
1550 			if (all_parents_computed) {
1551 				pop_commit(&list);
1552 
1553 				if (current->date && current->date > max_corrected_commit_date)
1554 					max_corrected_commit_date = current->date - 1;
1555 				commit_graph_data_at(current)->generation = max_corrected_commit_date + 1;
1556 
1557 				if (commit_graph_data_at(current)->generation - current->date > GENERATION_NUMBER_V2_OFFSET_MAX)
1558 					ctx->num_generation_data_overflows++;
1559 			}
1560 		}
1561 	}
1562 	stop_progress(&ctx->progress);
1563 }
1564 
trace2_bloom_filter_write_statistics(struct write_commit_graph_context * ctx)1565 static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
1566 {
1567 	trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
1568 			   ctx->count_bloom_filter_computed);
1569 	trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
1570 			   ctx->count_bloom_filter_not_computed);
1571 	trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty",
1572 			   ctx->count_bloom_filter_trunc_empty);
1573 	trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
1574 			   ctx->count_bloom_filter_trunc_large);
1575 }
1576 
compute_bloom_filters(struct write_commit_graph_context * ctx)1577 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
1578 {
1579 	int i;
1580 	struct progress *progress = NULL;
1581 	struct commit **sorted_commits;
1582 	int max_new_filters;
1583 
1584 	init_bloom_filters();
1585 
1586 	if (ctx->report_progress)
1587 		progress = start_delayed_progress(
1588 			_("Computing commit changed paths Bloom filters"),
1589 			ctx->commits.nr);
1590 
1591 	ALLOC_ARRAY(sorted_commits, ctx->commits.nr);
1592 	COPY_ARRAY(sorted_commits, ctx->commits.list, ctx->commits.nr);
1593 
1594 	if (ctx->order_by_pack)
1595 		QSORT(sorted_commits, ctx->commits.nr, commit_pos_cmp);
1596 	else
1597 		QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
1598 
1599 	max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
1600 		ctx->opts->max_new_filters : ctx->commits.nr;
1601 
1602 	for (i = 0; i < ctx->commits.nr; i++) {
1603 		enum bloom_filter_computed computed = 0;
1604 		struct commit *c = sorted_commits[i];
1605 		struct bloom_filter *filter = get_or_compute_bloom_filter(
1606 			ctx->r,
1607 			c,
1608 			ctx->count_bloom_filter_computed < max_new_filters,
1609 			ctx->bloom_settings,
1610 			&computed);
1611 		if (computed & BLOOM_COMPUTED) {
1612 			ctx->count_bloom_filter_computed++;
1613 			if (computed & BLOOM_TRUNC_EMPTY)
1614 				ctx->count_bloom_filter_trunc_empty++;
1615 			if (computed & BLOOM_TRUNC_LARGE)
1616 				ctx->count_bloom_filter_trunc_large++;
1617 		} else if (computed & BLOOM_NOT_COMPUTED)
1618 			ctx->count_bloom_filter_not_computed++;
1619 		ctx->total_bloom_filter_data_size += filter
1620 			? sizeof(unsigned char) * filter->len : 0;
1621 		display_progress(progress, i + 1);
1622 	}
1623 
1624 	if (trace2_is_enabled())
1625 		trace2_bloom_filter_write_statistics(ctx);
1626 
1627 	free(sorted_commits);
1628 	stop_progress(&progress);
1629 }
1630 
1631 struct refs_cb_data {
1632 	struct oidset *commits;
1633 	struct progress *progress;
1634 };
1635 
add_ref_to_set(const char * refname,const struct object_id * oid,int flags,void * cb_data)1636 static int add_ref_to_set(const char *refname,
1637 			  const struct object_id *oid,
1638 			  int flags, void *cb_data)
1639 {
1640 	struct object_id peeled;
1641 	struct refs_cb_data *data = (struct refs_cb_data *)cb_data;
1642 
1643 	if (!peel_iterated_oid(oid, &peeled))
1644 		oid = &peeled;
1645 	if (oid_object_info(the_repository, oid, NULL) == OBJ_COMMIT)
1646 		oidset_insert(data->commits, oid);
1647 
1648 	display_progress(data->progress, oidset_size(data->commits));
1649 
1650 	return 0;
1651 }
1652 
write_commit_graph_reachable(struct object_directory * odb,enum commit_graph_write_flags flags,const struct commit_graph_opts * opts)1653 int write_commit_graph_reachable(struct object_directory *odb,
1654 				 enum commit_graph_write_flags flags,
1655 				 const struct commit_graph_opts *opts)
1656 {
1657 	struct oidset commits = OIDSET_INIT;
1658 	struct refs_cb_data data;
1659 	int result;
1660 
1661 	memset(&data, 0, sizeof(data));
1662 	data.commits = &commits;
1663 	if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
1664 		data.progress = start_delayed_progress(
1665 			_("Collecting referenced commits"), 0);
1666 
1667 	for_each_ref(add_ref_to_set, &data);
1668 
1669 	stop_progress(&data.progress);
1670 
1671 	result = write_commit_graph(odb, NULL, &commits,
1672 				    flags, opts);
1673 
1674 	oidset_clear(&commits);
1675 	return result;
1676 }
1677 
fill_oids_from_packs(struct write_commit_graph_context * ctx,struct string_list * pack_indexes)1678 static int fill_oids_from_packs(struct write_commit_graph_context *ctx,
1679 				struct string_list *pack_indexes)
1680 {
1681 	uint32_t i;
1682 	struct strbuf progress_title = STRBUF_INIT;
1683 	struct strbuf packname = STRBUF_INIT;
1684 	int dirlen;
1685 
1686 	strbuf_addf(&packname, "%s/pack/", ctx->odb->path);
1687 	dirlen = packname.len;
1688 	if (ctx->report_progress) {
1689 		strbuf_addf(&progress_title,
1690 			    Q_("Finding commits for commit graph in %d pack",
1691 			       "Finding commits for commit graph in %d packs",
1692 			       pack_indexes->nr),
1693 			    pack_indexes->nr);
1694 		ctx->progress = start_delayed_progress(progress_title.buf, 0);
1695 		ctx->progress_done = 0;
1696 	}
1697 	for (i = 0; i < pack_indexes->nr; i++) {
1698 		struct packed_git *p;
1699 		strbuf_setlen(&packname, dirlen);
1700 		strbuf_addstr(&packname, pack_indexes->items[i].string);
1701 		p = add_packed_git(packname.buf, packname.len, 1);
1702 		if (!p) {
1703 			error(_("error adding pack %s"), packname.buf);
1704 			return -1;
1705 		}
1706 		if (open_pack_index(p)) {
1707 			error(_("error opening index for %s"), packname.buf);
1708 			return -1;
1709 		}
1710 		for_each_object_in_pack(p, add_packed_commits, ctx,
1711 					FOR_EACH_OBJECT_PACK_ORDER);
1712 		close_pack(p);
1713 		free(p);
1714 	}
1715 
1716 	stop_progress(&ctx->progress);
1717 	strbuf_release(&progress_title);
1718 	strbuf_release(&packname);
1719 
1720 	return 0;
1721 }
1722 
fill_oids_from_commits(struct write_commit_graph_context * ctx,struct oidset * commits)1723 static int fill_oids_from_commits(struct write_commit_graph_context *ctx,
1724 				  struct oidset *commits)
1725 {
1726 	struct oidset_iter iter;
1727 	struct object_id *oid;
1728 
1729 	if (!oidset_size(commits))
1730 		return 0;
1731 
1732 	oidset_iter_init(commits, &iter);
1733 	while ((oid = oidset_iter_next(&iter))) {
1734 		oid_array_append(&ctx->oids, oid);
1735 	}
1736 
1737 	return 0;
1738 }
1739 
fill_oids_from_all_packs(struct write_commit_graph_context * ctx)1740 static void fill_oids_from_all_packs(struct write_commit_graph_context *ctx)
1741 {
1742 	if (ctx->report_progress)
1743 		ctx->progress = start_delayed_progress(
1744 			_("Finding commits for commit graph among packed objects"),
1745 			ctx->approx_nr_objects);
1746 	for_each_packed_object(add_packed_commits, ctx,
1747 			       FOR_EACH_OBJECT_PACK_ORDER);
1748 	if (ctx->progress_done < ctx->approx_nr_objects)
1749 		display_progress(ctx->progress, ctx->approx_nr_objects);
1750 	stop_progress(&ctx->progress);
1751 }
1752 
copy_oids_to_commits(struct write_commit_graph_context * ctx)1753 static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
1754 {
1755 	uint32_t i;
1756 	enum commit_graph_split_flags flags = ctx->opts ?
1757 		ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
1758 
1759 	ctx->num_extra_edges = 0;
1760 	if (ctx->report_progress)
1761 		ctx->progress = start_delayed_progress(
1762 			_("Finding extra edges in commit graph"),
1763 			ctx->oids.nr);
1764 	oid_array_sort(&ctx->oids);
1765 	for (i = 0; i < ctx->oids.nr; i = oid_array_next_unique(&ctx->oids, i)) {
1766 		unsigned int num_parents;
1767 
1768 		display_progress(ctx->progress, i + 1);
1769 
1770 		ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc);
1771 		ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.oid[i]);
1772 
1773 		if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE &&
1774 		    commit_graph_position(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH)
1775 			continue;
1776 
1777 		if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE)
1778 			repo_parse_commit(ctx->r, ctx->commits.list[ctx->commits.nr]);
1779 		else
1780 			repo_parse_commit_no_graph(ctx->r, ctx->commits.list[ctx->commits.nr]);
1781 
1782 		num_parents = commit_list_count(ctx->commits.list[ctx->commits.nr]->parents);
1783 		if (num_parents > 2)
1784 			ctx->num_extra_edges += num_parents - 1;
1785 
1786 		ctx->commits.nr++;
1787 	}
1788 	stop_progress(&ctx->progress);
1789 }
1790 
write_graph_chunk_base_1(struct hashfile * f,struct commit_graph * g)1791 static int write_graph_chunk_base_1(struct hashfile *f,
1792 				    struct commit_graph *g)
1793 {
1794 	int num = 0;
1795 
1796 	if (!g)
1797 		return 0;
1798 
1799 	num = write_graph_chunk_base_1(f, g->base_graph);
1800 	hashwrite(f, g->oid.hash, the_hash_algo->rawsz);
1801 	return num + 1;
1802 }
1803 
write_graph_chunk_base(struct hashfile * f,void * data)1804 static int write_graph_chunk_base(struct hashfile *f,
1805 				    void *data)
1806 {
1807 	struct write_commit_graph_context *ctx = data;
1808 	int num = write_graph_chunk_base_1(f, ctx->new_base_graph);
1809 
1810 	if (num != ctx->num_commit_graphs_after - 1) {
1811 		error(_("failed to write correct number of base graph ids"));
1812 		return -1;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
write_commit_graph_file(struct write_commit_graph_context * ctx)1818 static int write_commit_graph_file(struct write_commit_graph_context *ctx)
1819 {
1820 	uint32_t i;
1821 	int fd;
1822 	struct hashfile *f;
1823 	struct lock_file lk = LOCK_INIT;
1824 	const unsigned hashsz = the_hash_algo->rawsz;
1825 	struct strbuf progress_title = STRBUF_INIT;
1826 	struct chunkfile *cf;
1827 	unsigned char file_hash[GIT_MAX_RAWSZ];
1828 
1829 	if (ctx->split) {
1830 		struct strbuf tmp_file = STRBUF_INIT;
1831 
1832 		strbuf_addf(&tmp_file,
1833 			    "%s/info/commit-graphs/tmp_graph_XXXXXX",
1834 			    ctx->odb->path);
1835 		ctx->graph_name = strbuf_detach(&tmp_file, NULL);
1836 	} else {
1837 		ctx->graph_name = get_commit_graph_filename(ctx->odb);
1838 	}
1839 
1840 	if (safe_create_leading_directories(ctx->graph_name)) {
1841 		UNLEAK(ctx->graph_name);
1842 		error(_("unable to create leading directories of %s"),
1843 			ctx->graph_name);
1844 		return -1;
1845 	}
1846 
1847 	if (ctx->split) {
1848 		char *lock_name = get_commit_graph_chain_filename(ctx->odb);
1849 
1850 		hold_lock_file_for_update_mode(&lk, lock_name,
1851 					       LOCK_DIE_ON_ERROR, 0444);
1852 
1853 		fd = git_mkstemp_mode(ctx->graph_name, 0444);
1854 		if (fd < 0) {
1855 			error(_("unable to create temporary graph layer"));
1856 			return -1;
1857 		}
1858 
1859 		if (adjust_shared_perm(ctx->graph_name)) {
1860 			error(_("unable to adjust shared permissions for '%s'"),
1861 			      ctx->graph_name);
1862 			return -1;
1863 		}
1864 
1865 		f = hashfd(fd, ctx->graph_name);
1866 	} else {
1867 		hold_lock_file_for_update_mode(&lk, ctx->graph_name,
1868 					       LOCK_DIE_ON_ERROR, 0444);
1869 		fd = get_lock_file_fd(&lk);
1870 		f = hashfd(fd, get_lock_file_path(&lk));
1871 	}
1872 
1873 	cf = init_chunkfile(f);
1874 
1875 	add_chunk(cf, GRAPH_CHUNKID_OIDFANOUT, GRAPH_FANOUT_SIZE,
1876 		  write_graph_chunk_fanout);
1877 	add_chunk(cf, GRAPH_CHUNKID_OIDLOOKUP, hashsz * ctx->commits.nr,
1878 		  write_graph_chunk_oids);
1879 	add_chunk(cf, GRAPH_CHUNKID_DATA, (hashsz + 16) * ctx->commits.nr,
1880 		  write_graph_chunk_data);
1881 
1882 	if (ctx->write_generation_data)
1883 		add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA,
1884 			  sizeof(uint32_t) * ctx->commits.nr,
1885 			  write_graph_chunk_generation_data);
1886 	if (ctx->num_generation_data_overflows)
1887 		add_chunk(cf, GRAPH_CHUNKID_GENERATION_DATA_OVERFLOW,
1888 			  sizeof(timestamp_t) * ctx->num_generation_data_overflows,
1889 			  write_graph_chunk_generation_data_overflow);
1890 	if (ctx->num_extra_edges)
1891 		add_chunk(cf, GRAPH_CHUNKID_EXTRAEDGES,
1892 			  4 * ctx->num_extra_edges,
1893 			  write_graph_chunk_extra_edges);
1894 	if (ctx->changed_paths) {
1895 		add_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
1896 			  sizeof(uint32_t) * ctx->commits.nr,
1897 			  write_graph_chunk_bloom_indexes);
1898 		add_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
1899 			  sizeof(uint32_t) * 3
1900 				+ ctx->total_bloom_filter_data_size,
1901 			  write_graph_chunk_bloom_data);
1902 	}
1903 	if (ctx->num_commit_graphs_after > 1)
1904 		add_chunk(cf, GRAPH_CHUNKID_BASE,
1905 			  hashsz * (ctx->num_commit_graphs_after - 1),
1906 			  write_graph_chunk_base);
1907 
1908 	hashwrite_be32(f, GRAPH_SIGNATURE);
1909 
1910 	hashwrite_u8(f, GRAPH_VERSION);
1911 	hashwrite_u8(f, oid_version());
1912 	hashwrite_u8(f, get_num_chunks(cf));
1913 	hashwrite_u8(f, ctx->num_commit_graphs_after - 1);
1914 
1915 	if (ctx->report_progress) {
1916 		strbuf_addf(&progress_title,
1917 			    Q_("Writing out commit graph in %d pass",
1918 			       "Writing out commit graph in %d passes",
1919 			       get_num_chunks(cf)),
1920 			    get_num_chunks(cf));
1921 		ctx->progress = start_delayed_progress(
1922 			progress_title.buf,
1923 			get_num_chunks(cf) * ctx->commits.nr);
1924 	}
1925 
1926 	write_chunkfile(cf, ctx);
1927 
1928 	stop_progress(&ctx->progress);
1929 	strbuf_release(&progress_title);
1930 
1931 	if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) {
1932 		char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid));
1933 		char *new_base_name = get_split_graph_filename(ctx->new_base_graph->odb, new_base_hash);
1934 
1935 		free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]);
1936 		free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]);
1937 		ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name;
1938 		ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash;
1939 	}
1940 
1941 	close_commit_graph(ctx->r->objects);
1942 	finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
1943 	free_chunkfile(cf);
1944 
1945 	if (ctx->split) {
1946 		FILE *chainf = fdopen_lock_file(&lk, "w");
1947 		char *final_graph_name;
1948 		int result;
1949 
1950 		close(fd);
1951 
1952 		if (!chainf) {
1953 			error(_("unable to open commit-graph chain file"));
1954 			return -1;
1955 		}
1956 
1957 		if (ctx->base_graph_name) {
1958 			const char *dest;
1959 			int idx = ctx->num_commit_graphs_after - 1;
1960 			if (ctx->num_commit_graphs_after > 1)
1961 				idx--;
1962 
1963 			dest = ctx->commit_graph_filenames_after[idx];
1964 
1965 			if (strcmp(ctx->base_graph_name, dest)) {
1966 				result = rename(ctx->base_graph_name, dest);
1967 
1968 				if (result) {
1969 					error(_("failed to rename base commit-graph file"));
1970 					return -1;
1971 				}
1972 			}
1973 		} else {
1974 			char *graph_name = get_commit_graph_filename(ctx->odb);
1975 			unlink(graph_name);
1976 		}
1977 
1978 		ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(hash_to_hex(file_hash));
1979 		final_graph_name = get_split_graph_filename(ctx->odb,
1980 					ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]);
1981 		ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name;
1982 
1983 		result = rename(ctx->graph_name, final_graph_name);
1984 
1985 		for (i = 0; i < ctx->num_commit_graphs_after; i++)
1986 			fprintf(get_lock_file_fp(&lk), "%s\n", ctx->commit_graph_hash_after[i]);
1987 
1988 		if (result) {
1989 			error(_("failed to rename temporary commit-graph file"));
1990 			return -1;
1991 		}
1992 	}
1993 
1994 	commit_lock_file(&lk);
1995 
1996 	return 0;
1997 }
1998 
split_graph_merge_strategy(struct write_commit_graph_context * ctx)1999 static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
2000 {
2001 	struct commit_graph *g;
2002 	uint32_t num_commits;
2003 	enum commit_graph_split_flags flags = COMMIT_GRAPH_SPLIT_UNSPECIFIED;
2004 	uint32_t i;
2005 
2006 	int max_commits = 0;
2007 	int size_mult = 2;
2008 
2009 	if (ctx->opts) {
2010 		max_commits = ctx->opts->max_commits;
2011 
2012 		if (ctx->opts->size_multiple)
2013 			size_mult = ctx->opts->size_multiple;
2014 
2015 		flags = ctx->opts->split_flags;
2016 	}
2017 
2018 	g = ctx->r->objects->commit_graph;
2019 	num_commits = ctx->commits.nr;
2020 	if (flags == COMMIT_GRAPH_SPLIT_REPLACE)
2021 		ctx->num_commit_graphs_after = 1;
2022 	else
2023 		ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
2024 
2025 	if (flags != COMMIT_GRAPH_SPLIT_MERGE_PROHIBITED &&
2026 	    flags != COMMIT_GRAPH_SPLIT_REPLACE) {
2027 		while (g && (g->num_commits <= size_mult * num_commits ||
2028 			    (max_commits && num_commits > max_commits))) {
2029 			if (g->odb != ctx->odb)
2030 				break;
2031 
2032 			num_commits += g->num_commits;
2033 			g = g->base_graph;
2034 
2035 			ctx->num_commit_graphs_after--;
2036 		}
2037 	}
2038 
2039 	if (flags != COMMIT_GRAPH_SPLIT_REPLACE)
2040 		ctx->new_base_graph = g;
2041 	else if (ctx->num_commit_graphs_after != 1)
2042 		BUG("split_graph_merge_strategy: num_commit_graphs_after "
2043 		    "should be 1 with --split=replace");
2044 
2045 	if (ctx->num_commit_graphs_after == 2) {
2046 		char *old_graph_name = get_commit_graph_filename(g->odb);
2047 
2048 		if (!strcmp(g->filename, old_graph_name) &&
2049 		    g->odb != ctx->odb) {
2050 			ctx->num_commit_graphs_after = 1;
2051 			ctx->new_base_graph = NULL;
2052 		}
2053 
2054 		free(old_graph_name);
2055 	}
2056 
2057 	CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
2058 	CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
2059 
2060 	for (i = 0; i < ctx->num_commit_graphs_after &&
2061 		    i < ctx->num_commit_graphs_before; i++)
2062 		ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
2063 
2064 	i = ctx->num_commit_graphs_before - 1;
2065 	g = ctx->r->objects->commit_graph;
2066 
2067 	while (g) {
2068 		if (i < ctx->num_commit_graphs_after)
2069 			ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
2070 
2071 		/*
2072 		 * If the topmost remaining layer has generation data chunk, the
2073 		 * resultant layer also has generation data chunk.
2074 		 */
2075 		if (i == ctx->num_commit_graphs_after - 2)
2076 			ctx->write_generation_data = !!g->chunk_generation_data;
2077 
2078 		i--;
2079 		g = g->base_graph;
2080 	}
2081 }
2082 
merge_commit_graph(struct write_commit_graph_context * ctx,struct commit_graph * g)2083 static void merge_commit_graph(struct write_commit_graph_context *ctx,
2084 			       struct commit_graph *g)
2085 {
2086 	uint32_t i;
2087 	uint32_t offset = g->num_commits_in_base;
2088 
2089 	ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
2090 
2091 	for (i = 0; i < g->num_commits; i++) {
2092 		struct object_id oid;
2093 		struct commit *result;
2094 
2095 		display_progress(ctx->progress, i + 1);
2096 
2097 		load_oid_from_graph(g, i + offset, &oid);
2098 
2099 		/* only add commits if they still exist in the repo */
2100 		result = lookup_commit_reference_gently(ctx->r, &oid, 1);
2101 
2102 		if (result) {
2103 			ctx->commits.list[ctx->commits.nr] = result;
2104 			ctx->commits.nr++;
2105 		}
2106 	}
2107 }
2108 
commit_compare(const void * _a,const void * _b)2109 static int commit_compare(const void *_a, const void *_b)
2110 {
2111 	const struct commit *a = *(const struct commit **)_a;
2112 	const struct commit *b = *(const struct commit **)_b;
2113 	return oidcmp(&a->object.oid, &b->object.oid);
2114 }
2115 
sort_and_scan_merged_commits(struct write_commit_graph_context * ctx)2116 static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
2117 {
2118 	uint32_t i, dedup_i = 0;
2119 
2120 	if (ctx->report_progress)
2121 		ctx->progress = start_delayed_progress(
2122 					_("Scanning merged commits"),
2123 					ctx->commits.nr);
2124 
2125 	QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
2126 
2127 	ctx->num_extra_edges = 0;
2128 	for (i = 0; i < ctx->commits.nr; i++) {
2129 		display_progress(ctx->progress, i + 1);
2130 
2131 		if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
2132 			  &ctx->commits.list[i]->object.oid)) {
2133 			/*
2134 			 * Silently ignore duplicates. These were likely
2135 			 * created due to a commit appearing in multiple
2136 			 * layers of the chain, which is unexpected but
2137 			 * not invalid. We should make sure there is a
2138 			 * unique copy in the new layer.
2139 			 */
2140 		} else {
2141 			unsigned int num_parents;
2142 
2143 			ctx->commits.list[dedup_i] = ctx->commits.list[i];
2144 			dedup_i++;
2145 
2146 			num_parents = commit_list_count(ctx->commits.list[i]->parents);
2147 			if (num_parents > 2)
2148 				ctx->num_extra_edges += num_parents - 1;
2149 		}
2150 	}
2151 
2152 	ctx->commits.nr = dedup_i;
2153 
2154 	stop_progress(&ctx->progress);
2155 }
2156 
merge_commit_graphs(struct write_commit_graph_context * ctx)2157 static void merge_commit_graphs(struct write_commit_graph_context *ctx)
2158 {
2159 	struct commit_graph *g = ctx->r->objects->commit_graph;
2160 	uint32_t current_graph_number = ctx->num_commit_graphs_before;
2161 
2162 	while (g && current_graph_number >= ctx->num_commit_graphs_after) {
2163 		current_graph_number--;
2164 
2165 		if (ctx->report_progress)
2166 			ctx->progress = start_delayed_progress(_("Merging commit-graph"), 0);
2167 
2168 		merge_commit_graph(ctx, g);
2169 		stop_progress(&ctx->progress);
2170 
2171 		g = g->base_graph;
2172 	}
2173 
2174 	if (g) {
2175 		ctx->new_base_graph = g;
2176 		ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
2177 	}
2178 
2179 	if (ctx->new_base_graph)
2180 		ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
2181 
2182 	sort_and_scan_merged_commits(ctx);
2183 }
2184 
mark_commit_graphs(struct write_commit_graph_context * ctx)2185 static void mark_commit_graphs(struct write_commit_graph_context *ctx)
2186 {
2187 	uint32_t i;
2188 	time_t now = time(NULL);
2189 
2190 	for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) {
2191 		struct stat st;
2192 		struct utimbuf updated_time;
2193 
2194 		stat(ctx->commit_graph_filenames_before[i], &st);
2195 
2196 		updated_time.actime = st.st_atime;
2197 		updated_time.modtime = now;
2198 		utime(ctx->commit_graph_filenames_before[i], &updated_time);
2199 	}
2200 }
2201 
expire_commit_graphs(struct write_commit_graph_context * ctx)2202 static void expire_commit_graphs(struct write_commit_graph_context *ctx)
2203 {
2204 	struct strbuf path = STRBUF_INIT;
2205 	DIR *dir;
2206 	struct dirent *de;
2207 	size_t dirnamelen;
2208 	timestamp_t expire_time = time(NULL);
2209 
2210 	if (ctx->opts && ctx->opts->expire_time)
2211 		expire_time = ctx->opts->expire_time;
2212 	if (!ctx->split) {
2213 		char *chain_file_name = get_commit_graph_chain_filename(ctx->odb);
2214 		unlink(chain_file_name);
2215 		free(chain_file_name);
2216 		ctx->num_commit_graphs_after = 0;
2217 	}
2218 
2219 	strbuf_addstr(&path, ctx->odb->path);
2220 	strbuf_addstr(&path, "/info/commit-graphs");
2221 	dir = opendir(path.buf);
2222 
2223 	if (!dir)
2224 		goto out;
2225 
2226 	strbuf_addch(&path, '/');
2227 	dirnamelen = path.len;
2228 	while ((de = readdir(dir)) != NULL) {
2229 		struct stat st;
2230 		uint32_t i, found = 0;
2231 
2232 		strbuf_setlen(&path, dirnamelen);
2233 		strbuf_addstr(&path, de->d_name);
2234 
2235 		stat(path.buf, &st);
2236 
2237 		if (st.st_mtime > expire_time)
2238 			continue;
2239 		if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph"))
2240 			continue;
2241 
2242 		for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2243 			if (!strcmp(ctx->commit_graph_filenames_after[i],
2244 				    path.buf)) {
2245 				found = 1;
2246 				break;
2247 			}
2248 		}
2249 
2250 		if (!found)
2251 			unlink(path.buf);
2252 	}
2253 
2254 out:
2255 	strbuf_release(&path);
2256 }
2257 
write_commit_graph(struct object_directory * odb,struct string_list * pack_indexes,struct oidset * commits,enum commit_graph_write_flags flags,const struct commit_graph_opts * opts)2258 int write_commit_graph(struct object_directory *odb,
2259 		       struct string_list *pack_indexes,
2260 		       struct oidset *commits,
2261 		       enum commit_graph_write_flags flags,
2262 		       const struct commit_graph_opts *opts)
2263 {
2264 	struct repository *r = the_repository;
2265 	struct write_commit_graph_context *ctx;
2266 	uint32_t i;
2267 	int res = 0;
2268 	int replace = 0;
2269 	struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
2270 	struct topo_level_slab topo_levels;
2271 
2272 	prepare_repo_settings(r);
2273 	if (!r->settings.core_commit_graph) {
2274 		warning(_("attempting to write a commit-graph, but 'core.commitGraph' is disabled"));
2275 		return 0;
2276 	}
2277 	if (!commit_graph_compatible(r))
2278 		return 0;
2279 
2280 	CALLOC_ARRAY(ctx, 1);
2281 	ctx->r = r;
2282 	ctx->odb = odb;
2283 	ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
2284 	ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
2285 	ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
2286 	ctx->opts = opts;
2287 	ctx->total_bloom_filter_data_size = 0;
2288 	ctx->write_generation_data = (get_configured_generation_version(r) == 2);
2289 	ctx->num_generation_data_overflows = 0;
2290 
2291 	bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
2292 						      bloom_settings.bits_per_entry);
2293 	bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
2294 						  bloom_settings.num_hashes);
2295 	bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
2296 							 bloom_settings.max_changed_paths);
2297 	ctx->bloom_settings = &bloom_settings;
2298 
2299 	init_topo_level_slab(&topo_levels);
2300 	ctx->topo_levels = &topo_levels;
2301 
2302 	prepare_commit_graph(ctx->r);
2303 	if (ctx->r->objects->commit_graph) {
2304 		struct commit_graph *g = ctx->r->objects->commit_graph;
2305 
2306 		while (g) {
2307 			g->topo_levels = &topo_levels;
2308 			g = g->base_graph;
2309 		}
2310 	}
2311 
2312 	if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
2313 		ctx->changed_paths = 1;
2314 	if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
2315 		struct commit_graph *g;
2316 
2317 		g = ctx->r->objects->commit_graph;
2318 
2319 		/* We have changed-paths already. Keep them in the next graph */
2320 		if (g && g->chunk_bloom_data) {
2321 			ctx->changed_paths = 1;
2322 			ctx->bloom_settings = g->bloom_filter_settings;
2323 		}
2324 	}
2325 
2326 	if (ctx->split) {
2327 		struct commit_graph *g = ctx->r->objects->commit_graph;
2328 
2329 		while (g) {
2330 			ctx->num_commit_graphs_before++;
2331 			g = g->base_graph;
2332 		}
2333 
2334 		if (ctx->num_commit_graphs_before) {
2335 			ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before);
2336 			i = ctx->num_commit_graphs_before;
2337 			g = ctx->r->objects->commit_graph;
2338 
2339 			while (g) {
2340 				ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename);
2341 				g = g->base_graph;
2342 			}
2343 		}
2344 
2345 		if (ctx->opts)
2346 			replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
2347 	}
2348 
2349 	ctx->approx_nr_objects = approximate_object_count();
2350 
2351 	if (ctx->append && ctx->r->objects->commit_graph) {
2352 		struct commit_graph *g = ctx->r->objects->commit_graph;
2353 		for (i = 0; i < g->num_commits; i++) {
2354 			struct object_id oid;
2355 			oidread(&oid, g->chunk_oid_lookup + g->hash_len * i);
2356 			oid_array_append(&ctx->oids, &oid);
2357 		}
2358 	}
2359 
2360 	if (pack_indexes) {
2361 		ctx->order_by_pack = 1;
2362 		if ((res = fill_oids_from_packs(ctx, pack_indexes)))
2363 			goto cleanup;
2364 	}
2365 
2366 	if (commits) {
2367 		if ((res = fill_oids_from_commits(ctx, commits)))
2368 			goto cleanup;
2369 	}
2370 
2371 	if (!pack_indexes && !commits) {
2372 		ctx->order_by_pack = 1;
2373 		fill_oids_from_all_packs(ctx);
2374 	}
2375 
2376 	close_reachable(ctx);
2377 
2378 	copy_oids_to_commits(ctx);
2379 
2380 	if (ctx->commits.nr >= GRAPH_EDGE_LAST_MASK) {
2381 		error(_("too many commits to write graph"));
2382 		res = -1;
2383 		goto cleanup;
2384 	}
2385 
2386 	if (!ctx->commits.nr && !replace)
2387 		goto cleanup;
2388 
2389 	if (ctx->split) {
2390 		split_graph_merge_strategy(ctx);
2391 
2392 		if (!replace)
2393 			merge_commit_graphs(ctx);
2394 	} else
2395 		ctx->num_commit_graphs_after = 1;
2396 
2397 	ctx->trust_generation_numbers = validate_mixed_generation_chain(ctx->r->objects->commit_graph);
2398 
2399 	compute_topological_levels(ctx);
2400 	if (ctx->write_generation_data)
2401 		compute_generation_numbers(ctx);
2402 
2403 	if (ctx->changed_paths)
2404 		compute_bloom_filters(ctx);
2405 
2406 	res = write_commit_graph_file(ctx);
2407 
2408 	if (ctx->split)
2409 		mark_commit_graphs(ctx);
2410 
2411 	expire_commit_graphs(ctx);
2412 
2413 cleanup:
2414 	free(ctx->graph_name);
2415 	free(ctx->commits.list);
2416 	oid_array_clear(&ctx->oids);
2417 	clear_topo_level_slab(&topo_levels);
2418 
2419 	if (ctx->commit_graph_filenames_after) {
2420 		for (i = 0; i < ctx->num_commit_graphs_after; i++) {
2421 			free(ctx->commit_graph_filenames_after[i]);
2422 			free(ctx->commit_graph_hash_after[i]);
2423 		}
2424 
2425 		for (i = 0; i < ctx->num_commit_graphs_before; i++)
2426 			free(ctx->commit_graph_filenames_before[i]);
2427 
2428 		free(ctx->commit_graph_filenames_after);
2429 		free(ctx->commit_graph_filenames_before);
2430 		free(ctx->commit_graph_hash_after);
2431 	}
2432 
2433 	free(ctx);
2434 
2435 	return res;
2436 }
2437 
2438 #define VERIFY_COMMIT_GRAPH_ERROR_HASH 2
2439 static int verify_commit_graph_error;
2440 
2441 __attribute__((format (printf, 1, 2)))
graph_report(const char * fmt,...)2442 static void graph_report(const char *fmt, ...)
2443 {
2444 	va_list ap;
2445 
2446 	verify_commit_graph_error = 1;
2447 	va_start(ap, fmt);
2448 	vfprintf(stderr, fmt, ap);
2449 	fprintf(stderr, "\n");
2450 	va_end(ap);
2451 }
2452 
2453 #define GENERATION_ZERO_EXISTS 1
2454 #define GENERATION_NUMBER_EXISTS 2
2455 
commit_graph_checksum_valid(struct commit_graph * g)2456 static int commit_graph_checksum_valid(struct commit_graph *g)
2457 {
2458 	return hashfile_checksum_valid(g->data, g->data_len);
2459 }
2460 
verify_commit_graph(struct repository * r,struct commit_graph * g,int flags)2461 int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags)
2462 {
2463 	uint32_t i, cur_fanout_pos = 0;
2464 	struct object_id prev_oid, cur_oid;
2465 	int generation_zero = 0;
2466 	struct progress *progress = NULL;
2467 	int local_error = 0;
2468 
2469 	if (!g) {
2470 		graph_report("no commit-graph file loaded");
2471 		return 1;
2472 	}
2473 
2474 	verify_commit_graph_error = verify_commit_graph_lite(g);
2475 	if (verify_commit_graph_error)
2476 		return verify_commit_graph_error;
2477 
2478 	if (!commit_graph_checksum_valid(g)) {
2479 		graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt"));
2480 		verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH;
2481 	}
2482 
2483 	for (i = 0; i < g->num_commits; i++) {
2484 		struct commit *graph_commit;
2485 
2486 		oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i);
2487 
2488 		if (i && oidcmp(&prev_oid, &cur_oid) >= 0)
2489 			graph_report(_("commit-graph has incorrect OID order: %s then %s"),
2490 				     oid_to_hex(&prev_oid),
2491 				     oid_to_hex(&cur_oid));
2492 
2493 		oidcpy(&prev_oid, &cur_oid);
2494 
2495 		while (cur_oid.hash[0] > cur_fanout_pos) {
2496 			uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2497 
2498 			if (i != fanout_value)
2499 				graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2500 					     cur_fanout_pos, fanout_value, i);
2501 			cur_fanout_pos++;
2502 		}
2503 
2504 		graph_commit = lookup_commit(r, &cur_oid);
2505 		if (!parse_commit_in_graph_one(r, g, graph_commit))
2506 			graph_report(_("failed to parse commit %s from commit-graph"),
2507 				     oid_to_hex(&cur_oid));
2508 	}
2509 
2510 	while (cur_fanout_pos < 256) {
2511 		uint32_t fanout_value = get_be32(g->chunk_oid_fanout + cur_fanout_pos);
2512 
2513 		if (g->num_commits != fanout_value)
2514 			graph_report(_("commit-graph has incorrect fanout value: fanout[%d] = %u != %u"),
2515 				     cur_fanout_pos, fanout_value, i);
2516 
2517 		cur_fanout_pos++;
2518 	}
2519 
2520 	if (verify_commit_graph_error & ~VERIFY_COMMIT_GRAPH_ERROR_HASH)
2521 		return verify_commit_graph_error;
2522 
2523 	if (flags & COMMIT_GRAPH_WRITE_PROGRESS)
2524 		progress = start_progress(_("Verifying commits in commit graph"),
2525 					g->num_commits);
2526 
2527 	for (i = 0; i < g->num_commits; i++) {
2528 		struct commit *graph_commit, *odb_commit;
2529 		struct commit_list *graph_parents, *odb_parents;
2530 		timestamp_t max_generation = 0;
2531 		timestamp_t generation;
2532 
2533 		display_progress(progress, i + 1);
2534 		oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i);
2535 
2536 		graph_commit = lookup_commit(r, &cur_oid);
2537 		odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r));
2538 		if (parse_commit_internal(odb_commit, 0, 0)) {
2539 			graph_report(_("failed to parse commit %s from object database for commit-graph"),
2540 				     oid_to_hex(&cur_oid));
2541 			continue;
2542 		}
2543 
2544 		if (!oideq(&get_commit_tree_in_graph_one(r, g, graph_commit)->object.oid,
2545 			   get_commit_tree_oid(odb_commit)))
2546 			graph_report(_("root tree OID for commit %s in commit-graph is %s != %s"),
2547 				     oid_to_hex(&cur_oid),
2548 				     oid_to_hex(get_commit_tree_oid(graph_commit)),
2549 				     oid_to_hex(get_commit_tree_oid(odb_commit)));
2550 
2551 		graph_parents = graph_commit->parents;
2552 		odb_parents = odb_commit->parents;
2553 
2554 		while (graph_parents) {
2555 			if (odb_parents == NULL) {
2556 				graph_report(_("commit-graph parent list for commit %s is too long"),
2557 					     oid_to_hex(&cur_oid));
2558 				break;
2559 			}
2560 
2561 			/* parse parent in case it is in a base graph */
2562 			parse_commit_in_graph_one(r, g, graph_parents->item);
2563 
2564 			if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid))
2565 				graph_report(_("commit-graph parent for %s is %s != %s"),
2566 					     oid_to_hex(&cur_oid),
2567 					     oid_to_hex(&graph_parents->item->object.oid),
2568 					     oid_to_hex(&odb_parents->item->object.oid));
2569 
2570 			generation = commit_graph_generation(graph_parents->item);
2571 			if (generation > max_generation)
2572 				max_generation = generation;
2573 
2574 			graph_parents = graph_parents->next;
2575 			odb_parents = odb_parents->next;
2576 		}
2577 
2578 		if (odb_parents != NULL)
2579 			graph_report(_("commit-graph parent list for commit %s terminates early"),
2580 				     oid_to_hex(&cur_oid));
2581 
2582 		if (!commit_graph_generation(graph_commit)) {
2583 			if (generation_zero == GENERATION_NUMBER_EXISTS)
2584 				graph_report(_("commit-graph has generation number zero for commit %s, but non-zero elsewhere"),
2585 					     oid_to_hex(&cur_oid));
2586 			generation_zero = GENERATION_ZERO_EXISTS;
2587 		} else if (generation_zero == GENERATION_ZERO_EXISTS)
2588 			graph_report(_("commit-graph has non-zero generation number for commit %s, but zero elsewhere"),
2589 				     oid_to_hex(&cur_oid));
2590 
2591 		if (generation_zero == GENERATION_ZERO_EXISTS)
2592 			continue;
2593 
2594 		/*
2595 		 * If we are using topological level and one of our parents has
2596 		 * generation GENERATION_NUMBER_V1_MAX, then our generation is
2597 		 * also GENERATION_NUMBER_V1_MAX. Decrement to avoid extra logic
2598 		 * in the following condition.
2599 		 */
2600 		if (!g->read_generation_data && max_generation == GENERATION_NUMBER_V1_MAX)
2601 			max_generation--;
2602 
2603 		generation = commit_graph_generation(graph_commit);
2604 		if (generation < max_generation + 1)
2605 			graph_report(_("commit-graph generation for commit %s is %"PRItime" < %"PRItime),
2606 				     oid_to_hex(&cur_oid),
2607 				     generation,
2608 				     max_generation + 1);
2609 
2610 		if (graph_commit->date != odb_commit->date)
2611 			graph_report(_("commit date for commit %s in commit-graph is %"PRItime" != %"PRItime),
2612 				     oid_to_hex(&cur_oid),
2613 				     graph_commit->date,
2614 				     odb_commit->date);
2615 	}
2616 	stop_progress(&progress);
2617 
2618 	local_error = verify_commit_graph_error;
2619 
2620 	if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph)
2621 		local_error |= verify_commit_graph(r, g->base_graph, flags);
2622 
2623 	return local_error;
2624 }
2625 
free_commit_graph(struct commit_graph * g)2626 void free_commit_graph(struct commit_graph *g)
2627 {
2628 	if (!g)
2629 		return;
2630 	if (g->data) {
2631 		munmap((void *)g->data, g->data_len);
2632 		g->data = NULL;
2633 	}
2634 	free(g->filename);
2635 	free(g->bloom_filter_settings);
2636 	free(g);
2637 }
2638 
disable_commit_graph(struct repository * r)2639 void disable_commit_graph(struct repository *r)
2640 {
2641 	r->commit_graph_disabled = 1;
2642 }
2643