1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "commit_graph.h"
9
10 #include "array.h"
11 #include "filebuf.h"
12 #include "futils.h"
13 #include "hash.h"
14 #include "oidarray.h"
15 #include "oidmap.h"
16 #include "pack.h"
17 #include "repository.h"
18 #include "revwalk.h"
19
20 #define GIT_COMMIT_GRAPH_MISSING_PARENT 0x70000000
21 #define GIT_COMMIT_GRAPH_GENERATION_NUMBER_MAX 0x3FFFFFFF
22 #define GIT_COMMIT_GRAPH_GENERATION_NUMBER_INFINITY 0xFFFFFFFF
23
24 #define COMMIT_GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
25 #define COMMIT_GRAPH_VERSION 1
26 #define COMMIT_GRAPH_OBJECT_ID_VERSION 1
27 struct git_commit_graph_header {
28 uint32_t signature;
29 uint8_t version;
30 uint8_t object_id_version;
31 uint8_t chunks;
32 uint8_t base_graph_files;
33 };
34
35 #define COMMIT_GRAPH_OID_FANOUT_ID 0x4f494446 /* "OIDF" */
36 #define COMMIT_GRAPH_OID_LOOKUP_ID 0x4f49444c /* "OIDL" */
37 #define COMMIT_GRAPH_COMMIT_DATA_ID 0x43444154 /* "CDAT" */
38 #define COMMIT_GRAPH_EXTRA_EDGE_LIST_ID 0x45444745 /* "EDGE" */
39 #define COMMIT_GRAPH_BLOOM_FILTER_INDEX_ID 0x42494458 /* "BIDX" */
40 #define COMMIT_GRAPH_BLOOM_FILTER_DATA_ID 0x42444154 /* "BDAT" */
41
42 struct git_commit_graph_chunk {
43 off64_t offset;
44 size_t length;
45 };
46
47 typedef git_array_t(size_t) parent_index_array_t;
48
49 struct packed_commit {
50 size_t index;
51 git_oid sha1;
52 git_oid tree_oid;
53 uint32_t generation;
54 git_time_t commit_time;
55 git_array_oid_t parents;
56 parent_index_array_t parent_indices;
57 };
58
packed_commit_free(struct packed_commit * p)59 static void packed_commit_free(struct packed_commit *p)
60 {
61 if (!p)
62 return;
63
64 git_array_clear(p->parents);
65 git_array_clear(p->parent_indices);
66 git__free(p);
67 }
68
packed_commit_new(git_commit * commit)69 static struct packed_commit *packed_commit_new(git_commit *commit)
70 {
71 unsigned int i, parentcount = git_commit_parentcount(commit);
72 struct packed_commit *p = git__calloc(1, sizeof(struct packed_commit));
73 if (!p)
74 goto cleanup;
75
76 git_array_init_to_size(p->parents, parentcount);
77 if (parentcount && !p->parents.ptr)
78 goto cleanup;
79
80 if (git_oid_cpy(&p->sha1, git_commit_id(commit)) < 0)
81 goto cleanup;
82 if (git_oid_cpy(&p->tree_oid, git_commit_tree_id(commit)) < 0)
83 goto cleanup;
84 p->commit_time = git_commit_time(commit);
85
86 for (i = 0; i < parentcount; ++i) {
87 git_oid *parent_id = git_array_alloc(p->parents);
88 if (!parent_id)
89 goto cleanup;
90 if (git_oid_cpy(parent_id, git_commit_parent_id(commit, i)) < 0)
91 goto cleanup;
92 }
93
94 return p;
95
96 cleanup:
97 packed_commit_free(p);
98 return NULL;
99 }
100
101 typedef int (*commit_graph_write_cb)(const char *buf, size_t size, void *cb_data);
102
commit_graph_error(const char * message)103 static int commit_graph_error(const char *message)
104 {
105 git_error_set(GIT_ERROR_ODB, "invalid commit-graph file - %s", message);
106 return -1;
107 }
108
commit_graph_parse_oid_fanout(git_commit_graph_file * file,const unsigned char * data,struct git_commit_graph_chunk * chunk_oid_fanout)109 static int commit_graph_parse_oid_fanout(
110 git_commit_graph_file *file,
111 const unsigned char *data,
112 struct git_commit_graph_chunk *chunk_oid_fanout)
113 {
114 uint32_t i, nr;
115 if (chunk_oid_fanout->offset == 0)
116 return commit_graph_error("missing OID Fanout chunk");
117 if (chunk_oid_fanout->length == 0)
118 return commit_graph_error("empty OID Fanout chunk");
119 if (chunk_oid_fanout->length != 256 * 4)
120 return commit_graph_error("OID Fanout chunk has wrong length");
121
122 file->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset);
123 nr = 0;
124 for (i = 0; i < 256; ++i) {
125 uint32_t n = ntohl(file->oid_fanout[i]);
126 if (n < nr)
127 return commit_graph_error("index is non-monotonic");
128 nr = n;
129 }
130 file->num_commits = nr;
131 return 0;
132 }
133
commit_graph_parse_oid_lookup(git_commit_graph_file * file,const unsigned char * data,struct git_commit_graph_chunk * chunk_oid_lookup)134 static int commit_graph_parse_oid_lookup(
135 git_commit_graph_file *file,
136 const unsigned char *data,
137 struct git_commit_graph_chunk *chunk_oid_lookup)
138 {
139 uint32_t i;
140 git_oid *oid, *prev_oid, zero_oid = {{0}};
141
142 if (chunk_oid_lookup->offset == 0)
143 return commit_graph_error("missing OID Lookup chunk");
144 if (chunk_oid_lookup->length == 0)
145 return commit_graph_error("empty OID Lookup chunk");
146 if (chunk_oid_lookup->length != file->num_commits * GIT_OID_RAWSZ)
147 return commit_graph_error("OID Lookup chunk has wrong length");
148
149 file->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset);
150 prev_oid = &zero_oid;
151 for (i = 0; i < file->num_commits; ++i, ++oid) {
152 if (git_oid_cmp(prev_oid, oid) >= 0)
153 return commit_graph_error("OID Lookup index is non-monotonic");
154 prev_oid = oid;
155 }
156
157 return 0;
158 }
159
commit_graph_parse_commit_data(git_commit_graph_file * file,const unsigned char * data,struct git_commit_graph_chunk * chunk_commit_data)160 static int commit_graph_parse_commit_data(
161 git_commit_graph_file *file,
162 const unsigned char *data,
163 struct git_commit_graph_chunk *chunk_commit_data)
164 {
165 if (chunk_commit_data->offset == 0)
166 return commit_graph_error("missing Commit Data chunk");
167 if (chunk_commit_data->length == 0)
168 return commit_graph_error("empty Commit Data chunk");
169 if (chunk_commit_data->length != file->num_commits * (GIT_OID_RAWSZ + 16))
170 return commit_graph_error("Commit Data chunk has wrong length");
171
172 file->commit_data = data + chunk_commit_data->offset;
173
174 return 0;
175 }
176
commit_graph_parse_extra_edge_list(git_commit_graph_file * file,const unsigned char * data,struct git_commit_graph_chunk * chunk_extra_edge_list)177 static int commit_graph_parse_extra_edge_list(
178 git_commit_graph_file *file,
179 const unsigned char *data,
180 struct git_commit_graph_chunk *chunk_extra_edge_list)
181 {
182 if (chunk_extra_edge_list->length == 0)
183 return 0;
184 if (chunk_extra_edge_list->length % 4 != 0)
185 return commit_graph_error("malformed Extra Edge List chunk");
186
187 file->extra_edge_list = data + chunk_extra_edge_list->offset;
188 file->num_extra_edge_list = chunk_extra_edge_list->length / 4;
189
190 return 0;
191 }
192
git_commit_graph_file_parse(git_commit_graph_file * file,const unsigned char * data,size_t size)193 int git_commit_graph_file_parse(
194 git_commit_graph_file *file,
195 const unsigned char *data,
196 size_t size)
197 {
198 struct git_commit_graph_header *hdr;
199 const unsigned char *chunk_hdr;
200 struct git_commit_graph_chunk *last_chunk;
201 uint32_t i;
202 off64_t last_chunk_offset, chunk_offset, trailer_offset;
203 git_oid cgraph_checksum = {{0}};
204 int error;
205 struct git_commit_graph_chunk chunk_oid_fanout = {0}, chunk_oid_lookup = {0},
206 chunk_commit_data = {0}, chunk_extra_edge_list = {0},
207 chunk_unsupported = {0};
208
209 GIT_ASSERT_ARG(file);
210
211 if (size < sizeof(struct git_commit_graph_header) + GIT_OID_RAWSZ)
212 return commit_graph_error("commit-graph is too short");
213
214 hdr = ((struct git_commit_graph_header *)data);
215
216 if (hdr->signature != htonl(COMMIT_GRAPH_SIGNATURE) || hdr->version != COMMIT_GRAPH_VERSION
217 || hdr->object_id_version != COMMIT_GRAPH_OBJECT_ID_VERSION) {
218 return commit_graph_error("unsupported commit-graph version");
219 }
220 if (hdr->chunks == 0)
221 return commit_graph_error("no chunks in commit-graph");
222
223 /*
224 * The very first chunk's offset should be after the header, all the chunk
225 * headers, and a special zero chunk.
226 */
227 last_chunk_offset = sizeof(struct git_commit_graph_header) + (1 + hdr->chunks) * 12;
228 trailer_offset = size - GIT_OID_RAWSZ;
229 if (trailer_offset < last_chunk_offset)
230 return commit_graph_error("wrong commit-graph size");
231 git_oid_cpy(&file->checksum, (git_oid *)(data + trailer_offset));
232
233 if (git_hash_buf(&cgraph_checksum, data, (size_t)trailer_offset) < 0)
234 return commit_graph_error("could not calculate signature");
235 if (!git_oid_equal(&cgraph_checksum, &file->checksum))
236 return commit_graph_error("index signature mismatch");
237
238 chunk_hdr = data + sizeof(struct git_commit_graph_header);
239 last_chunk = NULL;
240 for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
241 chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32
242 | ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8))));
243 if (chunk_offset < last_chunk_offset)
244 return commit_graph_error("chunks are non-monotonic");
245 if (chunk_offset >= trailer_offset)
246 return commit_graph_error("chunks extend beyond the trailer");
247 if (last_chunk != NULL)
248 last_chunk->length = (size_t)(chunk_offset - last_chunk_offset);
249 last_chunk_offset = chunk_offset;
250
251 switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) {
252 case COMMIT_GRAPH_OID_FANOUT_ID:
253 chunk_oid_fanout.offset = last_chunk_offset;
254 last_chunk = &chunk_oid_fanout;
255 break;
256
257 case COMMIT_GRAPH_OID_LOOKUP_ID:
258 chunk_oid_lookup.offset = last_chunk_offset;
259 last_chunk = &chunk_oid_lookup;
260 break;
261
262 case COMMIT_GRAPH_COMMIT_DATA_ID:
263 chunk_commit_data.offset = last_chunk_offset;
264 last_chunk = &chunk_commit_data;
265 break;
266
267 case COMMIT_GRAPH_EXTRA_EDGE_LIST_ID:
268 chunk_extra_edge_list.offset = last_chunk_offset;
269 last_chunk = &chunk_extra_edge_list;
270 break;
271
272 case COMMIT_GRAPH_BLOOM_FILTER_INDEX_ID:
273 case COMMIT_GRAPH_BLOOM_FILTER_DATA_ID:
274 chunk_unsupported.offset = last_chunk_offset;
275 last_chunk = &chunk_unsupported;
276 break;
277
278 default:
279 return commit_graph_error("unrecognized chunk ID");
280 }
281 }
282 last_chunk->length = (size_t)(trailer_offset - last_chunk_offset);
283
284 error = commit_graph_parse_oid_fanout(file, data, &chunk_oid_fanout);
285 if (error < 0)
286 return error;
287 error = commit_graph_parse_oid_lookup(file, data, &chunk_oid_lookup);
288 if (error < 0)
289 return error;
290 error = commit_graph_parse_commit_data(file, data, &chunk_commit_data);
291 if (error < 0)
292 return error;
293 error = commit_graph_parse_extra_edge_list(file, data, &chunk_extra_edge_list);
294 if (error < 0)
295 return error;
296
297 return 0;
298 }
299
git_commit_graph_new(git_commit_graph ** cgraph_out,const char * objects_dir,bool open_file)300 int git_commit_graph_new(git_commit_graph **cgraph_out, const char *objects_dir, bool open_file)
301 {
302 git_commit_graph *cgraph = NULL;
303 int error = 0;
304
305 GIT_ASSERT_ARG(cgraph_out);
306 GIT_ASSERT_ARG(objects_dir);
307
308 cgraph = git__calloc(1, sizeof(git_commit_graph));
309 GIT_ERROR_CHECK_ALLOC(cgraph);
310
311 error = git_buf_joinpath(&cgraph->filename, objects_dir, "info/commit-graph");
312 if (error < 0)
313 goto error;
314
315 if (open_file) {
316 error = git_commit_graph_file_open(&cgraph->file, git_buf_cstr(&cgraph->filename));
317 if (error < 0)
318 goto error;
319 cgraph->checked = 1;
320 }
321
322 *cgraph_out = cgraph;
323 return 0;
324
325 error:
326 git_commit_graph_free(cgraph);
327 return error;
328 }
329
git_commit_graph_open(git_commit_graph ** cgraph_out,const char * objects_dir)330 int git_commit_graph_open(git_commit_graph **cgraph_out, const char *objects_dir)
331 {
332 return git_commit_graph_new(cgraph_out, objects_dir, true);
333 }
334
git_commit_graph_file_open(git_commit_graph_file ** file_out,const char * path)335 int git_commit_graph_file_open(git_commit_graph_file **file_out, const char *path)
336 {
337 git_commit_graph_file *file;
338 git_file fd = -1;
339 size_t cgraph_size;
340 struct stat st;
341 int error;
342
343 /* TODO: properly open the file without access time using O_NOATIME */
344 fd = git_futils_open_ro(path);
345 if (fd < 0)
346 return fd;
347
348 if (p_fstat(fd, &st) < 0) {
349 p_close(fd);
350 git_error_set(GIT_ERROR_ODB, "commit-graph file not found - '%s'", path);
351 return GIT_ENOTFOUND;
352 }
353
354 if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) {
355 p_close(fd);
356 git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
357 return GIT_ENOTFOUND;
358 }
359 cgraph_size = (size_t)st.st_size;
360
361 file = git__calloc(1, sizeof(git_commit_graph_file));
362 GIT_ERROR_CHECK_ALLOC(file);
363
364 error = git_futils_mmap_ro(&file->graph_map, fd, 0, cgraph_size);
365 p_close(fd);
366 if (error < 0) {
367 git_commit_graph_file_free(file);
368 return error;
369 }
370
371 if ((error = git_commit_graph_file_parse(file, file->graph_map.data, cgraph_size)) < 0) {
372 git_commit_graph_file_free(file);
373 return error;
374 }
375
376 *file_out = file;
377 return 0;
378 }
379
git_commit_graph_get_file(git_commit_graph_file ** file_out,git_commit_graph * cgraph)380 int git_commit_graph_get_file(git_commit_graph_file **file_out, git_commit_graph *cgraph)
381 {
382 if (!cgraph->checked) {
383 int error = 0;
384 git_commit_graph_file *result = NULL;
385
386 /* We only check once, no matter the result. */
387 cgraph->checked = 1;
388
389 /* Best effort */
390 error = git_commit_graph_file_open(&result, git_buf_cstr(&cgraph->filename));
391
392 if (error < 0)
393 return error;
394
395 cgraph->file = result;
396 }
397 if (!cgraph->file)
398 return GIT_ENOTFOUND;
399
400 *file_out = cgraph->file;
401 return 0;
402 }
403
git_commit_graph_refresh(git_commit_graph * cgraph)404 void git_commit_graph_refresh(git_commit_graph *cgraph)
405 {
406 if (!cgraph->checked)
407 return;
408
409 if (cgraph->file
410 && git_commit_graph_file_needs_refresh(cgraph->file, git_buf_cstr(&cgraph->filename))) {
411 /* We just free the commit graph. The next time it is requested, it will be
412 * re-loaded. */
413 git_commit_graph_file_free(cgraph->file);
414 cgraph->file = NULL;
415 }
416 /* Force a lazy re-check next time it is needed. */
417 cgraph->checked = 0;
418 }
419
git_commit_graph_entry_get_byindex(git_commit_graph_entry * e,const git_commit_graph_file * file,size_t pos)420 static int git_commit_graph_entry_get_byindex(
421 git_commit_graph_entry *e,
422 const git_commit_graph_file *file,
423 size_t pos)
424 {
425 const unsigned char *commit_data;
426
427 GIT_ASSERT_ARG(e);
428 GIT_ASSERT_ARG(file);
429
430 if (pos >= file->num_commits) {
431 git_error_set(GIT_ERROR_INVALID, "commit index %zu does not exist", pos);
432 return GIT_ENOTFOUND;
433 }
434
435 commit_data = file->commit_data + pos * (GIT_OID_RAWSZ + 4 * sizeof(uint32_t));
436 git_oid_cpy(&e->tree_oid, (const git_oid *)commit_data);
437 e->parent_indices[0] = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ)));
438 e->parent_indices[1] = ntohl(
439 *((uint32_t *)(commit_data + GIT_OID_RAWSZ + sizeof(uint32_t))));
440 e->parent_count = (e->parent_indices[0] != GIT_COMMIT_GRAPH_MISSING_PARENT)
441 + (e->parent_indices[1] != GIT_COMMIT_GRAPH_MISSING_PARENT);
442 e->generation = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ + 2 * sizeof(uint32_t))));
443 e->commit_time = ntohl(*((uint32_t *)(commit_data + GIT_OID_RAWSZ + 3 * sizeof(uint32_t))));
444
445 e->commit_time |= (e->generation & UINT64_C(0x3)) << UINT64_C(32);
446 e->generation >>= 2u;
447 if (e->parent_indices[1] & 0x80000000u) {
448 uint32_t extra_edge_list_pos = e->parent_indices[1] & 0x7fffffff;
449
450 /* Make sure we're not being sent out of bounds */
451 if (extra_edge_list_pos >= file->num_extra_edge_list) {
452 git_error_set(GIT_ERROR_INVALID,
453 "commit %u does not exist",
454 extra_edge_list_pos);
455 return GIT_ENOTFOUND;
456 }
457
458 e->extra_parents_index = extra_edge_list_pos;
459 while (extra_edge_list_pos < file->num_extra_edge_list
460 && (ntohl(*(
461 (uint32_t *)(file->extra_edge_list
462 + extra_edge_list_pos * sizeof(uint32_t))))
463 & 0x80000000u)
464 == 0) {
465 extra_edge_list_pos++;
466 e->parent_count++;
467 }
468 }
469 git_oid_cpy(&e->sha1, &file->oid_lookup[pos]);
470 return 0;
471 }
472
git_commit_graph_file_needs_refresh(const git_commit_graph_file * file,const char * path)473 bool git_commit_graph_file_needs_refresh(const git_commit_graph_file *file, const char *path)
474 {
475 git_file fd = -1;
476 struct stat st;
477 ssize_t bytes_read;
478 git_oid cgraph_checksum = {{0}};
479
480 /* TODO: properly open the file without access time using O_NOATIME */
481 fd = git_futils_open_ro(path);
482 if (fd < 0)
483 return true;
484
485 if (p_fstat(fd, &st) < 0) {
486 p_close(fd);
487 return true;
488 }
489
490 if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)
491 || (size_t)st.st_size != file->graph_map.len) {
492 p_close(fd);
493 return true;
494 }
495
496 bytes_read = p_pread(fd, cgraph_checksum.id, GIT_OID_RAWSZ, st.st_size - GIT_OID_RAWSZ);
497 p_close(fd);
498 if (bytes_read != GIT_OID_RAWSZ)
499 return true;
500
501 return !git_oid_equal(&cgraph_checksum, &file->checksum);
502 }
503
git_commit_graph_entry_find(git_commit_graph_entry * e,const git_commit_graph_file * file,const git_oid * short_oid,size_t len)504 int git_commit_graph_entry_find(
505 git_commit_graph_entry *e,
506 const git_commit_graph_file *file,
507 const git_oid *short_oid,
508 size_t len)
509 {
510 int pos, found = 0;
511 uint32_t hi, lo;
512 const git_oid *current = NULL;
513
514 GIT_ASSERT_ARG(e);
515 GIT_ASSERT_ARG(file);
516 GIT_ASSERT_ARG(short_oid);
517
518 hi = ntohl(file->oid_fanout[(int)short_oid->id[0]]);
519 lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(file->oid_fanout[(int)short_oid->id[0] - 1]));
520
521 pos = git_pack__lookup_sha1(file->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id);
522
523 if (pos >= 0) {
524 /* An object matching exactly the oid was found */
525 found = 1;
526 current = file->oid_lookup + pos;
527 } else {
528 /* No object was found */
529 /* pos refers to the object with the "closest" oid to short_oid */
530 pos = -1 - pos;
531 if (pos < (int)file->num_commits) {
532 current = file->oid_lookup + pos;
533
534 if (!git_oid_ncmp(short_oid, current, len))
535 found = 1;
536 }
537 }
538
539 if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)file->num_commits) {
540 /* Check for ambiguousity */
541 const git_oid *next = current + 1;
542
543 if (!git_oid_ncmp(short_oid, next, len)) {
544 found = 2;
545 }
546 }
547
548 if (!found)
549 return git_odb__error_notfound(
550 "failed to find offset for commit-graph index entry", short_oid, len);
551 if (found > 1)
552 return git_odb__error_ambiguous(
553 "found multiple offsets for commit-graph index entry");
554
555 return git_commit_graph_entry_get_byindex(e, file, pos);
556 }
557
git_commit_graph_entry_parent(git_commit_graph_entry * parent,const git_commit_graph_file * file,const git_commit_graph_entry * entry,size_t n)558 int git_commit_graph_entry_parent(
559 git_commit_graph_entry *parent,
560 const git_commit_graph_file *file,
561 const git_commit_graph_entry *entry,
562 size_t n)
563 {
564 GIT_ASSERT_ARG(parent);
565 GIT_ASSERT_ARG(file);
566
567 if (n >= entry->parent_count) {
568 git_error_set(GIT_ERROR_INVALID, "parent index %zu does not exist", n);
569 return GIT_ENOTFOUND;
570 }
571
572 if (n == 0 || (n == 1 && entry->parent_count == 2))
573 return git_commit_graph_entry_get_byindex(parent, file, entry->parent_indices[n]);
574
575 return git_commit_graph_entry_get_byindex(
576 parent,
577 file,
578 ntohl(
579 *(uint32_t *)(file->extra_edge_list
580 + (entry->extra_parents_index + n - 1)
581 * sizeof(uint32_t)))
582 & 0x7fffffff);
583 }
584
git_commit_graph_file_close(git_commit_graph_file * file)585 int git_commit_graph_file_close(git_commit_graph_file *file)
586 {
587 GIT_ASSERT_ARG(file);
588
589 if (file->graph_map.data)
590 git_futils_mmap_free(&file->graph_map);
591
592 return 0;
593 }
594
git_commit_graph_free(git_commit_graph * cgraph)595 void git_commit_graph_free(git_commit_graph *cgraph)
596 {
597 if (!cgraph)
598 return;
599
600 git_buf_dispose(&cgraph->filename);
601 git_commit_graph_file_free(cgraph->file);
602 git__free(cgraph);
603 }
604
git_commit_graph_file_free(git_commit_graph_file * file)605 void git_commit_graph_file_free(git_commit_graph_file *file)
606 {
607 if (!file)
608 return;
609
610 git_commit_graph_file_close(file);
611 git__free(file);
612 }
613
packed_commit__cmp(const void * a_,const void * b_)614 static int packed_commit__cmp(const void *a_, const void *b_)
615 {
616 const struct packed_commit *a = a_;
617 const struct packed_commit *b = b_;
618 return git_oid_cmp(&a->sha1, &b->sha1);
619 }
620
git_commit_graph_writer_new(git_commit_graph_writer ** out,const char * objects_info_dir)621 int git_commit_graph_writer_new(git_commit_graph_writer **out, const char *objects_info_dir)
622 {
623 git_commit_graph_writer *w = git__calloc(1, sizeof(git_commit_graph_writer));
624 GIT_ERROR_CHECK_ALLOC(w);
625
626 if (git_buf_sets(&w->objects_info_dir, objects_info_dir) < 0) {
627 git__free(w);
628 return -1;
629 }
630
631 if (git_vector_init(&w->commits, 0, packed_commit__cmp) < 0) {
632 git_buf_dispose(&w->objects_info_dir);
633 git__free(w);
634 return -1;
635 }
636
637 *out = w;
638 return 0;
639 }
640
git_commit_graph_writer_free(git_commit_graph_writer * w)641 void git_commit_graph_writer_free(git_commit_graph_writer *w)
642 {
643 struct packed_commit *packed_commit;
644 size_t i;
645
646 if (!w)
647 return;
648
649 git_vector_foreach (&w->commits, i, packed_commit)
650 packed_commit_free(packed_commit);
651 git_vector_free(&w->commits);
652 git_buf_dispose(&w->objects_info_dir);
653 git__free(w);
654 }
655
656 struct object_entry_cb_state {
657 git_repository *repo;
658 git_odb *db;
659 git_vector *commits;
660 };
661
object_entry__cb(const git_oid * id,void * data)662 static int object_entry__cb(const git_oid *id, void *data)
663 {
664 struct object_entry_cb_state *state = (struct object_entry_cb_state *)data;
665 git_commit *commit = NULL;
666 struct packed_commit *packed_commit = NULL;
667 size_t header_len;
668 git_object_t header_type;
669 int error = 0;
670
671 error = git_odb_read_header(&header_len, &header_type, state->db, id);
672 if (error < 0)
673 return error;
674
675 if (header_type != GIT_OBJECT_COMMIT)
676 return 0;
677
678 error = git_commit_lookup(&commit, state->repo, id);
679 if (error < 0)
680 return error;
681
682 packed_commit = packed_commit_new(commit);
683 git_commit_free(commit);
684 GIT_ERROR_CHECK_ALLOC(packed_commit);
685
686 error = git_vector_insert(state->commits, packed_commit);
687 if (error < 0) {
688 packed_commit_free(packed_commit);
689 return error;
690 }
691
692 return 0;
693 }
694
git_commit_graph_writer_add_index_file(git_commit_graph_writer * w,git_repository * repo,const char * idx_path)695 int git_commit_graph_writer_add_index_file(
696 git_commit_graph_writer *w,
697 git_repository *repo,
698 const char *idx_path)
699 {
700 int error;
701 struct git_pack_file *p = NULL;
702 struct object_entry_cb_state state = {0};
703 state.repo = repo;
704 state.commits = &w->commits;
705
706 error = git_repository_odb(&state.db, repo);
707 if (error < 0)
708 goto cleanup;
709
710 error = git_mwindow_get_pack(&p, idx_path);
711 if (error < 0)
712 goto cleanup;
713
714 error = git_pack_foreach_entry(p, object_entry__cb, &state);
715 if (error < 0)
716 goto cleanup;
717
718 cleanup:
719 if (p)
720 git_mwindow_put_pack(p);
721 git_odb_free(state.db);
722 return error;
723 }
724
git_commit_graph_writer_add_revwalk(git_commit_graph_writer * w,git_revwalk * walk)725 int git_commit_graph_writer_add_revwalk(git_commit_graph_writer *w, git_revwalk *walk)
726 {
727 int error;
728 git_oid id;
729 git_repository *repo = git_revwalk_repository(walk);
730 git_commit *commit;
731 struct packed_commit *packed_commit;
732
733 while ((git_revwalk_next(&id, walk)) == 0) {
734 error = git_commit_lookup(&commit, repo, &id);
735 if (error < 0)
736 return error;
737
738 packed_commit = packed_commit_new(commit);
739 git_commit_free(commit);
740 GIT_ERROR_CHECK_ALLOC(packed_commit);
741
742 error = git_vector_insert(&w->commits, packed_commit);
743 if (error < 0) {
744 packed_commit_free(packed_commit);
745 return error;
746 }
747 }
748
749 return 0;
750 }
751
752 enum generation_number_commit_state {
753 GENERATION_NUMBER_COMMIT_STATE_UNVISITED = 0,
754 GENERATION_NUMBER_COMMIT_STATE_ADDED = 1,
755 GENERATION_NUMBER_COMMIT_STATE_EXPANDED = 2,
756 GENERATION_NUMBER_COMMIT_STATE_VISITED = 3,
757 };
758
compute_generation_numbers(git_vector * commits)759 static int compute_generation_numbers(git_vector *commits)
760 {
761 git_array_t(size_t) index_stack = GIT_ARRAY_INIT;
762 size_t i, j;
763 size_t *parent_idx;
764 enum generation_number_commit_state *commit_states = NULL;
765 struct packed_commit *child_packed_commit;
766 git_oidmap *packed_commit_map = NULL;
767 int error = 0;
768
769 /* First populate the parent indices fields */
770 error = git_oidmap_new(&packed_commit_map);
771 if (error < 0)
772 goto cleanup;
773 git_vector_foreach (commits, i, child_packed_commit) {
774 child_packed_commit->index = i;
775 error = git_oidmap_set(
776 packed_commit_map, &child_packed_commit->sha1, child_packed_commit);
777 if (error < 0)
778 goto cleanup;
779 }
780
781 git_vector_foreach (commits, i, child_packed_commit) {
782 size_t parent_i, *parent_idx_ptr;
783 struct packed_commit *parent_packed_commit;
784 git_oid *parent_id;
785 git_array_init_to_size(
786 child_packed_commit->parent_indices,
787 git_array_size(child_packed_commit->parents));
788 if (git_array_size(child_packed_commit->parents)
789 && !child_packed_commit->parent_indices.ptr) {
790 error = -1;
791 goto cleanup;
792 }
793 git_array_foreach (child_packed_commit->parents, parent_i, parent_id) {
794 parent_packed_commit = git_oidmap_get(packed_commit_map, parent_id);
795 if (!parent_packed_commit) {
796 git_error_set(GIT_ERROR_ODB,
797 "parent commit %s not found in commit graph",
798 git_oid_tostr_s(parent_id));
799 error = GIT_ENOTFOUND;
800 goto cleanup;
801 }
802 parent_idx_ptr = git_array_alloc(child_packed_commit->parent_indices);
803 if (!parent_idx_ptr) {
804 error = -1;
805 goto cleanup;
806 }
807 *parent_idx_ptr = parent_packed_commit->index;
808 }
809 }
810
811 /*
812 * We copy all the commits to the stack and then during visitation,
813 * each node can be added up to two times to the stack.
814 */
815 git_array_init_to_size(index_stack, 3 * git_vector_length(commits));
816 if (!index_stack.ptr) {
817 error = -1;
818 goto cleanup;
819 }
820
821 commit_states = (enum generation_number_commit_state *)git__calloc(
822 git_vector_length(commits), sizeof(enum generation_number_commit_state));
823 if (!commit_states) {
824 error = -1;
825 goto cleanup;
826 }
827
828 /*
829 * Perform a Post-Order traversal so that all parent nodes are fully
830 * visited before the child node.
831 */
832 git_vector_foreach (commits, i, child_packed_commit)
833 *(size_t *)git_array_alloc(index_stack) = i;
834
835 while (git_array_size(index_stack)) {
836 size_t *index_ptr = git_array_pop(index_stack);
837 i = *index_ptr;
838 child_packed_commit = git_vector_get(commits, i);
839
840 if (commit_states[i] == GENERATION_NUMBER_COMMIT_STATE_VISITED) {
841 /* This commit has already been fully visited. */
842 continue;
843 }
844 if (commit_states[i] == GENERATION_NUMBER_COMMIT_STATE_EXPANDED) {
845 /* All of the commits parents have been visited. */
846 child_packed_commit->generation = 0;
847 git_array_foreach (child_packed_commit->parent_indices, j, parent_idx) {
848 struct packed_commit *parent = git_vector_get(commits, *parent_idx);
849 if (child_packed_commit->generation < parent->generation)
850 child_packed_commit->generation = parent->generation;
851 }
852 if (child_packed_commit->generation
853 < GIT_COMMIT_GRAPH_GENERATION_NUMBER_MAX) {
854 ++child_packed_commit->generation;
855 }
856 commit_states[i] = GENERATION_NUMBER_COMMIT_STATE_VISITED;
857 continue;
858 }
859
860 /*
861 * This is the first time we see this commit. We need
862 * to visit all its parents before we can fully visit
863 * it.
864 */
865 if (git_array_size(child_packed_commit->parent_indices) == 0) {
866 /*
867 * Special case: if the commit has no parents, there's
868 * no need to add it to the stack just to immediately
869 * remove it.
870 */
871 commit_states[i] = GENERATION_NUMBER_COMMIT_STATE_VISITED;
872 child_packed_commit->generation = 1;
873 continue;
874 }
875
876 /*
877 * Add this current commit again so that it is visited
878 * again once all its children have been visited.
879 */
880 *(size_t *)git_array_alloc(index_stack) = i;
881 git_array_foreach (child_packed_commit->parent_indices, j, parent_idx) {
882 if (commit_states[*parent_idx]
883 != GENERATION_NUMBER_COMMIT_STATE_UNVISITED) {
884 /* This commit has already been considered. */
885 continue;
886 }
887
888 commit_states[*parent_idx] = GENERATION_NUMBER_COMMIT_STATE_ADDED;
889 *(size_t *)git_array_alloc(index_stack) = *parent_idx;
890 }
891 commit_states[i] = GENERATION_NUMBER_COMMIT_STATE_EXPANDED;
892 }
893
894 cleanup:
895 git_oidmap_free(packed_commit_map);
896 git__free(commit_states);
897 git_array_clear(index_stack);
898
899 return error;
900 }
901
write_offset(off64_t offset,commit_graph_write_cb write_cb,void * cb_data)902 static int write_offset(off64_t offset, commit_graph_write_cb write_cb, void *cb_data)
903 {
904 int error;
905 uint32_t word;
906
907 word = htonl((uint32_t)((offset >> 32) & 0xffffffffu));
908 error = write_cb((const char *)&word, sizeof(word), cb_data);
909 if (error < 0)
910 return error;
911 word = htonl((uint32_t)((offset >> 0) & 0xffffffffu));
912 error = write_cb((const char *)&word, sizeof(word), cb_data);
913 if (error < 0)
914 return error;
915
916 return 0;
917 }
918
write_chunk_header(int chunk_id,off64_t offset,commit_graph_write_cb write_cb,void * cb_data)919 static int write_chunk_header(
920 int chunk_id,
921 off64_t offset,
922 commit_graph_write_cb write_cb,
923 void *cb_data)
924 {
925 uint32_t word = htonl(chunk_id);
926 int error = write_cb((const char *)&word, sizeof(word), cb_data);
927 if (error < 0)
928 return error;
929 return write_offset(offset, write_cb, cb_data);
930 }
931
commit_graph_write_buf(const char * buf,size_t size,void * data)932 static int commit_graph_write_buf(const char *buf, size_t size, void *data)
933 {
934 git_buf *b = (git_buf *)data;
935 return git_buf_put(b, buf, size);
936 }
937
938 struct commit_graph_write_hash_context {
939 commit_graph_write_cb write_cb;
940 void *cb_data;
941 git_hash_ctx *ctx;
942 };
943
commit_graph_write_hash(const char * buf,size_t size,void * data)944 static int commit_graph_write_hash(const char *buf, size_t size, void *data)
945 {
946 struct commit_graph_write_hash_context *ctx = data;
947 int error;
948
949 error = git_hash_update(ctx->ctx, buf, size);
950 if (error < 0)
951 return error;
952
953 return ctx->write_cb(buf, size, ctx->cb_data);
954 }
955
packed_commit_free_dup(void * packed_commit)956 static void packed_commit_free_dup(void *packed_commit)
957 {
958 packed_commit_free(packed_commit);
959 }
960
commit_graph_write(git_commit_graph_writer * w,commit_graph_write_cb write_cb,void * cb_data)961 static int commit_graph_write(
962 git_commit_graph_writer *w,
963 commit_graph_write_cb write_cb,
964 void *cb_data)
965 {
966 int error = 0;
967 size_t i;
968 struct packed_commit *packed_commit;
969 struct git_commit_graph_header hdr = {0};
970 uint32_t oid_fanout_count;
971 uint32_t extra_edge_list_count;
972 uint32_t oid_fanout[256];
973 off64_t offset;
974 git_buf oid_lookup = GIT_BUF_INIT, commit_data = GIT_BUF_INIT,
975 extra_edge_list = GIT_BUF_INIT;
976 git_oid cgraph_checksum = {{0}};
977 git_hash_ctx ctx;
978 struct commit_graph_write_hash_context hash_cb_data = {0};
979
980 hdr.signature = htonl(COMMIT_GRAPH_SIGNATURE);
981 hdr.version = COMMIT_GRAPH_VERSION;
982 hdr.object_id_version = COMMIT_GRAPH_OBJECT_ID_VERSION;
983 hdr.chunks = 0;
984 hdr.base_graph_files = 0;
985 hash_cb_data.write_cb = write_cb;
986 hash_cb_data.cb_data = cb_data;
987 hash_cb_data.ctx = &ctx;
988
989 error = git_hash_ctx_init(&ctx);
990 if (error < 0)
991 return error;
992 cb_data = &hash_cb_data;
993 write_cb = commit_graph_write_hash;
994
995 /* Sort the commits. */
996 git_vector_sort(&w->commits);
997 git_vector_uniq(&w->commits, packed_commit_free_dup);
998 error = compute_generation_numbers(&w->commits);
999 if (error < 0)
1000 goto cleanup;
1001
1002 /* Fill the OID Fanout table. */
1003 oid_fanout_count = 0;
1004 for (i = 0; i < 256; i++) {
1005 while (oid_fanout_count < git_vector_length(&w->commits) &&
1006 (packed_commit = (struct packed_commit *)git_vector_get(&w->commits, oid_fanout_count)) &&
1007 packed_commit->sha1.id[0] <= i)
1008 ++oid_fanout_count;
1009 oid_fanout[i] = htonl(oid_fanout_count);
1010 }
1011
1012 /* Fill the OID Lookup table. */
1013 git_vector_foreach (&w->commits, i, packed_commit) {
1014 error = git_buf_put(&oid_lookup,
1015 (const char *)&packed_commit->sha1, sizeof(git_oid));
1016 if (error < 0)
1017 goto cleanup;
1018 }
1019
1020 /* Fill the Commit Data and Extra Edge List tables. */
1021 extra_edge_list_count = 0;
1022 git_vector_foreach (&w->commits, i, packed_commit) {
1023 uint64_t commit_time;
1024 uint32_t generation;
1025 uint32_t word;
1026 size_t *packed_index;
1027 unsigned int parentcount = (unsigned int)git_array_size(packed_commit->parents);
1028
1029 error = git_buf_put(&commit_data,
1030 (const char *)&packed_commit->tree_oid,
1031 sizeof(git_oid));
1032 if (error < 0)
1033 goto cleanup;
1034
1035 if (parentcount == 0) {
1036 word = htonl(GIT_COMMIT_GRAPH_MISSING_PARENT);
1037 } else {
1038 packed_index = git_array_get(packed_commit->parent_indices, 0);
1039 word = htonl((uint32_t)*packed_index);
1040 }
1041 error = git_buf_put(&commit_data, (const char *)&word, sizeof(word));
1042 if (error < 0)
1043 goto cleanup;
1044
1045 if (parentcount < 2) {
1046 word = htonl(GIT_COMMIT_GRAPH_MISSING_PARENT);
1047 } else if (parentcount == 2) {
1048 packed_index = git_array_get(packed_commit->parent_indices, 1);
1049 word = htonl((uint32_t)*packed_index);
1050 } else {
1051 word = htonl(0x80000000u | extra_edge_list_count);
1052 }
1053 error = git_buf_put(&commit_data, (const char *)&word, sizeof(word));
1054 if (error < 0)
1055 goto cleanup;
1056
1057 if (parentcount > 2) {
1058 unsigned int parent_i;
1059 for (parent_i = 1; parent_i < parentcount; ++parent_i) {
1060 packed_index = git_array_get(
1061 packed_commit->parent_indices, parent_i);
1062 word = htonl((uint32_t)(*packed_index | (parent_i + 1 == parentcount ? 0x80000000u : 0)));
1063
1064 error = git_buf_put(&extra_edge_list,
1065 (const char *)&word,
1066 sizeof(word));
1067 if (error < 0)
1068 goto cleanup;
1069 }
1070 extra_edge_list_count += parentcount - 1;
1071 }
1072
1073 generation = packed_commit->generation;
1074 commit_time = (uint64_t)packed_commit->commit_time;
1075 if (generation > GIT_COMMIT_GRAPH_GENERATION_NUMBER_MAX)
1076 generation = GIT_COMMIT_GRAPH_GENERATION_NUMBER_MAX;
1077 word = ntohl((uint32_t)((generation << 2) | ((commit_time >> 32ull) & 0x3ull)));
1078 error = git_buf_put(&commit_data, (const char *)&word, sizeof(word));
1079 if (error < 0)
1080 goto cleanup;
1081 word = ntohl((uint32_t)(commit_time & 0xffffffffull));
1082 error = git_buf_put(&commit_data, (const char *)&word, sizeof(word));
1083 if (error < 0)
1084 goto cleanup;
1085 }
1086
1087 /* Write the header. */
1088 hdr.chunks = 3;
1089 if (git_buf_len(&extra_edge_list) > 0)
1090 hdr.chunks++;
1091 error = write_cb((const char *)&hdr, sizeof(hdr), cb_data);
1092 if (error < 0)
1093 goto cleanup;
1094
1095 /* Write the chunk headers. */
1096 offset = sizeof(hdr) + (hdr.chunks + 1) * 12;
1097 error = write_chunk_header(COMMIT_GRAPH_OID_FANOUT_ID, offset, write_cb, cb_data);
1098 if (error < 0)
1099 goto cleanup;
1100 offset += sizeof(oid_fanout);
1101 error = write_chunk_header(COMMIT_GRAPH_OID_LOOKUP_ID, offset, write_cb, cb_data);
1102 if (error < 0)
1103 goto cleanup;
1104 offset += git_buf_len(&oid_lookup);
1105 error = write_chunk_header(COMMIT_GRAPH_COMMIT_DATA_ID, offset, write_cb, cb_data);
1106 if (error < 0)
1107 goto cleanup;
1108 offset += git_buf_len(&commit_data);
1109 if (git_buf_len(&extra_edge_list) > 0) {
1110 error = write_chunk_header(
1111 COMMIT_GRAPH_EXTRA_EDGE_LIST_ID, offset, write_cb, cb_data);
1112 if (error < 0)
1113 goto cleanup;
1114 offset += git_buf_len(&extra_edge_list);
1115 }
1116 error = write_chunk_header(0, offset, write_cb, cb_data);
1117 if (error < 0)
1118 goto cleanup;
1119
1120 /* Write all the chunks. */
1121 error = write_cb((const char *)oid_fanout, sizeof(oid_fanout), cb_data);
1122 if (error < 0)
1123 goto cleanup;
1124 error = write_cb(git_buf_cstr(&oid_lookup), git_buf_len(&oid_lookup), cb_data);
1125 if (error < 0)
1126 goto cleanup;
1127 error = write_cb(git_buf_cstr(&commit_data), git_buf_len(&commit_data), cb_data);
1128 if (error < 0)
1129 goto cleanup;
1130 error = write_cb(git_buf_cstr(&extra_edge_list), git_buf_len(&extra_edge_list), cb_data);
1131 if (error < 0)
1132 goto cleanup;
1133
1134 /* Finalize the checksum and write the trailer. */
1135 error = git_hash_final(&cgraph_checksum, &ctx);
1136 if (error < 0)
1137 goto cleanup;
1138 error = write_cb((const char *)&cgraph_checksum, sizeof(cgraph_checksum), cb_data);
1139 if (error < 0)
1140 goto cleanup;
1141
1142 cleanup:
1143 git_buf_dispose(&oid_lookup);
1144 git_buf_dispose(&commit_data);
1145 git_buf_dispose(&extra_edge_list);
1146 git_hash_ctx_cleanup(&ctx);
1147 return error;
1148 }
1149
commit_graph_write_filebuf(const char * buf,size_t size,void * data)1150 static int commit_graph_write_filebuf(const char *buf, size_t size, void *data)
1151 {
1152 git_filebuf *f = (git_filebuf *)data;
1153 return git_filebuf_write(f, buf, size);
1154 }
1155
git_commit_graph_writer_options_init(git_commit_graph_writer_options * opts,unsigned int version)1156 int git_commit_graph_writer_options_init(
1157 git_commit_graph_writer_options *opts,
1158 unsigned int version)
1159 {
1160 GIT_INIT_STRUCTURE_FROM_TEMPLATE(
1161 opts,
1162 version,
1163 git_commit_graph_writer_options,
1164 GIT_COMMIT_GRAPH_WRITER_OPTIONS_INIT);
1165 return 0;
1166 }
1167
git_commit_graph_writer_commit(git_commit_graph_writer * w,git_commit_graph_writer_options * opts)1168 int git_commit_graph_writer_commit(
1169 git_commit_graph_writer *w,
1170 git_commit_graph_writer_options *opts)
1171 {
1172 int error;
1173 int filebuf_flags = GIT_FILEBUF_DO_NOT_BUFFER;
1174 git_buf commit_graph_path = GIT_BUF_INIT;
1175 git_filebuf output = GIT_FILEBUF_INIT;
1176
1177 /* TODO: support options and fill in defaults. */
1178 GIT_UNUSED(opts);
1179
1180 error = git_buf_joinpath(
1181 &commit_graph_path, git_buf_cstr(&w->objects_info_dir), "commit-graph");
1182 if (error < 0)
1183 return error;
1184
1185 if (git_repository__fsync_gitdir)
1186 filebuf_flags |= GIT_FILEBUF_FSYNC;
1187 error = git_filebuf_open(&output, git_buf_cstr(&commit_graph_path), filebuf_flags, 0644);
1188 git_buf_dispose(&commit_graph_path);
1189 if (error < 0)
1190 return error;
1191
1192 error = commit_graph_write(w, commit_graph_write_filebuf, &output);
1193 if (error < 0) {
1194 git_filebuf_cleanup(&output);
1195 return error;
1196 }
1197
1198 return git_filebuf_commit(&output);
1199 }
1200
git_commit_graph_writer_dump(git_buf * cgraph,git_commit_graph_writer * w,git_commit_graph_writer_options * opts)1201 int git_commit_graph_writer_dump(
1202 git_buf *cgraph,
1203 git_commit_graph_writer *w,
1204 git_commit_graph_writer_options *opts)
1205 {
1206 /* TODO: support options. */
1207 GIT_UNUSED(opts);
1208 return commit_graph_write(w, commit_graph_write_buf, cgraph);
1209 }
1210