1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "midx.h"
9 
10 #include "array.h"
11 #include "buffer.h"
12 #include "filebuf.h"
13 #include "futils.h"
14 #include "hash.h"
15 #include "odb.h"
16 #include "pack.h"
17 #include "path.h"
18 #include "repository.h"
19 
20 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
21 #define MIDX_VERSION 1
22 #define MIDX_OBJECT_ID_VERSION 1
23 struct git_midx_header {
24 	uint32_t signature;
25 	uint8_t version;
26 	uint8_t object_id_version;
27 	uint8_t chunks;
28 	uint8_t base_midx_files;
29 	uint32_t packfiles;
30 };
31 
32 #define MIDX_PACKFILE_NAMES_ID 0x504e414d	   /* "PNAM" */
33 #define MIDX_OID_FANOUT_ID 0x4f494446	   /* "OIDF" */
34 #define MIDX_OID_LOOKUP_ID 0x4f49444c	   /* "OIDL" */
35 #define MIDX_OBJECT_OFFSETS_ID 0x4f4f4646	   /* "OOFF" */
36 #define MIDX_OBJECT_LARGE_OFFSETS_ID 0x4c4f4646 /* "LOFF" */
37 
38 struct git_midx_chunk {
39 	off64_t offset;
40 	size_t length;
41 };
42 
43 typedef int (*midx_write_cb)(const char *buf, size_t size, void *cb_data);
44 
midx_error(const char * message)45 static int midx_error(const char *message)
46 {
47 	git_error_set(GIT_ERROR_ODB, "invalid multi-pack-index file - %s", message);
48 	return -1;
49 }
50 
midx_parse_packfile_names(git_midx_file * idx,const unsigned char * data,uint32_t packfiles,struct git_midx_chunk * chunk)51 static int midx_parse_packfile_names(
52 		git_midx_file *idx,
53 		const unsigned char *data,
54 		uint32_t packfiles,
55 		struct git_midx_chunk *chunk)
56 {
57 	int error;
58 	uint32_t i;
59 	char *packfile_name = (char *)(data + chunk->offset);
60 	size_t chunk_size = chunk->length, len;
61 	if (chunk->offset == 0)
62 		return midx_error("missing Packfile Names chunk");
63 	if (chunk->length == 0)
64 		return midx_error("empty Packfile Names chunk");
65 	if ((error = git_vector_init(&idx->packfile_names, packfiles, git__strcmp_cb)) < 0)
66 		return error;
67 	for (i = 0; i < packfiles; ++i) {
68 		len = p_strnlen(packfile_name, chunk_size);
69 		if (len == 0)
70 			return midx_error("empty packfile name");
71 		if (len + 1 > chunk_size)
72 			return midx_error("unterminated packfile name");
73 		git_vector_insert(&idx->packfile_names, packfile_name);
74 		if (i && strcmp(git_vector_get(&idx->packfile_names, i - 1), packfile_name) >= 0)
75 			return midx_error("packfile names are not sorted");
76 		if (strlen(packfile_name) <= strlen(".idx") || git__suffixcmp(packfile_name, ".idx") != 0)
77 			return midx_error("non-.idx packfile name");
78 		if (strchr(packfile_name, '/') != NULL || strchr(packfile_name, '\\') != NULL)
79 			return midx_error("non-local packfile");
80 		packfile_name += len + 1;
81 		chunk_size -= len + 1;
82 	}
83 	return 0;
84 }
85 
midx_parse_oid_fanout(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_oid_fanout)86 static int midx_parse_oid_fanout(
87 		git_midx_file *idx,
88 		const unsigned char *data,
89 		struct git_midx_chunk *chunk_oid_fanout)
90 {
91 	uint32_t i, nr;
92 	if (chunk_oid_fanout->offset == 0)
93 		return midx_error("missing OID Fanout chunk");
94 	if (chunk_oid_fanout->length == 0)
95 		return midx_error("empty OID Fanout chunk");
96 	if (chunk_oid_fanout->length != 256 * 4)
97 		return midx_error("OID Fanout chunk has wrong length");
98 
99 	idx->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset);
100 	nr = 0;
101 	for (i = 0; i < 256; ++i) {
102 		uint32_t n = ntohl(idx->oid_fanout[i]);
103 		if (n < nr)
104 			return midx_error("index is non-monotonic");
105 		nr = n;
106 	}
107 	idx->num_objects = nr;
108 	return 0;
109 }
110 
midx_parse_oid_lookup(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_oid_lookup)111 static int midx_parse_oid_lookup(
112 		git_midx_file *idx,
113 		const unsigned char *data,
114 		struct git_midx_chunk *chunk_oid_lookup)
115 {
116 	uint32_t i;
117 	git_oid *oid, *prev_oid, zero_oid = {{0}};
118 
119 	if (chunk_oid_lookup->offset == 0)
120 		return midx_error("missing OID Lookup chunk");
121 	if (chunk_oid_lookup->length == 0)
122 		return midx_error("empty OID Lookup chunk");
123 	if (chunk_oid_lookup->length != idx->num_objects * GIT_OID_RAWSZ)
124 		return midx_error("OID Lookup chunk has wrong length");
125 
126 	idx->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset);
127 	prev_oid = &zero_oid;
128 	for (i = 0; i < idx->num_objects; ++i, ++oid) {
129 		if (git_oid_cmp(prev_oid, oid) >= 0)
130 			return midx_error("OID Lookup index is non-monotonic");
131 		prev_oid = oid;
132 	}
133 
134 	return 0;
135 }
136 
midx_parse_object_offsets(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_object_offsets)137 static int midx_parse_object_offsets(
138 		git_midx_file *idx,
139 		const unsigned char *data,
140 		struct git_midx_chunk *chunk_object_offsets)
141 {
142 	if (chunk_object_offsets->offset == 0)
143 		return midx_error("missing Object Offsets chunk");
144 	if (chunk_object_offsets->length == 0)
145 		return midx_error("empty Object Offsets chunk");
146 	if (chunk_object_offsets->length != idx->num_objects * 8)
147 		return midx_error("Object Offsets chunk has wrong length");
148 
149 	idx->object_offsets = data + chunk_object_offsets->offset;
150 
151 	return 0;
152 }
153 
midx_parse_object_large_offsets(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_object_large_offsets)154 static int midx_parse_object_large_offsets(
155 		git_midx_file *idx,
156 		const unsigned char *data,
157 		struct git_midx_chunk *chunk_object_large_offsets)
158 {
159 	if (chunk_object_large_offsets->length == 0)
160 		return 0;
161 	if (chunk_object_large_offsets->length % 8 != 0)
162 		return midx_error("malformed Object Large Offsets chunk");
163 
164 	idx->object_large_offsets = data + chunk_object_large_offsets->offset;
165 	idx->num_object_large_offsets = chunk_object_large_offsets->length / 8;
166 
167 	return 0;
168 }
169 
git_midx_parse(git_midx_file * idx,const unsigned char * data,size_t size)170 int git_midx_parse(
171 		git_midx_file *idx,
172 		const unsigned char *data,
173 		size_t size)
174 {
175 	struct git_midx_header *hdr;
176 	const unsigned char *chunk_hdr;
177 	struct git_midx_chunk *last_chunk;
178 	uint32_t i;
179 	off64_t last_chunk_offset, chunk_offset, trailer_offset;
180 	git_oid idx_checksum = {{0}};
181 	int error;
182 	struct git_midx_chunk chunk_packfile_names = {0},
183 					 chunk_oid_fanout = {0},
184 					 chunk_oid_lookup = {0},
185 					 chunk_object_offsets = {0},
186 					 chunk_object_large_offsets = {0};
187 
188 	GIT_ASSERT_ARG(idx);
189 
190 	if (size < sizeof(struct git_midx_header) + GIT_OID_RAWSZ)
191 		return midx_error("multi-pack index is too short");
192 
193 	hdr = ((struct git_midx_header *)data);
194 
195 	if (hdr->signature != htonl(MIDX_SIGNATURE) ||
196 	    hdr->version != MIDX_VERSION ||
197 	    hdr->object_id_version != MIDX_OBJECT_ID_VERSION) {
198 		return midx_error("unsupported multi-pack index version");
199 	}
200 	if (hdr->chunks == 0)
201 		return midx_error("no chunks in multi-pack index");
202 
203 	/*
204 	 * The very first chunk's offset should be after the header, all the chunk
205 	 * headers, and a special zero chunk.
206 	 */
207 	last_chunk_offset =
208 			sizeof(struct git_midx_header) +
209 			(1 + hdr->chunks) * 12;
210 	trailer_offset = size - GIT_OID_RAWSZ;
211 	if (trailer_offset < last_chunk_offset)
212 		return midx_error("wrong index size");
213 	git_oid_cpy(&idx->checksum, (git_oid *)(data + trailer_offset));
214 
215 	if (git_hash_buf(&idx_checksum, data, (size_t)trailer_offset) < 0)
216 		return midx_error("could not calculate signature");
217 	if (!git_oid_equal(&idx_checksum, &idx->checksum))
218 		return midx_error("index signature mismatch");
219 
220 	chunk_hdr = data + sizeof(struct git_midx_header);
221 	last_chunk = NULL;
222 	for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
223 		chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 |
224 				((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8))));
225 		if (chunk_offset < last_chunk_offset)
226 			return midx_error("chunks are non-monotonic");
227 		if (chunk_offset >= trailer_offset)
228 			return midx_error("chunks extend beyond the trailer");
229 		if (last_chunk != NULL)
230 			last_chunk->length = (size_t)(chunk_offset - last_chunk_offset);
231 		last_chunk_offset = chunk_offset;
232 
233 		switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) {
234 		case MIDX_PACKFILE_NAMES_ID:
235 			chunk_packfile_names.offset = last_chunk_offset;
236 			last_chunk = &chunk_packfile_names;
237 			break;
238 
239 		case MIDX_OID_FANOUT_ID:
240 			chunk_oid_fanout.offset = last_chunk_offset;
241 			last_chunk = &chunk_oid_fanout;
242 			break;
243 
244 		case MIDX_OID_LOOKUP_ID:
245 			chunk_oid_lookup.offset = last_chunk_offset;
246 			last_chunk = &chunk_oid_lookup;
247 			break;
248 
249 		case MIDX_OBJECT_OFFSETS_ID:
250 			chunk_object_offsets.offset = last_chunk_offset;
251 			last_chunk = &chunk_object_offsets;
252 			break;
253 
254 		case MIDX_OBJECT_LARGE_OFFSETS_ID:
255 			chunk_object_large_offsets.offset = last_chunk_offset;
256 			last_chunk = &chunk_object_large_offsets;
257 			break;
258 
259 		default:
260 			return midx_error("unrecognized chunk ID");
261 		}
262 	}
263 	last_chunk->length = (size_t)(trailer_offset - last_chunk_offset);
264 
265 	error = midx_parse_packfile_names(
266 			idx, data, ntohl(hdr->packfiles), &chunk_packfile_names);
267 	if (error < 0)
268 		return error;
269 	error = midx_parse_oid_fanout(idx, data, &chunk_oid_fanout);
270 	if (error < 0)
271 		return error;
272 	error = midx_parse_oid_lookup(idx, data, &chunk_oid_lookup);
273 	if (error < 0)
274 		return error;
275 	error = midx_parse_object_offsets(idx, data, &chunk_object_offsets);
276 	if (error < 0)
277 		return error;
278 	error = midx_parse_object_large_offsets(idx, data, &chunk_object_large_offsets);
279 	if (error < 0)
280 		return error;
281 
282 	return 0;
283 }
284 
git_midx_open(git_midx_file ** idx_out,const char * path)285 int git_midx_open(
286 		git_midx_file **idx_out,
287 		const char *path)
288 {
289 	git_midx_file *idx;
290 	git_file fd = -1;
291 	size_t idx_size;
292 	struct stat st;
293 	int error;
294 
295 	/* TODO: properly open the file without access time using O_NOATIME */
296 	fd = git_futils_open_ro(path);
297 	if (fd < 0)
298 		return fd;
299 
300 	if (p_fstat(fd, &st) < 0) {
301 		p_close(fd);
302 		git_error_set(GIT_ERROR_ODB, "multi-pack-index file not found - '%s'", path);
303 		return -1;
304 	}
305 
306 	if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) {
307 		p_close(fd);
308 		git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
309 		return -1;
310 	}
311 	idx_size = (size_t)st.st_size;
312 
313 	idx = git__calloc(1, sizeof(git_midx_file));
314 	GIT_ERROR_CHECK_ALLOC(idx);
315 
316 	error = git_buf_sets(&idx->filename, path);
317 	if (error < 0)
318 		return error;
319 
320 	error = git_futils_mmap_ro(&idx->index_map, fd, 0, idx_size);
321 	p_close(fd);
322 	if (error < 0) {
323 		git_midx_free(idx);
324 		return error;
325 	}
326 
327 	if ((error = git_midx_parse(idx, idx->index_map.data, idx_size)) < 0) {
328 		git_midx_free(idx);
329 		return error;
330 	}
331 
332 	*idx_out = idx;
333 	return 0;
334 }
335 
git_midx_needs_refresh(const git_midx_file * idx,const char * path)336 bool git_midx_needs_refresh(
337 		const git_midx_file *idx,
338 		const char *path)
339 {
340 	git_file fd = -1;
341 	struct stat st;
342 	ssize_t bytes_read;
343 	git_oid idx_checksum = {{0}};
344 
345 	/* TODO: properly open the file without access time using O_NOATIME */
346 	fd = git_futils_open_ro(path);
347 	if (fd < 0)
348 		return true;
349 
350 	if (p_fstat(fd, &st) < 0) {
351 		p_close(fd);
352 		return true;
353 	}
354 
355 	if (!S_ISREG(st.st_mode) ||
356 	    !git__is_sizet(st.st_size) ||
357 	    (size_t)st.st_size != idx->index_map.len) {
358 		p_close(fd);
359 		return true;
360 	}
361 
362 	bytes_read = p_pread(fd, &idx_checksum, GIT_OID_RAWSZ, st.st_size - GIT_OID_RAWSZ);
363 	p_close(fd);
364 
365 	if (bytes_read != GIT_OID_RAWSZ)
366 		return true;
367 
368 	return !git_oid_equal(&idx_checksum, &idx->checksum);
369 }
370 
git_midx_entry_find(git_midx_entry * e,git_midx_file * idx,const git_oid * short_oid,size_t len)371 int git_midx_entry_find(
372 		git_midx_entry *e,
373 		git_midx_file *idx,
374 		const git_oid *short_oid,
375 		size_t len)
376 {
377 	int pos, found = 0;
378 	size_t pack_index;
379 	uint32_t hi, lo;
380 	const git_oid *current = NULL;
381 	const unsigned char *object_offset;
382 	off64_t offset;
383 
384 	GIT_ASSERT_ARG(idx);
385 
386 	hi = ntohl(idx->oid_fanout[(int)short_oid->id[0]]);
387 	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(idx->oid_fanout[(int)short_oid->id[0] - 1]));
388 
389 	pos = git_pack__lookup_sha1(idx->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id);
390 
391 	if (pos >= 0) {
392 		/* An object matching exactly the oid was found */
393 		found = 1;
394 		current = idx->oid_lookup + pos;
395 	} else {
396 		/* No object was found */
397 		/* pos refers to the object with the "closest" oid to short_oid */
398 		pos = -1 - pos;
399 		if (pos < (int)idx->num_objects) {
400 			current = idx->oid_lookup + pos;
401 
402 			if (!git_oid_ncmp(short_oid, current, len))
403 				found = 1;
404 		}
405 	}
406 
407 	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)idx->num_objects) {
408 		/* Check for ambiguousity */
409 		const git_oid *next = current + 1;
410 
411 		if (!git_oid_ncmp(short_oid, next, len)) {
412 			found = 2;
413 		}
414 	}
415 
416 	if (!found)
417 		return git_odb__error_notfound("failed to find offset for multi-pack index entry", short_oid, len);
418 	if (found > 1)
419 		return git_odb__error_ambiguous("found multiple offsets for multi-pack index entry");
420 
421 	object_offset = idx->object_offsets + pos * 8;
422 	offset = ntohl(*((uint32_t *)(object_offset + 4)));
423 	if (offset & 0x80000000) {
424 		uint32_t object_large_offsets_pos = offset & 0x7fffffff;
425 		const unsigned char *object_large_offsets_index = idx->object_large_offsets;
426 
427 		/* Make sure we're not being sent out of bounds */
428 		if (object_large_offsets_pos >= idx->num_object_large_offsets)
429 			return git_odb__error_notfound("invalid index into the object large offsets table", short_oid, len);
430 
431 		object_large_offsets_index += 8 * object_large_offsets_pos;
432 
433 		offset = (((uint64_t)ntohl(*((uint32_t *)(object_large_offsets_index + 0)))) << 32) |
434 				ntohl(*((uint32_t *)(object_large_offsets_index + 4)));
435 	}
436 	pack_index = ntohl(*((uint32_t *)(object_offset + 0)));
437 	if (pack_index >= git_vector_length(&idx->packfile_names))
438 		return midx_error("invalid index into the packfile names table");
439 	e->pack_index = pack_index;
440 	e->offset = offset;
441 	git_oid_cpy(&e->sha1, current);
442 	return 0;
443 }
444 
git_midx_foreach_entry(git_midx_file * idx,git_odb_foreach_cb cb,void * data)445 int git_midx_foreach_entry(
446 		git_midx_file *idx,
447 		git_odb_foreach_cb cb,
448 		void *data)
449 {
450 	size_t i;
451 	int error;
452 
453 	GIT_ASSERT_ARG(idx);
454 
455 	for (i = 0; i < idx->num_objects; ++i) {
456 		if ((error = cb(&idx->oid_lookup[i], data)) != 0)
457 			return git_error_set_after_callback(error);
458 	}
459 
460 	return error;
461 }
462 
git_midx_close(git_midx_file * idx)463 int git_midx_close(git_midx_file *idx)
464 {
465 	GIT_ASSERT_ARG(idx);
466 
467 	if (idx->index_map.data)
468 		git_futils_mmap_free(&idx->index_map);
469 
470 	git_vector_free(&idx->packfile_names);
471 
472 	return 0;
473 }
474 
git_midx_free(git_midx_file * idx)475 void git_midx_free(git_midx_file *idx)
476 {
477 	if (!idx)
478 		return;
479 
480 	git_buf_dispose(&idx->filename);
481 	git_midx_close(idx);
482 	git__free(idx);
483 }
484 
packfile__cmp(const void * a_,const void * b_)485 static int packfile__cmp(const void *a_, const void *b_)
486 {
487 	const struct git_pack_file *a = a_;
488 	const struct git_pack_file *b = b_;
489 
490 	return strcmp(a->pack_name, b->pack_name);
491 }
492 
git_midx_writer_new(git_midx_writer ** out,const char * pack_dir)493 int git_midx_writer_new(
494 		git_midx_writer **out,
495 		const char *pack_dir)
496 {
497 	git_midx_writer *w = git__calloc(1, sizeof(git_midx_writer));
498 	GIT_ERROR_CHECK_ALLOC(w);
499 
500 	if (git_buf_sets(&w->pack_dir, pack_dir) < 0) {
501 		git__free(w);
502 		return -1;
503 	}
504 	git_path_squash_slashes(&w->pack_dir);
505 
506 	if (git_vector_init(&w->packs, 0, packfile__cmp) < 0) {
507 		git_buf_dispose(&w->pack_dir);
508 		git__free(w);
509 		return -1;
510 	}
511 
512 	*out = w;
513 	return 0;
514 }
515 
git_midx_writer_free(git_midx_writer * w)516 void git_midx_writer_free(git_midx_writer *w)
517 {
518 	struct git_pack_file *p;
519 	size_t i;
520 
521 	if (!w)
522 		return;
523 
524 	git_vector_foreach (&w->packs, i, p)
525 		git_mwindow_put_pack(p);
526 	git_vector_free(&w->packs);
527 	git_buf_dispose(&w->pack_dir);
528 	git__free(w);
529 }
530 
git_midx_writer_add(git_midx_writer * w,const char * idx_path)531 int git_midx_writer_add(
532 		git_midx_writer *w,
533 		const char *idx_path)
534 {
535 	git_buf idx_path_buf = GIT_BUF_INIT;
536 	int error;
537 	struct git_pack_file *p;
538 
539 	error = git_path_prettify(&idx_path_buf, idx_path, git_buf_cstr(&w->pack_dir));
540 	if (error < 0)
541 		return error;
542 
543 	error = git_mwindow_get_pack(&p, git_buf_cstr(&idx_path_buf));
544 	git_buf_dispose(&idx_path_buf);
545 	if (error < 0)
546 		return error;
547 
548 	error = git_vector_insert(&w->packs, p);
549 	if (error < 0) {
550 		git_mwindow_put_pack(p);
551 		return error;
552 	}
553 
554 	return 0;
555 }
556 
557 typedef git_array_t(git_midx_entry) object_entry_array_t;
558 
559 struct object_entry_cb_state {
560 	uint32_t pack_index;
561 	object_entry_array_t *object_entries_array;
562 };
563 
object_entry__cb(const git_oid * oid,off64_t offset,void * data)564 static int object_entry__cb(const git_oid *oid, off64_t offset, void *data)
565 {
566 	struct object_entry_cb_state *state = (struct object_entry_cb_state *)data;
567 
568 	git_midx_entry *entry = git_array_alloc(*state->object_entries_array);
569 	GIT_ERROR_CHECK_ALLOC(entry);
570 
571 	git_oid_cpy(&entry->sha1, oid);
572 	entry->offset = offset;
573 	entry->pack_index = state->pack_index;
574 
575 	return 0;
576 }
577 
object_entry__cmp(const void * a_,const void * b_)578 static int object_entry__cmp(const void *a_, const void *b_)
579 {
580 	const git_midx_entry *a = (const git_midx_entry *)a_;
581 	const git_midx_entry *b = (const git_midx_entry *)b_;
582 
583 	return git_oid_cmp(&a->sha1, &b->sha1);
584 }
585 
write_offset(off64_t offset,midx_write_cb write_cb,void * cb_data)586 static int write_offset(off64_t offset, midx_write_cb write_cb, void *cb_data)
587 {
588 	int error;
589 	uint32_t word;
590 
591 	word = htonl((uint32_t)((offset >> 32) & 0xffffffffu));
592 	error = write_cb((const char *)&word, sizeof(word), cb_data);
593 	if (error < 0)
594 		return error;
595 	word = htonl((uint32_t)((offset >> 0) & 0xffffffffu));
596 	error = write_cb((const char *)&word, sizeof(word), cb_data);
597 	if (error < 0)
598 		return error;
599 
600 	return 0;
601 }
602 
write_chunk_header(int chunk_id,off64_t offset,midx_write_cb write_cb,void * cb_data)603 static int write_chunk_header(int chunk_id, off64_t offset, midx_write_cb write_cb, void *cb_data)
604 {
605 	uint32_t word = htonl(chunk_id);
606 	int error = write_cb((const char *)&word, sizeof(word), cb_data);
607 	if (error < 0)
608 		return error;
609 	return write_offset(offset, write_cb, cb_data);
610 
611 	return 0;
612 }
613 
midx_write_buf(const char * buf,size_t size,void * data)614 static int midx_write_buf(const char *buf, size_t size, void *data)
615 {
616 	git_buf *b = (git_buf *)data;
617 	return git_buf_put(b, buf, size);
618 }
619 
620 struct midx_write_hash_context {
621 	midx_write_cb write_cb;
622 	void *cb_data;
623 	git_hash_ctx *ctx;
624 };
625 
midx_write_hash(const char * buf,size_t size,void * data)626 static int midx_write_hash(const char *buf, size_t size, void *data)
627 {
628 	struct midx_write_hash_context *ctx = (struct midx_write_hash_context *)data;
629 	int error;
630 
631 	error = git_hash_update(ctx->ctx, buf, size);
632 	if (error < 0)
633 		return error;
634 
635 	return ctx->write_cb(buf, size, ctx->cb_data);
636 }
637 
midx_write(git_midx_writer * w,midx_write_cb write_cb,void * cb_data)638 static int midx_write(
639 		git_midx_writer *w,
640 		midx_write_cb write_cb,
641 		void *cb_data)
642 {
643 	int error = 0;
644 	size_t i;
645 	struct git_pack_file *p;
646 	struct git_midx_header hdr = {0};
647 	uint32_t oid_fanout_count;
648 	uint32_t object_large_offsets_count;
649 	uint32_t oid_fanout[256];
650 	off64_t offset;
651 	git_buf packfile_names = GIT_BUF_INIT,
652 		oid_lookup = GIT_BUF_INIT,
653 		object_offsets = GIT_BUF_INIT,
654 		object_large_offsets = GIT_BUF_INIT;
655 	git_oid idx_checksum = {{0}};
656 	git_midx_entry *entry;
657 	object_entry_array_t object_entries_array = GIT_ARRAY_INIT;
658 	git_vector object_entries = GIT_VECTOR_INIT;
659 	git_hash_ctx ctx;
660 	struct midx_write_hash_context hash_cb_data = {0};
661 
662 	hdr.signature = htonl(MIDX_SIGNATURE);
663 	hdr.version = MIDX_VERSION;
664 	hdr.object_id_version = MIDX_OBJECT_ID_VERSION;
665 	hdr.base_midx_files = 0;
666 
667 	hash_cb_data.write_cb = write_cb;
668 	hash_cb_data.cb_data = cb_data;
669 	hash_cb_data.ctx = &ctx;
670 
671 	error = git_hash_ctx_init(&ctx);
672 	if (error < 0)
673 		return error;
674 	cb_data = &hash_cb_data;
675 	write_cb = midx_write_hash;
676 
677 	git_vector_sort(&w->packs);
678 	git_vector_foreach (&w->packs, i, p) {
679 		git_buf relative_index = GIT_BUF_INIT;
680 		struct object_entry_cb_state state = {0};
681 		size_t path_len;
682 
683 		state.pack_index = (uint32_t)i;
684 		state.object_entries_array = &object_entries_array;
685 
686 		error = git_buf_sets(&relative_index, p->pack_name);
687 		if (error < 0)
688 			goto cleanup;
689 		error = git_path_make_relative(&relative_index, git_buf_cstr(&w->pack_dir));
690 		if (error < 0) {
691 			git_buf_dispose(&relative_index);
692 			goto cleanup;
693 		}
694 		path_len = git_buf_len(&relative_index);
695 		if (path_len <= strlen(".pack") || git__suffixcmp(git_buf_cstr(&relative_index), ".pack") != 0) {
696 			git_buf_dispose(&relative_index);
697 			git_error_set(GIT_ERROR_INVALID, "invalid packfile name: '%s'", p->pack_name);
698 			error = -1;
699 			goto cleanup;
700 		}
701 		path_len -= strlen(".pack");
702 
703 		git_buf_put(&packfile_names, git_buf_cstr(&relative_index), path_len);
704 		git_buf_puts(&packfile_names, ".idx");
705 		git_buf_putc(&packfile_names, '\0');
706 		git_buf_dispose(&relative_index);
707 
708 		error = git_pack_foreach_entry_offset(p, object_entry__cb, &state);
709 		if (error < 0)
710 			goto cleanup;
711 	}
712 
713 	/* Sort the object entries. */
714 	error = git_vector_init(&object_entries, git_array_size(object_entries_array), object_entry__cmp);
715 	if (error < 0)
716 		goto cleanup;
717 	git_array_foreach (object_entries_array, i, entry) {
718 		if ((error = git_vector_set(NULL, &object_entries, i, entry)) < 0)
719 			goto cleanup;
720 	}
721 	git_vector_set_sorted(&object_entries, 0);
722 	git_vector_sort(&object_entries);
723 	git_vector_uniq(&object_entries, NULL);
724 
725 	/* Pad the packfile names so it is a multiple of four. */
726 	while (git_buf_len(&packfile_names) & 3)
727 		git_buf_putc(&packfile_names, '\0');
728 
729 	/* Fill the OID Fanout table. */
730 	oid_fanout_count = 0;
731 	for (i = 0; i < 256; i++) {
732 		while (oid_fanout_count < git_vector_length(&object_entries) &&
733 		       ((const git_midx_entry *)git_vector_get(&object_entries, oid_fanout_count))->sha1.id[0] <= i)
734 			++oid_fanout_count;
735 		oid_fanout[i] = htonl(oid_fanout_count);
736 	}
737 
738 	/* Fill the OID Lookup table. */
739 	git_vector_foreach (&object_entries, i, entry) {
740 		error = git_buf_put(&oid_lookup, (const char *)&entry->sha1, sizeof(entry->sha1));
741 		if (error < 0)
742 			goto cleanup;
743 	}
744 
745 	/* Fill the Object Offsets and Object Large Offsets tables. */
746 	object_large_offsets_count = 0;
747 	git_vector_foreach (&object_entries, i, entry) {
748 		uint32_t word;
749 
750 		word = htonl((uint32_t)entry->pack_index);
751 		error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word));
752 		if (error < 0)
753 			goto cleanup;
754 		if (entry->offset >= 0x80000000l) {
755 			word = htonl(0x80000000u | object_large_offsets_count++);
756 			if ((error = write_offset(entry->offset, midx_write_buf, &object_large_offsets)) < 0)
757 				goto cleanup;
758 		} else {
759 			word = htonl((uint32_t)entry->offset & 0x7fffffffu);
760 		}
761 
762 		error = git_buf_put(&object_offsets, (const char *)&word, sizeof(word));
763 		if (error < 0)
764 			goto cleanup;
765 	}
766 
767 	/* Write the header. */
768 	hdr.packfiles = htonl((uint32_t)git_vector_length(&w->packs));
769 	hdr.chunks = 4;
770 	if (git_buf_len(&object_large_offsets) > 0)
771 		hdr.chunks++;
772 	error = write_cb((const char *)&hdr, sizeof(hdr), cb_data);
773 	if (error < 0)
774 		goto cleanup;
775 
776 	/* Write the chunk headers. */
777 	offset = sizeof(hdr) + (hdr.chunks + 1) * 12;
778 	error = write_chunk_header(MIDX_PACKFILE_NAMES_ID, offset, write_cb, cb_data);
779 	if (error < 0)
780 		goto cleanup;
781 	offset += git_buf_len(&packfile_names);
782 	error = write_chunk_header(MIDX_OID_FANOUT_ID, offset, write_cb, cb_data);
783 	if (error < 0)
784 		goto cleanup;
785 	offset += sizeof(oid_fanout);
786 	error = write_chunk_header(MIDX_OID_LOOKUP_ID, offset, write_cb, cb_data);
787 	if (error < 0)
788 		goto cleanup;
789 	offset += git_buf_len(&oid_lookup);
790 	error = write_chunk_header(MIDX_OBJECT_OFFSETS_ID, offset, write_cb, cb_data);
791 	if (error < 0)
792 		goto cleanup;
793 	offset += git_buf_len(&object_offsets);
794 	if (git_buf_len(&object_large_offsets) > 0) {
795 		error = write_chunk_header(MIDX_OBJECT_LARGE_OFFSETS_ID, offset, write_cb, cb_data);
796 		if (error < 0)
797 			goto cleanup;
798 		offset += git_buf_len(&object_large_offsets);
799 	}
800 	error = write_chunk_header(0, offset, write_cb, cb_data);
801 	if (error < 0)
802 		goto cleanup;
803 
804 	/* Write all the chunks. */
805 	error = write_cb(git_buf_cstr(&packfile_names), git_buf_len(&packfile_names), cb_data);
806 	if (error < 0)
807 		goto cleanup;
808 	error = write_cb((const char *)oid_fanout, sizeof(oid_fanout), cb_data);
809 	if (error < 0)
810 		goto cleanup;
811 	error = write_cb(git_buf_cstr(&oid_lookup), git_buf_len(&oid_lookup), cb_data);
812 	if (error < 0)
813 		goto cleanup;
814 	error = write_cb(git_buf_cstr(&object_offsets), git_buf_len(&object_offsets), cb_data);
815 	if (error < 0)
816 		goto cleanup;
817 	error = write_cb(git_buf_cstr(&object_large_offsets), git_buf_len(&object_large_offsets), cb_data);
818 	if (error < 0)
819 		goto cleanup;
820 
821 	/* Finalize the checksum and write the trailer. */
822 	error = git_hash_final(&idx_checksum, &ctx);
823 	if (error < 0)
824 		goto cleanup;
825 	error = write_cb((const char *)&idx_checksum, sizeof(idx_checksum), cb_data);
826 	if (error < 0)
827 		goto cleanup;
828 
829 cleanup:
830 	git_array_clear(object_entries_array);
831 	git_vector_free(&object_entries);
832 	git_buf_dispose(&packfile_names);
833 	git_buf_dispose(&oid_lookup);
834 	git_buf_dispose(&object_offsets);
835 	git_buf_dispose(&object_large_offsets);
836 	git_hash_ctx_cleanup(&ctx);
837 	return error;
838 }
839 
midx_write_filebuf(const char * buf,size_t size,void * data)840 static int midx_write_filebuf(const char *buf, size_t size, void *data)
841 {
842 	git_filebuf *f = (git_filebuf *)data;
843 	return git_filebuf_write(f, buf, size);
844 }
845 
git_midx_writer_commit(git_midx_writer * w)846 int git_midx_writer_commit(
847 		git_midx_writer *w)
848 {
849 	int error;
850 	int filebuf_flags = GIT_FILEBUF_DO_NOT_BUFFER;
851 	git_buf midx_path = GIT_BUF_INIT;
852 	git_filebuf output = GIT_FILEBUF_INIT;
853 
854 	error = git_buf_joinpath(&midx_path, git_buf_cstr(&w->pack_dir), "multi-pack-index");
855 	if (error < 0)
856 		return error;
857 
858 	if (git_repository__fsync_gitdir)
859 		filebuf_flags |= GIT_FILEBUF_FSYNC;
860 	error = git_filebuf_open(&output, git_buf_cstr(&midx_path), filebuf_flags, 0644);
861 	git_buf_dispose(&midx_path);
862 	if (error < 0)
863 		return error;
864 
865 	error = midx_write(w, midx_write_filebuf, &output);
866 	if (error < 0) {
867 		git_filebuf_cleanup(&output);
868 		return error;
869 	}
870 
871 	return git_filebuf_commit(&output);
872 }
873 
git_midx_writer_dump(git_buf * midx,git_midx_writer * w)874 int git_midx_writer_dump(
875 		git_buf *midx,
876 		git_midx_writer *w)
877 {
878 	return midx_write(w, midx_write_buf, midx);
879 }
880