1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "midx.h"
9 
10 #include "buffer.h"
11 #include "futils.h"
12 #include "hash.h"
13 #include "odb.h"
14 #include "pack.h"
15 
16 #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
17 #define MIDX_VERSION 1
18 #define MIDX_OBJECT_ID_VERSION 1
19 struct git_midx_header {
20 	uint32_t signature;
21 	uint8_t version;
22 	uint8_t object_id_version;
23 	uint8_t chunks;
24 	uint8_t base_midx_files;
25 	uint32_t packfiles;
26 };
27 
28 #define MIDX_PACKFILE_NAMES_ID 0x504e414d	   /* "PNAM" */
29 #define MIDX_OID_FANOUT_ID 0x4f494446	   /* "OIDF" */
30 #define MIDX_OID_LOOKUP_ID 0x4f49444c	   /* "OIDL" */
31 #define MIDX_OBJECT_OFFSETS_ID 0x4f4f4646	   /* "OOFF" */
32 #define MIDX_OBJECT_LARGE_OFFSETS_ID 0x4c4f4646 /* "LOFF" */
33 
34 struct git_midx_chunk {
35 	off64_t offset;
36 	size_t length;
37 };
38 
midx_error(const char * message)39 static int midx_error(const char *message)
40 {
41 	git_error_set(GIT_ERROR_ODB, "invalid multi-pack-index file - %s", message);
42 	return -1;
43 }
44 
midx_parse_packfile_names(git_midx_file * idx,const unsigned char * data,uint32_t packfiles,struct git_midx_chunk * chunk)45 static int midx_parse_packfile_names(
46 		git_midx_file *idx,
47 		const unsigned char *data,
48 		uint32_t packfiles,
49 		struct git_midx_chunk *chunk)
50 {
51 	int error;
52 	uint32_t i;
53 	char *packfile_name = (char *)(data + chunk->offset);
54 	size_t chunk_size = chunk->length, len;
55 	if (chunk->offset == 0)
56 		return midx_error("missing Packfile Names chunk");
57 	if (chunk->length == 0)
58 		return midx_error("empty Packfile Names chunk");
59 	if ((error = git_vector_init(&idx->packfile_names, packfiles, git__strcmp_cb)) < 0)
60 		return error;
61 	for (i = 0; i < packfiles; ++i) {
62 		len = p_strnlen(packfile_name, chunk_size);
63 		if (len == 0)
64 			return midx_error("empty packfile name");
65 		if (len + 1 > chunk_size)
66 			return midx_error("unterminated packfile name");
67 		git_vector_insert(&idx->packfile_names, packfile_name);
68 		if (i && strcmp(git_vector_get(&idx->packfile_names, i - 1), packfile_name) >= 0)
69 			return midx_error("packfile names are not sorted");
70 		if (strlen(packfile_name) <= strlen(".idx") || git__suffixcmp(packfile_name, ".idx") != 0)
71 			return midx_error("non-.idx packfile name");
72 		if (strchr(packfile_name, '/') != NULL || strchr(packfile_name, '\\') != NULL)
73 			return midx_error("non-local packfile");
74 		packfile_name += len + 1;
75 		chunk_size -= len + 1;
76 	}
77 	return 0;
78 }
79 
midx_parse_oid_fanout(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_oid_fanout)80 static int midx_parse_oid_fanout(
81 		git_midx_file *idx,
82 		const unsigned char *data,
83 		struct git_midx_chunk *chunk_oid_fanout)
84 {
85 	uint32_t i, nr;
86 	if (chunk_oid_fanout->offset == 0)
87 		return midx_error("missing OID Fanout chunk");
88 	if (chunk_oid_fanout->length == 0)
89 		return midx_error("empty OID Fanout chunk");
90 	if (chunk_oid_fanout->length != 256 * 4)
91 		return midx_error("OID Fanout chunk has wrong length");
92 
93 	idx->oid_fanout = (const uint32_t *)(data + chunk_oid_fanout->offset);
94 	nr = 0;
95 	for (i = 0; i < 256; ++i) {
96 		uint32_t n = ntohl(idx->oid_fanout[i]);
97 		if (n < nr)
98 			return midx_error("index is non-monotonic");
99 		nr = n;
100 	}
101 	idx->num_objects = nr;
102 	return 0;
103 }
104 
midx_parse_oid_lookup(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_oid_lookup)105 static int midx_parse_oid_lookup(
106 		git_midx_file *idx,
107 		const unsigned char *data,
108 		struct git_midx_chunk *chunk_oid_lookup)
109 {
110 	uint32_t i;
111 	git_oid *oid, *prev_oid, zero_oid = {{0}};
112 
113 	if (chunk_oid_lookup->offset == 0)
114 		return midx_error("missing OID Lookup chunk");
115 	if (chunk_oid_lookup->length == 0)
116 		return midx_error("empty OID Lookup chunk");
117 	if (chunk_oid_lookup->length != idx->num_objects * GIT_OID_RAWSZ)
118 		return midx_error("OID Lookup chunk has wrong length");
119 
120 	idx->oid_lookup = oid = (git_oid *)(data + chunk_oid_lookup->offset);
121 	prev_oid = &zero_oid;
122 	for (i = 0; i < idx->num_objects; ++i, ++oid) {
123 		if (git_oid_cmp(prev_oid, oid) >= 0)
124 			return midx_error("OID Lookup index is non-monotonic");
125 		prev_oid = oid;
126 	}
127 
128 	return 0;
129 }
130 
midx_parse_object_offsets(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_object_offsets)131 static int midx_parse_object_offsets(
132 		git_midx_file *idx,
133 		const unsigned char *data,
134 		struct git_midx_chunk *chunk_object_offsets)
135 {
136 	if (chunk_object_offsets->offset == 0)
137 		return midx_error("missing Object Offsets chunk");
138 	if (chunk_object_offsets->length == 0)
139 		return midx_error("empty Object Offsets chunk");
140 	if (chunk_object_offsets->length != idx->num_objects * 8)
141 		return midx_error("Object Offsets chunk has wrong length");
142 
143 	idx->object_offsets = data + chunk_object_offsets->offset;
144 
145 	return 0;
146 }
147 
midx_parse_object_large_offsets(git_midx_file * idx,const unsigned char * data,struct git_midx_chunk * chunk_object_large_offsets)148 static int midx_parse_object_large_offsets(
149 		git_midx_file *idx,
150 		const unsigned char *data,
151 		struct git_midx_chunk *chunk_object_large_offsets)
152 {
153 	if (chunk_object_large_offsets->length == 0)
154 		return 0;
155 	if (chunk_object_large_offsets->length % 8 != 0)
156 		return midx_error("malformed Object Large Offsets chunk");
157 
158 	idx->object_large_offsets = data + chunk_object_large_offsets->offset;
159 	idx->num_object_large_offsets = chunk_object_large_offsets->length / 8;
160 
161 	return 0;
162 }
163 
git_midx_parse(git_midx_file * idx,const unsigned char * data,size_t size)164 int git_midx_parse(
165 		git_midx_file *idx,
166 		const unsigned char *data,
167 		size_t size)
168 {
169 	struct git_midx_header *hdr;
170 	const unsigned char *chunk_hdr;
171 	struct git_midx_chunk *last_chunk;
172 	uint32_t i;
173 	off64_t last_chunk_offset, chunk_offset, trailer_offset;
174 	git_oid idx_checksum = {{0}};
175 	int error;
176 	struct git_midx_chunk chunk_packfile_names = {0},
177 					 chunk_oid_fanout = {0},
178 					 chunk_oid_lookup = {0},
179 					 chunk_object_offsets = {0},
180 					 chunk_object_large_offsets = {0};
181 
182 	GIT_ASSERT_ARG(idx);
183 
184 	if (size < sizeof(struct git_midx_header) + GIT_OID_RAWSZ)
185 		return midx_error("multi-pack index is too short");
186 
187 	hdr = ((struct git_midx_header *)data);
188 
189 	if (hdr->signature != htonl(MIDX_SIGNATURE) ||
190 	    hdr->version != MIDX_VERSION ||
191 	    hdr->object_id_version != MIDX_OBJECT_ID_VERSION) {
192 		return midx_error("unsupported multi-pack index version");
193 	}
194 	if (hdr->chunks == 0)
195 		return midx_error("no chunks in multi-pack index");
196 
197 	/*
198 	 * The very first chunk's offset should be after the header, all the chunk
199 	 * headers, and a special zero chunk.
200 	 */
201 	last_chunk_offset =
202 			sizeof(struct git_midx_header) +
203 			(1 + hdr->chunks) * 12;
204 	trailer_offset = size - GIT_OID_RAWSZ;
205 	if (trailer_offset < last_chunk_offset)
206 		return midx_error("wrong index size");
207 	git_oid_cpy(&idx->checksum, (git_oid *)(data + trailer_offset));
208 
209 	if (git_hash_buf(&idx_checksum, data, (size_t)trailer_offset) < 0)
210 		return midx_error("could not calculate signature");
211 	if (!git_oid_equal(&idx_checksum, &idx->checksum))
212 		return midx_error("index signature mismatch");
213 
214 	chunk_hdr = data + sizeof(struct git_midx_header);
215 	last_chunk = NULL;
216 	for (i = 0; i < hdr->chunks; ++i, chunk_hdr += 12) {
217 		chunk_offset = ((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 4)))) << 32 |
218 				((off64_t)ntohl(*((uint32_t *)(chunk_hdr + 8))));
219 		if (chunk_offset < last_chunk_offset)
220 			return midx_error("chunks are non-monotonic");
221 		if (chunk_offset >= trailer_offset)
222 			return midx_error("chunks extend beyond the trailer");
223 		if (last_chunk != NULL)
224 			last_chunk->length = (size_t)(chunk_offset - last_chunk_offset);
225 		last_chunk_offset = chunk_offset;
226 
227 		switch (ntohl(*((uint32_t *)(chunk_hdr + 0)))) {
228 		case MIDX_PACKFILE_NAMES_ID:
229 			chunk_packfile_names.offset = last_chunk_offset;
230 			last_chunk = &chunk_packfile_names;
231 			break;
232 
233 		case MIDX_OID_FANOUT_ID:
234 			chunk_oid_fanout.offset = last_chunk_offset;
235 			last_chunk = &chunk_oid_fanout;
236 			break;
237 
238 		case MIDX_OID_LOOKUP_ID:
239 			chunk_oid_lookup.offset = last_chunk_offset;
240 			last_chunk = &chunk_oid_lookup;
241 			break;
242 
243 		case MIDX_OBJECT_OFFSETS_ID:
244 			chunk_object_offsets.offset = last_chunk_offset;
245 			last_chunk = &chunk_object_offsets;
246 			break;
247 
248 		case MIDX_OBJECT_LARGE_OFFSETS_ID:
249 			chunk_object_large_offsets.offset = last_chunk_offset;
250 			last_chunk = &chunk_object_large_offsets;
251 			break;
252 
253 		default:
254 			return midx_error("unrecognized chunk ID");
255 		}
256 	}
257 	last_chunk->length = (size_t)(trailer_offset - last_chunk_offset);
258 
259 	error = midx_parse_packfile_names(
260 			idx, data, ntohl(hdr->packfiles), &chunk_packfile_names);
261 	if (error < 0)
262 		return error;
263 	error = midx_parse_oid_fanout(idx, data, &chunk_oid_fanout);
264 	if (error < 0)
265 		return error;
266 	error = midx_parse_oid_lookup(idx, data, &chunk_oid_lookup);
267 	if (error < 0)
268 		return error;
269 	error = midx_parse_object_offsets(idx, data, &chunk_object_offsets);
270 	if (error < 0)
271 		return error;
272 	error = midx_parse_object_large_offsets(idx, data, &chunk_object_large_offsets);
273 	if (error < 0)
274 		return error;
275 
276 	return 0;
277 }
278 
git_midx_open(git_midx_file ** idx_out,const char * path)279 int git_midx_open(
280 		git_midx_file **idx_out,
281 		const char *path)
282 {
283 	git_midx_file *idx;
284 	git_file fd = -1;
285 	size_t idx_size;
286 	struct stat st;
287 	int error;
288 
289 	/* TODO: properly open the file without access time using O_NOATIME */
290 	fd = git_futils_open_ro(path);
291 	if (fd < 0)
292 		return fd;
293 
294 	if (p_fstat(fd, &st) < 0) {
295 		p_close(fd);
296 		git_error_set(GIT_ERROR_ODB, "multi-pack-index file not found - '%s'", path);
297 		return -1;
298 	}
299 
300 	if (!S_ISREG(st.st_mode) || !git__is_sizet(st.st_size)) {
301 		p_close(fd);
302 		git_error_set(GIT_ERROR_ODB, "invalid pack index '%s'", path);
303 		return -1;
304 	}
305 	idx_size = (size_t)st.st_size;
306 
307 	idx = git__calloc(1, sizeof(git_midx_file));
308 	GIT_ERROR_CHECK_ALLOC(idx);
309 
310 	error = git_buf_sets(&idx->filename, path);
311 	if (error < 0)
312 		return error;
313 
314 	error = git_futils_mmap_ro(&idx->index_map, fd, 0, idx_size);
315 	p_close(fd);
316 	if (error < 0) {
317 		git_midx_free(idx);
318 		return error;
319 	}
320 
321 	if ((error = git_midx_parse(idx, idx->index_map.data, idx_size)) < 0) {
322 		git_midx_free(idx);
323 		return error;
324 	}
325 
326 	*idx_out = idx;
327 	return 0;
328 }
329 
git_midx_needs_refresh(const git_midx_file * idx,const char * path)330 bool git_midx_needs_refresh(
331 		const git_midx_file *idx,
332 		const char *path)
333 {
334 	git_file fd = -1;
335 	struct stat st;
336 	ssize_t bytes_read;
337 	git_oid idx_checksum = {{0}};
338 
339 	/* TODO: properly open the file without access time using O_NOATIME */
340 	fd = git_futils_open_ro(path);
341 	if (fd < 0)
342 		return true;
343 
344 	if (p_fstat(fd, &st) < 0) {
345 		p_close(fd);
346 		return true;
347 	}
348 
349 	if (!S_ISREG(st.st_mode) ||
350 	    !git__is_sizet(st.st_size) ||
351 	    (size_t)st.st_size != idx->index_map.len) {
352 		p_close(fd);
353 		return true;
354 	}
355 
356 	if (p_lseek(fd, -GIT_OID_RAWSZ, SEEK_END) < 0) {
357 		p_close(fd);
358 		return true;
359 	}
360 
361 	bytes_read = p_read(fd, &idx_checksum, GIT_OID_RAWSZ);
362 	p_close(fd);
363 
364 	if (bytes_read != GIT_OID_RAWSZ)
365 		return true;
366 
367 	return git_oid_cmp(&idx_checksum, &idx->checksum) == 0;
368 }
369 
git_midx_entry_find(git_midx_entry * e,git_midx_file * idx,const git_oid * short_oid,size_t len)370 int git_midx_entry_find(
371 		git_midx_entry *e,
372 		git_midx_file *idx,
373 		const git_oid *short_oid,
374 		size_t len)
375 {
376 	int pos, found = 0;
377 	size_t pack_index;
378 	uint32_t hi, lo;
379 	const git_oid *current = NULL;
380 	const unsigned char *object_offset;
381 	off64_t offset;
382 
383 	GIT_ASSERT_ARG(idx);
384 
385 	hi = ntohl(idx->oid_fanout[(int)short_oid->id[0]]);
386 	lo = ((short_oid->id[0] == 0x0) ? 0 : ntohl(idx->oid_fanout[(int)short_oid->id[0] - 1]));
387 
388 	pos = git_pack__lookup_sha1(idx->oid_lookup, GIT_OID_RAWSZ, lo, hi, short_oid->id);
389 
390 	if (pos >= 0) {
391 		/* An object matching exactly the oid was found */
392 		found = 1;
393 		current = idx->oid_lookup + pos;
394 	} else {
395 		/* No object was found */
396 		/* pos refers to the object with the "closest" oid to short_oid */
397 		pos = -1 - pos;
398 		if (pos < (int)idx->num_objects) {
399 			current = idx->oid_lookup + pos;
400 
401 			if (!git_oid_ncmp(short_oid, current, len))
402 				found = 1;
403 		}
404 	}
405 
406 	if (found && len != GIT_OID_HEXSZ && pos + 1 < (int)idx->num_objects) {
407 		/* Check for ambiguousity */
408 		const git_oid *next = current + 1;
409 
410 		if (!git_oid_ncmp(short_oid, next, len)) {
411 			found = 2;
412 		}
413 	}
414 
415 	if (!found)
416 		return git_odb__error_notfound("failed to find offset for multi-pack index entry", short_oid, len);
417 	if (found > 1)
418 		return git_odb__error_ambiguous("found multiple offsets for multi-pack index entry");
419 
420 	object_offset = idx->object_offsets + pos * 8;
421 	offset = ntohl(*((uint32_t *)(object_offset + 4)));
422 	if (offset & 0x80000000) {
423 		uint32_t object_large_offsets_pos = offset & 0x7fffffff;
424 		const unsigned char *object_large_offsets_index = idx->object_large_offsets;
425 
426 		/* Make sure we're not being sent out of bounds */
427 		if (object_large_offsets_pos >= idx->num_object_large_offsets)
428 			return git_odb__error_notfound("invalid index into the object large offsets table", short_oid, len);
429 
430 		object_large_offsets_index += 8 * object_large_offsets_pos;
431 
432 		offset = (((uint64_t)ntohl(*((uint32_t *)(object_large_offsets_index + 0)))) << 32) |
433 				ntohl(*((uint32_t *)(object_large_offsets_index + 4)));
434 	}
435 	pack_index = ntohl(*((uint32_t *)(object_offset + 0)));
436 	if (pack_index >= git_vector_length(&idx->packfile_names))
437 		return midx_error("invalid index into the packfile names table");
438 	e->pack_index = pack_index;
439 	e->offset = offset;
440 	git_oid_cpy(&e->sha1, current);
441 	return 0;
442 }
443 
git_midx_foreach_entry(git_midx_file * idx,git_odb_foreach_cb cb,void * data)444 int git_midx_foreach_entry(
445 		git_midx_file *idx,
446 		git_odb_foreach_cb cb,
447 		void *data)
448 {
449 	size_t i;
450 	int error;
451 
452 	GIT_ASSERT_ARG(idx);
453 
454 	for (i = 0; i < idx->num_objects; ++i) {
455 		if ((error = cb(&idx->oid_lookup[i], data)) != 0)
456 			return git_error_set_after_callback(error);
457 	}
458 
459 	return error;
460 }
461 
git_midx_close(git_midx_file * idx)462 int git_midx_close(git_midx_file *idx)
463 {
464 	GIT_ASSERT_ARG(idx);
465 
466 	if (idx->index_map.data)
467 		git_futils_mmap_free(&idx->index_map);
468 
469 	git_vector_free(&idx->packfile_names);
470 
471 	return 0;
472 }
473 
git_midx_free(git_midx_file * idx)474 void git_midx_free(git_midx_file *idx)
475 {
476 	if (!idx)
477 		return;
478 
479 	git_buf_dispose(&idx->filename);
480 	git_midx_close(idx);
481 	git__free(idx);
482 }
483