1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "common.h"
9 
10 #include <zlib.h>
11 #include "git2/object.h"
12 #include "git2/sys/odb_backend.h"
13 #include "futils.h"
14 #include "hash.h"
15 #include "odb.h"
16 #include "delta.h"
17 #include "filebuf.h"
18 #include "object.h"
19 #include "zstream.h"
20 
21 #include "git2/odb_backend.h"
22 #include "git2/types.h"
23 
24 /* maximum possible header length */
25 #define MAX_HEADER_LEN 64
26 
27 typedef struct { /* object header data */
28 	git_object_t type; /* object type */
29 	size_t	size; /* object size */
30 } obj_hdr;
31 
32 typedef struct {
33 	git_odb_stream stream;
34 	git_filebuf fbuf;
35 } loose_writestream;
36 
37 typedef struct {
38 	git_odb_stream stream;
39 	git_map map;
40 	char start[MAX_HEADER_LEN];
41 	size_t start_len;
42 	size_t start_read;
43 	git_zstream zstream;
44 } loose_readstream;
45 
46 typedef struct loose_backend {
47 	git_odb_backend parent;
48 
49 	int object_zlib_level; /** loose object zlib compression level. */
50 	int fsync_object_files; /** loose object file fsync flag. */
51 	mode_t object_file_mode;
52 	mode_t object_dir_mode;
53 
54 	size_t objects_dirlen;
55 	char objects_dir[GIT_FLEX_ARRAY];
56 } loose_backend;
57 
58 /* State structure for exploring directories,
59  * in order to locate objects matching a short oid.
60  */
61 typedef struct {
62 	size_t dir_len;
63 	unsigned char short_oid[GIT_OID_HEXSZ]; /* hex formatted oid to match */
64 	size_t short_oid_len;
65 	int found;				/* number of matching
66 						 * objects already found */
67 	unsigned char res_oid[GIT_OID_HEXSZ];	/* hex formatted oid of
68 						 * the object found */
69 } loose_locate_object_state;
70 
71 
72 /***********************************************************
73  *
74  * MISCELLANEOUS HELPER FUNCTIONS
75  *
76  ***********************************************************/
77 
object_file_name(git_buf * name,const loose_backend * be,const git_oid * id)78 static int object_file_name(
79 	git_buf *name, const loose_backend *be, const git_oid *id)
80 {
81 	size_t alloclen;
82 
83 	/* expand length for object root + 40 hex sha1 chars + 2 * '/' + '\0' */
84 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, be->objects_dirlen, GIT_OID_HEXSZ);
85 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 3);
86 	if (git_buf_grow(name, alloclen) < 0)
87 		return -1;
88 
89 	git_buf_set(name, be->objects_dir, be->objects_dirlen);
90 	git_path_to_dir(name);
91 
92 	/* loose object filename: aa/aaa... (41 bytes) */
93 	git_oid_pathfmt(name->ptr + name->size, id);
94 	name->size += GIT_OID_HEXSZ + 1;
95 	name->ptr[name->size] = '\0';
96 
97 	return 0;
98 }
99 
object_mkdir(const git_buf * name,const loose_backend * be)100 static int object_mkdir(const git_buf *name, const loose_backend *be)
101 {
102 	return git_futils_mkdir_relative(
103 		name->ptr + be->objects_dirlen, be->objects_dir, be->object_dir_mode,
104 		GIT_MKDIR_PATH | GIT_MKDIR_SKIP_LAST | GIT_MKDIR_VERIFY_DIR, NULL);
105 }
106 
parse_header_packlike(obj_hdr * out,size_t * out_len,const unsigned char * data,size_t len)107 static int parse_header_packlike(
108 	obj_hdr *out, size_t *out_len, const unsigned char *data, size_t len)
109 {
110 	unsigned long c;
111 	size_t shift, size, used = 0;
112 
113 	if (len == 0)
114 		goto on_error;
115 
116 	c = data[used++];
117 	out->type = (c >> 4) & 7;
118 
119 	size = c & 15;
120 	shift = 4;
121 	while (c & 0x80) {
122 		if (len <= used)
123 			goto on_error;
124 
125 		if (sizeof(size_t) * 8 <= shift)
126 			goto on_error;
127 
128 		c = data[used++];
129 		size += (c & 0x7f) << shift;
130 		shift += 7;
131 	}
132 
133 	out->size = size;
134 
135 	if (out_len)
136 		*out_len = used;
137 
138 	return 0;
139 
140 on_error:
141 	git_error_set(GIT_ERROR_OBJECT, "failed to parse loose object: invalid header");
142 	return -1;
143 }
144 
parse_header(obj_hdr * out,size_t * out_len,const unsigned char * _data,size_t data_len)145 static int parse_header(
146 	obj_hdr *out,
147        	size_t *out_len,
148 	const unsigned char *_data,
149 	size_t data_len)
150 {
151 	const char *data = (char *)_data;
152 	size_t i, typename_len, size_idx, size_len;
153 	int64_t size;
154 
155 	*out_len = 0;
156 
157 	/* find the object type name */
158 	for (i = 0, typename_len = 0; i < data_len; i++, typename_len++) {
159 		if (data[i] == ' ')
160 			break;
161 	}
162 
163 	if (typename_len == data_len)
164 		goto on_error;
165 
166 	out->type = git_object_stringn2type(data, typename_len);
167 
168 	size_idx = typename_len + 1;
169 	for (i = size_idx, size_len = 0; i < data_len; i++, size_len++) {
170 		if (data[i] == '\0')
171 			break;
172 	}
173 
174 	if (i == data_len)
175 		goto on_error;
176 
177 	if (git__strntol64(&size, &data[size_idx], size_len, NULL, 10) < 0 ||
178 		size < 0)
179 		goto on_error;
180 
181 	if ((uint64_t)size > SIZE_MAX) {
182 		git_error_set(GIT_ERROR_OBJECT, "object is larger than available memory");
183 		return -1;
184 	}
185 
186 	out->size = (size_t)size;
187 
188 	if (GIT_ADD_SIZET_OVERFLOW(out_len, i, 1))
189 		goto on_error;
190 
191 	return 0;
192 
193 on_error:
194 	git_error_set(GIT_ERROR_OBJECT, "failed to parse loose object: invalid header");
195 	return -1;
196 }
197 
is_zlib_compressed_data(unsigned char * data,size_t data_len)198 static int is_zlib_compressed_data(unsigned char *data, size_t data_len)
199 {
200 	unsigned int w;
201 
202 	if (data_len < 2)
203 		return 0;
204 
205 	w = ((unsigned int)(data[0]) << 8) + data[1];
206 	return (data[0] & 0x8F) == 0x08 && !(w % 31);
207 }
208 
209 /***********************************************************
210  *
211  * ODB OBJECT READING & WRITING
212  *
213  * Backend for the public API; read headers and full objects
214  * from the ODB. Write raw data to the ODB.
215  *
216  ***********************************************************/
217 
218 
219 /*
220  * At one point, there was a loose object format that was intended to
221  * mimic the format used in pack-files. This was to allow easy copying
222  * of loose object data into packs. This format is no longer used, but
223  * we must still read it.
224  */
read_loose_packlike(git_rawobj * out,git_buf * obj)225 static int read_loose_packlike(git_rawobj *out, git_buf *obj)
226 {
227 	git_buf body = GIT_BUF_INIT;
228 	const unsigned char *obj_data;
229 	obj_hdr hdr;
230 	size_t obj_len, head_len, alloc_size;
231 	int error;
232 
233 	obj_data = (unsigned char *)obj->ptr;
234 	obj_len = obj->size;
235 
236 	/*
237 	 * read the object header, which is an (uncompressed)
238 	 * binary encoding of the object type and size.
239 	 */
240 	if ((error = parse_header_packlike(&hdr, &head_len, obj_data, obj_len)) < 0)
241 		goto done;
242 
243 	if (!git_object_typeisloose(hdr.type) || head_len > obj_len) {
244 		git_error_set(GIT_ERROR_ODB, "failed to inflate loose object");
245 		error = -1;
246 		goto done;
247 	}
248 
249 	obj_data += head_len;
250 	obj_len -= head_len;
251 
252 	/*
253 	 * allocate a buffer and inflate the data into it
254 	 */
255 	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
256 		git_buf_init(&body, alloc_size) < 0) {
257 		error = -1;
258 		goto done;
259 	}
260 
261 	if ((error = git_zstream_inflatebuf(&body, obj_data, obj_len)) < 0)
262 		goto done;
263 
264 	out->len = hdr.size;
265 	out->type = hdr.type;
266 	out->data = git_buf_detach(&body);
267 
268 done:
269 	git_buf_dispose(&body);
270 	return error;
271 }
272 
read_loose_standard(git_rawobj * out,git_buf * obj)273 static int read_loose_standard(git_rawobj *out, git_buf *obj)
274 {
275 	git_zstream zstream = GIT_ZSTREAM_INIT;
276 	unsigned char head[MAX_HEADER_LEN], *body = NULL;
277 	size_t decompressed, head_len, body_len, alloc_size;
278 	obj_hdr hdr;
279 	int error;
280 
281 	if ((error = git_zstream_init(&zstream, GIT_ZSTREAM_INFLATE)) < 0 ||
282 		(error = git_zstream_set_input(&zstream, git_buf_cstr(obj), git_buf_len(obj))) < 0)
283 		goto done;
284 
285 	decompressed = sizeof(head);
286 
287 	/*
288 	 * inflate the initial part of the compressed buffer in order to
289 	 * parse the header; read the largest header possible, then push the
290 	 * remainder into the body buffer.
291 	 */
292 	if ((error = git_zstream_get_output(head, &decompressed, &zstream)) < 0 ||
293 		(error = parse_header(&hdr, &head_len, head, decompressed)) < 0)
294 		goto done;
295 
296 	if (!git_object_typeisloose(hdr.type)) {
297 		git_error_set(GIT_ERROR_ODB, "failed to inflate disk object");
298 		error = -1;
299 		goto done;
300 	}
301 
302 	/*
303 	 * allocate a buffer and inflate the object data into it
304 	 * (including the initial sequence in the head buffer).
305 	 */
306 	if (GIT_ADD_SIZET_OVERFLOW(&alloc_size, hdr.size, 1) ||
307 		(body = git__calloc(1, alloc_size)) == NULL) {
308 		error = -1;
309 		goto done;
310 	}
311 
312 	GIT_ASSERT(decompressed >= head_len);
313 	body_len = decompressed - head_len;
314 
315 	if (body_len)
316 		memcpy(body, head + head_len, body_len);
317 
318 	decompressed = hdr.size - body_len;
319 	if ((error = git_zstream_get_output(body + body_len, &decompressed, &zstream)) < 0)
320 		goto done;
321 
322 	if (!git_zstream_done(&zstream)) {
323 		git_error_set(GIT_ERROR_ZLIB, "failed to finish zlib inflation: stream aborted prematurely");
324 		error = -1;
325 		goto done;
326 	}
327 
328 	body[hdr.size] = '\0';
329 
330 	out->data = body;
331 	out->len = hdr.size;
332 	out->type = hdr.type;
333 
334 done:
335 	if (error < 0)
336 		git__free(body);
337 
338 	git_zstream_free(&zstream);
339 	return error;
340 }
341 
read_loose(git_rawobj * out,git_buf * loc)342 static int read_loose(git_rawobj *out, git_buf *loc)
343 {
344 	int error;
345 	git_buf obj = GIT_BUF_INIT;
346 
347 	GIT_ASSERT_ARG(out);
348 	GIT_ASSERT_ARG(loc);
349 
350 	if (git_buf_oom(loc))
351 		return -1;
352 
353 	out->data = NULL;
354 	out->len = 0;
355 	out->type = GIT_OBJECT_INVALID;
356 
357 	if ((error = git_futils_readbuffer(&obj, loc->ptr)) < 0)
358 		goto done;
359 
360 	if (!is_zlib_compressed_data((unsigned char *)obj.ptr, obj.size))
361 		error = read_loose_packlike(out, &obj);
362 	else
363 		error = read_loose_standard(out, &obj);
364 
365 done:
366 	git_buf_dispose(&obj);
367 	return error;
368 }
369 
read_header_loose_packlike(git_rawobj * out,const unsigned char * data,size_t len)370 static int read_header_loose_packlike(
371 	git_rawobj *out, const unsigned char *data, size_t len)
372 {
373 	obj_hdr hdr;
374 	size_t header_len;
375 	int error;
376 
377 	if ((error = parse_header_packlike(&hdr, &header_len, data, len)) < 0)
378 		return error;
379 
380 	out->len = hdr.size;
381 	out->type = hdr.type;
382 
383 	return error;
384 }
385 
read_header_loose_standard(git_rawobj * out,const unsigned char * data,size_t len)386 static int read_header_loose_standard(
387 	git_rawobj *out, const unsigned char *data, size_t len)
388 {
389 	git_zstream zs = GIT_ZSTREAM_INIT;
390 	obj_hdr hdr = {0};
391 	unsigned char inflated[MAX_HEADER_LEN] = {0};
392 	size_t header_len, inflated_len = sizeof(inflated);
393 	int error;
394 
395 	if ((error = git_zstream_init(&zs, GIT_ZSTREAM_INFLATE)) < 0 ||
396 		(error = git_zstream_set_input(&zs, data, len)) < 0 ||
397 		(error = git_zstream_get_output_chunk(inflated, &inflated_len, &zs)) < 0 ||
398 		(error = parse_header(&hdr, &header_len, inflated, inflated_len)) < 0)
399 		goto done;
400 
401 	out->len = hdr.size;
402 	out->type = hdr.type;
403 
404 done:
405 	git_zstream_free(&zs);
406 	return error;
407 }
408 
read_header_loose(git_rawobj * out,git_buf * loc)409 static int read_header_loose(git_rawobj *out, git_buf *loc)
410 {
411 	unsigned char obj[1024];
412 	ssize_t obj_len;
413 	int fd, error;
414 
415 	GIT_ASSERT_ARG(out);
416 	GIT_ASSERT_ARG(loc);
417 
418 	if (git_buf_oom(loc))
419 		return -1;
420 
421 	out->data = NULL;
422 
423 	if ((error = fd = git_futils_open_ro(loc->ptr)) < 0)
424 		goto done;
425 
426 	if ((obj_len = p_read(fd, obj, sizeof(obj))) < 0) {
427 		error = (int)obj_len;
428 		goto done;
429 	}
430 
431 	if (!is_zlib_compressed_data(obj, (size_t)obj_len))
432 		error = read_header_loose_packlike(out, obj, (size_t)obj_len);
433 	else
434 		error = read_header_loose_standard(out, obj, (size_t)obj_len);
435 
436 	if (!error && !git_object_typeisloose(out->type)) {
437 		git_error_set(GIT_ERROR_ZLIB, "failed to read loose object header");
438 		error = -1;
439 		goto done;
440 	}
441 
442 done:
443 	if (fd >= 0)
444 		p_close(fd);
445 	return error;
446 }
447 
locate_object(git_buf * object_location,loose_backend * backend,const git_oid * oid)448 static int locate_object(
449 	git_buf *object_location,
450 	loose_backend *backend,
451 	const git_oid *oid)
452 {
453 	int error = object_file_name(object_location, backend, oid);
454 
455 	if (!error && !git_path_exists(object_location->ptr))
456 		return GIT_ENOTFOUND;
457 
458 	return error;
459 }
460 
461 /* Explore an entry of a directory and see if it matches a short oid */
fn_locate_object_short_oid(void * state,git_buf * pathbuf)462 static int fn_locate_object_short_oid(void *state, git_buf *pathbuf) {
463 	loose_locate_object_state *sstate = (loose_locate_object_state *)state;
464 
465 	if (git_buf_len(pathbuf) - sstate->dir_len != GIT_OID_HEXSZ - 2) {
466 		/* Entry cannot be an object. Continue to next entry */
467 		return 0;
468 	}
469 
470 	if (git_path_isdir(pathbuf->ptr) == false) {
471 		/* We are already in the directory matching the 2 first hex characters,
472 		 * compare the first ncmp characters of the oids */
473 		if (!memcmp(sstate->short_oid + 2,
474 			(unsigned char *)pathbuf->ptr + sstate->dir_len,
475 			sstate->short_oid_len - 2)) {
476 
477 			if (!sstate->found) {
478 				sstate->res_oid[0] = sstate->short_oid[0];
479 				sstate->res_oid[1] = sstate->short_oid[1];
480 				memcpy(sstate->res_oid+2, pathbuf->ptr+sstate->dir_len, GIT_OID_HEXSZ-2);
481 			}
482 			sstate->found++;
483 		}
484 	}
485 
486 	if (sstate->found > 1)
487 		return GIT_EAMBIGUOUS;
488 
489 	return 0;
490 }
491 
492 /* Locate an object matching a given short oid */
locate_object_short_oid(git_buf * object_location,git_oid * res_oid,loose_backend * backend,const git_oid * short_oid,size_t len)493 static int locate_object_short_oid(
494 	git_buf *object_location,
495 	git_oid *res_oid,
496 	loose_backend *backend,
497 	const git_oid *short_oid,
498 	size_t len)
499 {
500 	char *objects_dir = backend->objects_dir;
501 	size_t dir_len = strlen(objects_dir), alloc_len;
502 	loose_locate_object_state state;
503 	int error;
504 
505 	/* prealloc memory for OBJ_DIR/xx/xx..38x..xx */
506 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
507 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 3);
508 	if (git_buf_grow(object_location, alloc_len) < 0)
509 		return -1;
510 
511 	git_buf_set(object_location, objects_dir, dir_len);
512 	git_path_to_dir(object_location);
513 
514 	/* save adjusted position at end of dir so it can be restored later */
515 	dir_len = git_buf_len(object_location);
516 
517 	/* Convert raw oid to hex formatted oid */
518 	git_oid_fmt((char *)state.short_oid, short_oid);
519 
520 	/* Explore OBJ_DIR/xx/ where xx is the beginning of hex formatted short oid */
521 	if (git_buf_put(object_location, (char *)state.short_oid, 3) < 0)
522 		return -1;
523 	object_location->ptr[object_location->size - 1] = '/';
524 
525 	/* Check that directory exists */
526 	if (git_path_isdir(object_location->ptr) == false)
527 		return git_odb__error_notfound("no matching loose object for prefix",
528 			short_oid, len);
529 
530 	state.dir_len = git_buf_len(object_location);
531 	state.short_oid_len = len;
532 	state.found = 0;
533 
534 	/* Explore directory to find a unique object matching short_oid */
535 	error = git_path_direach(
536 		object_location, 0, fn_locate_object_short_oid, &state);
537 	if (error < 0 && error != GIT_EAMBIGUOUS)
538 		return error;
539 
540 	if (!state.found)
541 		return git_odb__error_notfound("no matching loose object for prefix",
542 			short_oid, len);
543 
544 	if (state.found > 1)
545 		return git_odb__error_ambiguous("multiple matches in loose objects");
546 
547 	/* Convert obtained hex formatted oid to raw */
548 	error = git_oid_fromstr(res_oid, (char *)state.res_oid);
549 	if (error)
550 		return error;
551 
552 	/* Update the location according to the oid obtained */
553 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, dir_len, GIT_OID_HEXSZ);
554 	GIT_ERROR_CHECK_ALLOC_ADD(&alloc_len, alloc_len, 2);
555 
556 	git_buf_truncate(object_location, dir_len);
557 	if (git_buf_grow(object_location, alloc_len) < 0)
558 		return -1;
559 
560 	git_oid_pathfmt(object_location->ptr + dir_len, res_oid);
561 
562 	object_location->size += GIT_OID_HEXSZ + 1;
563 	object_location->ptr[object_location->size] = '\0';
564 
565 	return 0;
566 }
567 
568 
569 
570 
571 
572 
573 
574 
575 
576 /***********************************************************
577  *
578  * LOOSE BACKEND PUBLIC API
579  *
580  * Implement the git_odb_backend API calls
581  *
582  ***********************************************************/
583 
loose_backend__read_header(size_t * len_p,git_object_t * type_p,git_odb_backend * backend,const git_oid * oid)584 static int loose_backend__read_header(size_t *len_p, git_object_t *type_p, git_odb_backend *backend, const git_oid *oid)
585 {
586 	git_buf object_path = GIT_BUF_INIT;
587 	git_rawobj raw;
588 	int error;
589 
590 	GIT_ASSERT_ARG(backend);
591 	GIT_ASSERT_ARG(oid);
592 
593 	raw.len = 0;
594 	raw.type = GIT_OBJECT_INVALID;
595 
596 	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
597 		error = git_odb__error_notfound("no matching loose object",
598 			oid, GIT_OID_HEXSZ);
599 	} else if ((error = read_header_loose(&raw, &object_path)) == 0) {
600 		*len_p = raw.len;
601 		*type_p = raw.type;
602 	}
603 
604 	git_buf_dispose(&object_path);
605 
606 	return error;
607 }
608 
loose_backend__read(void ** buffer_p,size_t * len_p,git_object_t * type_p,git_odb_backend * backend,const git_oid * oid)609 static int loose_backend__read(void **buffer_p, size_t *len_p, git_object_t *type_p, git_odb_backend *backend, const git_oid *oid)
610 {
611 	git_buf object_path = GIT_BUF_INIT;
612 	git_rawobj raw;
613 	int error = 0;
614 
615 	GIT_ASSERT_ARG(backend);
616 	GIT_ASSERT_ARG(oid);
617 
618 	if (locate_object(&object_path, (loose_backend *)backend, oid) < 0) {
619 		error = git_odb__error_notfound("no matching loose object",
620 			oid, GIT_OID_HEXSZ);
621 	} else if ((error = read_loose(&raw, &object_path)) == 0) {
622 		*buffer_p = raw.data;
623 		*len_p = raw.len;
624 		*type_p = raw.type;
625 	}
626 
627 	git_buf_dispose(&object_path);
628 
629 	return error;
630 }
631 
loose_backend__read_prefix(git_oid * out_oid,void ** buffer_p,size_t * len_p,git_object_t * type_p,git_odb_backend * backend,const git_oid * short_oid,size_t len)632 static int loose_backend__read_prefix(
633 	git_oid *out_oid,
634 	void **buffer_p,
635 	size_t *len_p,
636 	git_object_t *type_p,
637 	git_odb_backend *backend,
638 	const git_oid *short_oid,
639 	size_t len)
640 {
641 	int error = 0;
642 
643 	GIT_ASSERT_ARG(len >= GIT_OID_MINPREFIXLEN && len <= GIT_OID_HEXSZ);
644 
645 	if (len == GIT_OID_HEXSZ) {
646 		/* We can fall back to regular read method */
647 		error = loose_backend__read(buffer_p, len_p, type_p, backend, short_oid);
648 		if (!error)
649 			git_oid_cpy(out_oid, short_oid);
650 	} else {
651 		git_buf object_path = GIT_BUF_INIT;
652 		git_rawobj raw;
653 
654 		GIT_ASSERT_ARG(backend && short_oid);
655 
656 		if ((error = locate_object_short_oid(&object_path, out_oid,
657 				(loose_backend *)backend, short_oid, len)) == 0 &&
658 			(error = read_loose(&raw, &object_path)) == 0)
659 		{
660 			*buffer_p = raw.data;
661 			*len_p = raw.len;
662 			*type_p = raw.type;
663 		}
664 
665 		git_buf_dispose(&object_path);
666 	}
667 
668 	return error;
669 }
670 
loose_backend__exists(git_odb_backend * backend,const git_oid * oid)671 static int loose_backend__exists(git_odb_backend *backend, const git_oid *oid)
672 {
673 	git_buf object_path = GIT_BUF_INIT;
674 	int error;
675 
676 	GIT_ASSERT_ARG(backend);
677 	GIT_ASSERT_ARG(oid);
678 
679 	error = locate_object(&object_path, (loose_backend *)backend, oid);
680 
681 	git_buf_dispose(&object_path);
682 
683 	return !error;
684 }
685 
loose_backend__exists_prefix(git_oid * out,git_odb_backend * backend,const git_oid * short_id,size_t len)686 static int loose_backend__exists_prefix(
687 	git_oid *out, git_odb_backend *backend, const git_oid *short_id, size_t len)
688 {
689 	git_buf object_path = GIT_BUF_INIT;
690 	int error;
691 
692 	GIT_ASSERT_ARG(backend);
693 	GIT_ASSERT_ARG(out);
694 	GIT_ASSERT_ARG(short_id);
695 	GIT_ASSERT_ARG(len >= GIT_OID_MINPREFIXLEN);
696 
697 	error = locate_object_short_oid(
698 		&object_path, out, (loose_backend *)backend, short_id, len);
699 
700 	git_buf_dispose(&object_path);
701 
702 	return error;
703 }
704 
705 struct foreach_state {
706 	size_t dir_len;
707 	git_odb_foreach_cb cb;
708 	void *data;
709 };
710 
filename_to_oid(git_oid * oid,const char * ptr)711 GIT_INLINE(int) filename_to_oid(git_oid *oid, const char *ptr)
712 {
713 	int v, i = 0;
714 	if (strlen(ptr) != GIT_OID_HEXSZ+1)
715 		return -1;
716 
717 	if (ptr[2] != '/') {
718 		return -1;
719 	}
720 
721 	v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i+1]);
722 	if (v < 0)
723 		return -1;
724 
725 	oid->id[0] = (unsigned char) v;
726 
727 	ptr += 3;
728 	for (i = 0; i < 38; i += 2) {
729 		v = (git__fromhex(ptr[i]) << 4) | git__fromhex(ptr[i + 1]);
730 		if (v < 0)
731 			return -1;
732 
733 		oid->id[1 + i/2] = (unsigned char) v;
734 	}
735 
736 	return 0;
737 }
738 
foreach_object_dir_cb(void * _state,git_buf * path)739 static int foreach_object_dir_cb(void *_state, git_buf *path)
740 {
741 	git_oid oid;
742 	struct foreach_state *state = (struct foreach_state *) _state;
743 
744 	if (filename_to_oid(&oid, path->ptr + state->dir_len) < 0)
745 		return 0;
746 
747 	return git_error_set_after_callback_function(
748 		state->cb(&oid, state->data), "git_odb_foreach");
749 }
750 
foreach_cb(void * _state,git_buf * path)751 static int foreach_cb(void *_state, git_buf *path)
752 {
753 	struct foreach_state *state = (struct foreach_state *) _state;
754 
755 	/* non-dir is some stray file, ignore it */
756 	if (!git_path_isdir(git_buf_cstr(path)))
757 		return 0;
758 
759 	return git_path_direach(path, 0, foreach_object_dir_cb, state);
760 }
761 
loose_backend__foreach(git_odb_backend * _backend,git_odb_foreach_cb cb,void * data)762 static int loose_backend__foreach(git_odb_backend *_backend, git_odb_foreach_cb cb, void *data)
763 {
764 	char *objects_dir;
765 	int error;
766 	git_buf buf = GIT_BUF_INIT;
767 	struct foreach_state state;
768 	loose_backend *backend = (loose_backend *) _backend;
769 
770 	GIT_ASSERT_ARG(backend);
771 	GIT_ASSERT_ARG(cb);
772 
773 	objects_dir = backend->objects_dir;
774 
775 	git_buf_sets(&buf, objects_dir);
776 	git_path_to_dir(&buf);
777 	if (git_buf_oom(&buf))
778 		return -1;
779 
780 	memset(&state, 0, sizeof(state));
781 	state.cb = cb;
782 	state.data = data;
783 	state.dir_len = git_buf_len(&buf);
784 
785 	error = git_path_direach(&buf, 0, foreach_cb, &state);
786 
787 	git_buf_dispose(&buf);
788 
789 	return error;
790 }
791 
loose_backend__writestream_finalize(git_odb_stream * _stream,const git_oid * oid)792 static int loose_backend__writestream_finalize(git_odb_stream *_stream, const git_oid *oid)
793 {
794 	loose_writestream *stream = (loose_writestream *)_stream;
795 	loose_backend *backend = (loose_backend *)_stream->backend;
796 	git_buf final_path = GIT_BUF_INIT;
797 	int error = 0;
798 
799 	if (object_file_name(&final_path, backend, oid) < 0 ||
800 		object_mkdir(&final_path, backend) < 0)
801 		error = -1;
802 	else
803 		error = git_filebuf_commit_at(
804 			&stream->fbuf, final_path.ptr);
805 
806 	git_buf_dispose(&final_path);
807 
808 	return error;
809 }
810 
loose_backend__writestream_write(git_odb_stream * _stream,const char * data,size_t len)811 static int loose_backend__writestream_write(git_odb_stream *_stream, const char *data, size_t len)
812 {
813 	loose_writestream *stream = (loose_writestream *)_stream;
814 	return git_filebuf_write(&stream->fbuf, data, len);
815 }
816 
loose_backend__writestream_free(git_odb_stream * _stream)817 static void loose_backend__writestream_free(git_odb_stream *_stream)
818 {
819 	loose_writestream *stream = (loose_writestream *)_stream;
820 
821 	git_filebuf_cleanup(&stream->fbuf);
822 	git__free(stream);
823 }
824 
filebuf_flags(loose_backend * backend)825 static int filebuf_flags(loose_backend *backend)
826 {
827 	int flags = GIT_FILEBUF_TEMPORARY |
828 		(backend->object_zlib_level << GIT_FILEBUF_DEFLATE_SHIFT);
829 
830 	if (backend->fsync_object_files || git_repository__fsync_gitdir)
831 		flags |= GIT_FILEBUF_FSYNC;
832 
833 	return flags;
834 }
835 
loose_backend__writestream(git_odb_stream ** stream_out,git_odb_backend * _backend,git_object_size_t length,git_object_t type)836 static int loose_backend__writestream(git_odb_stream **stream_out, git_odb_backend *_backend, git_object_size_t length, git_object_t type)
837 {
838 	loose_backend *backend;
839 	loose_writestream *stream = NULL;
840 	char hdr[MAX_HEADER_LEN];
841 	git_buf tmp_path = GIT_BUF_INIT;
842 	size_t hdrlen;
843 	int error;
844 
845 	GIT_ASSERT_ARG(_backend);
846 
847 	backend = (loose_backend *)_backend;
848 	*stream_out = NULL;
849 
850 	if ((error = git_odb__format_object_header(&hdrlen,
851 		hdr, sizeof(hdr), length, type)) < 0)
852 		return error;
853 
854 	stream = git__calloc(1, sizeof(loose_writestream));
855 	GIT_ERROR_CHECK_ALLOC(stream);
856 
857 	stream->stream.backend = _backend;
858 	stream->stream.read = NULL; /* read only */
859 	stream->stream.write = &loose_backend__writestream_write;
860 	stream->stream.finalize_write = &loose_backend__writestream_finalize;
861 	stream->stream.free = &loose_backend__writestream_free;
862 	stream->stream.mode = GIT_STREAM_WRONLY;
863 
864 	if (git_buf_joinpath(&tmp_path, backend->objects_dir, "tmp_object") < 0 ||
865 		git_filebuf_open(&stream->fbuf, tmp_path.ptr, filebuf_flags(backend),
866 			backend->object_file_mode) < 0 ||
867 		stream->stream.write((git_odb_stream *)stream, hdr, hdrlen) < 0)
868 	{
869 		git_filebuf_cleanup(&stream->fbuf);
870 		git__free(stream);
871 		stream = NULL;
872 	}
873 	git_buf_dispose(&tmp_path);
874 	*stream_out = (git_odb_stream *)stream;
875 
876 	return !stream ? -1 : 0;
877 }
878 
loose_backend__readstream_read(git_odb_stream * _stream,char * buffer,size_t buffer_len)879 static int loose_backend__readstream_read(
880 	git_odb_stream *_stream,
881 	char *buffer,
882 	size_t buffer_len)
883 {
884 	loose_readstream *stream = (loose_readstream *)_stream;
885 	size_t start_remain = stream->start_len - stream->start_read;
886 	int total = 0, error;
887 
888 	buffer_len = min(buffer_len, INT_MAX);
889 
890 	/*
891 	 * if we read more than just the header in the initial read, play
892 	 * that back for the caller.
893 	 */
894 	if (start_remain && buffer_len) {
895 		size_t chunk = min(start_remain, buffer_len);
896 		memcpy(buffer, stream->start + stream->start_read, chunk);
897 
898 		buffer += chunk;
899 		stream->start_read += chunk;
900 
901 		total += (int)chunk;
902 		buffer_len -= chunk;
903 	}
904 
905 	if (buffer_len) {
906 		size_t chunk = buffer_len;
907 
908 		if ((error = git_zstream_get_output(buffer, &chunk, &stream->zstream)) < 0)
909 			return error;
910 
911 		total += (int)chunk;
912 	}
913 
914 	return (int)total;
915 }
916 
loose_backend__readstream_free(git_odb_stream * _stream)917 static void loose_backend__readstream_free(git_odb_stream *_stream)
918 {
919 	loose_readstream *stream = (loose_readstream *)_stream;
920 
921 	git_futils_mmap_free(&stream->map);
922 	git_zstream_free(&stream->zstream);
923 	git__free(stream);
924 }
925 
loose_backend__readstream_packlike(obj_hdr * hdr,loose_readstream * stream)926 static int loose_backend__readstream_packlike(
927 	obj_hdr *hdr,
928 	loose_readstream *stream)
929 {
930 	const unsigned char *data;
931 	size_t data_len, head_len;
932 	int error;
933 
934 	data = stream->map.data;
935 	data_len = stream->map.len;
936 
937 	/*
938 	 * read the object header, which is an (uncompressed)
939 	 * binary encoding of the object type and size.
940 	 */
941 	if ((error = parse_header_packlike(hdr, &head_len, data, data_len)) < 0)
942 		return error;
943 
944 	if (!git_object_typeisloose(hdr->type)) {
945 		git_error_set(GIT_ERROR_ODB, "failed to inflate loose object");
946 		return -1;
947 	}
948 
949 	return git_zstream_set_input(&stream->zstream,
950 		data + head_len, data_len - head_len);
951 }
952 
loose_backend__readstream_standard(obj_hdr * hdr,loose_readstream * stream)953 static int loose_backend__readstream_standard(
954 	obj_hdr *hdr,
955 	loose_readstream *stream)
956 {
957 	unsigned char head[MAX_HEADER_LEN];
958 	size_t init, head_len;
959 	int error;
960 
961 	if ((error = git_zstream_set_input(&stream->zstream,
962 			stream->map.data, stream->map.len)) < 0)
963 		return error;
964 
965 	init = sizeof(head);
966 
967 	/*
968 	 * inflate the initial part of the compressed buffer in order to
969 	 * parse the header; read the largest header possible, then store
970 	 * it in the `start` field of the stream object.
971 	 */
972 	if ((error = git_zstream_get_output(head, &init, &stream->zstream)) < 0 ||
973 		(error = parse_header(hdr, &head_len, head, init)) < 0)
974 		return error;
975 
976 	if (!git_object_typeisloose(hdr->type)) {
977 		git_error_set(GIT_ERROR_ODB, "failed to inflate disk object");
978 		return -1;
979 	}
980 
981 	if (init > head_len) {
982 		stream->start_len = init - head_len;
983 		memcpy(stream->start, head + head_len, init - head_len);
984 	}
985 
986 	return 0;
987 }
988 
loose_backend__readstream(git_odb_stream ** stream_out,size_t * len_out,git_object_t * type_out,git_odb_backend * _backend,const git_oid * oid)989 static int loose_backend__readstream(
990 	git_odb_stream **stream_out,
991 	size_t *len_out,
992 	git_object_t *type_out,
993 	git_odb_backend *_backend,
994 	const git_oid *oid)
995 {
996 	loose_backend *backend;
997 	loose_readstream *stream = NULL;
998 	git_hash_ctx *hash_ctx = NULL;
999 	git_buf object_path = GIT_BUF_INIT;
1000 	obj_hdr hdr;
1001 	int error = 0;
1002 
1003 	GIT_ASSERT_ARG(stream_out);
1004 	GIT_ASSERT_ARG(len_out);
1005 	GIT_ASSERT_ARG(type_out);
1006 	GIT_ASSERT_ARG(_backend);
1007 	GIT_ASSERT_ARG(oid);
1008 
1009 	backend = (loose_backend *)_backend;
1010 	*stream_out = NULL;
1011 	*len_out = 0;
1012 	*type_out = GIT_OBJECT_INVALID;
1013 
1014 	if (locate_object(&object_path, backend, oid) < 0) {
1015 		error = git_odb__error_notfound("no matching loose object",
1016 			oid, GIT_OID_HEXSZ);
1017 		goto done;
1018 	}
1019 
1020 	stream = git__calloc(1, sizeof(loose_readstream));
1021 	GIT_ERROR_CHECK_ALLOC(stream);
1022 
1023 	hash_ctx = git__malloc(sizeof(git_hash_ctx));
1024 	GIT_ERROR_CHECK_ALLOC(hash_ctx);
1025 
1026 	if ((error = git_hash_ctx_init(hash_ctx)) < 0 ||
1027 		(error = git_futils_mmap_ro_file(&stream->map, object_path.ptr)) < 0 ||
1028 		(error = git_zstream_init(&stream->zstream, GIT_ZSTREAM_INFLATE)) < 0)
1029 		goto done;
1030 
1031 	/* check for a packlike loose object */
1032 	if (!is_zlib_compressed_data(stream->map.data, stream->map.len))
1033 		error = loose_backend__readstream_packlike(&hdr, stream);
1034 	else
1035 		error = loose_backend__readstream_standard(&hdr, stream);
1036 
1037 	if (error < 0)
1038 		goto done;
1039 
1040 	stream->stream.backend = _backend;
1041 	stream->stream.hash_ctx = hash_ctx;
1042 	stream->stream.read = &loose_backend__readstream_read;
1043 	stream->stream.free = &loose_backend__readstream_free;
1044 
1045 	*stream_out = (git_odb_stream *)stream;
1046 	*len_out = hdr.size;
1047 	*type_out = hdr.type;
1048 
1049 done:
1050 	if (error < 0) {
1051 		if (stream) {
1052 			git_futils_mmap_free(&stream->map);
1053 			git_zstream_free(&stream->zstream);
1054 			git__free(stream);
1055 		}
1056 		if (hash_ctx) {
1057 			git_hash_ctx_cleanup(hash_ctx);
1058 			git__free(hash_ctx);
1059 		}
1060 	}
1061 
1062 	git_buf_dispose(&object_path);
1063 	return error;
1064 }
1065 
loose_backend__write(git_odb_backend * _backend,const git_oid * oid,const void * data,size_t len,git_object_t type)1066 static int loose_backend__write(git_odb_backend *_backend, const git_oid *oid, const void *data, size_t len, git_object_t type)
1067 {
1068 	int error = 0;
1069 	git_buf final_path = GIT_BUF_INIT;
1070 	char header[MAX_HEADER_LEN];
1071 	size_t header_len;
1072 	git_filebuf fbuf = GIT_FILEBUF_INIT;
1073 	loose_backend *backend;
1074 
1075 	backend = (loose_backend *)_backend;
1076 
1077 	/* prepare the header for the file */
1078 	if ((error = git_odb__format_object_header(&header_len,
1079 		header, sizeof(header), len, type)) < 0)
1080 		goto cleanup;
1081 
1082 	if (git_buf_joinpath(&final_path, backend->objects_dir, "tmp_object") < 0 ||
1083 		git_filebuf_open(&fbuf, final_path.ptr, filebuf_flags(backend),
1084 			backend->object_file_mode) < 0)
1085 	{
1086 		error = -1;
1087 		goto cleanup;
1088 	}
1089 
1090 	git_filebuf_write(&fbuf, header, header_len);
1091 	git_filebuf_write(&fbuf, data, len);
1092 
1093 	if (object_file_name(&final_path, backend, oid) < 0 ||
1094 		object_mkdir(&final_path, backend) < 0 ||
1095 		git_filebuf_commit_at(&fbuf, final_path.ptr) < 0)
1096 		error = -1;
1097 
1098 cleanup:
1099 	if (error < 0)
1100 		git_filebuf_cleanup(&fbuf);
1101 	git_buf_dispose(&final_path);
1102 	return error;
1103 }
1104 
loose_backend__freshen(git_odb_backend * _backend,const git_oid * oid)1105 static int loose_backend__freshen(
1106 	git_odb_backend *_backend,
1107 	const git_oid *oid)
1108 {
1109 	loose_backend *backend = (loose_backend *)_backend;
1110 	git_buf path = GIT_BUF_INIT;
1111 	int error;
1112 
1113 	if (object_file_name(&path, backend, oid) < 0)
1114 		return -1;
1115 
1116 	error = git_futils_touch(path.ptr, NULL);
1117 	git_buf_dispose(&path);
1118 
1119 	return error;
1120 }
1121 
loose_backend__free(git_odb_backend * _backend)1122 static void loose_backend__free(git_odb_backend *_backend)
1123 {
1124 	git__free(_backend);
1125 }
1126 
git_odb_backend_loose(git_odb_backend ** backend_out,const char * objects_dir,int compression_level,int do_fsync,unsigned int dir_mode,unsigned int file_mode)1127 int git_odb_backend_loose(
1128 	git_odb_backend **backend_out,
1129 	const char *objects_dir,
1130 	int compression_level,
1131 	int do_fsync,
1132 	unsigned int dir_mode,
1133 	unsigned int file_mode)
1134 {
1135 	loose_backend *backend;
1136 	size_t objects_dirlen, alloclen;
1137 
1138 	GIT_ASSERT_ARG(backend_out);
1139 	GIT_ASSERT_ARG(objects_dir);
1140 
1141 	objects_dirlen = strlen(objects_dir);
1142 
1143 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, sizeof(loose_backend), objects_dirlen);
1144 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 2);
1145 	backend = git__calloc(1, alloclen);
1146 	GIT_ERROR_CHECK_ALLOC(backend);
1147 
1148 	backend->parent.version = GIT_ODB_BACKEND_VERSION;
1149 	backend->objects_dirlen = objects_dirlen;
1150 	memcpy(backend->objects_dir, objects_dir, objects_dirlen);
1151 	if (backend->objects_dir[backend->objects_dirlen - 1] != '/')
1152 		backend->objects_dir[backend->objects_dirlen++] = '/';
1153 
1154 	if (compression_level < 0)
1155 		compression_level = Z_BEST_SPEED;
1156 
1157 	if (dir_mode == 0)
1158 		dir_mode = GIT_OBJECT_DIR_MODE;
1159 
1160 	if (file_mode == 0)
1161 		file_mode = GIT_OBJECT_FILE_MODE;
1162 
1163 	backend->object_zlib_level = compression_level;
1164 	backend->fsync_object_files = do_fsync;
1165 	backend->object_dir_mode = dir_mode;
1166 	backend->object_file_mode = file_mode;
1167 
1168 	backend->parent.read = &loose_backend__read;
1169 	backend->parent.write = &loose_backend__write;
1170 	backend->parent.read_prefix = &loose_backend__read_prefix;
1171 	backend->parent.read_header = &loose_backend__read_header;
1172 	backend->parent.writestream = &loose_backend__writestream;
1173 	backend->parent.readstream = &loose_backend__readstream;
1174 	backend->parent.exists = &loose_backend__exists;
1175 	backend->parent.exists_prefix = &loose_backend__exists_prefix;
1176 	backend->parent.foreach = &loose_backend__foreach;
1177 	backend->parent.freshen = &loose_backend__freshen;
1178 	backend->parent.free = &loose_backend__free;
1179 
1180 	*backend_out = (git_odb_backend *)backend;
1181 	return 0;
1182 }
1183