1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blob.h"
9 
10 #include "git2/common.h"
11 #include "git2/object.h"
12 #include "git2/repository.h"
13 #include "git2/odb_backend.h"
14 
15 #include "filebuf.h"
16 #include "filter.h"
17 
git_blob_rawcontent(const git_blob * blob)18 const void *git_blob_rawcontent(const git_blob *blob)
19 {
20 	GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);
21 
22 	if (blob->raw)
23 		return blob->data.raw.data;
24 	else
25 		return git_odb_object_data(blob->data.odb);
26 }
27 
git_blob_rawsize(const git_blob * blob)28 git_object_size_t git_blob_rawsize(const git_blob *blob)
29 {
30 	GIT_ASSERT_ARG(blob);
31 
32 	if (blob->raw)
33 		return blob->data.raw.size;
34 	else
35 		return (git_object_size_t)git_odb_object_size(blob->data.odb);
36 }
37 
git_blob__getbuf(git_buf * buffer,git_blob * blob)38 int git_blob__getbuf(git_buf *buffer, git_blob *blob)
39 {
40 	git_object_size_t size = git_blob_rawsize(blob);
41 
42 	GIT_ERROR_CHECK_BLOBSIZE(size);
43 	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
44 }
45 
git_blob__free(void * _blob)46 void git_blob__free(void *_blob)
47 {
48 	git_blob *blob = (git_blob *) _blob;
49 	if (!blob->raw)
50 		git_odb_object_free(blob->data.odb);
51 	git__free(blob);
52 }
53 
git_blob__parse_raw(void * _blob,const char * data,size_t size)54 int git_blob__parse_raw(void *_blob, const char *data, size_t size)
55 {
56 	git_blob *blob = (git_blob *) _blob;
57 
58 	GIT_ASSERT_ARG(blob);
59 
60 	blob->raw = 1;
61 	blob->data.raw.data = data;
62 	blob->data.raw.size = size;
63 	return 0;
64 }
65 
git_blob__parse(void * _blob,git_odb_object * odb_obj)66 int git_blob__parse(void *_blob, git_odb_object *odb_obj)
67 {
68 	git_blob *blob = (git_blob *) _blob;
69 
70 	GIT_ASSERT_ARG(blob);
71 
72 	git_cached_obj_incref((git_cached_obj *)odb_obj);
73 	blob->raw = 0;
74 	blob->data.odb = odb_obj;
75 	return 0;
76 }
77 
git_blob_create_from_buffer(git_oid * id,git_repository * repo,const void * buffer,size_t len)78 int git_blob_create_from_buffer(
79 	git_oid *id, git_repository *repo, const void *buffer, size_t len)
80 {
81 	int error;
82 	git_odb *odb;
83 	git_odb_stream *stream;
84 
85 	GIT_ASSERT_ARG(id);
86 	GIT_ASSERT_ARG(repo);
87 
88 	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
89 		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
90 		return error;
91 
92 	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
93 		error = git_odb_stream_finalize_write(id, stream);
94 
95 	git_odb_stream_free(stream);
96 	return error;
97 }
98 
write_file_stream(git_oid * id,git_odb * odb,const char * path,git_object_size_t file_size)99 static int write_file_stream(
100 	git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
101 {
102 	int fd, error;
103 	char buffer[FILEIO_BUFSIZE];
104 	git_odb_stream *stream = NULL;
105 	ssize_t read_len = -1;
106 	git_object_size_t written = 0;
107 
108 	if ((error = git_odb_open_wstream(
109 			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
110 		return error;
111 
112 	if ((fd = git_futils_open_ro(path)) < 0) {
113 		git_odb_stream_free(stream);
114 		return -1;
115 	}
116 
117 	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
118 		error = git_odb_stream_write(stream, buffer, read_len);
119 		written += read_len;
120 	}
121 
122 	p_close(fd);
123 
124 	if (written != file_size || read_len < 0) {
125 		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
126 		error = -1;
127 	}
128 
129 	if (!error)
130 		error = git_odb_stream_finalize_write(id, stream);
131 
132 	git_odb_stream_free(stream);
133 	return error;
134 }
135 
write_file_filtered(git_oid * id,git_object_size_t * size,git_odb * odb,const char * full_path,git_filter_list * fl,git_repository * repo)136 static int write_file_filtered(
137 	git_oid *id,
138 	git_object_size_t *size,
139 	git_odb *odb,
140 	const char *full_path,
141 	git_filter_list *fl,
142 	git_repository* repo)
143 {
144 	int error;
145 	git_buf tgt = GIT_BUF_INIT;
146 
147 	error = git_filter_list_apply_to_file(&tgt, fl, repo, full_path);
148 
149 	/* Write the file to disk if it was properly filtered */
150 	if (!error) {
151 		*size = tgt.size;
152 
153 		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
154 	}
155 
156 	git_buf_dispose(&tgt);
157 	return error;
158 }
159 
write_symlink(git_oid * id,git_odb * odb,const char * path,size_t link_size)160 static int write_symlink(
161 	git_oid *id, git_odb *odb, const char *path, size_t link_size)
162 {
163 	char *link_data;
164 	ssize_t read_len;
165 	int error;
166 
167 	link_data = git__malloc(link_size);
168 	GIT_ERROR_CHECK_ALLOC(link_data);
169 
170 	read_len = p_readlink(path, link_data, link_size);
171 	if (read_len != (ssize_t)link_size) {
172 		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
173 		git__free(link_data);
174 		return -1;
175 	}
176 
177 	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
178 	git__free(link_data);
179 	return error;
180 }
181 
git_blob__create_from_paths(git_oid * id,struct stat * out_st,git_repository * repo,const char * content_path,const char * hint_path,mode_t hint_mode,bool try_load_filters)182 int git_blob__create_from_paths(
183 	git_oid *id,
184 	struct stat *out_st,
185 	git_repository *repo,
186 	const char *content_path,
187 	const char *hint_path,
188 	mode_t hint_mode,
189 	bool try_load_filters)
190 {
191 	int error;
192 	struct stat st;
193 	git_odb *odb = NULL;
194 	git_object_size_t size;
195 	mode_t mode;
196 	git_buf path = GIT_BUF_INIT;
197 
198 	GIT_ASSERT_ARG(hint_path || !try_load_filters);
199 
200 	if (!content_path) {
201 		if (git_repository_workdir_path(&path, repo, hint_path) < 0)
202 			return -1;
203 
204 		content_path = path.ptr;
205 	}
206 
207 	if ((error = git_path_lstat(content_path, &st)) < 0 ||
208 		(error = git_repository_odb(&odb, repo)) < 0)
209 		goto done;
210 
211 	if (S_ISDIR(st.st_mode)) {
212 		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
213 		error = GIT_EDIRECTORY;
214 		goto done;
215 	}
216 
217 	if (out_st)
218 		memcpy(out_st, &st, sizeof(st));
219 
220 	size = st.st_size;
221 	mode = hint_mode ? hint_mode : st.st_mode;
222 
223 	if (S_ISLNK(mode)) {
224 		error = write_symlink(id, odb, content_path, (size_t)size);
225 	} else {
226 		git_filter_list *fl = NULL;
227 
228 		if (try_load_filters)
229 			/* Load the filters for writing this file to the ODB */
230 			error = git_filter_list_load(
231 				&fl, repo, NULL, hint_path,
232 				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
233 
234 		if (error < 0)
235 			/* well, that didn't work */;
236 		else if (fl == NULL)
237 			/* No filters need to be applied to the document: we can stream
238 			 * directly from disk */
239 			error = write_file_stream(id, odb, content_path, size);
240 		else {
241 			/* We need to apply one or more filters */
242 			error = write_file_filtered(id, &size, odb, content_path, fl, repo);
243 
244 			git_filter_list_free(fl);
245 		}
246 
247 		/*
248 		 * TODO: eventually support streaming filtered files, for files
249 		 * which are bigger than a given threshold. This is not a priority
250 		 * because applying a filter in streaming mode changes the final
251 		 * size of the blob, and without knowing its final size, the blob
252 		 * cannot be written in stream mode to the ODB.
253 		 *
254 		 * The plan is to do streaming writes to a tempfile on disk and then
255 		 * opening streaming that file to the ODB, using
256 		 * `write_file_stream`.
257 		 *
258 		 * CAREFULLY DESIGNED APIS YO
259 		 */
260 	}
261 
262 done:
263 	git_odb_free(odb);
264 	git_buf_dispose(&path);
265 
266 	return error;
267 }
268 
git_blob_create_from_workdir(git_oid * id,git_repository * repo,const char * path)269 int git_blob_create_from_workdir(
270 	git_oid *id, git_repository *repo, const char *path)
271 {
272 	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
273 }
274 
git_blob_create_from_disk(git_oid * id,git_repository * repo,const char * path)275 int git_blob_create_from_disk(
276 	git_oid *id, git_repository *repo, const char *path)
277 {
278 	int error;
279 	git_buf full_path = GIT_BUF_INIT;
280 	const char *workdir, *hintpath = NULL;
281 
282 	if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
283 		git_buf_dispose(&full_path);
284 		return error;
285 	}
286 
287 	workdir  = git_repository_workdir(repo);
288 
289 	if (workdir && !git__prefixcmp(full_path.ptr, workdir))
290 		hintpath = full_path.ptr + strlen(workdir);
291 
292 	error = git_blob__create_from_paths(
293 		id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, !!hintpath);
294 
295 	git_buf_dispose(&full_path);
296 	return error;
297 }
298 
299 typedef struct {
300 	git_writestream parent;
301 	git_filebuf fbuf;
302 	git_repository *repo;
303 	char *hintpath;
304 } blob_writestream;
305 
blob_writestream_close(git_writestream * _stream)306 static int blob_writestream_close(git_writestream *_stream)
307 {
308 	blob_writestream *stream = (blob_writestream *) _stream;
309 
310 	git_filebuf_cleanup(&stream->fbuf);
311 	return 0;
312 }
313 
blob_writestream_free(git_writestream * _stream)314 static void blob_writestream_free(git_writestream *_stream)
315 {
316 	blob_writestream *stream = (blob_writestream *) _stream;
317 
318 	git_filebuf_cleanup(&stream->fbuf);
319 	git__free(stream->hintpath);
320 	git__free(stream);
321 }
322 
blob_writestream_write(git_writestream * _stream,const char * buffer,size_t len)323 static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
324 {
325 	blob_writestream *stream = (blob_writestream *) _stream;
326 
327 	return git_filebuf_write(&stream->fbuf, buffer, len);
328 }
329 
git_blob_create_from_stream(git_writestream ** out,git_repository * repo,const char * hintpath)330 int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
331 {
332 	int error;
333 	git_buf path = GIT_BUF_INIT;
334 	blob_writestream *stream;
335 
336 	GIT_ASSERT_ARG(out);
337 	GIT_ASSERT_ARG(repo);
338 
339 	stream = git__calloc(1, sizeof(blob_writestream));
340 	GIT_ERROR_CHECK_ALLOC(stream);
341 
342 	if (hintpath) {
343 		stream->hintpath = git__strdup(hintpath);
344 		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
345 	}
346 
347 	stream->repo = repo;
348 	stream->parent.write = blob_writestream_write;
349 	stream->parent.close = blob_writestream_close;
350 	stream->parent.free  = blob_writestream_free;
351 
352 	if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
353 		|| (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
354 		goto cleanup;
355 
356 	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
357 					       0666, 2 * 1024 * 1024)) < 0)
358 		goto cleanup;
359 
360 	*out = (git_writestream *) stream;
361 
362 cleanup:
363 	if (error < 0)
364 		blob_writestream_free((git_writestream *) stream);
365 
366 	git_buf_dispose(&path);
367 	return error;
368 }
369 
git_blob_create_from_stream_commit(git_oid * out,git_writestream * _stream)370 int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
371 {
372 	int error;
373 	blob_writestream *stream = (blob_writestream *) _stream;
374 
375 	/*
376 	 * We can make this more officient by avoiding writing to
377 	 * disk, but for now let's re-use the helper functions we
378 	 * have.
379 	 */
380 	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
381 		goto cleanup;
382 
383 	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
384 					    stream->hintpath, 0, !!stream->hintpath);
385 
386 cleanup:
387 	blob_writestream_free(_stream);
388 	return error;
389 
390 }
391 
git_blob_is_binary(const git_blob * blob)392 int git_blob_is_binary(const git_blob *blob)
393 {
394 	git_buf content = GIT_BUF_INIT;
395 	git_object_size_t size;
396 
397 	GIT_ASSERT_ARG(blob);
398 
399 	size = git_blob_rawsize(blob);
400 
401 	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
402 		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
403 	return git_buf_is_binary(&content);
404 }
405 
git_blob_filter_options_init(git_blob_filter_options * opts,unsigned int version)406 int git_blob_filter_options_init(
407 	git_blob_filter_options *opts,
408 	unsigned int version)
409 {
410 	GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version,
411 		git_blob_filter_options, GIT_BLOB_FILTER_OPTIONS_INIT);
412 	return 0;
413 }
414 
git_blob_filter(git_buf * out,git_blob * blob,const char * path,git_blob_filter_options * given_opts)415 int git_blob_filter(
416 	git_buf *out,
417 	git_blob *blob,
418 	const char *path,
419 	git_blob_filter_options *given_opts)
420 {
421 	int error = 0;
422 	git_filter_list *fl = NULL;
423 	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
424 	git_filter_options filter_opts = GIT_FILTER_OPTIONS_INIT;
425 
426 	GIT_ASSERT_ARG(blob);
427 	GIT_ASSERT_ARG(path);
428 	GIT_ASSERT_ARG(out);
429 
430 	GIT_ERROR_CHECK_VERSION(
431 		given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");
432 
433 	if (git_buf_sanitize(out) < 0)
434 		return -1;
435 
436 	if (given_opts != NULL)
437 		memcpy(&opts, given_opts, sizeof(git_blob_filter_options));
438 
439 	if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
440 	    git_blob_is_binary(blob))
441 		return 0;
442 
443 	if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
444 		filter_opts.flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
445 
446 	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_HEAD) != 0)
447 		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
448 
449 	if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_COMMIT) != 0) {
450 		filter_opts.flags |= GIT_FILTER_ATTRIBUTES_FROM_COMMIT;
451 
452 #ifndef GIT_DEPRECATE_HARD
453 		if (opts.commit_id)
454 			git_oid_cpy(&filter_opts.attr_commit_id, opts.commit_id);
455 		else
456 #endif
457 		git_oid_cpy(&filter_opts.attr_commit_id, &opts.attr_commit_id);
458 	}
459 
460 	if (!(error = git_filter_list_load_ext(
461 			&fl, git_blob_owner(blob), blob, path,
462 			GIT_FILTER_TO_WORKTREE, &filter_opts))) {
463 
464 		error = git_filter_list_apply_to_blob(out, fl, blob);
465 
466 		git_filter_list_free(fl);
467 	}
468 
469 	return error;
470 }
471 
472 /* Deprecated functions */
473 
474 #ifndef GIT_DEPRECATE_HARD
git_blob_create_frombuffer(git_oid * id,git_repository * repo,const void * buffer,size_t len)475 int git_blob_create_frombuffer(
476 	git_oid *id, git_repository *repo, const void *buffer, size_t len)
477 {
478 	return git_blob_create_from_buffer(id, repo, buffer, len);
479 }
480 
git_blob_create_fromworkdir(git_oid * id,git_repository * repo,const char * relative_path)481 int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
482 {
483 	return git_blob_create_from_workdir(id, repo, relative_path);
484 }
485 
git_blob_create_fromdisk(git_oid * id,git_repository * repo,const char * path)486 int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
487 {
488 	return git_blob_create_from_disk(id, repo, path);
489 }
490 
git_blob_create_fromstream(git_writestream ** out,git_repository * repo,const char * hintpath)491 int git_blob_create_fromstream(
492     git_writestream **out,
493     git_repository *repo,
494     const char *hintpath)
495 {
496 	return  git_blob_create_from_stream(out, repo, hintpath);
497 }
498 
git_blob_create_fromstream_commit(git_oid * out,git_writestream * stream)499 int git_blob_create_fromstream_commit(
500 	git_oid *out,
501 	git_writestream *stream)
502 {
503 	return git_blob_create_from_stream_commit(out, stream);
504 }
505 
git_blob_filtered_content(git_buf * out,git_blob * blob,const char * path,int check_for_binary_data)506 int git_blob_filtered_content(
507 	git_buf *out,
508 	git_blob *blob,
509 	const char *path,
510 	int check_for_binary_data)
511 {
512 	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
513 
514 	if (check_for_binary_data)
515 		opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
516 	else
517 		opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;
518 
519 	return git_blob_filter(out, blob, path, &opts);
520 }
521 #endif
522