1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blob.h"
9 
10 #include "git2/common.h"
11 #include "git2/object.h"
12 #include "git2/repository.h"
13 #include "git2/odb_backend.h"
14 
15 #include "filebuf.h"
16 #include "filter.h"
17 #include "buf_text.h"
18 
19 const void *git_blob_rawcontent(const git_blob *blob)
20 {
21 	GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);
22 
23 	if (blob->raw)
24 		return blob->data.raw.data;
25 	else
26 		return git_odb_object_data(blob->data.odb);
27 }
28 
29 git_object_size_t git_blob_rawsize(const git_blob *blob)
30 {
31 	GIT_ASSERT_ARG(blob);
32 
33 	if (blob->raw)
34 		return blob->data.raw.size;
35 	else
36 		return (git_object_size_t)git_odb_object_size(blob->data.odb);
37 }
38 
39 int git_blob__getbuf(git_buf *buffer, git_blob *blob)
40 {
41 	git_object_size_t size = git_blob_rawsize(blob);
42 
43 	GIT_ERROR_CHECK_BLOBSIZE(size);
44 	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
45 }
46 
47 void git_blob__free(void *_blob)
48 {
49 	git_blob *blob = (git_blob *) _blob;
50 	if (!blob->raw)
51 		git_odb_object_free(blob->data.odb);
52 	git__free(blob);
53 }
54 
55 int git_blob__parse_raw(void *_blob, const char *data, size_t size)
56 {
57 	git_blob *blob = (git_blob *) _blob;
58 
59 	GIT_ASSERT_ARG(blob);
60 
61 	blob->raw = 1;
62 	blob->data.raw.data = data;
63 	blob->data.raw.size = size;
64 	return 0;
65 }
66 
67 int git_blob__parse(void *_blob, git_odb_object *odb_obj)
68 {
69 	git_blob *blob = (git_blob *) _blob;
70 
71 	GIT_ASSERT_ARG(blob);
72 
73 	git_cached_obj_incref((git_cached_obj *)odb_obj);
74 	blob->raw = 0;
75 	blob->data.odb = odb_obj;
76 	return 0;
77 }
78 
79 int git_blob_create_from_buffer(
80 	git_oid *id, git_repository *repo, const void *buffer, size_t len)
81 {
82 	int error;
83 	git_odb *odb;
84 	git_odb_stream *stream;
85 
86 	GIT_ASSERT_ARG(id);
87 	GIT_ASSERT_ARG(repo);
88 
89 	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
90 		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
91 		return error;
92 
93 	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
94 		error = git_odb_stream_finalize_write(id, stream);
95 
96 	git_odb_stream_free(stream);
97 	return error;
98 }
99 
100 static int write_file_stream(
101 	git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
102 {
103 	int fd, error;
104 	char buffer[FILEIO_BUFSIZE];
105 	git_odb_stream *stream = NULL;
106 	ssize_t read_len = -1;
107 	git_object_size_t written = 0;
108 
109 	if ((error = git_odb_open_wstream(
110 			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
111 		return error;
112 
113 	if ((fd = git_futils_open_ro(path)) < 0) {
114 		git_odb_stream_free(stream);
115 		return -1;
116 	}
117 
118 	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
119 		error = git_odb_stream_write(stream, buffer, read_len);
120 		written += read_len;
121 	}
122 
123 	p_close(fd);
124 
125 	if (written != file_size || read_len < 0) {
126 		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
127 		error = -1;
128 	}
129 
130 	if (!error)
131 		error = git_odb_stream_finalize_write(id, stream);
132 
133 	git_odb_stream_free(stream);
134 	return error;
135 }
136 
137 static int write_file_filtered(
138 	git_oid *id,
139 	git_object_size_t *size,
140 	git_odb *odb,
141 	const char *full_path,
142 	git_filter_list *fl)
143 {
144 	int error;
145 	git_buf tgt = GIT_BUF_INIT;
146 
147 	error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path);
148 
149 	/* Write the file to disk if it was properly filtered */
150 	if (!error) {
151 		*size = tgt.size;
152 
153 		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
154 	}
155 
156 	git_buf_dispose(&tgt);
157 	return error;
158 }
159 
160 static int write_symlink(
161 	git_oid *id, git_odb *odb, const char *path, size_t link_size)
162 {
163 	char *link_data;
164 	ssize_t read_len;
165 	int error;
166 
167 	link_data = git__malloc(link_size);
168 	GIT_ERROR_CHECK_ALLOC(link_data);
169 
170 	read_len = p_readlink(path, link_data, link_size);
171 	if (read_len != (ssize_t)link_size) {
172 		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
173 		git__free(link_data);
174 		return -1;
175 	}
176 
177 	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
178 	git__free(link_data);
179 	return error;
180 }
181 
182 int git_blob__create_from_paths(
183 	git_oid *id,
184 	struct stat *out_st,
185 	git_repository *repo,
186 	const char *content_path,
187 	const char *hint_path,
188 	mode_t hint_mode,
189 	bool try_load_filters)
190 {
191 	int error;
192 	struct stat st;
193 	git_odb *odb = NULL;
194 	git_object_size_t size;
195 	mode_t mode;
196 	git_buf path = GIT_BUF_INIT;
197 
198 	GIT_ASSERT_ARG(hint_path || !try_load_filters);
199 
200 	if (!content_path) {
201 		if (git_repository__ensure_not_bare(repo, "create blob from file") < 0)
202 			return GIT_EBAREREPO;
203 
204 		if (git_buf_joinpath(
205 				&path, git_repository_workdir(repo), hint_path) < 0)
206 			return -1;
207 
208 		content_path = path.ptr;
209 	}
210 
211 	if ((error = git_path_lstat(content_path, &st)) < 0 ||
212 		(error = git_repository_odb(&odb, repo)) < 0)
213 		goto done;
214 
215 	if (S_ISDIR(st.st_mode)) {
216 		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
217 		error = GIT_EDIRECTORY;
218 		goto done;
219 	}
220 
221 	if (out_st)
222 		memcpy(out_st, &st, sizeof(st));
223 
224 	size = st.st_size;
225 	mode = hint_mode ? hint_mode : st.st_mode;
226 
227 	if (S_ISLNK(mode)) {
228 		error = write_symlink(id, odb, content_path, (size_t)size);
229 	} else {
230 		git_filter_list *fl = NULL;
231 
232 		if (try_load_filters)
233 			/* Load the filters for writing this file to the ODB */
234 			error = git_filter_list_load(
235 				&fl, repo, NULL, hint_path,
236 				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
237 
238 		if (error < 0)
239 			/* well, that didn't work */;
240 		else if (fl == NULL)
241 			/* No filters need to be applied to the document: we can stream
242 			 * directly from disk */
243 			error = write_file_stream(id, odb, content_path, size);
244 		else {
245 			/* We need to apply one or more filters */
246 			error = write_file_filtered(id, &size, odb, content_path, fl);
247 
248 			git_filter_list_free(fl);
249 		}
250 
251 		/*
252 		 * TODO: eventually support streaming filtered files, for files
253 		 * which are bigger than a given threshold. This is not a priority
254 		 * because applying a filter in streaming mode changes the final
255 		 * size of the blob, and without knowing its final size, the blob
256 		 * cannot be written in stream mode to the ODB.
257 		 *
258 		 * The plan is to do streaming writes to a tempfile on disk and then
259 		 * opening streaming that file to the ODB, using
260 		 * `write_file_stream`.
261 		 *
262 		 * CAREFULLY DESIGNED APIS YO
263 		 */
264 	}
265 
266 done:
267 	git_odb_free(odb);
268 	git_buf_dispose(&path);
269 
270 	return error;
271 }
272 
273 int git_blob_create_from_workdir(
274 	git_oid *id, git_repository *repo, const char *path)
275 {
276 	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
277 }
278 
279 int git_blob_create_from_disk(
280 	git_oid *id, git_repository *repo, const char *path)
281 {
282 	int error;
283 	git_buf full_path = GIT_BUF_INIT;
284 	const char *workdir, *hintpath;
285 
286 	if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
287 		git_buf_dispose(&full_path);
288 		return error;
289 	}
290 
291 	hintpath = git_buf_cstr(&full_path);
292 	workdir  = git_repository_workdir(repo);
293 
294 	if (workdir && !git__prefixcmp(hintpath, workdir))
295 		hintpath += strlen(workdir);
296 
297 	error = git_blob__create_from_paths(
298 		id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
299 
300 	git_buf_dispose(&full_path);
301 	return error;
302 }
303 
304 typedef struct {
305 	git_writestream parent;
306 	git_filebuf fbuf;
307 	git_repository *repo;
308 	char *hintpath;
309 } blob_writestream;
310 
311 static int blob_writestream_close(git_writestream *_stream)
312 {
313 	blob_writestream *stream = (blob_writestream *) _stream;
314 
315 	git_filebuf_cleanup(&stream->fbuf);
316 	return 0;
317 }
318 
319 static void blob_writestream_free(git_writestream *_stream)
320 {
321 	blob_writestream *stream = (blob_writestream *) _stream;
322 
323 	git_filebuf_cleanup(&stream->fbuf);
324 	git__free(stream->hintpath);
325 	git__free(stream);
326 }
327 
328 static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
329 {
330 	blob_writestream *stream = (blob_writestream *) _stream;
331 
332 	return git_filebuf_write(&stream->fbuf, buffer, len);
333 }
334 
335 int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
336 {
337 	int error;
338 	git_buf path = GIT_BUF_INIT;
339 	blob_writestream *stream;
340 
341 	GIT_ASSERT_ARG(out);
342 	GIT_ASSERT_ARG(repo);
343 
344 	stream = git__calloc(1, sizeof(blob_writestream));
345 	GIT_ERROR_CHECK_ALLOC(stream);
346 
347 	if (hintpath) {
348 		stream->hintpath = git__strdup(hintpath);
349 		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
350 	}
351 
352 	stream->repo = repo;
353 	stream->parent.write = blob_writestream_write;
354 	stream->parent.close = blob_writestream_close;
355 	stream->parent.free  = blob_writestream_free;
356 
357 	if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
358 		|| (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
359 		goto cleanup;
360 
361 	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
362 					       0666, 2 * 1024 * 1024)) < 0)
363 		goto cleanup;
364 
365 	*out = (git_writestream *) stream;
366 
367 cleanup:
368 	if (error < 0)
369 		blob_writestream_free((git_writestream *) stream);
370 
371 	git_buf_dispose(&path);
372 	return error;
373 }
374 
375 int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
376 {
377 	int error;
378 	blob_writestream *stream = (blob_writestream *) _stream;
379 
380 	/*
381 	 * We can make this more officient by avoiding writing to
382 	 * disk, but for now let's re-use the helper functions we
383 	 * have.
384 	 */
385 	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
386 		goto cleanup;
387 
388 	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
389 					    stream->hintpath, 0, !!stream->hintpath);
390 
391 cleanup:
392 	blob_writestream_free(_stream);
393 	return error;
394 
395 }
396 
397 int git_blob_is_binary(const git_blob *blob)
398 {
399 	git_buf content = GIT_BUF_INIT;
400 	git_object_size_t size;
401 
402 	GIT_ASSERT_ARG(blob);
403 
404 	size = git_blob_rawsize(blob);
405 
406 	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
407 		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
408 	return git_buf_text_is_binary(&content);
409 }
410 
411 int git_blob_filter(
412 	git_buf *out,
413 	git_blob *blob,
414 	const char *path,
415 	git_blob_filter_options *given_opts)
416 {
417 	int error = 0;
418 	git_filter_list *fl = NULL;
419 	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
420 	git_filter_flag_t flags = GIT_FILTER_DEFAULT;
421 
422 	GIT_ASSERT_ARG(blob);
423 	GIT_ASSERT_ARG(path);
424 	GIT_ASSERT_ARG(out);
425 
426 	GIT_ERROR_CHECK_VERSION(
427 		given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");
428 
429 	if (git_buf_sanitize(out) < 0)
430 		return -1;
431 
432 	if (given_opts != NULL)
433 		memcpy(&opts, given_opts, sizeof(git_blob_filter_options));
434 
435 	if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
436 	    git_blob_is_binary(blob))
437 		return 0;
438 
439 	if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
440 		flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
441 
442 	if ((opts.flags & GIT_BLOB_FILTER_ATTTRIBUTES_FROM_HEAD) != 0)
443 		flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
444 
445 	if (!(error = git_filter_list_load(
446 			&fl, git_blob_owner(blob), blob, path,
447 			GIT_FILTER_TO_WORKTREE, flags))) {
448 
449 		error = git_filter_list_apply_to_blob(out, fl, blob);
450 
451 		git_filter_list_free(fl);
452 	}
453 
454 	return error;
455 }
456 
457 /* Deprecated functions */
458 
459 #ifndef GIT_DEPRECATE_HARD
460 int git_blob_create_frombuffer(
461 	git_oid *id, git_repository *repo, const void *buffer, size_t len)
462 {
463 	return git_blob_create_from_buffer(id, repo, buffer, len);
464 }
465 
466 int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
467 {
468 	return git_blob_create_from_workdir(id, repo, relative_path);
469 }
470 
471 int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
472 {
473 	return git_blob_create_from_disk(id, repo, path);
474 }
475 
476 int git_blob_create_fromstream(
477     git_writestream **out,
478     git_repository *repo,
479     const char *hintpath)
480 {
481 	return  git_blob_create_from_stream(out, repo, hintpath);
482 }
483 
484 int git_blob_create_fromstream_commit(
485 	git_oid *out,
486 	git_writestream *stream)
487 {
488 	return git_blob_create_from_stream_commit(out, stream);
489 }
490 
491 int git_blob_filtered_content(
492 	git_buf *out,
493 	git_blob *blob,
494 	const char *path,
495 	int check_for_binary_data)
496 {
497 	git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
498 
499 	if (check_for_binary_data)
500 		opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
501 	else
502 		opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;
503 
504 	return git_blob_filter(out, blob, path, &opts);
505 }
506 #endif
507