1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blob.h"
9 
10 #include "git2/common.h"
11 #include "git2/object.h"
12 #include "git2/repository.h"
13 #include "git2/odb_backend.h"
14 
15 #include "filebuf.h"
16 #include "filter.h"
17 #include "buf_text.h"
18 
git_blob_rawcontent(const git_blob * blob)19 const void *git_blob_rawcontent(const git_blob *blob)
20 {
21 	assert(blob);
22 	if (blob->raw)
23 		return blob->data.raw.data;
24 	else
25 		return git_odb_object_data(blob->data.odb);
26 }
27 
git_blob_rawsize(const git_blob * blob)28 git_off_t git_blob_rawsize(const git_blob *blob)
29 {
30 	assert(blob);
31 	if (blob->raw)
32 		return blob->data.raw.size;
33 	else
34 		return (git_off_t)git_odb_object_size(blob->data.odb);
35 }
36 
git_blob__getbuf(git_buf * buffer,git_blob * blob)37 int git_blob__getbuf(git_buf *buffer, git_blob *blob)
38 {
39 	git_off_t size = git_blob_rawsize(blob);
40 
41 	GIT_ERROR_CHECK_BLOBSIZE(size);
42 	return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
43 }
44 
git_blob__free(void * _blob)45 void git_blob__free(void *_blob)
46 {
47 	git_blob *blob = (git_blob *) _blob;
48 	if (!blob->raw)
49 		git_odb_object_free(blob->data.odb);
50 	git__free(blob);
51 }
52 
git_blob__parse_raw(void * _blob,const char * data,size_t size)53 int git_blob__parse_raw(void *_blob, const char *data, size_t size)
54 {
55 	git_blob *blob = (git_blob *) _blob;
56 	assert(blob);
57 	blob->raw = 1;
58 	blob->data.raw.data = data;
59 	blob->data.raw.size = size;
60 	return 0;
61 }
62 
git_blob__parse(void * _blob,git_odb_object * odb_obj)63 int git_blob__parse(void *_blob, git_odb_object *odb_obj)
64 {
65 	git_blob *blob = (git_blob *) _blob;
66 	assert(blob);
67 	git_cached_obj_incref((git_cached_obj *)odb_obj);
68 	blob->raw = 0;
69 	blob->data.odb = odb_obj;
70 	return 0;
71 }
72 
git_blob_create_frombuffer(git_oid * id,git_repository * repo,const void * buffer,size_t len)73 int git_blob_create_frombuffer(
74 	git_oid *id, git_repository *repo, const void *buffer, size_t len)
75 {
76 	int error;
77 	git_odb *odb;
78 	git_odb_stream *stream;
79 
80 	assert(id && repo);
81 
82 	if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
83 		(error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
84 		return error;
85 
86 	if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
87 		error = git_odb_stream_finalize_write(id, stream);
88 
89 	git_odb_stream_free(stream);
90 	return error;
91 }
92 
write_file_stream(git_oid * id,git_odb * odb,const char * path,git_off_t file_size)93 static int write_file_stream(
94 	git_oid *id, git_odb *odb, const char *path, git_off_t file_size)
95 {
96 	int fd, error;
97 	char buffer[FILEIO_BUFSIZE];
98 	git_odb_stream *stream = NULL;
99 	ssize_t read_len = -1;
100 	git_off_t written = 0;
101 
102 	if ((error = git_odb_open_wstream(
103 			&stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
104 		return error;
105 
106 	if ((fd = git_futils_open_ro(path)) < 0) {
107 		git_odb_stream_free(stream);
108 		return -1;
109 	}
110 
111 	while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
112 		error = git_odb_stream_write(stream, buffer, read_len);
113 		written += read_len;
114 	}
115 
116 	p_close(fd);
117 
118 	if (written != file_size || read_len < 0) {
119 		git_error_set(GIT_ERROR_OS, "failed to read file into stream");
120 		error = -1;
121 	}
122 
123 	if (!error)
124 		error = git_odb_stream_finalize_write(id, stream);
125 
126 	git_odb_stream_free(stream);
127 	return error;
128 }
129 
write_file_filtered(git_oid * id,git_off_t * size,git_odb * odb,const char * full_path,git_filter_list * fl)130 static int write_file_filtered(
131 	git_oid *id,
132 	git_off_t *size,
133 	git_odb *odb,
134 	const char *full_path,
135 	git_filter_list *fl)
136 {
137 	int error;
138 	git_buf tgt = GIT_BUF_INIT;
139 
140 	error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path);
141 
142 	/* Write the file to disk if it was properly filtered */
143 	if (!error) {
144 		*size = tgt.size;
145 
146 		error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
147 	}
148 
149 	git_buf_dispose(&tgt);
150 	return error;
151 }
152 
write_symlink(git_oid * id,git_odb * odb,const char * path,size_t link_size)153 static int write_symlink(
154 	git_oid *id, git_odb *odb, const char *path, size_t link_size)
155 {
156 	char *link_data;
157 	ssize_t read_len;
158 	int error;
159 
160 	link_data = git__malloc(link_size);
161 	GIT_ERROR_CHECK_ALLOC(link_data);
162 
163 	read_len = p_readlink(path, link_data, link_size);
164 	if (read_len != (ssize_t)link_size) {
165 		git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
166 		git__free(link_data);
167 		return -1;
168 	}
169 
170 	error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
171 	git__free(link_data);
172 	return error;
173 }
174 
git_blob__create_from_paths(git_oid * id,struct stat * out_st,git_repository * repo,const char * content_path,const char * hint_path,mode_t hint_mode,bool try_load_filters)175 int git_blob__create_from_paths(
176 	git_oid *id,
177 	struct stat *out_st,
178 	git_repository *repo,
179 	const char *content_path,
180 	const char *hint_path,
181 	mode_t hint_mode,
182 	bool try_load_filters)
183 {
184 	int error;
185 	struct stat st;
186 	git_odb *odb = NULL;
187 	git_off_t size;
188 	mode_t mode;
189 	git_buf path = GIT_BUF_INIT;
190 
191 	assert(hint_path || !try_load_filters);
192 
193 	if (!content_path) {
194 		if (git_repository__ensure_not_bare(repo, "create blob from file") < 0)
195 			return GIT_EBAREREPO;
196 
197 		if (git_buf_joinpath(
198 				&path, git_repository_workdir(repo), hint_path) < 0)
199 			return -1;
200 
201 		content_path = path.ptr;
202 	}
203 
204 	if ((error = git_path_lstat(content_path, &st)) < 0 ||
205 		(error = git_repository_odb(&odb, repo)) < 0)
206 		goto done;
207 
208 	if (S_ISDIR(st.st_mode)) {
209 		git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
210 		error = GIT_EDIRECTORY;
211 		goto done;
212 	}
213 
214 	if (out_st)
215 		memcpy(out_st, &st, sizeof(st));
216 
217 	size = st.st_size;
218 	mode = hint_mode ? hint_mode : st.st_mode;
219 
220 	if (S_ISLNK(mode)) {
221 		error = write_symlink(id, odb, content_path, (size_t)size);
222 	} else {
223 		git_filter_list *fl = NULL;
224 
225 		if (try_load_filters)
226 			/* Load the filters for writing this file to the ODB */
227 			error = git_filter_list_load(
228 				&fl, repo, NULL, hint_path,
229 				GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
230 
231 		if (error < 0)
232 			/* well, that didn't work */;
233 		else if (fl == NULL)
234 			/* No filters need to be applied to the document: we can stream
235 			 * directly from disk */
236 			error = write_file_stream(id, odb, content_path, size);
237 		else {
238 			/* We need to apply one or more filters */
239 			error = write_file_filtered(id, &size, odb, content_path, fl);
240 
241 			git_filter_list_free(fl);
242 		}
243 
244 		/*
245 		 * TODO: eventually support streaming filtered files, for files
246 		 * which are bigger than a given threshold. This is not a priority
247 		 * because applying a filter in streaming mode changes the final
248 		 * size of the blob, and without knowing its final size, the blob
249 		 * cannot be written in stream mode to the ODB.
250 		 *
251 		 * The plan is to do streaming writes to a tempfile on disk and then
252 		 * opening streaming that file to the ODB, using
253 		 * `write_file_stream`.
254 		 *
255 		 * CAREFULLY DESIGNED APIS YO
256 		 */
257 	}
258 
259 done:
260 	git_odb_free(odb);
261 	git_buf_dispose(&path);
262 
263 	return error;
264 }
265 
git_blob_create_fromworkdir(git_oid * id,git_repository * repo,const char * path)266 int git_blob_create_fromworkdir(
267 	git_oid *id, git_repository *repo, const char *path)
268 {
269 	return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
270 }
271 
git_blob_create_fromdisk(git_oid * id,git_repository * repo,const char * path)272 int git_blob_create_fromdisk(
273 	git_oid *id, git_repository *repo, const char *path)
274 {
275 	int error;
276 	git_buf full_path = GIT_BUF_INIT;
277 	const char *workdir, *hintpath;
278 
279 	if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
280 		git_buf_dispose(&full_path);
281 		return error;
282 	}
283 
284 	hintpath = git_buf_cstr(&full_path);
285 	workdir  = git_repository_workdir(repo);
286 
287 	if (workdir && !git__prefixcmp(hintpath, workdir))
288 		hintpath += strlen(workdir);
289 
290 	error = git_blob__create_from_paths(
291 		id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
292 
293 	git_buf_dispose(&full_path);
294 	return error;
295 }
296 
297 typedef struct {
298 	git_writestream parent;
299 	git_filebuf fbuf;
300 	git_repository *repo;
301 	char *hintpath;
302 } blob_writestream;
303 
blob_writestream_close(git_writestream * _stream)304 static int blob_writestream_close(git_writestream *_stream)
305 {
306 	blob_writestream *stream = (blob_writestream *) _stream;
307 
308 	git_filebuf_cleanup(&stream->fbuf);
309 	return 0;
310 }
311 
blob_writestream_free(git_writestream * _stream)312 static void blob_writestream_free(git_writestream *_stream)
313 {
314 	blob_writestream *stream = (blob_writestream *) _stream;
315 
316 	git_filebuf_cleanup(&stream->fbuf);
317 	git__free(stream->hintpath);
318 	git__free(stream);
319 }
320 
blob_writestream_write(git_writestream * _stream,const char * buffer,size_t len)321 static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
322 {
323 	blob_writestream *stream = (blob_writestream *) _stream;
324 
325 	return git_filebuf_write(&stream->fbuf, buffer, len);
326 }
327 
git_blob_create_fromstream(git_writestream ** out,git_repository * repo,const char * hintpath)328 int git_blob_create_fromstream(git_writestream **out, git_repository *repo, const char *hintpath)
329 {
330 	int error;
331 	git_buf path = GIT_BUF_INIT;
332 	blob_writestream *stream;
333 
334 	assert(out && repo);
335 
336 	stream = git__calloc(1, sizeof(blob_writestream));
337 	GIT_ERROR_CHECK_ALLOC(stream);
338 
339 	if (hintpath) {
340 		stream->hintpath = git__strdup(hintpath);
341 		GIT_ERROR_CHECK_ALLOC(stream->hintpath);
342 	}
343 
344 	stream->repo = repo;
345 	stream->parent.write = blob_writestream_write;
346 	stream->parent.close = blob_writestream_close;
347 	stream->parent.free  = blob_writestream_free;
348 
349 	if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
350 		|| (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
351 		goto cleanup;
352 
353 	if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
354 					       0666, 2 * 1024 * 1024)) < 0)
355 		goto cleanup;
356 
357 	*out = (git_writestream *) stream;
358 
359 cleanup:
360 	if (error < 0)
361 		blob_writestream_free((git_writestream *) stream);
362 
363 	git_buf_dispose(&path);
364 	return error;
365 }
366 
git_blob_create_fromstream_commit(git_oid * out,git_writestream * _stream)367 int git_blob_create_fromstream_commit(git_oid *out, git_writestream *_stream)
368 {
369 	int error;
370 	blob_writestream *stream = (blob_writestream *) _stream;
371 
372 	/*
373 	 * We can make this more officient by avoiding writing to
374 	 * disk, but for now let's re-use the helper functions we
375 	 * have.
376 	 */
377 	if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
378 		goto cleanup;
379 
380 	error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
381 					    stream->hintpath, 0, !!stream->hintpath);
382 
383 cleanup:
384 	blob_writestream_free(_stream);
385 	return error;
386 
387 }
388 
git_blob_is_binary(const git_blob * blob)389 int git_blob_is_binary(const git_blob *blob)
390 {
391 	git_buf content = GIT_BUF_INIT;
392 	git_off_t size;
393 
394 	assert(blob);
395 
396 	size = git_blob_rawsize(blob);
397 
398 	git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
399 		(size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
400 	return git_buf_text_is_binary(&content);
401 }
402 
git_blob_filtered_content(git_buf * out,git_blob * blob,const char * path,int check_for_binary_data)403 int git_blob_filtered_content(
404 	git_buf *out,
405 	git_blob *blob,
406 	const char *path,
407 	int check_for_binary_data)
408 {
409 	int error = 0;
410 	git_filter_list *fl = NULL;
411 
412 	assert(blob && path && out);
413 
414 	git_buf_sanitize(out);
415 
416 	if (check_for_binary_data && git_blob_is_binary(blob))
417 		return 0;
418 
419 	if (!(error = git_filter_list_load(
420 			&fl, git_blob_owner(blob), blob, path,
421 			GIT_FILTER_TO_WORKTREE, GIT_FILTER_DEFAULT))) {
422 
423 		error = git_filter_list_apply_to_blob(out, fl, blob);
424 
425 		git_filter_list_free(fl);
426 	}
427 
428 	return error;
429 }
430