1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "blob.h"
9
10 #include "git2/common.h"
11 #include "git2/object.h"
12 #include "git2/repository.h"
13 #include "git2/odb_backend.h"
14
15 #include "filebuf.h"
16 #include "filter.h"
17
git_blob_rawcontent(const git_blob * blob)18 const void *git_blob_rawcontent(const git_blob *blob)
19 {
20 GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL);
21
22 if (blob->raw)
23 return blob->data.raw.data;
24 else
25 return git_odb_object_data(blob->data.odb);
26 }
27
git_blob_rawsize(const git_blob * blob)28 git_object_size_t git_blob_rawsize(const git_blob *blob)
29 {
30 GIT_ASSERT_ARG(blob);
31
32 if (blob->raw)
33 return blob->data.raw.size;
34 else
35 return (git_object_size_t)git_odb_object_size(blob->data.odb);
36 }
37
git_blob__getbuf(git_buf * buffer,git_blob * blob)38 int git_blob__getbuf(git_buf *buffer, git_blob *blob)
39 {
40 git_object_size_t size = git_blob_rawsize(blob);
41
42 GIT_ERROR_CHECK_BLOBSIZE(size);
43 return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size);
44 }
45
git_blob__free(void * _blob)46 void git_blob__free(void *_blob)
47 {
48 git_blob *blob = (git_blob *) _blob;
49 if (!blob->raw)
50 git_odb_object_free(blob->data.odb);
51 git__free(blob);
52 }
53
git_blob__parse_raw(void * _blob,const char * data,size_t size)54 int git_blob__parse_raw(void *_blob, const char *data, size_t size)
55 {
56 git_blob *blob = (git_blob *) _blob;
57
58 GIT_ASSERT_ARG(blob);
59
60 blob->raw = 1;
61 blob->data.raw.data = data;
62 blob->data.raw.size = size;
63 return 0;
64 }
65
git_blob__parse(void * _blob,git_odb_object * odb_obj)66 int git_blob__parse(void *_blob, git_odb_object *odb_obj)
67 {
68 git_blob *blob = (git_blob *) _blob;
69
70 GIT_ASSERT_ARG(blob);
71
72 git_cached_obj_incref((git_cached_obj *)odb_obj);
73 blob->raw = 0;
74 blob->data.odb = odb_obj;
75 return 0;
76 }
77
git_blob_create_from_buffer(git_oid * id,git_repository * repo,const void * buffer,size_t len)78 int git_blob_create_from_buffer(
79 git_oid *id, git_repository *repo, const void *buffer, size_t len)
80 {
81 int error;
82 git_odb *odb;
83 git_odb_stream *stream;
84
85 GIT_ASSERT_ARG(id);
86 GIT_ASSERT_ARG(repo);
87
88 if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 ||
89 (error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0)
90 return error;
91
92 if ((error = git_odb_stream_write(stream, buffer, len)) == 0)
93 error = git_odb_stream_finalize_write(id, stream);
94
95 git_odb_stream_free(stream);
96 return error;
97 }
98
write_file_stream(git_oid * id,git_odb * odb,const char * path,git_object_size_t file_size)99 static int write_file_stream(
100 git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size)
101 {
102 int fd, error;
103 char buffer[FILEIO_BUFSIZE];
104 git_odb_stream *stream = NULL;
105 ssize_t read_len = -1;
106 git_object_size_t written = 0;
107
108 if ((error = git_odb_open_wstream(
109 &stream, odb, file_size, GIT_OBJECT_BLOB)) < 0)
110 return error;
111
112 if ((fd = git_futils_open_ro(path)) < 0) {
113 git_odb_stream_free(stream);
114 return -1;
115 }
116
117 while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) {
118 error = git_odb_stream_write(stream, buffer, read_len);
119 written += read_len;
120 }
121
122 p_close(fd);
123
124 if (written != file_size || read_len < 0) {
125 git_error_set(GIT_ERROR_OS, "failed to read file into stream");
126 error = -1;
127 }
128
129 if (!error)
130 error = git_odb_stream_finalize_write(id, stream);
131
132 git_odb_stream_free(stream);
133 return error;
134 }
135
write_file_filtered(git_oid * id,git_object_size_t * size,git_odb * odb,const char * full_path,git_filter_list * fl)136 static int write_file_filtered(
137 git_oid *id,
138 git_object_size_t *size,
139 git_odb *odb,
140 const char *full_path,
141 git_filter_list *fl)
142 {
143 int error;
144 git_buf tgt = GIT_BUF_INIT;
145
146 error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path);
147
148 /* Write the file to disk if it was properly filtered */
149 if (!error) {
150 *size = tgt.size;
151
152 error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB);
153 }
154
155 git_buf_dispose(&tgt);
156 return error;
157 }
158
write_symlink(git_oid * id,git_odb * odb,const char * path,size_t link_size)159 static int write_symlink(
160 git_oid *id, git_odb *odb, const char *path, size_t link_size)
161 {
162 char *link_data;
163 ssize_t read_len;
164 int error;
165
166 link_data = git__malloc(link_size);
167 GIT_ERROR_CHECK_ALLOC(link_data);
168
169 read_len = p_readlink(path, link_data, link_size);
170 if (read_len != (ssize_t)link_size) {
171 git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path);
172 git__free(link_data);
173 return -1;
174 }
175
176 error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB);
177 git__free(link_data);
178 return error;
179 }
180
git_blob__create_from_paths(git_oid * id,struct stat * out_st,git_repository * repo,const char * content_path,const char * hint_path,mode_t hint_mode,bool try_load_filters)181 int git_blob__create_from_paths(
182 git_oid *id,
183 struct stat *out_st,
184 git_repository *repo,
185 const char *content_path,
186 const char *hint_path,
187 mode_t hint_mode,
188 bool try_load_filters)
189 {
190 int error;
191 struct stat st;
192 git_odb *odb = NULL;
193 git_object_size_t size;
194 mode_t mode;
195 git_buf path = GIT_BUF_INIT;
196
197 GIT_ASSERT_ARG(hint_path || !try_load_filters);
198
199 if (!content_path) {
200 if (git_repository_workdir_path(&path, repo, hint_path) < 0)
201 return -1;
202
203 content_path = path.ptr;
204 }
205
206 if ((error = git_path_lstat(content_path, &st)) < 0 ||
207 (error = git_repository_odb(&odb, repo)) < 0)
208 goto done;
209
210 if (S_ISDIR(st.st_mode)) {
211 git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path);
212 error = GIT_EDIRECTORY;
213 goto done;
214 }
215
216 if (out_st)
217 memcpy(out_st, &st, sizeof(st));
218
219 size = st.st_size;
220 mode = hint_mode ? hint_mode : st.st_mode;
221
222 if (S_ISLNK(mode)) {
223 error = write_symlink(id, odb, content_path, (size_t)size);
224 } else {
225 git_filter_list *fl = NULL;
226
227 if (try_load_filters)
228 /* Load the filters for writing this file to the ODB */
229 error = git_filter_list_load(
230 &fl, repo, NULL, hint_path,
231 GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT);
232
233 if (error < 0)
234 /* well, that didn't work */;
235 else if (fl == NULL)
236 /* No filters need to be applied to the document: we can stream
237 * directly from disk */
238 error = write_file_stream(id, odb, content_path, size);
239 else {
240 /* We need to apply one or more filters */
241 error = write_file_filtered(id, &size, odb, content_path, fl);
242
243 git_filter_list_free(fl);
244 }
245
246 /*
247 * TODO: eventually support streaming filtered files, for files
248 * which are bigger than a given threshold. This is not a priority
249 * because applying a filter in streaming mode changes the final
250 * size of the blob, and without knowing its final size, the blob
251 * cannot be written in stream mode to the ODB.
252 *
253 * The plan is to do streaming writes to a tempfile on disk and then
254 * opening streaming that file to the ODB, using
255 * `write_file_stream`.
256 *
257 * CAREFULLY DESIGNED APIS YO
258 */
259 }
260
261 done:
262 git_odb_free(odb);
263 git_buf_dispose(&path);
264
265 return error;
266 }
267
git_blob_create_from_workdir(git_oid * id,git_repository * repo,const char * path)268 int git_blob_create_from_workdir(
269 git_oid *id, git_repository *repo, const char *path)
270 {
271 return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true);
272 }
273
git_blob_create_from_disk(git_oid * id,git_repository * repo,const char * path)274 int git_blob_create_from_disk(
275 git_oid *id, git_repository *repo, const char *path)
276 {
277 int error;
278 git_buf full_path = GIT_BUF_INIT;
279 const char *workdir, *hintpath;
280
281 if ((error = git_path_prettify(&full_path, path, NULL)) < 0) {
282 git_buf_dispose(&full_path);
283 return error;
284 }
285
286 hintpath = git_buf_cstr(&full_path);
287 workdir = git_repository_workdir(repo);
288
289 if (workdir && !git__prefixcmp(hintpath, workdir))
290 hintpath += strlen(workdir);
291
292 error = git_blob__create_from_paths(
293 id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true);
294
295 git_buf_dispose(&full_path);
296 return error;
297 }
298
299 typedef struct {
300 git_writestream parent;
301 git_filebuf fbuf;
302 git_repository *repo;
303 char *hintpath;
304 } blob_writestream;
305
blob_writestream_close(git_writestream * _stream)306 static int blob_writestream_close(git_writestream *_stream)
307 {
308 blob_writestream *stream = (blob_writestream *) _stream;
309
310 git_filebuf_cleanup(&stream->fbuf);
311 return 0;
312 }
313
blob_writestream_free(git_writestream * _stream)314 static void blob_writestream_free(git_writestream *_stream)
315 {
316 blob_writestream *stream = (blob_writestream *) _stream;
317
318 git_filebuf_cleanup(&stream->fbuf);
319 git__free(stream->hintpath);
320 git__free(stream);
321 }
322
blob_writestream_write(git_writestream * _stream,const char * buffer,size_t len)323 static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len)
324 {
325 blob_writestream *stream = (blob_writestream *) _stream;
326
327 return git_filebuf_write(&stream->fbuf, buffer, len);
328 }
329
git_blob_create_from_stream(git_writestream ** out,git_repository * repo,const char * hintpath)330 int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath)
331 {
332 int error;
333 git_buf path = GIT_BUF_INIT;
334 blob_writestream *stream;
335
336 GIT_ASSERT_ARG(out);
337 GIT_ASSERT_ARG(repo);
338
339 stream = git__calloc(1, sizeof(blob_writestream));
340 GIT_ERROR_CHECK_ALLOC(stream);
341
342 if (hintpath) {
343 stream->hintpath = git__strdup(hintpath);
344 GIT_ERROR_CHECK_ALLOC(stream->hintpath);
345 }
346
347 stream->repo = repo;
348 stream->parent.write = blob_writestream_write;
349 stream->parent.close = blob_writestream_close;
350 stream->parent.free = blob_writestream_free;
351
352 if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0
353 || (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0)
354 goto cleanup;
355
356 if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY,
357 0666, 2 * 1024 * 1024)) < 0)
358 goto cleanup;
359
360 *out = (git_writestream *) stream;
361
362 cleanup:
363 if (error < 0)
364 blob_writestream_free((git_writestream *) stream);
365
366 git_buf_dispose(&path);
367 return error;
368 }
369
git_blob_create_from_stream_commit(git_oid * out,git_writestream * _stream)370 int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream)
371 {
372 int error;
373 blob_writestream *stream = (blob_writestream *) _stream;
374
375 /*
376 * We can make this more officient by avoiding writing to
377 * disk, but for now let's re-use the helper functions we
378 * have.
379 */
380 if ((error = git_filebuf_flush(&stream->fbuf)) < 0)
381 goto cleanup;
382
383 error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock,
384 stream->hintpath, 0, !!stream->hintpath);
385
386 cleanup:
387 blob_writestream_free(_stream);
388 return error;
389
390 }
391
git_blob_is_binary(const git_blob * blob)392 int git_blob_is_binary(const git_blob *blob)
393 {
394 git_buf content = GIT_BUF_INIT;
395 git_object_size_t size;
396
397 GIT_ASSERT_ARG(blob);
398
399 size = git_blob_rawsize(blob);
400
401 git_buf_attach_notowned(&content, git_blob_rawcontent(blob),
402 (size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL));
403 return git_buf_is_binary(&content);
404 }
405
git_blob_filter_options_init(git_blob_filter_options * opts,unsigned int version)406 int git_blob_filter_options_init(
407 git_blob_filter_options *opts,
408 unsigned int version)
409 {
410 GIT_INIT_STRUCTURE_FROM_TEMPLATE(opts, version,
411 git_blob_filter_options, GIT_BLOB_FILTER_OPTIONS_INIT);
412 return 0;
413 }
414
git_blob_filter(git_buf * out,git_blob * blob,const char * path,git_blob_filter_options * given_opts)415 int git_blob_filter(
416 git_buf *out,
417 git_blob *blob,
418 const char *path,
419 git_blob_filter_options *given_opts)
420 {
421 int error = 0;
422 git_filter_list *fl = NULL;
423 git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
424 git_filter_flag_t flags = GIT_FILTER_DEFAULT;
425
426 GIT_ASSERT_ARG(blob);
427 GIT_ASSERT_ARG(path);
428 GIT_ASSERT_ARG(out);
429
430 GIT_ERROR_CHECK_VERSION(
431 given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options");
432
433 if (git_buf_sanitize(out) < 0)
434 return -1;
435
436 if (given_opts != NULL)
437 memcpy(&opts, given_opts, sizeof(git_blob_filter_options));
438
439 if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 &&
440 git_blob_is_binary(blob))
441 return 0;
442
443 if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0)
444 flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES;
445
446 if ((opts.flags & GIT_BLOB_FILTER_ATTRIBUTES_FROM_HEAD) != 0)
447 flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD;
448
449 if (!(error = git_filter_list_load(
450 &fl, git_blob_owner(blob), blob, path,
451 GIT_FILTER_TO_WORKTREE, flags))) {
452
453 error = git_filter_list_apply_to_blob(out, fl, blob);
454
455 git_filter_list_free(fl);
456 }
457
458 return error;
459 }
460
461 /* Deprecated functions */
462
463 #ifndef GIT_DEPRECATE_HARD
git_blob_create_frombuffer(git_oid * id,git_repository * repo,const void * buffer,size_t len)464 int git_blob_create_frombuffer(
465 git_oid *id, git_repository *repo, const void *buffer, size_t len)
466 {
467 return git_blob_create_from_buffer(id, repo, buffer, len);
468 }
469
git_blob_create_fromworkdir(git_oid * id,git_repository * repo,const char * relative_path)470 int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path)
471 {
472 return git_blob_create_from_workdir(id, repo, relative_path);
473 }
474
git_blob_create_fromdisk(git_oid * id,git_repository * repo,const char * path)475 int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path)
476 {
477 return git_blob_create_from_disk(id, repo, path);
478 }
479
git_blob_create_fromstream(git_writestream ** out,git_repository * repo,const char * hintpath)480 int git_blob_create_fromstream(
481 git_writestream **out,
482 git_repository *repo,
483 const char *hintpath)
484 {
485 return git_blob_create_from_stream(out, repo, hintpath);
486 }
487
git_blob_create_fromstream_commit(git_oid * out,git_writestream * stream)488 int git_blob_create_fromstream_commit(
489 git_oid *out,
490 git_writestream *stream)
491 {
492 return git_blob_create_from_stream_commit(out, stream);
493 }
494
git_blob_filtered_content(git_buf * out,git_blob * blob,const char * path,int check_for_binary_data)495 int git_blob_filtered_content(
496 git_buf *out,
497 git_blob *blob,
498 const char *path,
499 int check_for_binary_data)
500 {
501 git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT;
502
503 if (check_for_binary_data)
504 opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY;
505 else
506 opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY;
507
508 return git_blob_filter(out, blob, path, &opts);
509 }
510 #endif
511