1 /* 2 * Copyright (C) the libgit2 contributors. All rights reserved. 3 * 4 * This file is part of libgit2, distributed under the GNU GPL v2 with 5 * a Linking Exception. For full terms see the included COPYING file. 6 */ 7 8 #include "blob.h" 9 10 #include "git2/common.h" 11 #include "git2/object.h" 12 #include "git2/repository.h" 13 #include "git2/odb_backend.h" 14 15 #include "filebuf.h" 16 #include "filter.h" 17 #include "buf_text.h" 18 19 const void *git_blob_rawcontent(const git_blob *blob) 20 { 21 GIT_ASSERT_ARG_WITH_RETVAL(blob, NULL); 22 23 if (blob->raw) 24 return blob->data.raw.data; 25 else 26 return git_odb_object_data(blob->data.odb); 27 } 28 29 git_object_size_t git_blob_rawsize(const git_blob *blob) 30 { 31 GIT_ASSERT_ARG(blob); 32 33 if (blob->raw) 34 return blob->data.raw.size; 35 else 36 return (git_object_size_t)git_odb_object_size(blob->data.odb); 37 } 38 39 int git_blob__getbuf(git_buf *buffer, git_blob *blob) 40 { 41 git_object_size_t size = git_blob_rawsize(blob); 42 43 GIT_ERROR_CHECK_BLOBSIZE(size); 44 return git_buf_set(buffer, git_blob_rawcontent(blob), (size_t)size); 45 } 46 47 void git_blob__free(void *_blob) 48 { 49 git_blob *blob = (git_blob *) _blob; 50 if (!blob->raw) 51 git_odb_object_free(blob->data.odb); 52 git__free(blob); 53 } 54 55 int git_blob__parse_raw(void *_blob, const char *data, size_t size) 56 { 57 git_blob *blob = (git_blob *) _blob; 58 59 GIT_ASSERT_ARG(blob); 60 61 blob->raw = 1; 62 blob->data.raw.data = data; 63 blob->data.raw.size = size; 64 return 0; 65 } 66 67 int git_blob__parse(void *_blob, git_odb_object *odb_obj) 68 { 69 git_blob *blob = (git_blob *) _blob; 70 71 GIT_ASSERT_ARG(blob); 72 73 git_cached_obj_incref((git_cached_obj *)odb_obj); 74 blob->raw = 0; 75 blob->data.odb = odb_obj; 76 return 0; 77 } 78 79 int git_blob_create_from_buffer( 80 git_oid *id, git_repository *repo, const void *buffer, size_t len) 81 { 82 int error; 83 git_odb *odb; 84 git_odb_stream *stream; 85 86 GIT_ASSERT_ARG(id); 87 GIT_ASSERT_ARG(repo); 88 89 if ((error = git_repository_odb__weakptr(&odb, repo)) < 0 || 90 (error = git_odb_open_wstream(&stream, odb, len, GIT_OBJECT_BLOB)) < 0) 91 return error; 92 93 if ((error = git_odb_stream_write(stream, buffer, len)) == 0) 94 error = git_odb_stream_finalize_write(id, stream); 95 96 git_odb_stream_free(stream); 97 return error; 98 } 99 100 static int write_file_stream( 101 git_oid *id, git_odb *odb, const char *path, git_object_size_t file_size) 102 { 103 int fd, error; 104 char buffer[FILEIO_BUFSIZE]; 105 git_odb_stream *stream = NULL; 106 ssize_t read_len = -1; 107 git_object_size_t written = 0; 108 109 if ((error = git_odb_open_wstream( 110 &stream, odb, file_size, GIT_OBJECT_BLOB)) < 0) 111 return error; 112 113 if ((fd = git_futils_open_ro(path)) < 0) { 114 git_odb_stream_free(stream); 115 return -1; 116 } 117 118 while (!error && (read_len = p_read(fd, buffer, sizeof(buffer))) > 0) { 119 error = git_odb_stream_write(stream, buffer, read_len); 120 written += read_len; 121 } 122 123 p_close(fd); 124 125 if (written != file_size || read_len < 0) { 126 git_error_set(GIT_ERROR_OS, "failed to read file into stream"); 127 error = -1; 128 } 129 130 if (!error) 131 error = git_odb_stream_finalize_write(id, stream); 132 133 git_odb_stream_free(stream); 134 return error; 135 } 136 137 static int write_file_filtered( 138 git_oid *id, 139 git_object_size_t *size, 140 git_odb *odb, 141 const char *full_path, 142 git_filter_list *fl) 143 { 144 int error; 145 git_buf tgt = GIT_BUF_INIT; 146 147 error = git_filter_list_apply_to_file(&tgt, fl, NULL, full_path); 148 149 /* Write the file to disk if it was properly filtered */ 150 if (!error) { 151 *size = tgt.size; 152 153 error = git_odb_write(id, odb, tgt.ptr, tgt.size, GIT_OBJECT_BLOB); 154 } 155 156 git_buf_dispose(&tgt); 157 return error; 158 } 159 160 static int write_symlink( 161 git_oid *id, git_odb *odb, const char *path, size_t link_size) 162 { 163 char *link_data; 164 ssize_t read_len; 165 int error; 166 167 link_data = git__malloc(link_size); 168 GIT_ERROR_CHECK_ALLOC(link_data); 169 170 read_len = p_readlink(path, link_data, link_size); 171 if (read_len != (ssize_t)link_size) { 172 git_error_set(GIT_ERROR_OS, "failed to create blob: cannot read symlink '%s'", path); 173 git__free(link_data); 174 return -1; 175 } 176 177 error = git_odb_write(id, odb, (void *)link_data, link_size, GIT_OBJECT_BLOB); 178 git__free(link_data); 179 return error; 180 } 181 182 int git_blob__create_from_paths( 183 git_oid *id, 184 struct stat *out_st, 185 git_repository *repo, 186 const char *content_path, 187 const char *hint_path, 188 mode_t hint_mode, 189 bool try_load_filters) 190 { 191 int error; 192 struct stat st; 193 git_odb *odb = NULL; 194 git_object_size_t size; 195 mode_t mode; 196 git_buf path = GIT_BUF_INIT; 197 198 GIT_ASSERT_ARG(hint_path || !try_load_filters); 199 200 if (!content_path) { 201 if (git_repository__ensure_not_bare(repo, "create blob from file") < 0) 202 return GIT_EBAREREPO; 203 204 if (git_buf_joinpath( 205 &path, git_repository_workdir(repo), hint_path) < 0) 206 return -1; 207 208 content_path = path.ptr; 209 } 210 211 if ((error = git_path_lstat(content_path, &st)) < 0 || 212 (error = git_repository_odb(&odb, repo)) < 0) 213 goto done; 214 215 if (S_ISDIR(st.st_mode)) { 216 git_error_set(GIT_ERROR_ODB, "cannot create blob from '%s': it is a directory", content_path); 217 error = GIT_EDIRECTORY; 218 goto done; 219 } 220 221 if (out_st) 222 memcpy(out_st, &st, sizeof(st)); 223 224 size = st.st_size; 225 mode = hint_mode ? hint_mode : st.st_mode; 226 227 if (S_ISLNK(mode)) { 228 error = write_symlink(id, odb, content_path, (size_t)size); 229 } else { 230 git_filter_list *fl = NULL; 231 232 if (try_load_filters) 233 /* Load the filters for writing this file to the ODB */ 234 error = git_filter_list_load( 235 &fl, repo, NULL, hint_path, 236 GIT_FILTER_TO_ODB, GIT_FILTER_DEFAULT); 237 238 if (error < 0) 239 /* well, that didn't work */; 240 else if (fl == NULL) 241 /* No filters need to be applied to the document: we can stream 242 * directly from disk */ 243 error = write_file_stream(id, odb, content_path, size); 244 else { 245 /* We need to apply one or more filters */ 246 error = write_file_filtered(id, &size, odb, content_path, fl); 247 248 git_filter_list_free(fl); 249 } 250 251 /* 252 * TODO: eventually support streaming filtered files, for files 253 * which are bigger than a given threshold. This is not a priority 254 * because applying a filter in streaming mode changes the final 255 * size of the blob, and without knowing its final size, the blob 256 * cannot be written in stream mode to the ODB. 257 * 258 * The plan is to do streaming writes to a tempfile on disk and then 259 * opening streaming that file to the ODB, using 260 * `write_file_stream`. 261 * 262 * CAREFULLY DESIGNED APIS YO 263 */ 264 } 265 266 done: 267 git_odb_free(odb); 268 git_buf_dispose(&path); 269 270 return error; 271 } 272 273 int git_blob_create_from_workdir( 274 git_oid *id, git_repository *repo, const char *path) 275 { 276 return git_blob__create_from_paths(id, NULL, repo, NULL, path, 0, true); 277 } 278 279 int git_blob_create_from_disk( 280 git_oid *id, git_repository *repo, const char *path) 281 { 282 int error; 283 git_buf full_path = GIT_BUF_INIT; 284 const char *workdir, *hintpath; 285 286 if ((error = git_path_prettify(&full_path, path, NULL)) < 0) { 287 git_buf_dispose(&full_path); 288 return error; 289 } 290 291 hintpath = git_buf_cstr(&full_path); 292 workdir = git_repository_workdir(repo); 293 294 if (workdir && !git__prefixcmp(hintpath, workdir)) 295 hintpath += strlen(workdir); 296 297 error = git_blob__create_from_paths( 298 id, NULL, repo, git_buf_cstr(&full_path), hintpath, 0, true); 299 300 git_buf_dispose(&full_path); 301 return error; 302 } 303 304 typedef struct { 305 git_writestream parent; 306 git_filebuf fbuf; 307 git_repository *repo; 308 char *hintpath; 309 } blob_writestream; 310 311 static int blob_writestream_close(git_writestream *_stream) 312 { 313 blob_writestream *stream = (blob_writestream *) _stream; 314 315 git_filebuf_cleanup(&stream->fbuf); 316 return 0; 317 } 318 319 static void blob_writestream_free(git_writestream *_stream) 320 { 321 blob_writestream *stream = (blob_writestream *) _stream; 322 323 git_filebuf_cleanup(&stream->fbuf); 324 git__free(stream->hintpath); 325 git__free(stream); 326 } 327 328 static int blob_writestream_write(git_writestream *_stream, const char *buffer, size_t len) 329 { 330 blob_writestream *stream = (blob_writestream *) _stream; 331 332 return git_filebuf_write(&stream->fbuf, buffer, len); 333 } 334 335 int git_blob_create_from_stream(git_writestream **out, git_repository *repo, const char *hintpath) 336 { 337 int error; 338 git_buf path = GIT_BUF_INIT; 339 blob_writestream *stream; 340 341 GIT_ASSERT_ARG(out); 342 GIT_ASSERT_ARG(repo); 343 344 stream = git__calloc(1, sizeof(blob_writestream)); 345 GIT_ERROR_CHECK_ALLOC(stream); 346 347 if (hintpath) { 348 stream->hintpath = git__strdup(hintpath); 349 GIT_ERROR_CHECK_ALLOC(stream->hintpath); 350 } 351 352 stream->repo = repo; 353 stream->parent.write = blob_writestream_write; 354 stream->parent.close = blob_writestream_close; 355 stream->parent.free = blob_writestream_free; 356 357 if ((error = git_repository_item_path(&path, repo, GIT_REPOSITORY_ITEM_OBJECTS)) < 0 358 || (error = git_buf_joinpath(&path, path.ptr, "streamed")) < 0) 359 goto cleanup; 360 361 if ((error = git_filebuf_open_withsize(&stream->fbuf, git_buf_cstr(&path), GIT_FILEBUF_TEMPORARY, 362 0666, 2 * 1024 * 1024)) < 0) 363 goto cleanup; 364 365 *out = (git_writestream *) stream; 366 367 cleanup: 368 if (error < 0) 369 blob_writestream_free((git_writestream *) stream); 370 371 git_buf_dispose(&path); 372 return error; 373 } 374 375 int git_blob_create_from_stream_commit(git_oid *out, git_writestream *_stream) 376 { 377 int error; 378 blob_writestream *stream = (blob_writestream *) _stream; 379 380 /* 381 * We can make this more officient by avoiding writing to 382 * disk, but for now let's re-use the helper functions we 383 * have. 384 */ 385 if ((error = git_filebuf_flush(&stream->fbuf)) < 0) 386 goto cleanup; 387 388 error = git_blob__create_from_paths(out, NULL, stream->repo, stream->fbuf.path_lock, 389 stream->hintpath, 0, !!stream->hintpath); 390 391 cleanup: 392 blob_writestream_free(_stream); 393 return error; 394 395 } 396 397 int git_blob_is_binary(const git_blob *blob) 398 { 399 git_buf content = GIT_BUF_INIT; 400 git_object_size_t size; 401 402 GIT_ASSERT_ARG(blob); 403 404 size = git_blob_rawsize(blob); 405 406 git_buf_attach_notowned(&content, git_blob_rawcontent(blob), 407 (size_t)min(size, GIT_FILTER_BYTES_TO_CHECK_NUL)); 408 return git_buf_text_is_binary(&content); 409 } 410 411 int git_blob_filter( 412 git_buf *out, 413 git_blob *blob, 414 const char *path, 415 git_blob_filter_options *given_opts) 416 { 417 int error = 0; 418 git_filter_list *fl = NULL; 419 git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT; 420 git_filter_flag_t flags = GIT_FILTER_DEFAULT; 421 422 GIT_ASSERT_ARG(blob); 423 GIT_ASSERT_ARG(path); 424 GIT_ASSERT_ARG(out); 425 426 GIT_ERROR_CHECK_VERSION( 427 given_opts, GIT_BLOB_FILTER_OPTIONS_VERSION, "git_blob_filter_options"); 428 429 if (git_buf_sanitize(out) < 0) 430 return -1; 431 432 if (given_opts != NULL) 433 memcpy(&opts, given_opts, sizeof(git_blob_filter_options)); 434 435 if ((opts.flags & GIT_BLOB_FILTER_CHECK_FOR_BINARY) != 0 && 436 git_blob_is_binary(blob)) 437 return 0; 438 439 if ((opts.flags & GIT_BLOB_FILTER_NO_SYSTEM_ATTRIBUTES) != 0) 440 flags |= GIT_FILTER_NO_SYSTEM_ATTRIBUTES; 441 442 if ((opts.flags & GIT_BLOB_FILTER_ATTTRIBUTES_FROM_HEAD) != 0) 443 flags |= GIT_FILTER_ATTRIBUTES_FROM_HEAD; 444 445 if (!(error = git_filter_list_load( 446 &fl, git_blob_owner(blob), blob, path, 447 GIT_FILTER_TO_WORKTREE, flags))) { 448 449 error = git_filter_list_apply_to_blob(out, fl, blob); 450 451 git_filter_list_free(fl); 452 } 453 454 return error; 455 } 456 457 /* Deprecated functions */ 458 459 #ifndef GIT_DEPRECATE_HARD 460 int git_blob_create_frombuffer( 461 git_oid *id, git_repository *repo, const void *buffer, size_t len) 462 { 463 return git_blob_create_from_buffer(id, repo, buffer, len); 464 } 465 466 int git_blob_create_fromworkdir(git_oid *id, git_repository *repo, const char *relative_path) 467 { 468 return git_blob_create_from_workdir(id, repo, relative_path); 469 } 470 471 int git_blob_create_fromdisk(git_oid *id, git_repository *repo, const char *path) 472 { 473 return git_blob_create_from_disk(id, repo, path); 474 } 475 476 int git_blob_create_fromstream( 477 git_writestream **out, 478 git_repository *repo, 479 const char *hintpath) 480 { 481 return git_blob_create_from_stream(out, repo, hintpath); 482 } 483 484 int git_blob_create_fromstream_commit( 485 git_oid *out, 486 git_writestream *stream) 487 { 488 return git_blob_create_from_stream_commit(out, stream); 489 } 490 491 int git_blob_filtered_content( 492 git_buf *out, 493 git_blob *blob, 494 const char *path, 495 int check_for_binary_data) 496 { 497 git_blob_filter_options opts = GIT_BLOB_FILTER_OPTIONS_INIT; 498 499 if (check_for_binary_data) 500 opts.flags |= GIT_BLOB_FILTER_CHECK_FOR_BINARY; 501 else 502 opts.flags &= ~GIT_BLOB_FILTER_CHECK_FOR_BINARY; 503 504 return git_blob_filter(out, blob, path, &opts); 505 } 506 #endif 507