1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "diff_file.h"
9
10 #include "git2/blob.h"
11 #include "git2/submodule.h"
12 #include "diff.h"
13 #include "diff_generate.h"
14 #include "odb.h"
15 #include "fileops.h"
16 #include "filter.h"
17
18 #define DIFF_MAX_FILESIZE 0x20000000
19
diff_file_content_binary_by_size(git_diff_file_content * fc)20 static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
21 {
22 /* if we have diff opts, check max_size vs file size */
23 if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
24 fc->opts_max_size > 0 &&
25 fc->file->size > fc->opts_max_size)
26 fc->file->flags |= GIT_DIFF_FLAG_BINARY;
27
28 return ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0);
29 }
30
diff_file_content_binary_by_content(git_diff_file_content * fc)31 static void diff_file_content_binary_by_content(git_diff_file_content *fc)
32 {
33 if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
34 return;
35
36 switch (git_diff_driver_content_is_binary(
37 fc->driver, fc->map.data, fc->map.len)) {
38 case 0: fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
39 case 1: fc->file->flags |= GIT_DIFF_FLAG_BINARY; break;
40 default: break;
41 }
42 }
43
diff_file_content_init_common(git_diff_file_content * fc,const git_diff_options * opts)44 static int diff_file_content_init_common(
45 git_diff_file_content *fc, const git_diff_options *opts)
46 {
47 fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;
48
49 if (opts && opts->max_size >= 0)
50 fc->opts_max_size = opts->max_size ?
51 opts->max_size : DIFF_MAX_FILESIZE;
52
53 if (fc->src == GIT_ITERATOR_TYPE_EMPTY)
54 fc->src = GIT_ITERATOR_TYPE_TREE;
55
56 if (!fc->driver &&
57 git_diff_driver_lookup(&fc->driver, fc->repo,
58 NULL, fc->file->path) < 0)
59 return -1;
60
61 /* give driver a chance to modify options */
62 git_diff_driver_update_options(&fc->opts_flags, fc->driver);
63
64 /* make sure file is conceivable mmap-able */
65 if ((git_off_t)((size_t)fc->file->size) != fc->file->size)
66 fc->file->flags |= GIT_DIFF_FLAG_BINARY;
67 /* check if user is forcing text diff the file */
68 else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
69 fc->file->flags &= ~GIT_DIFF_FLAG_BINARY;
70 fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
71 }
72 /* check if user is forcing binary diff the file */
73 else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
74 fc->file->flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
75 fc->file->flags |= GIT_DIFF_FLAG_BINARY;
76 }
77
78 diff_file_content_binary_by_size(fc);
79
80 if ((fc->flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
81 fc->flags |= GIT_DIFF_FLAG__LOADED;
82 fc->map.len = 0;
83 fc->map.data = "";
84 }
85
86 if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
87 diff_file_content_binary_by_content(fc);
88
89 return 0;
90 }
91
git_diff_file_content__init_from_diff(git_diff_file_content * fc,git_diff * diff,git_diff_delta * delta,bool use_old)92 int git_diff_file_content__init_from_diff(
93 git_diff_file_content *fc,
94 git_diff *diff,
95 git_diff_delta *delta,
96 bool use_old)
97 {
98 bool has_data = true;
99
100 memset(fc, 0, sizeof(*fc));
101 fc->repo = diff->repo;
102 fc->file = use_old ? &delta->old_file : &delta->new_file;
103 fc->src = use_old ? diff->old_src : diff->new_src;
104
105 if (git_diff_driver_lookup(&fc->driver, fc->repo,
106 &diff->attrsession, fc->file->path) < 0)
107 return -1;
108
109 switch (delta->status) {
110 case GIT_DELTA_ADDED:
111 has_data = !use_old; break;
112 case GIT_DELTA_DELETED:
113 has_data = use_old; break;
114 case GIT_DELTA_UNTRACKED:
115 has_data = !use_old &&
116 (diff->opts.flags & GIT_DIFF_SHOW_UNTRACKED_CONTENT) != 0;
117 break;
118 case GIT_DELTA_UNREADABLE:
119 case GIT_DELTA_MODIFIED:
120 case GIT_DELTA_COPIED:
121 case GIT_DELTA_RENAMED:
122 break;
123 default:
124 has_data = false;
125 break;
126 }
127
128 if (!has_data)
129 fc->flags |= GIT_DIFF_FLAG__NO_DATA;
130
131 return diff_file_content_init_common(fc, &diff->opts);
132 }
133
git_diff_file_content__init_from_src(git_diff_file_content * fc,git_repository * repo,const git_diff_options * opts,const git_diff_file_content_src * src,git_diff_file * as_file)134 int git_diff_file_content__init_from_src(
135 git_diff_file_content *fc,
136 git_repository *repo,
137 const git_diff_options *opts,
138 const git_diff_file_content_src *src,
139 git_diff_file *as_file)
140 {
141 memset(fc, 0, sizeof(*fc));
142 fc->repo = repo;
143 fc->file = as_file;
144
145 if (!src->blob && !src->buf) {
146 fc->flags |= GIT_DIFF_FLAG__NO_DATA;
147 } else {
148 fc->flags |= GIT_DIFF_FLAG__LOADED;
149 fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
150 fc->file->mode = GIT_FILEMODE_BLOB;
151
152 if (src->blob) {
153 git_blob_dup((git_blob **)&fc->blob, (git_blob *) src->blob);
154 fc->file->size = git_blob_rawsize(src->blob);
155 git_oid_cpy(&fc->file->id, git_blob_id(src->blob));
156 fc->file->id_abbrev = GIT_OID_HEXSZ;
157
158 fc->map.len = (size_t)fc->file->size;
159 fc->map.data = (char *)git_blob_rawcontent(src->blob);
160
161 fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
162 } else {
163 fc->file->size = src->buflen;
164 git_odb_hash(&fc->file->id, src->buf, src->buflen, GIT_OBJ_BLOB);
165 fc->file->id_abbrev = GIT_OID_HEXSZ;
166
167 fc->map.len = src->buflen;
168 fc->map.data = (char *)src->buf;
169 }
170 }
171
172 return diff_file_content_init_common(fc, opts);
173 }
174
diff_file_content_commit_to_str(git_diff_file_content * fc,bool check_status)175 static int diff_file_content_commit_to_str(
176 git_diff_file_content *fc, bool check_status)
177 {
178 char oid[GIT_OID_HEXSZ+1];
179 git_buf content = GIT_BUF_INIT;
180 const char *status = "";
181
182 if (check_status) {
183 int error = 0;
184 git_submodule *sm = NULL;
185 unsigned int sm_status = 0;
186 const git_oid *sm_head;
187
188 if ((error = git_submodule_lookup(&sm, fc->repo, fc->file->path)) < 0) {
189 /* GIT_EEXISTS means a "submodule" that has not been git added */
190 if (error == GIT_EEXISTS) {
191 giterr_clear();
192 error = 0;
193 }
194 return error;
195 }
196
197 if ((error = git_submodule_status(&sm_status, fc->repo, fc->file->path, GIT_SUBMODULE_IGNORE_UNSPECIFIED)) < 0) {
198 git_submodule_free(sm);
199 return error;
200 }
201
202 /* update OID if we didn't have it previously */
203 if ((fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0 &&
204 ((sm_head = git_submodule_wd_id(sm)) != NULL ||
205 (sm_head = git_submodule_head_id(sm)) != NULL))
206 {
207 git_oid_cpy(&fc->file->id, sm_head);
208 fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
209 }
210
211 if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
212 status = "-dirty";
213
214 git_submodule_free(sm);
215 }
216
217 git_oid_tostr(oid, sizeof(oid), &fc->file->id);
218 if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
219 return -1;
220
221 fc->map.len = git_buf_len(&content);
222 fc->map.data = git_buf_detach(&content);
223 fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
224
225 return 0;
226 }
227
diff_file_content_load_blob(git_diff_file_content * fc,git_diff_options * opts)228 static int diff_file_content_load_blob(
229 git_diff_file_content *fc,
230 git_diff_options *opts)
231 {
232 int error = 0;
233 git_odb_object *odb_obj = NULL;
234
235 if (git_oid_iszero(&fc->file->id))
236 return 0;
237
238 if (fc->file->mode == GIT_FILEMODE_COMMIT)
239 return diff_file_content_commit_to_str(fc, false);
240
241 /* if we don't know size, try to peek at object header first */
242 if (!fc->file->size) {
243 if ((error = git_diff_file__resolve_zero_size(
244 fc->file, &odb_obj, fc->repo)) < 0)
245 return error;
246 }
247
248 if ((opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
249 diff_file_content_binary_by_size(fc))
250 return 0;
251
252 if (odb_obj != NULL) {
253 error = git_object__from_odb_object(
254 (git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJ_BLOB);
255 git_odb_object_free(odb_obj);
256 } else {
257 error = git_blob_lookup(
258 (git_blob **)&fc->blob, fc->repo, &fc->file->id);
259 }
260
261 if (!error) {
262 fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
263 fc->map.data = (void *)git_blob_rawcontent(fc->blob);
264 fc->map.len = (size_t)git_blob_rawsize(fc->blob);
265 }
266
267 return error;
268 }
269
diff_file_content_load_workdir_symlink_fake(git_diff_file_content * fc,git_buf * path)270 static int diff_file_content_load_workdir_symlink_fake(
271 git_diff_file_content *fc, git_buf *path)
272 {
273 git_buf target = GIT_BUF_INIT;
274 int error;
275
276 if ((error = git_futils_readbuffer(&target, path->ptr)) < 0)
277 return error;
278
279 fc->map.len = git_buf_len(&target);
280 fc->map.data = git_buf_detach(&target);
281 fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
282
283 git_buf_free(&target);
284 return error;
285 }
286
diff_file_content_load_workdir_symlink(git_diff_file_content * fc,git_buf * path)287 static int diff_file_content_load_workdir_symlink(
288 git_diff_file_content *fc, git_buf *path)
289 {
290 ssize_t alloc_len, read_len;
291 int symlink_supported, error;
292
293 if ((error = git_repository__cvar(
294 &symlink_supported, fc->repo, GIT_CVAR_SYMLINKS)) < 0)
295 return -1;
296
297 if (!symlink_supported)
298 return diff_file_content_load_workdir_symlink_fake(fc, path);
299
300 /* link path on disk could be UTF-16, so prepare a buffer that is
301 * big enough to handle some UTF-8 data expansion
302 */
303 alloc_len = (ssize_t)(fc->file->size * 2) + 1;
304
305 fc->map.data = git__calloc(alloc_len, sizeof(char));
306 GITERR_CHECK_ALLOC(fc->map.data);
307
308 fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
309
310 read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len);
311 if (read_len < 0) {
312 giterr_set(GITERR_OS, "failed to read symlink '%s'", fc->file->path);
313 return -1;
314 }
315
316 fc->map.len = read_len;
317 return 0;
318 }
319
diff_file_content_load_workdir_file(git_diff_file_content * fc,git_buf * path,git_diff_options * diff_opts)320 static int diff_file_content_load_workdir_file(
321 git_diff_file_content *fc,
322 git_buf *path,
323 git_diff_options *diff_opts)
324 {
325 int error = 0;
326 git_filter_list *fl = NULL;
327 git_file fd = git_futils_open_ro(git_buf_cstr(path));
328 git_buf raw = GIT_BUF_INIT;
329
330 if (fd < 0)
331 return fd;
332
333 if (!fc->file->size &&
334 !(fc->file->size = git_futils_filesize(fd)))
335 goto cleanup;
336
337 if ((diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
338 diff_file_content_binary_by_size(fc))
339 goto cleanup;
340
341 if ((error = git_filter_list_load(
342 &fl, fc->repo, NULL, fc->file->path,
343 GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
344 goto cleanup;
345
346 /* if there are no filters, try to mmap the file */
347 if (fl == NULL) {
348 if (!(error = git_futils_mmap_ro(
349 &fc->map, fd, 0, (size_t)fc->file->size))) {
350 fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
351 goto cleanup;
352 }
353
354 /* if mmap failed, fall through to try readbuffer below */
355 giterr_clear();
356 }
357
358 if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) {
359 git_buf out = GIT_BUF_INIT;
360
361 error = git_filter_list_apply_to_data(&out, fl, &raw);
362
363 if (out.ptr != raw.ptr)
364 git_buf_free(&raw);
365
366 if (!error) {
367 fc->map.len = out.size;
368 fc->map.data = out.ptr;
369 fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
370 }
371 }
372
373 cleanup:
374 git_filter_list_free(fl);
375 p_close(fd);
376
377 return error;
378 }
379
diff_file_content_load_workdir(git_diff_file_content * fc,git_diff_options * diff_opts)380 static int diff_file_content_load_workdir(
381 git_diff_file_content *fc,
382 git_diff_options *diff_opts)
383 {
384 int error = 0;
385 git_buf path = GIT_BUF_INIT;
386
387 if (fc->file->mode == GIT_FILEMODE_COMMIT)
388 return diff_file_content_commit_to_str(fc, true);
389
390 if (fc->file->mode == GIT_FILEMODE_TREE)
391 return 0;
392
393 if (git_buf_joinpath(
394 &path, git_repository_workdir(fc->repo), fc->file->path) < 0)
395 return -1;
396
397 if (S_ISLNK(fc->file->mode))
398 error = diff_file_content_load_workdir_symlink(fc, &path);
399 else
400 error = diff_file_content_load_workdir_file(fc, &path, diff_opts);
401
402 /* once data is loaded, update OID if we didn't have it previously */
403 if (!error && (fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0) {
404 error = git_odb_hash(
405 &fc->file->id, fc->map.data, fc->map.len, GIT_OBJ_BLOB);
406 fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
407 }
408
409 git_buf_free(&path);
410 return error;
411 }
412
git_diff_file_content__load(git_diff_file_content * fc,git_diff_options * diff_opts)413 int git_diff_file_content__load(
414 git_diff_file_content *fc,
415 git_diff_options *diff_opts)
416 {
417 int error = 0;
418
419 if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
420 return 0;
421
422 if ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0 &&
423 (diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0)
424 return 0;
425
426 if (fc->src == GIT_ITERATOR_TYPE_WORKDIR)
427 error = diff_file_content_load_workdir(fc, diff_opts);
428 else
429 error = diff_file_content_load_blob(fc, diff_opts);
430 if (error)
431 return error;
432
433 fc->flags |= GIT_DIFF_FLAG__LOADED;
434
435 diff_file_content_binary_by_content(fc);
436
437 return 0;
438 }
439
git_diff_file_content__unload(git_diff_file_content * fc)440 void git_diff_file_content__unload(git_diff_file_content *fc)
441 {
442 if ((fc->flags & GIT_DIFF_FLAG__LOADED) == 0)
443 return;
444
445 if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
446 git__free(fc->map.data);
447 fc->map.data = "";
448 fc->map.len = 0;
449 fc->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
450 }
451 else if (fc->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
452 git_futils_mmap_free(&fc->map);
453 fc->map.data = "";
454 fc->map.len = 0;
455 fc->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
456 }
457
458 if (fc->flags & GIT_DIFF_FLAG__FREE_BLOB) {
459 git_blob_free((git_blob *)fc->blob);
460 fc->blob = NULL;
461 fc->flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
462 }
463
464 fc->flags &= ~GIT_DIFF_FLAG__LOADED;
465 }
466
git_diff_file_content__clear(git_diff_file_content * fc)467 void git_diff_file_content__clear(git_diff_file_content *fc)
468 {
469 git_diff_file_content__unload(fc);
470
471 /* for now, nothing else to do */
472 }
473