1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_file.h"
9 
10 #include "git2/blob.h"
11 #include "git2/submodule.h"
12 #include "diff.h"
13 #include "diff_generate.h"
14 #include "odb.h"
15 #include "futils.h"
16 #include "filter.h"
17 
18 #define DIFF_MAX_FILESIZE 0x20000000
19 
diff_file_content_binary_by_size(git_diff_file_content * fc)20 static bool diff_file_content_binary_by_size(git_diff_file_content *fc)
21 {
22 	/* if we have diff opts, check max_size vs file size */
23 	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) == 0 &&
24 		fc->opts_max_size > 0 &&
25 		fc->file->size > fc->opts_max_size)
26 		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
27 
28 	return ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0);
29 }
30 
diff_file_content_binary_by_content(git_diff_file_content * fc)31 static void diff_file_content_binary_by_content(git_diff_file_content *fc)
32 {
33 	if ((fc->file->flags & DIFF_FLAGS_KNOWN_BINARY) != 0)
34 		return;
35 
36 	switch (git_diff_driver_content_is_binary(
37 		fc->driver, fc->map.data, fc->map.len)) {
38 	case 0: fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY; break;
39 	case 1: fc->file->flags |= GIT_DIFF_FLAG_BINARY; break;
40 	default: break;
41 	}
42 }
43 
diff_file_content_init_common(git_diff_file_content * fc,const git_diff_options * opts)44 static int diff_file_content_init_common(
45 	git_diff_file_content *fc, const git_diff_options *opts)
46 {
47 	fc->opts_flags = opts ? opts->flags : GIT_DIFF_NORMAL;
48 
49 	if (opts && opts->max_size >= 0)
50 		fc->opts_max_size = opts->max_size ?
51 			opts->max_size : DIFF_MAX_FILESIZE;
52 
53 	if (fc->src == GIT_ITERATOR_EMPTY)
54 		fc->src = GIT_ITERATOR_TREE;
55 
56 	if (!fc->driver &&
57 		git_diff_driver_lookup(&fc->driver, fc->repo,
58 		    NULL, fc->file->path) < 0)
59 		return -1;
60 
61 	/* give driver a chance to modify options */
62 	git_diff_driver_update_options(&fc->opts_flags, fc->driver);
63 
64 	/* make sure file is conceivable mmap-able */
65 	if ((size_t)fc->file->size != fc->file->size)
66 		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
67 	/* check if user is forcing text diff the file */
68 	else if (fc->opts_flags & GIT_DIFF_FORCE_TEXT) {
69 		fc->file->flags &= ~GIT_DIFF_FLAG_BINARY;
70 		fc->file->flags |= GIT_DIFF_FLAG_NOT_BINARY;
71 	}
72 	/* check if user is forcing binary diff the file */
73 	else if (fc->opts_flags & GIT_DIFF_FORCE_BINARY) {
74 		fc->file->flags &= ~GIT_DIFF_FLAG_NOT_BINARY;
75 		fc->file->flags |= GIT_DIFF_FLAG_BINARY;
76 	}
77 
78 	diff_file_content_binary_by_size(fc);
79 
80 	if ((fc->flags & GIT_DIFF_FLAG__NO_DATA) != 0) {
81 		fc->flags |= GIT_DIFF_FLAG__LOADED;
82 		fc->map.len  = 0;
83 		fc->map.data = "";
84 	}
85 
86 	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
87 		diff_file_content_binary_by_content(fc);
88 
89 	return 0;
90 }
91 
git_diff_file_content__init_from_diff(git_diff_file_content * fc,git_diff * diff,git_diff_delta * delta,bool use_old)92 int git_diff_file_content__init_from_diff(
93 	git_diff_file_content *fc,
94 	git_diff *diff,
95 	git_diff_delta *delta,
96 	bool use_old)
97 {
98 	bool has_data = true;
99 
100 	memset(fc, 0, sizeof(*fc));
101 	fc->repo = diff->repo;
102 	fc->file = use_old ? &delta->old_file : &delta->new_file;
103 	fc->src  = use_old ? diff->old_src : diff->new_src;
104 
105 	if (git_diff_driver_lookup(&fc->driver, fc->repo,
106 		    &diff->attrsession, fc->file->path) < 0)
107 		return -1;
108 
109 	switch (delta->status) {
110 	case GIT_DELTA_ADDED:
111 		has_data = !use_old; break;
112 	case GIT_DELTA_DELETED:
113 		has_data = use_old; break;
114 	case GIT_DELTA_UNTRACKED:
115 		has_data = !use_old &&
116 			(diff->opts.flags & GIT_DIFF_SHOW_UNTRACKED_CONTENT) != 0;
117 		break;
118 	case GIT_DELTA_UNREADABLE:
119 	case GIT_DELTA_MODIFIED:
120 	case GIT_DELTA_COPIED:
121 	case GIT_DELTA_RENAMED:
122 		break;
123 	default:
124 		has_data = false;
125 		break;
126 	}
127 
128 	if (!has_data)
129 		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
130 
131 	return diff_file_content_init_common(fc, &diff->opts);
132 }
133 
git_diff_file_content__init_from_src(git_diff_file_content * fc,git_repository * repo,const git_diff_options * opts,const git_diff_file_content_src * src,git_diff_file * as_file)134 int git_diff_file_content__init_from_src(
135 	git_diff_file_content *fc,
136 	git_repository *repo,
137 	const git_diff_options *opts,
138 	const git_diff_file_content_src *src,
139 	git_diff_file *as_file)
140 {
141 	memset(fc, 0, sizeof(*fc));
142 	fc->repo = repo;
143 	fc->file = as_file;
144 
145 	if (!src->blob && !src->buf) {
146 		fc->flags |= GIT_DIFF_FLAG__NO_DATA;
147 	} else {
148 		fc->flags |= GIT_DIFF_FLAG__LOADED;
149 		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
150 		fc->file->mode = GIT_FILEMODE_BLOB;
151 
152 		if (src->blob) {
153 			git_blob_dup((git_blob **)&fc->blob, (git_blob *) src->blob);
154 			fc->file->size = git_blob_rawsize(src->blob);
155 			git_oid_cpy(&fc->file->id, git_blob_id(src->blob));
156 			fc->file->id_abbrev = GIT_OID_HEXSZ;
157 
158 			fc->map.len  = (size_t)fc->file->size;
159 			fc->map.data = (char *)git_blob_rawcontent(src->blob);
160 
161 			fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
162 		} else {
163 			fc->file->size = src->buflen;
164 			git_odb_hash(&fc->file->id, src->buf, src->buflen, GIT_OBJECT_BLOB);
165 			fc->file->id_abbrev = GIT_OID_HEXSZ;
166 
167 			fc->map.len  = src->buflen;
168 			fc->map.data = (char *)src->buf;
169 		}
170 	}
171 
172 	return diff_file_content_init_common(fc, opts);
173 }
174 
diff_file_content_commit_to_str(git_diff_file_content * fc,bool check_status)175 static int diff_file_content_commit_to_str(
176 	git_diff_file_content *fc, bool check_status)
177 {
178 	char oid[GIT_OID_HEXSZ+1];
179 	git_buf content = GIT_BUF_INIT;
180 	const char *status = "";
181 
182 	if (check_status) {
183 		int error = 0;
184 		git_submodule *sm = NULL;
185 		unsigned int sm_status = 0;
186 		const git_oid *sm_head;
187 
188 		if ((error = git_submodule_lookup(&sm, fc->repo, fc->file->path)) < 0) {
189 			/* GIT_EEXISTS means a "submodule" that has not been git added */
190 			if (error == GIT_EEXISTS) {
191 				git_error_clear();
192 				error = 0;
193 			}
194 			return error;
195 		}
196 
197 		if ((error = git_submodule_status(&sm_status, fc->repo, fc->file->path, GIT_SUBMODULE_IGNORE_UNSPECIFIED)) < 0) {
198 			git_submodule_free(sm);
199 			return error;
200 		}
201 
202 		/* update OID if we didn't have it previously */
203 		if ((fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0 &&
204 			((sm_head = git_submodule_wd_id(sm)) != NULL ||
205 			 (sm_head = git_submodule_head_id(sm)) != NULL))
206 		{
207 			git_oid_cpy(&fc->file->id, sm_head);
208 			fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
209 		}
210 
211 		if (GIT_SUBMODULE_STATUS_IS_WD_DIRTY(sm_status))
212 			status = "-dirty";
213 
214 		git_submodule_free(sm);
215 	}
216 
217 	git_oid_tostr(oid, sizeof(oid), &fc->file->id);
218 	if (git_buf_printf(&content, "Subproject commit %s%s\n", oid, status) < 0)
219 		return -1;
220 
221 	fc->map.len  = git_buf_len(&content);
222 	fc->map.data = git_buf_detach(&content);
223 	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
224 
225 	return 0;
226 }
227 
diff_file_content_load_blob(git_diff_file_content * fc,git_diff_options * opts)228 static int diff_file_content_load_blob(
229 	git_diff_file_content *fc,
230 	git_diff_options *opts)
231 {
232 	int error = 0;
233 	git_odb_object *odb_obj = NULL;
234 
235 	if (git_oid_is_zero(&fc->file->id))
236 		return 0;
237 
238 	if (fc->file->mode == GIT_FILEMODE_COMMIT)
239 		return diff_file_content_commit_to_str(fc, false);
240 
241 	/* if we don't know size, try to peek at object header first */
242 	if (!fc->file->size) {
243 		if ((error = git_diff_file__resolve_zero_size(
244 				fc->file, &odb_obj, fc->repo)) < 0)
245 			return error;
246 	}
247 
248 	if ((opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
249 		diff_file_content_binary_by_size(fc))
250 		return 0;
251 
252 	if (odb_obj != NULL) {
253 		error = git_object__from_odb_object(
254 			(git_object **)&fc->blob, fc->repo, odb_obj, GIT_OBJECT_BLOB);
255 		git_odb_object_free(odb_obj);
256 	} else {
257 		error = git_blob_lookup(
258 			(git_blob **)&fc->blob, fc->repo, &fc->file->id);
259 	}
260 
261 	if (!error) {
262 		fc->flags |= GIT_DIFF_FLAG__FREE_BLOB;
263 		fc->map.data = (void *)git_blob_rawcontent(fc->blob);
264 		fc->map.len  = (size_t)git_blob_rawsize(fc->blob);
265 	}
266 
267 	return error;
268 }
269 
diff_file_content_load_workdir_symlink_fake(git_diff_file_content * fc,git_buf * path)270 static int diff_file_content_load_workdir_symlink_fake(
271 	git_diff_file_content *fc, git_buf *path)
272 {
273 	git_buf target = GIT_BUF_INIT;
274 	int error;
275 
276 	if ((error = git_futils_readbuffer(&target, path->ptr)) < 0)
277 		return error;
278 
279 	fc->map.len = git_buf_len(&target);
280 	fc->map.data = git_buf_detach(&target);
281 	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
282 
283 	git_buf_dispose(&target);
284 	return error;
285 }
286 
diff_file_content_load_workdir_symlink(git_diff_file_content * fc,git_buf * path)287 static int diff_file_content_load_workdir_symlink(
288 	git_diff_file_content *fc, git_buf *path)
289 {
290 	ssize_t alloc_len, read_len;
291 	int symlink_supported, error;
292 
293 	if ((error = git_repository__configmap_lookup(
294 		&symlink_supported, fc->repo, GIT_CONFIGMAP_SYMLINKS)) < 0)
295 		return -1;
296 
297 	if (!symlink_supported)
298 		return diff_file_content_load_workdir_symlink_fake(fc, path);
299 
300 	/* link path on disk could be UTF-16, so prepare a buffer that is
301 	 * big enough to handle some UTF-8 data expansion
302 	 */
303 	alloc_len = (ssize_t)(fc->file->size * 2) + 1;
304 
305 	fc->map.data = git__calloc(alloc_len, sizeof(char));
306 	GIT_ERROR_CHECK_ALLOC(fc->map.data);
307 
308 	fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
309 
310 	read_len = p_readlink(git_buf_cstr(path), fc->map.data, alloc_len);
311 	if (read_len < 0) {
312 		git_error_set(GIT_ERROR_OS, "failed to read symlink '%s'", fc->file->path);
313 		return -1;
314 	}
315 
316 	fc->map.len = read_len;
317 	return 0;
318 }
319 
diff_file_content_load_workdir_file(git_diff_file_content * fc,git_buf * path,git_diff_options * diff_opts)320 static int diff_file_content_load_workdir_file(
321 	git_diff_file_content *fc,
322 	git_buf *path,
323 	git_diff_options *diff_opts)
324 {
325 	int error = 0;
326 	git_filter_list *fl = NULL;
327 	git_file fd = git_futils_open_ro(git_buf_cstr(path));
328 	git_buf raw = GIT_BUF_INIT;
329 
330 	if (fd < 0)
331 		return fd;
332 
333 	if (!fc->file->size)
334 	    error = git_futils_filesize(&fc->file->size, fd);
335 
336 	if (error < 0 || !fc->file->size)
337 		goto cleanup;
338 
339 	if ((diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0 &&
340 		diff_file_content_binary_by_size(fc))
341 		goto cleanup;
342 
343 	if ((error = git_filter_list_load(
344 			&fl, fc->repo, NULL, fc->file->path,
345 			GIT_FILTER_TO_ODB, GIT_FILTER_ALLOW_UNSAFE)) < 0)
346 		goto cleanup;
347 
348 	/* if there are no filters, try to mmap the file */
349 	if (fl == NULL) {
350 		if (!(error = git_futils_mmap_ro(
351 				&fc->map, fd, 0, (size_t)fc->file->size))) {
352 			fc->flags |= GIT_DIFF_FLAG__UNMAP_DATA;
353 			goto cleanup;
354 		}
355 
356 		/* if mmap failed, fall through to try readbuffer below */
357 		git_error_clear();
358 	}
359 
360 	if (!(error = git_futils_readbuffer_fd(&raw, fd, (size_t)fc->file->size))) {
361 		git_buf out = GIT_BUF_INIT;
362 
363 		error = git_filter_list_apply_to_data(&out, fl, &raw);
364 
365 		if (out.ptr != raw.ptr)
366 			git_buf_dispose(&raw);
367 
368 		if (!error) {
369 			fc->map.len  = out.size;
370 			fc->map.data = out.ptr;
371 			fc->flags |= GIT_DIFF_FLAG__FREE_DATA;
372 		}
373 	}
374 
375 cleanup:
376 	git_filter_list_free(fl);
377 	p_close(fd);
378 
379 	return error;
380 }
381 
diff_file_content_load_workdir(git_diff_file_content * fc,git_diff_options * diff_opts)382 static int diff_file_content_load_workdir(
383 	git_diff_file_content *fc,
384 	git_diff_options *diff_opts)
385 {
386 	int error = 0;
387 	git_buf path = GIT_BUF_INIT;
388 
389 	if (fc->file->mode == GIT_FILEMODE_COMMIT)
390 		return diff_file_content_commit_to_str(fc, true);
391 
392 	if (fc->file->mode == GIT_FILEMODE_TREE)
393 		return 0;
394 
395 	if (git_buf_joinpath(
396 			&path, git_repository_workdir(fc->repo), fc->file->path) < 0)
397 		return -1;
398 
399 	if (S_ISLNK(fc->file->mode))
400 		error = diff_file_content_load_workdir_symlink(fc, &path);
401 	else
402 		error = diff_file_content_load_workdir_file(fc, &path, diff_opts);
403 
404 	/* once data is loaded, update OID if we didn't have it previously */
405 	if (!error && (fc->file->flags & GIT_DIFF_FLAG_VALID_ID) == 0) {
406 		error = git_odb_hash(
407 			&fc->file->id, fc->map.data, fc->map.len, GIT_OBJECT_BLOB);
408 		fc->file->flags |= GIT_DIFF_FLAG_VALID_ID;
409 	}
410 
411 	git_buf_dispose(&path);
412 	return error;
413 }
414 
git_diff_file_content__load(git_diff_file_content * fc,git_diff_options * diff_opts)415 int git_diff_file_content__load(
416 	git_diff_file_content *fc,
417 	git_diff_options *diff_opts)
418 {
419 	int error = 0;
420 
421 	if ((fc->flags & GIT_DIFF_FLAG__LOADED) != 0)
422 		return 0;
423 
424 	if ((fc->file->flags & GIT_DIFF_FLAG_BINARY) != 0 &&
425 		(diff_opts->flags & GIT_DIFF_SHOW_BINARY) == 0)
426 		return 0;
427 
428 	if (fc->src == GIT_ITERATOR_WORKDIR)
429 		error = diff_file_content_load_workdir(fc, diff_opts);
430 	else
431 		error = diff_file_content_load_blob(fc, diff_opts);
432 	if (error)
433 		return error;
434 
435 	fc->flags |= GIT_DIFF_FLAG__LOADED;
436 
437 	diff_file_content_binary_by_content(fc);
438 
439 	return 0;
440 }
441 
git_diff_file_content__unload(git_diff_file_content * fc)442 void git_diff_file_content__unload(git_diff_file_content *fc)
443 {
444 	if ((fc->flags & GIT_DIFF_FLAG__LOADED) == 0)
445 		return;
446 
447 	if (fc->flags & GIT_DIFF_FLAG__FREE_DATA) {
448 		git__free(fc->map.data);
449 		fc->map.data = "";
450 		fc->map.len  = 0;
451 		fc->flags &= ~GIT_DIFF_FLAG__FREE_DATA;
452 	}
453 	else if (fc->flags & GIT_DIFF_FLAG__UNMAP_DATA) {
454 		git_futils_mmap_free(&fc->map);
455 		fc->map.data = "";
456 		fc->map.len  = 0;
457 		fc->flags &= ~GIT_DIFF_FLAG__UNMAP_DATA;
458 	}
459 
460 	if (fc->flags & GIT_DIFF_FLAG__FREE_BLOB) {
461 		git_blob_free((git_blob *)fc->blob);
462 		fc->blob = NULL;
463 		fc->flags &= ~GIT_DIFF_FLAG__FREE_BLOB;
464 	}
465 
466 	fc->flags &= ~GIT_DIFF_FLAG__LOADED;
467 }
468 
git_diff_file_content__clear(git_diff_file_content * fc)469 void git_diff_file_content__clear(git_diff_file_content *fc)
470 {
471 	git_diff_file_content__unload(fc);
472 
473 	/* for now, nothing else to do */
474 }
475