1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blame.h"
9 
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
18 #include "util.h"
19 #include "repository.h"
20 #include "blame_git.h"
21 
22 
hunk_byfinalline_search_cmp(const void * key,const void * entry)23 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
24 {
25 	git_blame_hunk *hunk = (git_blame_hunk*)entry;
26 
27 	size_t lineno = *(size_t*)key;
28 	size_t lines_in_hunk = hunk->lines_in_hunk;
29 	size_t final_start_line_number = hunk->final_start_line_number;
30 
31 	if (lineno < final_start_line_number)
32 		return -1;
33 	if (lineno >= final_start_line_number + lines_in_hunk)
34 		return 1;
35 	return 0;
36 }
37 
paths_cmp(const void * a,const void * b)38 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
hunk_cmp(const void * _a,const void * _b)39 static int hunk_cmp(const void *_a, const void *_b)
40 {
41 	git_blame_hunk *a = (git_blame_hunk*)_a,
42 						*b = (git_blame_hunk*)_b;
43 
44 	if (a->final_start_line_number > b->final_start_line_number)
45 		return 1;
46 	else if (a->final_start_line_number < b->final_start_line_number)
47 		return -1;
48 	else
49 		return 0;
50 }
51 
hunk_ends_at_or_before_line(git_blame_hunk * hunk,size_t line)52 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
53 {
54 	return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
55 }
56 
hunk_starts_at_or_after_line(git_blame_hunk * hunk,size_t line)57 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
58 {
59 	return line <= hunk->final_start_line_number;
60 }
61 
new_hunk(size_t start,size_t lines,size_t orig_start,const char * path)62 static git_blame_hunk *new_hunk(
63 		size_t start,
64 		size_t lines,
65 		size_t orig_start,
66 		const char *path)
67 {
68 	git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
69 	if (!hunk) return NULL;
70 
71 	hunk->lines_in_hunk = lines;
72 	hunk->final_start_line_number = start;
73 	hunk->orig_start_line_number = orig_start;
74 	hunk->orig_path = path ? git__strdup(path) : NULL;
75 
76 	return hunk;
77 }
78 
free_hunk(git_blame_hunk * hunk)79 static void free_hunk(git_blame_hunk *hunk)
80 {
81 	git__free((void*)hunk->orig_path);
82 	git_signature_free(hunk->final_signature);
83 	git_signature_free(hunk->orig_signature);
84 	git__free(hunk);
85 }
86 
dup_hunk(git_blame_hunk * hunk)87 static git_blame_hunk *dup_hunk(git_blame_hunk *hunk)
88 {
89 	git_blame_hunk *newhunk = new_hunk(
90 			hunk->final_start_line_number,
91 			hunk->lines_in_hunk,
92 			hunk->orig_start_line_number,
93 			hunk->orig_path);
94 
95 	if (!newhunk)
96 		return NULL;
97 
98 	git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
99 	git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
100 	newhunk->boundary = hunk->boundary;
101 
102 	if (git_signature_dup(&newhunk->final_signature, hunk->final_signature) < 0 ||
103 		git_signature_dup(&newhunk->orig_signature, hunk->orig_signature) < 0) {
104 		free_hunk(newhunk);
105 		return NULL;
106 	}
107 
108 	return newhunk;
109 }
110 
111 /* Starting with the hunk that includes start_line, shift all following hunks'
112  * final_start_line by shift_by lines */
shift_hunks_by(git_vector * v,size_t start_line,int shift_by)113 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
114 {
115 	size_t i;
116 
117 	if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
118 		for (; i < v->length; i++) {
119 			git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
120 			hunk->final_start_line_number += shift_by;
121 		}
122 	}
123 }
124 
git_blame__alloc(git_repository * repo,git_blame_options opts,const char * path)125 git_blame *git_blame__alloc(
126 	git_repository *repo,
127 	git_blame_options opts,
128 	const char *path)
129 {
130 	git_blame *gbr = git__calloc(1, sizeof(git_blame));
131 	if (!gbr)
132 		return NULL;
133 
134 	gbr->repository = repo;
135 	gbr->options = opts;
136 
137 	if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
138 		git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
139 		(gbr->path = git__strdup(path)) == NULL ||
140 		git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
141 	{
142 		git_blame_free(gbr);
143 		return NULL;
144 	}
145 
146 	if (opts.flags & GIT_BLAME_USE_MAILMAP &&
147 	    git_mailmap_from_repository(&gbr->mailmap, repo) < 0) {
148 		git_blame_free(gbr);
149 		return NULL;
150 	}
151 
152 	return gbr;
153 }
154 
git_blame_free(git_blame * blame)155 void git_blame_free(git_blame *blame)
156 {
157 	size_t i;
158 	git_blame_hunk *hunk;
159 
160 	if (!blame) return;
161 
162 	git_vector_foreach(&blame->hunks, i, hunk)
163 		free_hunk(hunk);
164 	git_vector_free(&blame->hunks);
165 
166 	git_vector_free_deep(&blame->paths);
167 
168 	git_array_clear(blame->line_index);
169 
170 	git_mailmap_free(blame->mailmap);
171 
172 	git__free(blame->path);
173 	git_blob_free(blame->final_blob);
174 	git__free(blame);
175 }
176 
git_blame_get_hunk_count(git_blame * blame)177 uint32_t git_blame_get_hunk_count(git_blame *blame)
178 {
179 	GIT_ASSERT_ARG(blame);
180 	return (uint32_t)blame->hunks.length;
181 }
182 
git_blame_get_hunk_byindex(git_blame * blame,uint32_t index)183 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
184 {
185 	GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL);
186 	return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
187 }
188 
git_blame_get_hunk_byline(git_blame * blame,size_t lineno)189 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
190 {
191 	size_t i, new_lineno = lineno;
192 
193 	GIT_ASSERT_ARG_WITH_RETVAL(blame, NULL);
194 
195 	if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
196 		return git_blame_get_hunk_byindex(blame, (uint32_t)i);
197 	}
198 
199 	return NULL;
200 }
201 
normalize_options(git_blame_options * out,const git_blame_options * in,git_repository * repo)202 static int normalize_options(
203 		git_blame_options *out,
204 		const git_blame_options *in,
205 		git_repository *repo)
206 {
207 	git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
208 	if (!in) in = &dummy;
209 
210 	memcpy(out, in, sizeof(git_blame_options));
211 
212 	/* No newest_commit => HEAD */
213 	if (git_oid_is_zero(&out->newest_commit)) {
214 		if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
215 			return -1;
216 		}
217 	}
218 
219 	/* min_line 0 really means 1 */
220 	if (!out->min_line) out->min_line = 1;
221 	/* max_line 0 really means N, but we don't know N yet */
222 
223 	/* Fix up option implications */
224 	if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
225 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
226 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
227 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
228 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
229 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
230 
231 	return 0;
232 }
233 
split_hunk_in_vector(git_vector * vec,git_blame_hunk * hunk,size_t rel_line,bool return_new)234 static git_blame_hunk *split_hunk_in_vector(
235 		git_vector *vec,
236 		git_blame_hunk *hunk,
237 		size_t rel_line,
238 		bool return_new)
239 {
240 	size_t new_line_count;
241 	git_blame_hunk *nh;
242 
243 	/* Don't split if already at a boundary */
244 	if (rel_line <= 0 ||
245 	    rel_line >= hunk->lines_in_hunk)
246 	{
247 		return hunk;
248 	}
249 
250 	new_line_count = hunk->lines_in_hunk - rel_line;
251 	nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
252 			hunk->orig_start_line_number + rel_line, hunk->orig_path);
253 
254 	if (!nh)
255 		return NULL;
256 
257 	git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
258 	git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
259 
260 	/* Adjust hunk that was split */
261 	hunk->lines_in_hunk -= new_line_count;
262 	git_vector_insert_sorted(vec, nh, NULL);
263 	{
264 		git_blame_hunk *ret = return_new ? nh : hunk;
265 		return ret;
266 	}
267 }
268 
269 /*
270  * Construct a list of char indices for where lines begin
271  * Adapted from core git:
272  * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
273  */
index_blob_lines(git_blame * blame)274 static int index_blob_lines(git_blame *blame)
275 {
276     const char *buf = blame->final_buf;
277     size_t len = blame->final_buf_size;
278     int num = 0, incomplete = 0, bol = 1;
279     size_t *i;
280 
281     if (len && buf[len-1] != '\n')
282         incomplete++; /* incomplete line at the end */
283     while (len--) {
284         if (bol) {
285             i = git_array_alloc(blame->line_index);
286             GIT_ERROR_CHECK_ALLOC(i);
287             *i = buf - blame->final_buf;
288             bol = 0;
289         }
290         if (*buf++ == '\n') {
291             num++;
292             bol = 1;
293         }
294     }
295     i = git_array_alloc(blame->line_index);
296     GIT_ERROR_CHECK_ALLOC(i);
297     *i = buf - blame->final_buf;
298     blame->num_lines = num + incomplete;
299     return blame->num_lines;
300 }
301 
hunk_from_entry(git_blame__entry * e,git_blame * blame)302 static git_blame_hunk *hunk_from_entry(git_blame__entry *e, git_blame *blame)
303 {
304 	git_blame_hunk *h = new_hunk(
305 			e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
306 
307 	if (!h)
308 		return NULL;
309 
310 	git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
311 	git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
312 	git_commit_author_with_mailmap(
313 		&h->final_signature, e->suspect->commit, blame->mailmap);
314 	git_signature_dup(&h->orig_signature, h->final_signature);
315 	h->boundary = e->is_boundary ? 1 : 0;
316 	return h;
317 }
318 
load_blob(git_blame * blame)319 static int load_blob(git_blame *blame)
320 {
321 	int error;
322 
323 	if (blame->final_blob) return 0;
324 
325 	error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
326 	if (error < 0)
327 		goto cleanup;
328 	error = git_object_lookup_bypath((git_object**)&blame->final_blob,
329 			(git_object*)blame->final, blame->path, GIT_OBJECT_BLOB);
330 
331 cleanup:
332 	return error;
333 }
334 
blame_internal(git_blame * blame)335 static int blame_internal(git_blame *blame)
336 {
337 	int error;
338 	git_blame__entry *ent = NULL;
339 	git_blame__origin *o;
340 
341 	if ((error = load_blob(blame)) < 0 ||
342 	    (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
343 		goto cleanup;
344 
345 	if (git_blob_rawsize(blame->final_blob) > SIZE_MAX) {
346 		git_error_set(GIT_ERROR_NOMEMORY, "blob is too large to blame");
347 		error = -1;
348 		goto cleanup;
349 	}
350 
351 	blame->final_buf = git_blob_rawcontent(blame->final_blob);
352 	blame->final_buf_size = (size_t)git_blob_rawsize(blame->final_blob);
353 
354 	ent = git__calloc(1, sizeof(git_blame__entry));
355 	GIT_ERROR_CHECK_ALLOC(ent);
356 
357 	ent->num_lines = index_blob_lines(blame);
358 	ent->lno = blame->options.min_line - 1;
359 	ent->num_lines = ent->num_lines - blame->options.min_line + 1;
360 	if (blame->options.max_line > 0)
361 		ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
362 	ent->s_lno = ent->lno;
363 	ent->suspect = o;
364 
365 	blame->ent = ent;
366 
367 	error = git_blame__like_git(blame, blame->options.flags);
368 
369 cleanup:
370 	for (ent = blame->ent; ent; ) {
371 		git_blame__entry *e = ent->next;
372 		git_blame_hunk *h = hunk_from_entry(ent, blame);
373 
374 		git_vector_insert(&blame->hunks, h);
375 
376 		git_blame__free_entry(ent);
377 		ent = e;
378 	}
379 
380 	return error;
381 }
382 
383 /*******************************************************************************
384  * File blaming
385  ******************************************************************************/
386 
git_blame_file(git_blame ** out,git_repository * repo,const char * path,git_blame_options * options)387 int git_blame_file(
388 		git_blame **out,
389 		git_repository *repo,
390 		const char *path,
391 		git_blame_options *options)
392 {
393 	int error = -1;
394 	git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
395 	git_blame *blame = NULL;
396 
397 	GIT_ASSERT_ARG(out);
398 	GIT_ASSERT_ARG(repo);
399 	GIT_ASSERT_ARG(path);
400 
401 	if ((error = normalize_options(&normOptions, options, repo)) < 0)
402 		goto on_error;
403 
404 	blame = git_blame__alloc(repo, normOptions, path);
405 	GIT_ERROR_CHECK_ALLOC(blame);
406 
407 	if ((error = load_blob(blame)) < 0)
408 		goto on_error;
409 
410 	if ((error = blame_internal(blame)) < 0)
411 		goto on_error;
412 
413 	*out = blame;
414 	return 0;
415 
416 on_error:
417 	git_blame_free(blame);
418 	return error;
419 }
420 
421 /*******************************************************************************
422  * Buffer blaming
423  *******************************************************************************/
424 
hunk_is_bufferblame(git_blame_hunk * hunk)425 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
426 {
427 	return hunk && git_oid_is_zero(&hunk->final_commit_id);
428 }
429 
buffer_hunk_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,void * payload)430 static int buffer_hunk_cb(
431 	const git_diff_delta *delta,
432 	const git_diff_hunk *hunk,
433 	void *payload)
434 {
435 	git_blame *blame = (git_blame*)payload;
436 	uint32_t wedge_line;
437 
438 	GIT_UNUSED(delta);
439 
440 	wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
441 	blame->current_diff_line = wedge_line;
442 
443 	blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
444 	if (!blame->current_hunk) {
445 		/* Line added at the end of the file */
446 		blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
447 		GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
448 
449 		git_vector_insert(&blame->hunks, blame->current_hunk);
450 	} else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
451 		/* If this hunk doesn't start between existing hunks, split a hunk up so it does */
452 		blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
453 				wedge_line - blame->current_hunk->orig_start_line_number, true);
454 		GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
455 	}
456 
457 	return 0;
458 }
459 
ptrs_equal_cmp(const void * a,const void * b)460 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
buffer_line_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,const git_diff_line * line,void * payload)461 static int buffer_line_cb(
462 	const git_diff_delta *delta,
463 	const git_diff_hunk *hunk,
464 	const git_diff_line *line,
465 	void *payload)
466 {
467 	git_blame *blame = (git_blame*)payload;
468 
469 	GIT_UNUSED(delta);
470 	GIT_UNUSED(hunk);
471 	GIT_UNUSED(line);
472 
473 	if (line->origin == GIT_DIFF_LINE_ADDITION) {
474 		if (hunk_is_bufferblame(blame->current_hunk) &&
475 		    hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
476 			/* Append to the current buffer-blame hunk */
477 			blame->current_hunk->lines_in_hunk++;
478 			shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
479 		} else {
480 			/* Create a new buffer-blame hunk with this line */
481 			shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
482 			blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
483 			GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
484 
485 			git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
486 		}
487 		blame->current_diff_line++;
488 	}
489 
490 	if (line->origin == GIT_DIFF_LINE_DELETION) {
491 		/* Trim the line from the current hunk; remove it if it's now empty */
492 		size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
493 
494 		if (--(blame->current_hunk->lines_in_hunk) == 0) {
495 			size_t i;
496 			shift_base--;
497 			if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
498 				git_vector_remove(&blame->hunks, i);
499 				free_hunk(blame->current_hunk);
500 				blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
501 			}
502 		}
503 		shift_hunks_by(&blame->hunks, shift_base, -1);
504 	}
505 	return 0;
506 }
507 
git_blame_buffer(git_blame ** out,git_blame * reference,const char * buffer,size_t buffer_len)508 int git_blame_buffer(
509 		git_blame **out,
510 		git_blame *reference,
511 		const char *buffer,
512 		size_t buffer_len)
513 {
514 	git_blame *blame;
515 	git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
516 	size_t i;
517 	git_blame_hunk *hunk;
518 
519 	diffopts.context_lines = 0;
520 
521 	GIT_ASSERT_ARG(out);
522 	GIT_ASSERT_ARG(reference);
523 	GIT_ASSERT_ARG(buffer && buffer_len);
524 
525 	blame = git_blame__alloc(reference->repository, reference->options, reference->path);
526 	GIT_ERROR_CHECK_ALLOC(blame);
527 
528 	/* Duplicate all of the hunk structures in the reference blame */
529 	git_vector_foreach(&reference->hunks, i, hunk) {
530 		git_blame_hunk *h = dup_hunk(hunk);
531 		GIT_ERROR_CHECK_ALLOC(h);
532 
533 		git_vector_insert(&blame->hunks, h);
534 	}
535 
536 	/* Diff to the reference blob */
537 	git_diff_blob_to_buffer(reference->final_blob, blame->path,
538 		buffer, buffer_len, blame->path, &diffopts,
539 		NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
540 
541 	*out = blame;
542 	return 0;
543 }
544 
git_blame_options_init(git_blame_options * opts,unsigned int version)545 int git_blame_options_init(git_blame_options *opts, unsigned int version)
546 {
547 	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
548 		opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
549 	return 0;
550 }
551 
552 #ifndef GIT_DEPRECATE_HARD
git_blame_init_options(git_blame_options * opts,unsigned int version)553 int git_blame_init_options(git_blame_options *opts, unsigned int version)
554 {
555 	return git_blame_options_init(opts, version);
556 }
557 #endif
558