1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blame.h"
9 
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "git2/mailmap.h"
18 #include "util.h"
19 #include "repository.h"
20 #include "blame_git.h"
21 
22 
hunk_byfinalline_search_cmp(const void * key,const void * entry)23 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
24 {
25 	git_blame_hunk *hunk = (git_blame_hunk*)entry;
26 
27 	size_t lineno = *(size_t*)key;
28 	size_t lines_in_hunk = hunk->lines_in_hunk;
29 	size_t final_start_line_number = hunk->final_start_line_number;
30 
31 	if (lineno < final_start_line_number)
32 		return -1;
33 	if (lineno >= final_start_line_number + lines_in_hunk)
34 		return 1;
35 	return 0;
36 }
37 
paths_cmp(const void * a,const void * b)38 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
hunk_cmp(const void * _a,const void * _b)39 static int hunk_cmp(const void *_a, const void *_b)
40 {
41 	git_blame_hunk *a = (git_blame_hunk*)_a,
42 						*b = (git_blame_hunk*)_b;
43 
44 	if (a->final_start_line_number > b->final_start_line_number)
45 		return 1;
46 	else if (a->final_start_line_number < b->final_start_line_number)
47 		return -1;
48 	else
49 		return 0;
50 }
51 
hunk_ends_at_or_before_line(git_blame_hunk * hunk,size_t line)52 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
53 {
54 	return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
55 }
56 
hunk_starts_at_or_after_line(git_blame_hunk * hunk,size_t line)57 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
58 {
59 	return line <= hunk->final_start_line_number;
60 }
61 
new_hunk(size_t start,size_t lines,size_t orig_start,const char * path)62 static git_blame_hunk* new_hunk(
63 		size_t start,
64 		size_t lines,
65 		size_t orig_start,
66 		const char *path)
67 {
68 	git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
69 	if (!hunk) return NULL;
70 
71 	hunk->lines_in_hunk = lines;
72 	hunk->final_start_line_number = start;
73 	hunk->orig_start_line_number = orig_start;
74 	hunk->orig_path = path ? git__strdup(path) : NULL;
75 
76 	return hunk;
77 }
78 
dup_hunk(git_blame_hunk * hunk)79 static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
80 {
81 	git_blame_hunk *newhunk = new_hunk(
82 			hunk->final_start_line_number,
83 			hunk->lines_in_hunk,
84 			hunk->orig_start_line_number,
85 			hunk->orig_path);
86 
87 	if (!newhunk)
88 		return NULL;
89 
90 	git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
91 	git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
92 	newhunk->boundary = hunk->boundary;
93 	git_signature_dup(&newhunk->final_signature, hunk->final_signature);
94 	git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
95 	return newhunk;
96 }
97 
free_hunk(git_blame_hunk * hunk)98 static void free_hunk(git_blame_hunk *hunk)
99 {
100 	git__free((void*)hunk->orig_path);
101 	git_signature_free(hunk->final_signature);
102 	git_signature_free(hunk->orig_signature);
103 	git__free(hunk);
104 }
105 
106 /* Starting with the hunk that includes start_line, shift all following hunks'
107  * final_start_line by shift_by lines */
shift_hunks_by(git_vector * v,size_t start_line,int shift_by)108 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
109 {
110 	size_t i;
111 
112 	if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
113 		for (; i < v->length; i++) {
114 			git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
115 			hunk->final_start_line_number += shift_by;
116 		}
117 	}
118 }
119 
git_blame__alloc(git_repository * repo,git_blame_options opts,const char * path)120 git_blame* git_blame__alloc(
121 	git_repository *repo,
122 	git_blame_options opts,
123 	const char *path)
124 {
125 	git_blame *gbr = git__calloc(1, sizeof(git_blame));
126 	if (!gbr)
127 		return NULL;
128 
129 	gbr->repository = repo;
130 	gbr->options = opts;
131 
132 	if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
133 		git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
134 		(gbr->path = git__strdup(path)) == NULL ||
135 		git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
136 	{
137 		git_blame_free(gbr);
138 		return NULL;
139 	}
140 
141 	if (opts.flags & GIT_BLAME_USE_MAILMAP &&
142 	    git_mailmap_from_repository(&gbr->mailmap, repo) < 0) {
143 		git_blame_free(gbr);
144 		return NULL;
145 	}
146 
147 	return gbr;
148 }
149 
git_blame_free(git_blame * blame)150 void git_blame_free(git_blame *blame)
151 {
152 	size_t i;
153 	git_blame_hunk *hunk;
154 
155 	if (!blame) return;
156 
157 	git_vector_foreach(&blame->hunks, i, hunk)
158 		free_hunk(hunk);
159 	git_vector_free(&blame->hunks);
160 
161 	git_vector_free_deep(&blame->paths);
162 
163 	git_array_clear(blame->line_index);
164 
165 	git_mailmap_free(blame->mailmap);
166 
167 	git__free(blame->path);
168 	git_blob_free(blame->final_blob);
169 	git__free(blame);
170 }
171 
git_blame_get_hunk_count(git_blame * blame)172 uint32_t git_blame_get_hunk_count(git_blame *blame)
173 {
174 	assert(blame);
175 	return (uint32_t)blame->hunks.length;
176 }
177 
git_blame_get_hunk_byindex(git_blame * blame,uint32_t index)178 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
179 {
180 	assert(blame);
181 	return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
182 }
183 
git_blame_get_hunk_byline(git_blame * blame,size_t lineno)184 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
185 {
186 	size_t i, new_lineno = lineno;
187 	assert(blame);
188 
189 	if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
190 		return git_blame_get_hunk_byindex(blame, (uint32_t)i);
191 	}
192 
193 	return NULL;
194 }
195 
normalize_options(git_blame_options * out,const git_blame_options * in,git_repository * repo)196 static int normalize_options(
197 		git_blame_options *out,
198 		const git_blame_options *in,
199 		git_repository *repo)
200 {
201 	git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
202 	if (!in) in = &dummy;
203 
204 	memcpy(out, in, sizeof(git_blame_options));
205 
206 	/* No newest_commit => HEAD */
207 	if (git_oid_is_zero(&out->newest_commit)) {
208 		if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
209 			return -1;
210 		}
211 	}
212 
213 	/* min_line 0 really means 1 */
214 	if (!out->min_line) out->min_line = 1;
215 	/* max_line 0 really means N, but we don't know N yet */
216 
217 	/* Fix up option implications */
218 	if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
219 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
220 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
221 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
222 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
223 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
224 
225 	return 0;
226 }
227 
split_hunk_in_vector(git_vector * vec,git_blame_hunk * hunk,size_t rel_line,bool return_new)228 static git_blame_hunk *split_hunk_in_vector(
229 		git_vector *vec,
230 		git_blame_hunk *hunk,
231 		size_t rel_line,
232 		bool return_new)
233 {
234 	size_t new_line_count;
235 	git_blame_hunk *nh;
236 
237 	/* Don't split if already at a boundary */
238 	if (rel_line <= 0 ||
239 	    rel_line >= hunk->lines_in_hunk)
240 	{
241 		return hunk;
242 	}
243 
244 	new_line_count = hunk->lines_in_hunk - rel_line;
245 	nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
246 			hunk->orig_start_line_number + rel_line, hunk->orig_path);
247 
248 	if (!nh)
249 		return NULL;
250 
251 	git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
252 	git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
253 
254 	/* Adjust hunk that was split */
255 	hunk->lines_in_hunk -= new_line_count;
256 	git_vector_insert_sorted(vec, nh, NULL);
257 	{
258 		git_blame_hunk *ret = return_new ? nh : hunk;
259 		return ret;
260 	}
261 }
262 
263 /*
264  * Construct a list of char indices for where lines begin
265  * Adapted from core git:
266  * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
267  */
index_blob_lines(git_blame * blame)268 static int index_blob_lines(git_blame *blame)
269 {
270     const char *buf = blame->final_buf;
271     size_t len = blame->final_buf_size;
272     int num = 0, incomplete = 0, bol = 1;
273     size_t *i;
274 
275     if (len && buf[len-1] != '\n')
276         incomplete++; /* incomplete line at the end */
277     while (len--) {
278         if (bol) {
279             i = git_array_alloc(blame->line_index);
280             GIT_ERROR_CHECK_ALLOC(i);
281             *i = buf - blame->final_buf;
282             bol = 0;
283         }
284         if (*buf++ == '\n') {
285             num++;
286             bol = 1;
287         }
288     }
289     i = git_array_alloc(blame->line_index);
290     GIT_ERROR_CHECK_ALLOC(i);
291     *i = buf - blame->final_buf;
292     blame->num_lines = num + incomplete;
293     return blame->num_lines;
294 }
295 
hunk_from_entry(git_blame__entry * e,git_blame * blame)296 static git_blame_hunk* hunk_from_entry(git_blame__entry *e, git_blame *blame)
297 {
298 	git_blame_hunk *h = new_hunk(
299 			e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
300 
301 	if (!h)
302 		return NULL;
303 
304 	git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
305 	git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
306 	git_commit_author_with_mailmap(
307 		&h->final_signature, e->suspect->commit, blame->mailmap);
308 	git_signature_dup(&h->orig_signature, h->final_signature);
309 	h->boundary = e->is_boundary ? 1 : 0;
310 	return h;
311 }
312 
load_blob(git_blame * blame)313 static int load_blob(git_blame *blame)
314 {
315 	int error;
316 
317 	if (blame->final_blob) return 0;
318 
319 	error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
320 	if (error < 0)
321 		goto cleanup;
322 	error = git_object_lookup_bypath((git_object**)&blame->final_blob,
323 			(git_object*)blame->final, blame->path, GIT_OBJECT_BLOB);
324 
325 cleanup:
326 	return error;
327 }
328 
blame_internal(git_blame * blame)329 static int blame_internal(git_blame *blame)
330 {
331 	int error;
332 	git_blame__entry *ent = NULL;
333 	git_blame__origin *o;
334 
335 	if ((error = load_blob(blame)) < 0 ||
336 	    (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
337 		goto cleanup;
338 
339 	if (git_blob_rawsize(blame->final_blob) > SIZE_MAX) {
340 		git_error_set(GIT_ERROR_NOMEMORY, "blob is too large to blame");
341 		error = -1;
342 		goto cleanup;
343 	}
344 
345 	blame->final_buf = git_blob_rawcontent(blame->final_blob);
346 	blame->final_buf_size = (size_t)git_blob_rawsize(blame->final_blob);
347 
348 	ent = git__calloc(1, sizeof(git_blame__entry));
349 	GIT_ERROR_CHECK_ALLOC(ent);
350 
351 	ent->num_lines = index_blob_lines(blame);
352 	ent->lno = blame->options.min_line - 1;
353 	ent->num_lines = ent->num_lines - blame->options.min_line + 1;
354 	if (blame->options.max_line > 0)
355 		ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
356 	ent->s_lno = ent->lno;
357 	ent->suspect = o;
358 
359 	blame->ent = ent;
360 
361 	error = git_blame__like_git(blame, blame->options.flags);
362 
363 cleanup:
364 	for (ent = blame->ent; ent; ) {
365 		git_blame__entry *e = ent->next;
366 		git_blame_hunk *h = hunk_from_entry(ent, blame);
367 
368 		git_vector_insert(&blame->hunks, h);
369 
370 		git_blame__free_entry(ent);
371 		ent = e;
372 	}
373 
374 	return error;
375 }
376 
377 /*******************************************************************************
378  * File blaming
379  ******************************************************************************/
380 
git_blame_file(git_blame ** out,git_repository * repo,const char * path,git_blame_options * options)381 int git_blame_file(
382 		git_blame **out,
383 		git_repository *repo,
384 		const char *path,
385 		git_blame_options *options)
386 {
387 	int error = -1;
388 	git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
389 	git_blame *blame = NULL;
390 
391 	assert(out && repo && path);
392 	if ((error = normalize_options(&normOptions, options, repo)) < 0)
393 		goto on_error;
394 
395 	blame = git_blame__alloc(repo, normOptions, path);
396 	GIT_ERROR_CHECK_ALLOC(blame);
397 
398 	if ((error = load_blob(blame)) < 0)
399 		goto on_error;
400 
401 	if ((error = blame_internal(blame)) < 0)
402 		goto on_error;
403 
404 	*out = blame;
405 	return 0;
406 
407 on_error:
408 	git_blame_free(blame);
409 	return error;
410 }
411 
412 /*******************************************************************************
413  * Buffer blaming
414  *******************************************************************************/
415 
hunk_is_bufferblame(git_blame_hunk * hunk)416 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
417 {
418 	return hunk && git_oid_is_zero(&hunk->final_commit_id);
419 }
420 
buffer_hunk_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,void * payload)421 static int buffer_hunk_cb(
422 	const git_diff_delta *delta,
423 	const git_diff_hunk *hunk,
424 	void *payload)
425 {
426 	git_blame *blame = (git_blame*)payload;
427 	uint32_t wedge_line;
428 
429 	GIT_UNUSED(delta);
430 
431 	wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
432 	blame->current_diff_line = wedge_line;
433 
434 	blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
435 	if (!blame->current_hunk) {
436 		/* Line added at the end of the file */
437 		blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
438 		GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
439 
440 		git_vector_insert(&blame->hunks, blame->current_hunk);
441 	} else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
442 		/* If this hunk doesn't start between existing hunks, split a hunk up so it does */
443 		blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
444 				wedge_line - blame->current_hunk->orig_start_line_number, true);
445 		GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
446 	}
447 
448 	return 0;
449 }
450 
ptrs_equal_cmp(const void * a,const void * b)451 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
buffer_line_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,const git_diff_line * line,void * payload)452 static int buffer_line_cb(
453 	const git_diff_delta *delta,
454 	const git_diff_hunk *hunk,
455 	const git_diff_line *line,
456 	void *payload)
457 {
458 	git_blame *blame = (git_blame*)payload;
459 
460 	GIT_UNUSED(delta);
461 	GIT_UNUSED(hunk);
462 	GIT_UNUSED(line);
463 
464 	if (line->origin == GIT_DIFF_LINE_ADDITION) {
465 		if (hunk_is_bufferblame(blame->current_hunk) &&
466 		    hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
467 			/* Append to the current buffer-blame hunk */
468 			blame->current_hunk->lines_in_hunk++;
469 			shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
470 		} else {
471 			/* Create a new buffer-blame hunk with this line */
472 			shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
473 			blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
474 			GIT_ERROR_CHECK_ALLOC(blame->current_hunk);
475 
476 			git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
477 		}
478 		blame->current_diff_line++;
479 	}
480 
481 	if (line->origin == GIT_DIFF_LINE_DELETION) {
482 		/* Trim the line from the current hunk; remove it if it's now empty */
483 		size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
484 
485 		if (--(blame->current_hunk->lines_in_hunk) == 0) {
486 			size_t i;
487 			shift_base--;
488 			if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
489 				git_vector_remove(&blame->hunks, i);
490 				free_hunk(blame->current_hunk);
491 				blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
492 			}
493 		}
494 		shift_hunks_by(&blame->hunks, shift_base, -1);
495 	}
496 	return 0;
497 }
498 
git_blame_buffer(git_blame ** out,git_blame * reference,const char * buffer,size_t buffer_len)499 int git_blame_buffer(
500 		git_blame **out,
501 		git_blame *reference,
502 		const char *buffer,
503 		size_t buffer_len)
504 {
505 	git_blame *blame;
506 	git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
507 	size_t i;
508 	git_blame_hunk *hunk;
509 
510 	diffopts.context_lines = 0;
511 
512 	assert(out && reference && buffer && buffer_len);
513 
514 	blame = git_blame__alloc(reference->repository, reference->options, reference->path);
515 	GIT_ERROR_CHECK_ALLOC(blame);
516 
517 	/* Duplicate all of the hunk structures in the reference blame */
518 	git_vector_foreach(&reference->hunks, i, hunk) {
519 		git_blame_hunk *h = dup_hunk(hunk);
520 		GIT_ERROR_CHECK_ALLOC(h);
521 
522 		git_vector_insert(&blame->hunks, h);
523 	}
524 
525 	/* Diff to the reference blob */
526 	git_diff_blob_to_buffer(reference->final_blob, blame->path,
527 		buffer, buffer_len, blame->path, &diffopts,
528 		NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
529 
530 	*out = blame;
531 	return 0;
532 }
533 
git_blame_options_init(git_blame_options * opts,unsigned int version)534 int git_blame_options_init(git_blame_options *opts, unsigned int version)
535 {
536 	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
537 		opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
538 	return 0;
539 }
540 
541 #ifndef GIT_DEPRECATE_HARD
git_blame_init_options(git_blame_options * opts,unsigned int version)542 int git_blame_init_options(git_blame_options *opts, unsigned int version)
543 {
544 	return git_blame_options_init(opts, version);
545 }
546 #endif
547