1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "blame.h"
9 
10 #include "git2/commit.h"
11 #include "git2/revparse.h"
12 #include "git2/revwalk.h"
13 #include "git2/tree.h"
14 #include "git2/diff.h"
15 #include "git2/blob.h"
16 #include "git2/signature.h"
17 #include "util.h"
18 #include "repository.h"
19 #include "blame_git.h"
20 
21 
hunk_byfinalline_search_cmp(const void * key,const void * entry)22 static int hunk_byfinalline_search_cmp(const void *key, const void *entry)
23 {
24 	git_blame_hunk *hunk = (git_blame_hunk*)entry;
25 
26 	size_t lineno = *(size_t*)key;
27 	size_t lines_in_hunk = hunk->lines_in_hunk;
28 	size_t final_start_line_number = hunk->final_start_line_number;
29 
30 	if (lineno < final_start_line_number)
31 		return -1;
32 	if (lineno >= final_start_line_number + lines_in_hunk)
33 		return 1;
34 	return 0;
35 }
36 
paths_cmp(const void * a,const void * b)37 static int paths_cmp(const void *a, const void *b) { return git__strcmp((char*)a, (char*)b); }
hunk_cmp(const void * _a,const void * _b)38 static int hunk_cmp(const void *_a, const void *_b)
39 {
40 	git_blame_hunk *a = (git_blame_hunk*)_a,
41 						*b = (git_blame_hunk*)_b;
42 
43 	return a->final_start_line_number - b->final_start_line_number;
44 }
45 
hunk_ends_at_or_before_line(git_blame_hunk * hunk,size_t line)46 static bool hunk_ends_at_or_before_line(git_blame_hunk *hunk, size_t line)
47 {
48 	return line >= (hunk->final_start_line_number + hunk->lines_in_hunk - 1);
49 }
50 
hunk_starts_at_or_after_line(git_blame_hunk * hunk,size_t line)51 static bool hunk_starts_at_or_after_line(git_blame_hunk *hunk, size_t line)
52 {
53 	return line <= hunk->final_start_line_number;
54 }
55 
new_hunk(size_t start,size_t lines,size_t orig_start,const char * path)56 static git_blame_hunk* new_hunk(
57 		size_t start,
58 		size_t lines,
59 		size_t orig_start,
60 		const char *path)
61 {
62 	git_blame_hunk *hunk = git__calloc(1, sizeof(git_blame_hunk));
63 	if (!hunk) return NULL;
64 
65 	hunk->lines_in_hunk = lines;
66 	hunk->final_start_line_number = start;
67 	hunk->orig_start_line_number = orig_start;
68 	hunk->orig_path = path ? git__strdup(path) : NULL;
69 
70 	return hunk;
71 }
72 
dup_hunk(git_blame_hunk * hunk)73 static git_blame_hunk* dup_hunk(git_blame_hunk *hunk)
74 {
75 	git_blame_hunk *newhunk = new_hunk(
76 			hunk->final_start_line_number,
77 			hunk->lines_in_hunk,
78 			hunk->orig_start_line_number,
79 			hunk->orig_path);
80 
81 	if (!newhunk)
82 		return NULL;
83 
84 	git_oid_cpy(&newhunk->orig_commit_id, &hunk->orig_commit_id);
85 	git_oid_cpy(&newhunk->final_commit_id, &hunk->final_commit_id);
86 	newhunk->boundary = hunk->boundary;
87 	git_signature_dup(&newhunk->final_signature, hunk->final_signature);
88 	git_signature_dup(&newhunk->orig_signature, hunk->orig_signature);
89 	return newhunk;
90 }
91 
free_hunk(git_blame_hunk * hunk)92 static void free_hunk(git_blame_hunk *hunk)
93 {
94 	git__free((void*)hunk->orig_path);
95 	git_signature_free(hunk->final_signature);
96 	git_signature_free(hunk->orig_signature);
97 	git__free(hunk);
98 }
99 
100 /* Starting with the hunk that includes start_line, shift all following hunks'
101  * final_start_line by shift_by lines */
shift_hunks_by(git_vector * v,size_t start_line,int shift_by)102 static void shift_hunks_by(git_vector *v, size_t start_line, int shift_by)
103 {
104 	size_t i;
105 
106 	if (!git_vector_bsearch2(&i, v, hunk_byfinalline_search_cmp, &start_line)) {
107 		for (; i < v->length; i++) {
108 			git_blame_hunk *hunk = (git_blame_hunk*)v->contents[i];
109 			hunk->final_start_line_number += shift_by;
110 		}
111 	}
112 }
113 
git_blame__alloc(git_repository * repo,git_blame_options opts,const char * path)114 git_blame* git_blame__alloc(
115 	git_repository *repo,
116 	git_blame_options opts,
117 	const char *path)
118 {
119 	git_blame *gbr = git__calloc(1, sizeof(git_blame));
120 	if (!gbr)
121 		return NULL;
122 
123 	gbr->repository = repo;
124 	gbr->options = opts;
125 
126 	if (git_vector_init(&gbr->hunks, 8, hunk_cmp) < 0 ||
127 		git_vector_init(&gbr->paths, 8, paths_cmp) < 0 ||
128 		(gbr->path = git__strdup(path)) == NULL ||
129 		git_vector_insert(&gbr->paths, git__strdup(path)) < 0)
130 	{
131 		git_blame_free(gbr);
132 		return NULL;
133 	}
134 
135 	return gbr;
136 }
137 
git_blame_free(git_blame * blame)138 void git_blame_free(git_blame *blame)
139 {
140 	size_t i;
141 	git_blame_hunk *hunk;
142 
143 	if (!blame) return;
144 
145 	git_vector_foreach(&blame->hunks, i, hunk)
146 		free_hunk(hunk);
147 	git_vector_free(&blame->hunks);
148 
149 	git_vector_free_deep(&blame->paths);
150 
151 	git_array_clear(blame->line_index);
152 
153 	git__free(blame->path);
154 	git_blob_free(blame->final_blob);
155 	git__free(blame);
156 }
157 
git_blame_get_hunk_count(git_blame * blame)158 uint32_t git_blame_get_hunk_count(git_blame *blame)
159 {
160 	assert(blame);
161 	return (uint32_t)blame->hunks.length;
162 }
163 
git_blame_get_hunk_byindex(git_blame * blame,uint32_t index)164 const git_blame_hunk *git_blame_get_hunk_byindex(git_blame *blame, uint32_t index)
165 {
166 	assert(blame);
167 	return (git_blame_hunk*)git_vector_get(&blame->hunks, index);
168 }
169 
git_blame_get_hunk_byline(git_blame * blame,size_t lineno)170 const git_blame_hunk *git_blame_get_hunk_byline(git_blame *blame, size_t lineno)
171 {
172 	size_t i, new_lineno = lineno;
173 	assert(blame);
174 
175 	if (!git_vector_bsearch2(&i, &blame->hunks, hunk_byfinalline_search_cmp, &new_lineno)) {
176 		return git_blame_get_hunk_byindex(blame, (uint32_t)i);
177 	}
178 
179 	return NULL;
180 }
181 
normalize_options(git_blame_options * out,const git_blame_options * in,git_repository * repo)182 static int normalize_options(
183 		git_blame_options *out,
184 		const git_blame_options *in,
185 		git_repository *repo)
186 {
187 	git_blame_options dummy = GIT_BLAME_OPTIONS_INIT;
188 	if (!in) in = &dummy;
189 
190 	memcpy(out, in, sizeof(git_blame_options));
191 
192 	/* No newest_commit => HEAD */
193 	if (git_oid_iszero(&out->newest_commit)) {
194 		if (git_reference_name_to_id(&out->newest_commit, repo, "HEAD") < 0) {
195 			return -1;
196 		}
197 	}
198 
199 	/* min_line 0 really means 1 */
200 	if (!out->min_line) out->min_line = 1;
201 	/* max_line 0 really means N, but we don't know N yet */
202 
203 	/* Fix up option implications */
204 	if (out->flags & GIT_BLAME_TRACK_COPIES_ANY_COMMIT_COPIES)
205 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES;
206 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_COPIES)
207 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES;
208 	if (out->flags & GIT_BLAME_TRACK_COPIES_SAME_COMMIT_MOVES)
209 		out->flags |= GIT_BLAME_TRACK_COPIES_SAME_FILE;
210 
211 	return 0;
212 }
213 
split_hunk_in_vector(git_vector * vec,git_blame_hunk * hunk,size_t rel_line,bool return_new)214 static git_blame_hunk *split_hunk_in_vector(
215 		git_vector *vec,
216 		git_blame_hunk *hunk,
217 		size_t rel_line,
218 		bool return_new)
219 {
220 	size_t new_line_count;
221 	git_blame_hunk *nh;
222 
223 	/* Don't split if already at a boundary */
224 	if (rel_line <= 0 ||
225 	    rel_line >= hunk->lines_in_hunk)
226 	{
227 		return hunk;
228 	}
229 
230 	new_line_count = hunk->lines_in_hunk - rel_line;
231 	nh = new_hunk(hunk->final_start_line_number + rel_line, new_line_count,
232 			hunk->orig_start_line_number + rel_line, hunk->orig_path);
233 
234 	if (!nh)
235 		return NULL;
236 
237 	git_oid_cpy(&nh->final_commit_id, &hunk->final_commit_id);
238 	git_oid_cpy(&nh->orig_commit_id, &hunk->orig_commit_id);
239 
240 	/* Adjust hunk that was split */
241 	hunk->lines_in_hunk -= new_line_count;
242 	git_vector_insert_sorted(vec, nh, NULL);
243 	{
244 		git_blame_hunk *ret = return_new ? nh : hunk;
245 		return ret;
246 	}
247 }
248 
249 /*
250  * Construct a list of char indices for where lines begin
251  * Adapted from core git:
252  * https://github.com/gitster/git/blob/be5c9fb9049ed470e7005f159bb923a5f4de1309/builtin/blame.c#L1760-L1789
253  */
index_blob_lines(git_blame * blame)254 static int index_blob_lines(git_blame *blame)
255 {
256     const char *buf = blame->final_buf;
257     git_off_t len = blame->final_buf_size;
258     int num = 0, incomplete = 0, bol = 1;
259     size_t *i;
260 
261     if (len && buf[len-1] != '\n')
262         incomplete++; /* incomplete line at the end */
263     while (len--) {
264         if (bol) {
265             i = git_array_alloc(blame->line_index);
266             GITERR_CHECK_ALLOC(i);
267             *i = buf - blame->final_buf;
268             bol = 0;
269         }
270         if (*buf++ == '\n') {
271             num++;
272             bol = 1;
273         }
274     }
275     i = git_array_alloc(blame->line_index);
276     GITERR_CHECK_ALLOC(i);
277     *i = buf - blame->final_buf;
278     blame->num_lines = num + incomplete;
279     return blame->num_lines;
280 }
281 
hunk_from_entry(git_blame__entry * e)282 static git_blame_hunk* hunk_from_entry(git_blame__entry *e)
283 {
284 	git_blame_hunk *h = new_hunk(
285 			e->lno+1, e->num_lines, e->s_lno+1, e->suspect->path);
286 
287 	if (!h)
288 		return NULL;
289 
290 	git_oid_cpy(&h->final_commit_id, git_commit_id(e->suspect->commit));
291 	git_oid_cpy(&h->orig_commit_id, git_commit_id(e->suspect->commit));
292 	git_signature_dup(&h->final_signature, git_commit_author(e->suspect->commit));
293 	git_signature_dup(&h->orig_signature, git_commit_author(e->suspect->commit));
294 	h->boundary = e->is_boundary ? 1 : 0;
295 	return h;
296 }
297 
load_blob(git_blame * blame)298 static int load_blob(git_blame *blame)
299 {
300 	int error;
301 
302 	if (blame->final_blob) return 0;
303 
304 	error = git_commit_lookup(&blame->final, blame->repository, &blame->options.newest_commit);
305 	if (error < 0)
306 		goto cleanup;
307 	error = git_object_lookup_bypath((git_object**)&blame->final_blob,
308 			(git_object*)blame->final, blame->path, GIT_OBJ_BLOB);
309 
310 cleanup:
311 	return error;
312 }
313 
blame_internal(git_blame * blame)314 static int blame_internal(git_blame *blame)
315 {
316 	int error;
317 	git_blame__entry *ent = NULL;
318 	git_blame__origin *o;
319 
320 	if ((error = load_blob(blame)) < 0 ||
321 	    (error = git_blame__get_origin(&o, blame, blame->final, blame->path)) < 0)
322 		goto cleanup;
323 	blame->final_buf = git_blob_rawcontent(blame->final_blob);
324 	blame->final_buf_size = git_blob_rawsize(blame->final_blob);
325 
326 	ent = git__calloc(1, sizeof(git_blame__entry));
327 	GITERR_CHECK_ALLOC(ent);
328 
329 	ent->num_lines = index_blob_lines(blame);
330 	ent->lno = blame->options.min_line - 1;
331 	ent->num_lines = ent->num_lines - blame->options.min_line + 1;
332 	if (blame->options.max_line > 0)
333 		ent->num_lines = blame->options.max_line - blame->options.min_line + 1;
334 	ent->s_lno = ent->lno;
335 	ent->suspect = o;
336 
337 	blame->ent = ent;
338 
339 	error = git_blame__like_git(blame, blame->options.flags);
340 
341 cleanup:
342 	for (ent = blame->ent; ent; ) {
343 		git_blame__entry *e = ent->next;
344 		git_blame_hunk *h = hunk_from_entry(ent);
345 
346 		git_vector_insert(&blame->hunks, h);
347 
348 		git_blame__free_entry(ent);
349 		ent = e;
350 	}
351 
352 	return error;
353 }
354 
355 /*******************************************************************************
356  * File blaming
357  ******************************************************************************/
358 
git_blame_file(git_blame ** out,git_repository * repo,const char * path,git_blame_options * options)359 int git_blame_file(
360 		git_blame **out,
361 		git_repository *repo,
362 		const char *path,
363 		git_blame_options *options)
364 {
365 	int error = -1;
366 	git_blame_options normOptions = GIT_BLAME_OPTIONS_INIT;
367 	git_blame *blame = NULL;
368 
369 	assert(out && repo && path);
370 	if ((error = normalize_options(&normOptions, options, repo)) < 0)
371 		goto on_error;
372 
373 	blame = git_blame__alloc(repo, normOptions, path);
374 	GITERR_CHECK_ALLOC(blame);
375 
376 	if ((error = load_blob(blame)) < 0)
377 		goto on_error;
378 
379 	if ((error = blame_internal(blame)) < 0)
380 		goto on_error;
381 
382 	*out = blame;
383 	return 0;
384 
385 on_error:
386 	git_blame_free(blame);
387 	return error;
388 }
389 
390 /*******************************************************************************
391  * Buffer blaming
392  *******************************************************************************/
393 
hunk_is_bufferblame(git_blame_hunk * hunk)394 static bool hunk_is_bufferblame(git_blame_hunk *hunk)
395 {
396 	return git_oid_iszero(&hunk->final_commit_id);
397 }
398 
buffer_hunk_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,void * payload)399 static int buffer_hunk_cb(
400 	const git_diff_delta *delta,
401 	const git_diff_hunk *hunk,
402 	void *payload)
403 {
404 	git_blame *blame = (git_blame*)payload;
405 	uint32_t wedge_line;
406 
407 	GIT_UNUSED(delta);
408 
409 	wedge_line = (hunk->old_lines == 0) ? hunk->new_start : hunk->old_start;
410 	blame->current_diff_line = wedge_line;
411 
412 	blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byline(blame, wedge_line);
413 	if (!blame->current_hunk) {
414 		/* Line added at the end of the file */
415 		blame->current_hunk = new_hunk(wedge_line, 0, wedge_line, blame->path);
416 		GITERR_CHECK_ALLOC(blame->current_hunk);
417 
418 		git_vector_insert(&blame->hunks, blame->current_hunk);
419 	} else if (!hunk_starts_at_or_after_line(blame->current_hunk, wedge_line)){
420 		/* If this hunk doesn't start between existing hunks, split a hunk up so it does */
421 		blame->current_hunk = split_hunk_in_vector(&blame->hunks, blame->current_hunk,
422 				wedge_line - blame->current_hunk->orig_start_line_number, true);
423 		GITERR_CHECK_ALLOC(blame->current_hunk);
424 	}
425 
426 	return 0;
427 }
428 
ptrs_equal_cmp(const void * a,const void * b)429 static int ptrs_equal_cmp(const void *a, const void *b) { return a<b ? -1 : a>b ? 1 : 0; }
buffer_line_cb(const git_diff_delta * delta,const git_diff_hunk * hunk,const git_diff_line * line,void * payload)430 static int buffer_line_cb(
431 	const git_diff_delta *delta,
432 	const git_diff_hunk *hunk,
433 	const git_diff_line *line,
434 	void *payload)
435 {
436 	git_blame *blame = (git_blame*)payload;
437 
438 	GIT_UNUSED(delta);
439 	GIT_UNUSED(hunk);
440 	GIT_UNUSED(line);
441 
442 	if (line->origin == GIT_DIFF_LINE_ADDITION) {
443 		if (hunk_is_bufferblame(blame->current_hunk) &&
444 		    hunk_ends_at_or_before_line(blame->current_hunk, blame->current_diff_line)) {
445 			/* Append to the current buffer-blame hunk */
446 			blame->current_hunk->lines_in_hunk++;
447 			shift_hunks_by(&blame->hunks, blame->current_diff_line+1, 1);
448 		} else {
449 			/* Create a new buffer-blame hunk with this line */
450 			shift_hunks_by(&blame->hunks, blame->current_diff_line, 1);
451 			blame->current_hunk = new_hunk(blame->current_diff_line, 1, 0, blame->path);
452 			GITERR_CHECK_ALLOC(blame->current_hunk);
453 
454 			git_vector_insert_sorted(&blame->hunks, blame->current_hunk, NULL);
455 		}
456 		blame->current_diff_line++;
457 	}
458 
459 	if (line->origin == GIT_DIFF_LINE_DELETION) {
460 		/* Trim the line from the current hunk; remove it if it's now empty */
461 		size_t shift_base = blame->current_diff_line + blame->current_hunk->lines_in_hunk+1;
462 
463 		if (--(blame->current_hunk->lines_in_hunk) == 0) {
464 			size_t i;
465 			shift_base--;
466 			if (!git_vector_search2(&i, &blame->hunks, ptrs_equal_cmp, blame->current_hunk)) {
467 				git_vector_remove(&blame->hunks, i);
468 				free_hunk(blame->current_hunk);
469 				blame->current_hunk = (git_blame_hunk*)git_blame_get_hunk_byindex(blame, (uint32_t)i);
470 			}
471 		}
472 		shift_hunks_by(&blame->hunks, shift_base, -1);
473 	}
474 	return 0;
475 }
476 
git_blame_buffer(git_blame ** out,git_blame * reference,const char * buffer,size_t buffer_len)477 int git_blame_buffer(
478 		git_blame **out,
479 		git_blame *reference,
480 		const char *buffer,
481 		size_t buffer_len)
482 {
483 	git_blame *blame;
484 	git_diff_options diffopts = GIT_DIFF_OPTIONS_INIT;
485 	size_t i;
486 	git_blame_hunk *hunk;
487 
488 	diffopts.context_lines = 0;
489 
490 	assert(out && reference && buffer && buffer_len);
491 
492 	blame = git_blame__alloc(reference->repository, reference->options, reference->path);
493 	GITERR_CHECK_ALLOC(blame);
494 
495 	/* Duplicate all of the hunk structures in the reference blame */
496 	git_vector_foreach(&reference->hunks, i, hunk) {
497 		git_blame_hunk *h = dup_hunk(hunk);
498 		GITERR_CHECK_ALLOC(h);
499 
500 		git_vector_insert(&blame->hunks, h);
501 	}
502 
503 	/* Diff to the reference blob */
504 	git_diff_blob_to_buffer(reference->final_blob, blame->path,
505 		buffer, buffer_len, blame->path, &diffopts,
506 		NULL, NULL, buffer_hunk_cb, buffer_line_cb, blame);
507 
508 	*out = blame;
509 	return 0;
510 }
511 
git_blame_init_options(git_blame_options * opts,unsigned int version)512 int git_blame_init_options(git_blame_options *opts, unsigned int version)
513 {
514 	GIT_INIT_STRUCTURE_FROM_TEMPLATE(
515 		opts, version, git_blame_options, GIT_BLAME_OPTIONS_INIT);
516 	return 0;
517 }
518