1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_driver.h"
9 
10 #include "git2/attr.h"
11 
12 #include "common.h"
13 #include "diff.h"
14 #include "strmap.h"
15 #include "map.h"
16 #include "config.h"
17 #include "regexp.h"
18 #include "repository.h"
19 
20 typedef enum {
21 	DIFF_DRIVER_AUTO = 0,
22 	DIFF_DRIVER_BINARY = 1,
23 	DIFF_DRIVER_TEXT = 2,
24 	DIFF_DRIVER_PATTERNLIST = 3,
25 } git_diff_driver_t;
26 
27 typedef struct {
28 	git_regexp re;
29 	int flags;
30 } git_diff_driver_pattern;
31 
32 enum {
33 	REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
34 };
35 
36 /* data for finding function context for a given file type */
37 struct git_diff_driver {
38 	git_diff_driver_t type;
39 	uint32_t binary_flags;
40 	uint32_t other_flags;
41 	git_array_t(git_diff_driver_pattern) fn_patterns;
42 	git_regexp  word_pattern;
43 	char name[GIT_FLEX_ARRAY];
44 };
45 
46 #include "userdiff.h"
47 
48 struct git_diff_driver_registry {
49 	git_strmap *drivers;
50 };
51 
52 #define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)
53 
54 static git_diff_driver global_drivers[3] = {
55 	{ DIFF_DRIVER_AUTO,   0, 0, },
56 	{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
57 	{ DIFF_DRIVER_TEXT,   GIT_DIFF_FORCE_TEXT, 0 },
58 };
59 
git_diff_driver_registry_new(void)60 git_diff_driver_registry *git_diff_driver_registry_new(void)
61 {
62 	git_diff_driver_registry *reg =
63 		git__calloc(1, sizeof(git_diff_driver_registry));
64 	if (!reg)
65 		return NULL;
66 
67 	if (git_strmap_new(&reg->drivers) < 0) {
68 		git_diff_driver_registry_free(reg);
69 		return NULL;
70 	}
71 
72 	return reg;
73 }
74 
git_diff_driver_registry_free(git_diff_driver_registry * reg)75 void git_diff_driver_registry_free(git_diff_driver_registry *reg)
76 {
77 	git_diff_driver *drv;
78 
79 	if (!reg)
80 		return;
81 
82 	git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
83 	git_strmap_free(reg->drivers);
84 	git__free(reg);
85 }
86 
diff_driver_add_patterns(git_diff_driver * drv,const char * regex_str,int regex_flags)87 static int diff_driver_add_patterns(
88 	git_diff_driver *drv, const char *regex_str, int regex_flags)
89 {
90 	int error = 0;
91 	const char *scan, *end;
92 	git_diff_driver_pattern *pat = NULL;
93 	git_buf buf = GIT_BUF_INIT;
94 
95 	for (scan = regex_str; scan; scan = end) {
96 		/* get pattern to fill in */
97 		if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
98 			return -1;
99 		}
100 
101 		pat->flags = regex_flags;
102 		if (*scan == '!') {
103 			pat->flags |= REG_NEGATE;
104 			++scan;
105 		}
106 
107 		if ((end = strchr(scan, '\n')) != NULL) {
108 			error = git_buf_set(&buf, scan, end - scan);
109 			end++;
110 		} else {
111 			error = git_buf_sets(&buf, scan);
112 		}
113 		if (error < 0)
114 			break;
115 
116 		if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) {
117 			/*
118 			 * TODO: issue a warning
119 			 */
120 		}
121 	}
122 
123 	if (error && pat != NULL)
124 		(void)git_array_pop(drv->fn_patterns); /* release last item */
125 	git_buf_dispose(&buf);
126 
127 	/* We want to ignore bad patterns, so return success regardless */
128 	return 0;
129 }
130 
diff_driver_xfuncname(const git_config_entry * entry,void * payload)131 static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
132 {
133 	return diff_driver_add_patterns(payload, entry->value, 0);
134 }
135 
diff_driver_funcname(const git_config_entry * entry,void * payload)136 static int diff_driver_funcname(const git_config_entry *entry, void *payload)
137 {
138 	return diff_driver_add_patterns(payload, entry->value, 0);
139 }
140 
git_repository_driver_registry(git_repository * repo)141 static git_diff_driver_registry *git_repository_driver_registry(
142 	git_repository *repo)
143 {
144 	if (!repo->diff_drivers) {
145 		git_diff_driver_registry *reg = git_diff_driver_registry_new();
146 		reg = git_atomic_compare_and_swap(&repo->diff_drivers, NULL, reg);
147 
148 		if (reg != NULL) /* if we race, free losing allocation */
149 			git_diff_driver_registry_free(reg);
150 	}
151 
152 	if (!repo->diff_drivers)
153 		git_error_set(GIT_ERROR_REPOSITORY, "unable to create diff driver registry");
154 
155 	return repo->diff_drivers;
156 }
157 
diff_driver_alloc(git_diff_driver ** out,size_t * namelen_out,const char * name)158 static int diff_driver_alloc(
159 	git_diff_driver **out, size_t *namelen_out, const char *name)
160 {
161 	git_diff_driver *driver;
162 	size_t driverlen = sizeof(git_diff_driver),
163 		namelen = strlen(name),
164 		alloclen;
165 
166 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, driverlen, namelen);
167 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
168 
169 	driver = git__calloc(1, alloclen);
170 	GIT_ERROR_CHECK_ALLOC(driver);
171 
172 	memcpy(driver->name, name, namelen);
173 
174 	*out = driver;
175 
176 	if (namelen_out)
177 		*namelen_out = namelen;
178 
179 	return 0;
180 }
181 
git_diff_driver_builtin(git_diff_driver ** out,git_diff_driver_registry * reg,const char * driver_name)182 static int git_diff_driver_builtin(
183 	git_diff_driver **out,
184 	git_diff_driver_registry *reg,
185 	const char *driver_name)
186 {
187 	git_diff_driver_definition *ddef = NULL;
188 	git_diff_driver *drv = NULL;
189 	int error = 0;
190 	size_t idx;
191 
192 	for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
193 		if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
194 			ddef = &builtin_defs[idx];
195 			break;
196 		}
197 	}
198 	if (!ddef)
199 		goto done;
200 
201 	if ((error = diff_driver_alloc(&drv, NULL, ddef->name)) < 0)
202 		goto done;
203 
204 	drv->type = DIFF_DRIVER_PATTERNLIST;
205 
206 	if (ddef->fns &&
207 		(error = diff_driver_add_patterns(
208 			drv, ddef->fns, ddef->flags)) < 0)
209 		goto done;
210 
211 	if (ddef->words &&
212 	    (error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0)
213 		goto done;
214 
215 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
216 		goto done;
217 
218 done:
219 	if (error && drv)
220 		git_diff_driver_free(drv);
221 	else
222 		*out = drv;
223 
224 	return error;
225 }
226 
git_diff_driver_load(git_diff_driver ** out,git_repository * repo,const char * driver_name)227 static int git_diff_driver_load(
228 	git_diff_driver **out, git_repository *repo, const char *driver_name)
229 {
230 	int error = 0;
231 	git_diff_driver_registry *reg;
232 	git_diff_driver *drv;
233 	size_t namelen;
234 	git_config *cfg = NULL;
235 	git_buf name = GIT_BUF_INIT;
236 	git_config_entry *ce = NULL;
237 	bool found_driver = false;
238 
239 	if ((reg = git_repository_driver_registry(repo)) == NULL)
240 		return -1;
241 
242 	if ((drv = git_strmap_get(reg->drivers, driver_name)) != NULL) {
243 		*out = drv;
244 		return 0;
245 	}
246 
247 	if ((error = diff_driver_alloc(&drv, &namelen, driver_name)) < 0)
248 		goto done;
249 
250 	drv->type = DIFF_DRIVER_AUTO;
251 
252 	/* if you can't read config for repo, just use default driver */
253 	if (git_repository_config_snapshot(&cfg, repo) < 0) {
254 		git_error_clear();
255 		goto done;
256 	}
257 
258 	if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
259 		goto done;
260 
261 	switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
262 	case true:
263 		/* if diff.<driver>.binary is true, just return the binary driver */
264 		*out = &global_drivers[DIFF_DRIVER_BINARY];
265 		goto done;
266 	case false:
267 		/* if diff.<driver>.binary is false, force binary checks off */
268 		/* but still may have custom function context patterns, etc. */
269 		drv->binary_flags = GIT_DIFF_FORCE_TEXT;
270 		found_driver = true;
271 		break;
272 	default:
273 		/* diff.<driver>.binary unspecified or "auto", so just continue */
274 		break;
275 	}
276 
277 	/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */
278 
279 	git_buf_truncate(&name, namelen + strlen("diff.."));
280 	if ((error = git_buf_PUTS(&name, "xfuncname")) < 0)
281 		goto done;
282 
283 	if ((error = git_config_get_multivar_foreach(
284 			cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
285 		if (error != GIT_ENOTFOUND)
286 			goto done;
287 		git_error_clear(); /* no diff.<driver>.xfuncname, so just continue */
288 	}
289 
290 	git_buf_truncate(&name, namelen + strlen("diff.."));
291 	if ((error = git_buf_PUTS(&name, "funcname")) < 0)
292 		goto done;
293 
294 	if ((error = git_config_get_multivar_foreach(
295 			cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
296 		if (error != GIT_ENOTFOUND)
297 			goto done;
298 		git_error_clear(); /* no diff.<driver>.funcname, so just continue */
299 	}
300 
301 	/* if we found any patterns, set driver type to use correct callback */
302 	if (git_array_size(drv->fn_patterns) > 0) {
303 		drv->type = DIFF_DRIVER_PATTERNLIST;
304 		found_driver = true;
305 	}
306 
307 	git_buf_truncate(&name, namelen + strlen("diff.."));
308 	if ((error = git_buf_PUTS(&name, "wordregex")) < 0)
309 		goto done;
310 
311 	if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
312 		goto done;
313 	if (!ce || !ce->value)
314 		/* no diff.<driver>.wordregex, so just continue */;
315 	else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0)))
316 		found_driver = true;
317 	else {
318 		/* TODO: warn about bad regex instead of failure */
319 		goto done;
320 	}
321 
322 	/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
323 	 * diff in drv->other_flags
324 	 */
325 
326 	/* if no driver config found at all, fall back on AUTO driver */
327 	if (!found_driver)
328 		goto done;
329 
330 	/* store driver in registry */
331 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
332 		goto done;
333 
334 	*out = drv;
335 
336 done:
337 	git_config_entry_free(ce);
338 	git_buf_dispose(&name);
339 	git_config_free(cfg);
340 
341 	if (!*out) {
342 		int error2 = git_diff_driver_builtin(out, reg, driver_name);
343 		if (!error)
344 			error = error2;
345 	}
346 
347 	if (drv && drv != *out)
348 		git_diff_driver_free(drv);
349 
350 	return error;
351 }
352 
git_diff_driver_lookup(git_diff_driver ** out,git_repository * repo,git_attr_session * attrsession,const char * path)353 int git_diff_driver_lookup(
354 	git_diff_driver **out, git_repository *repo,
355 	git_attr_session *attrsession, const char *path)
356 {
357 	int error = 0;
358 	const char *values[1], *attrs[] = { "diff" };
359 
360 	GIT_ASSERT_ARG(out);
361 	*out = NULL;
362 
363 	if (!repo || !path || !strlen(path))
364 		/* just use the auto value */;
365 	else if ((error = git_attr_get_many_with_session(values, repo,
366 			attrsession, 0, path, 1, attrs)) < 0)
367 		/* return error below */;
368 
369 	else if (GIT_ATTR_IS_UNSPECIFIED(values[0]))
370 		/* just use the auto value */;
371 	else if (GIT_ATTR_IS_FALSE(values[0]))
372 		*out = &global_drivers[DIFF_DRIVER_BINARY];
373 	else if (GIT_ATTR_IS_TRUE(values[0]))
374 		*out = &global_drivers[DIFF_DRIVER_TEXT];
375 
376 	/* otherwise look for driver information in config and build driver */
377 	else if ((error = git_diff_driver_load(out, repo, values[0])) < 0) {
378 		if (error == GIT_ENOTFOUND) {
379 			error = 0;
380 			git_error_clear();
381 		}
382 	}
383 
384 	if (!*out)
385 		*out = &global_drivers[DIFF_DRIVER_AUTO];
386 
387 	return error;
388 }
389 
git_diff_driver_free(git_diff_driver * driver)390 void git_diff_driver_free(git_diff_driver *driver)
391 {
392 	size_t i;
393 
394 	if (!driver)
395 		return;
396 
397 	for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
398 		git_regexp_dispose(& git_array_get(driver->fn_patterns, i)->re);
399 	git_array_clear(driver->fn_patterns);
400 
401 	git_regexp_dispose(&driver->word_pattern);
402 
403 	git__free(driver);
404 }
405 
git_diff_driver_update_options(uint32_t * option_flags,git_diff_driver * driver)406 void git_diff_driver_update_options(
407 	uint32_t *option_flags, git_diff_driver *driver)
408 {
409 	if ((*option_flags & FORCE_DIFFABLE) == 0)
410 		*option_flags |= driver->binary_flags;
411 
412 	*option_flags |= driver->other_flags;
413 }
414 
git_diff_driver_content_is_binary(git_diff_driver * driver,const char * content,size_t content_len)415 int git_diff_driver_content_is_binary(
416 	git_diff_driver *driver, const char *content, size_t content_len)
417 {
418 	git_buf search = GIT_BUF_INIT;
419 
420 	GIT_UNUSED(driver);
421 
422 	git_buf_attach_notowned(&search, content,
423 		min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
424 
425 	/* TODO: provide encoding / binary detection callbacks that can
426 	 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
427 	 * let's just use the simple NUL-byte detection that core git uses.
428 	 */
429 
430 	/* previously was: if (git_buf_is_binary(&search)) */
431 	if (git_buf_contains_nul(&search))
432 		return 1;
433 
434 	return 0;
435 }
436 
diff_context_line__simple(git_diff_driver * driver,git_buf * line)437 static int diff_context_line__simple(
438 	git_diff_driver *driver, git_buf *line)
439 {
440 	char firstch = line->ptr[0];
441 	GIT_UNUSED(driver);
442 	return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
443 }
444 
diff_context_line__pattern_match(git_diff_driver * driver,git_buf * line)445 static int diff_context_line__pattern_match(
446 	git_diff_driver *driver, git_buf *line)
447 {
448 	size_t i, maxi = git_array_size(driver->fn_patterns);
449 	git_regmatch pmatch[2];
450 
451 	for (i = 0; i < maxi; ++i) {
452 		git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);
453 
454 		if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) {
455 			if (pat->flags & REG_NEGATE)
456 				return false;
457 
458 			/* use pmatch data to trim line data */
459 			i = (pmatch[1].start >= 0) ? 1 : 0;
460 			git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start);
461 			git_buf_truncate(line, pmatch[i].end - pmatch[i].start);
462 			git_buf_rtrim(line);
463 
464 			return true;
465 		}
466 	}
467 
468 	return false;
469 }
470 
diff_context_find(const char * line,long line_len,char * out,long out_size,void * payload)471 static long diff_context_find(
472 	const char *line,
473 	long line_len,
474 	char *out,
475 	long out_size,
476 	void *payload)
477 {
478 	git_diff_find_context_payload *ctxt = payload;
479 
480 	if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
481 		return -1;
482 	git_buf_rtrim(&ctxt->line);
483 
484 	if (!ctxt->line.size)
485 		return -1;
486 
487 	if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
488 		return -1;
489 
490 	if (out_size > (long)ctxt->line.size)
491 		out_size = (long)ctxt->line.size;
492 	memcpy(out, ctxt->line.ptr, (size_t)out_size);
493 
494 	return out_size;
495 }
496 
git_diff_find_context_init(git_diff_find_context_fn * findfn_out,git_diff_find_context_payload * payload_out,git_diff_driver * driver)497 void git_diff_find_context_init(
498 	git_diff_find_context_fn *findfn_out,
499 	git_diff_find_context_payload *payload_out,
500 	git_diff_driver *driver)
501 {
502 	*findfn_out = driver ? diff_context_find : NULL;
503 
504 	memset(payload_out, 0, sizeof(*payload_out));
505 	if (driver) {
506 		payload_out->driver = driver;
507 		payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
508 			diff_context_line__pattern_match : diff_context_line__simple;
509 		git_buf_init(&payload_out->line, 0);
510 	}
511 }
512 
git_diff_find_context_clear(git_diff_find_context_payload * payload)513 void git_diff_find_context_clear(git_diff_find_context_payload *payload)
514 {
515 	if (payload) {
516 		git_buf_dispose(&payload->line);
517 		payload->driver = NULL;
518 	}
519 }
520