1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_driver.h"
9 
10 #include "git2/attr.h"
11 
12 #include "common.h"
13 #include "diff.h"
14 #include "strmap.h"
15 #include "map.h"
16 #include "config.h"
17 #include "regexp.h"
18 #include "repository.h"
19 
20 typedef enum {
21 	DIFF_DRIVER_AUTO = 0,
22 	DIFF_DRIVER_BINARY = 1,
23 	DIFF_DRIVER_TEXT = 2,
24 	DIFF_DRIVER_PATTERNLIST = 3,
25 } git_diff_driver_t;
26 
27 typedef struct {
28 	git_regexp re;
29 	int flags;
30 } git_diff_driver_pattern;
31 
32 enum {
33 	REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
34 };
35 
36 /* data for finding function context for a given file type */
37 struct git_diff_driver {
38 	git_diff_driver_t type;
39 	uint32_t binary_flags;
40 	uint32_t other_flags;
41 	git_array_t(git_diff_driver_pattern) fn_patterns;
42 	git_regexp  word_pattern;
43 	char name[GIT_FLEX_ARRAY];
44 };
45 
46 #include "userdiff.h"
47 
48 struct git_diff_driver_registry {
49 	git_strmap *drivers;
50 };
51 
52 #define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)
53 
54 static git_diff_driver global_drivers[3] = {
55 	{ DIFF_DRIVER_AUTO,   0, 0, },
56 	{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
57 	{ DIFF_DRIVER_TEXT,   GIT_DIFF_FORCE_TEXT, 0 },
58 };
59 
git_diff_driver_registry_new(void)60 git_diff_driver_registry *git_diff_driver_registry_new(void)
61 {
62 	git_diff_driver_registry *reg =
63 		git__calloc(1, sizeof(git_diff_driver_registry));
64 	if (!reg)
65 		return NULL;
66 
67 	if (git_strmap_new(&reg->drivers) < 0) {
68 		git_diff_driver_registry_free(reg);
69 		return NULL;
70 	}
71 
72 	return reg;
73 }
74 
git_diff_driver_registry_free(git_diff_driver_registry * reg)75 void git_diff_driver_registry_free(git_diff_driver_registry *reg)
76 {
77 	git_diff_driver *drv;
78 
79 	if (!reg)
80 		return;
81 
82 	git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
83 	git_strmap_free(reg->drivers);
84 	git__free(reg);
85 }
86 
diff_driver_add_patterns(git_diff_driver * drv,const char * regex_str,int regex_flags)87 static int diff_driver_add_patterns(
88 	git_diff_driver *drv, const char *regex_str, int regex_flags)
89 {
90 	int error = 0;
91 	const char *scan, *end;
92 	git_diff_driver_pattern *pat = NULL;
93 	git_buf buf = GIT_BUF_INIT;
94 
95 	for (scan = regex_str; scan; scan = end) {
96 		/* get pattern to fill in */
97 		if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
98 			return -1;
99 		}
100 
101 		pat->flags = regex_flags;
102 		if (*scan == '!') {
103 			pat->flags |= REG_NEGATE;
104 			++scan;
105 		}
106 
107 		if ((end = strchr(scan, '\n')) != NULL) {
108 			error = git_buf_set(&buf, scan, end - scan);
109 			end++;
110 		} else {
111 			error = git_buf_sets(&buf, scan);
112 		}
113 		if (error < 0)
114 			break;
115 
116 		if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) {
117 			/*
118 			 * TODO: issue a warning
119 			 */
120 		}
121 	}
122 
123 	if (error && pat != NULL)
124 		(void)git_array_pop(drv->fn_patterns); /* release last item */
125 	git_buf_dispose(&buf);
126 
127 	/* We want to ignore bad patterns, so return success regardless */
128 	return 0;
129 }
130 
diff_driver_xfuncname(const git_config_entry * entry,void * payload)131 static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
132 {
133 	return diff_driver_add_patterns(payload, entry->value, 0);
134 }
135 
diff_driver_funcname(const git_config_entry * entry,void * payload)136 static int diff_driver_funcname(const git_config_entry *entry, void *payload)
137 {
138 	return diff_driver_add_patterns(payload, entry->value, 0);
139 }
140 
git_repository_driver_registry(git_repository * repo)141 static git_diff_driver_registry *git_repository_driver_registry(
142 	git_repository *repo)
143 {
144 	git_diff_driver_registry *reg = git_atomic_load(repo->diff_drivers), *newreg;
145 	if (reg)
146 		return reg;
147 
148 	newreg = git_diff_driver_registry_new();
149 	if (!newreg) {
150 		git_error_set(GIT_ERROR_REPOSITORY, "unable to create diff driver registry");
151 		return newreg;
152 	}
153 	reg = git_atomic_compare_and_swap(&repo->diff_drivers, NULL, newreg);
154 	if (!reg) {
155 		reg = newreg;
156 	} else {
157 		/* if we race, free losing allocation */
158 		git_diff_driver_registry_free(newreg);
159 	}
160 	return reg;
161 }
162 
diff_driver_alloc(git_diff_driver ** out,size_t * namelen_out,const char * name)163 static int diff_driver_alloc(
164 	git_diff_driver **out, size_t *namelen_out, const char *name)
165 {
166 	git_diff_driver *driver;
167 	size_t driverlen = sizeof(git_diff_driver),
168 		namelen = strlen(name),
169 		alloclen;
170 
171 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, driverlen, namelen);
172 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
173 
174 	driver = git__calloc(1, alloclen);
175 	GIT_ERROR_CHECK_ALLOC(driver);
176 
177 	memcpy(driver->name, name, namelen);
178 
179 	*out = driver;
180 
181 	if (namelen_out)
182 		*namelen_out = namelen;
183 
184 	return 0;
185 }
186 
git_diff_driver_builtin(git_diff_driver ** out,git_diff_driver_registry * reg,const char * driver_name)187 static int git_diff_driver_builtin(
188 	git_diff_driver **out,
189 	git_diff_driver_registry *reg,
190 	const char *driver_name)
191 {
192 	git_diff_driver_definition *ddef = NULL;
193 	git_diff_driver *drv = NULL;
194 	int error = 0;
195 	size_t idx;
196 
197 	for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
198 		if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
199 			ddef = &builtin_defs[idx];
200 			break;
201 		}
202 	}
203 	if (!ddef)
204 		goto done;
205 
206 	if ((error = diff_driver_alloc(&drv, NULL, ddef->name)) < 0)
207 		goto done;
208 
209 	drv->type = DIFF_DRIVER_PATTERNLIST;
210 
211 	if (ddef->fns &&
212 		(error = diff_driver_add_patterns(
213 			drv, ddef->fns, ddef->flags)) < 0)
214 		goto done;
215 
216 	if (ddef->words &&
217 	    (error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0)
218 		goto done;
219 
220 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
221 		goto done;
222 
223 done:
224 	if (error && drv)
225 		git_diff_driver_free(drv);
226 	else
227 		*out = drv;
228 
229 	return error;
230 }
231 
git_diff_driver_load(git_diff_driver ** out,git_repository * repo,const char * driver_name)232 static int git_diff_driver_load(
233 	git_diff_driver **out, git_repository *repo, const char *driver_name)
234 {
235 	int error = 0;
236 	git_diff_driver_registry *reg;
237 	git_diff_driver *drv;
238 	size_t namelen;
239 	git_config *cfg = NULL;
240 	git_buf name = GIT_BUF_INIT;
241 	git_config_entry *ce = NULL;
242 	bool found_driver = false;
243 
244 	if ((reg = git_repository_driver_registry(repo)) == NULL)
245 		return -1;
246 
247 	if ((drv = git_strmap_get(reg->drivers, driver_name)) != NULL) {
248 		*out = drv;
249 		return 0;
250 	}
251 
252 	if ((error = diff_driver_alloc(&drv, &namelen, driver_name)) < 0)
253 		goto done;
254 
255 	drv->type = DIFF_DRIVER_AUTO;
256 
257 	/* if you can't read config for repo, just use default driver */
258 	if (git_repository_config_snapshot(&cfg, repo) < 0) {
259 		git_error_clear();
260 		goto done;
261 	}
262 
263 	if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
264 		goto done;
265 
266 	switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
267 	case true:
268 		/* if diff.<driver>.binary is true, just return the binary driver */
269 		*out = &global_drivers[DIFF_DRIVER_BINARY];
270 		goto done;
271 	case false:
272 		/* if diff.<driver>.binary is false, force binary checks off */
273 		/* but still may have custom function context patterns, etc. */
274 		drv->binary_flags = GIT_DIFF_FORCE_TEXT;
275 		found_driver = true;
276 		break;
277 	default:
278 		/* diff.<driver>.binary unspecified or "auto", so just continue */
279 		break;
280 	}
281 
282 	/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */
283 
284 	git_buf_truncate(&name, namelen + strlen("diff.."));
285 	if ((error = git_buf_PUTS(&name, "xfuncname")) < 0)
286 		goto done;
287 
288 	if ((error = git_config_get_multivar_foreach(
289 			cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
290 		if (error != GIT_ENOTFOUND)
291 			goto done;
292 		git_error_clear(); /* no diff.<driver>.xfuncname, so just continue */
293 	}
294 
295 	git_buf_truncate(&name, namelen + strlen("diff.."));
296 	if ((error = git_buf_PUTS(&name, "funcname")) < 0)
297 		goto done;
298 
299 	if ((error = git_config_get_multivar_foreach(
300 			cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
301 		if (error != GIT_ENOTFOUND)
302 			goto done;
303 		git_error_clear(); /* no diff.<driver>.funcname, so just continue */
304 	}
305 
306 	/* if we found any patterns, set driver type to use correct callback */
307 	if (git_array_size(drv->fn_patterns) > 0) {
308 		drv->type = DIFF_DRIVER_PATTERNLIST;
309 		found_driver = true;
310 	}
311 
312 	git_buf_truncate(&name, namelen + strlen("diff.."));
313 	if ((error = git_buf_PUTS(&name, "wordregex")) < 0)
314 		goto done;
315 
316 	if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
317 		goto done;
318 	if (!ce || !ce->value)
319 		/* no diff.<driver>.wordregex, so just continue */;
320 	else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0)))
321 		found_driver = true;
322 	else {
323 		/* TODO: warn about bad regex instead of failure */
324 		goto done;
325 	}
326 
327 	/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
328 	 * diff in drv->other_flags
329 	 */
330 
331 	/* if no driver config found at all, fall back on AUTO driver */
332 	if (!found_driver)
333 		goto done;
334 
335 	/* store driver in registry */
336 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
337 		goto done;
338 
339 	*out = drv;
340 
341 done:
342 	git_config_entry_free(ce);
343 	git_buf_dispose(&name);
344 	git_config_free(cfg);
345 
346 	if (!*out) {
347 		int error2 = git_diff_driver_builtin(out, reg, driver_name);
348 		if (!error)
349 			error = error2;
350 	}
351 
352 	if (drv && drv != *out)
353 		git_diff_driver_free(drv);
354 
355 	return error;
356 }
357 
git_diff_driver_lookup(git_diff_driver ** out,git_repository * repo,git_attr_session * attrsession,const char * path)358 int git_diff_driver_lookup(
359 	git_diff_driver **out, git_repository *repo,
360 	git_attr_session *attrsession, const char *path)
361 {
362 	int error = 0;
363 	const char *values[1], *attrs[] = { "diff" };
364 
365 	GIT_ASSERT_ARG(out);
366 	*out = NULL;
367 
368 	if (!repo || !path || !strlen(path))
369 		/* just use the auto value */;
370 	else if ((error = git_attr_get_many_with_session(values, repo,
371 			attrsession, 0, path, 1, attrs)) < 0)
372 		/* return error below */;
373 
374 	else if (GIT_ATTR_IS_UNSPECIFIED(values[0]))
375 		/* just use the auto value */;
376 	else if (GIT_ATTR_IS_FALSE(values[0]))
377 		*out = &global_drivers[DIFF_DRIVER_BINARY];
378 	else if (GIT_ATTR_IS_TRUE(values[0]))
379 		*out = &global_drivers[DIFF_DRIVER_TEXT];
380 
381 	/* otherwise look for driver information in config and build driver */
382 	else if ((error = git_diff_driver_load(out, repo, values[0])) < 0) {
383 		if (error == GIT_ENOTFOUND) {
384 			error = 0;
385 			git_error_clear();
386 		}
387 	}
388 
389 	if (!*out)
390 		*out = &global_drivers[DIFF_DRIVER_AUTO];
391 
392 	return error;
393 }
394 
git_diff_driver_free(git_diff_driver * driver)395 void git_diff_driver_free(git_diff_driver *driver)
396 {
397 	git_diff_driver_pattern *pat;
398 
399 	if (!driver)
400 		return;
401 
402 	while ((pat = git_array_pop(driver->fn_patterns)) != NULL)
403 		git_regexp_dispose(&pat->re);
404 	git_array_clear(driver->fn_patterns);
405 
406 	git_regexp_dispose(&driver->word_pattern);
407 
408 	git__free(driver);
409 }
410 
git_diff_driver_update_options(uint32_t * option_flags,git_diff_driver * driver)411 void git_diff_driver_update_options(
412 	uint32_t *option_flags, git_diff_driver *driver)
413 {
414 	if ((*option_flags & FORCE_DIFFABLE) == 0)
415 		*option_flags |= driver->binary_flags;
416 
417 	*option_flags |= driver->other_flags;
418 }
419 
git_diff_driver_content_is_binary(git_diff_driver * driver,const char * content,size_t content_len)420 int git_diff_driver_content_is_binary(
421 	git_diff_driver *driver, const char *content, size_t content_len)
422 {
423 	git_buf search = GIT_BUF_INIT;
424 
425 	GIT_UNUSED(driver);
426 
427 	git_buf_attach_notowned(&search, content,
428 		min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
429 
430 	/* TODO: provide encoding / binary detection callbacks that can
431 	 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
432 	 * let's just use the simple NUL-byte detection that core git uses.
433 	 */
434 
435 	/* previously was: if (git_buf_is_binary(&search)) */
436 	if (git_buf_contains_nul(&search))
437 		return 1;
438 
439 	return 0;
440 }
441 
diff_context_line__simple(git_diff_driver * driver,git_buf * line)442 static int diff_context_line__simple(
443 	git_diff_driver *driver, git_buf *line)
444 {
445 	char firstch = line->ptr[0];
446 	GIT_UNUSED(driver);
447 	return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
448 }
449 
diff_context_line__pattern_match(git_diff_driver * driver,git_buf * line)450 static int diff_context_line__pattern_match(
451 	git_diff_driver *driver, git_buf *line)
452 {
453 	size_t i, maxi = git_array_size(driver->fn_patterns);
454 	git_regmatch pmatch[2];
455 
456 	for (i = 0; i < maxi; ++i) {
457 		git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);
458 
459 		if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) {
460 			if (pat->flags & REG_NEGATE)
461 				return false;
462 
463 			/* use pmatch data to trim line data */
464 			i = (pmatch[1].start >= 0) ? 1 : 0;
465 			git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start);
466 			git_buf_truncate(line, pmatch[i].end - pmatch[i].start);
467 			git_buf_rtrim(line);
468 
469 			return true;
470 		}
471 	}
472 
473 	return false;
474 }
475 
diff_context_find(const char * line,long line_len,char * out,long out_size,void * payload)476 static long diff_context_find(
477 	const char *line,
478 	long line_len,
479 	char *out,
480 	long out_size,
481 	void *payload)
482 {
483 	git_diff_find_context_payload *ctxt = payload;
484 
485 	if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
486 		return -1;
487 	git_buf_rtrim(&ctxt->line);
488 
489 	if (!ctxt->line.size)
490 		return -1;
491 
492 	if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
493 		return -1;
494 
495 	if (out_size > (long)ctxt->line.size)
496 		out_size = (long)ctxt->line.size;
497 	memcpy(out, ctxt->line.ptr, (size_t)out_size);
498 
499 	return out_size;
500 }
501 
git_diff_find_context_init(git_diff_find_context_fn * findfn_out,git_diff_find_context_payload * payload_out,git_diff_driver * driver)502 void git_diff_find_context_init(
503 	git_diff_find_context_fn *findfn_out,
504 	git_diff_find_context_payload *payload_out,
505 	git_diff_driver *driver)
506 {
507 	*findfn_out = driver ? diff_context_find : NULL;
508 
509 	memset(payload_out, 0, sizeof(*payload_out));
510 	if (driver) {
511 		payload_out->driver = driver;
512 		payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
513 			diff_context_line__pattern_match : diff_context_line__simple;
514 		git_buf_init(&payload_out->line, 0);
515 	}
516 }
517 
git_diff_find_context_clear(git_diff_find_context_payload * payload)518 void git_diff_find_context_clear(git_diff_find_context_payload *payload)
519 {
520 	if (payload) {
521 		git_buf_dispose(&payload->line);
522 		payload->driver = NULL;
523 	}
524 }
525