1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "diff_driver.h"
9 
10 #include "git2/attr.h"
11 
12 #include "common.h"
13 #include "diff.h"
14 #include "strmap.h"
15 #include "map.h"
16 #include "buf_text.h"
17 #include "config.h"
18 #include "regexp.h"
19 #include "repository.h"
20 
21 typedef enum {
22 	DIFF_DRIVER_AUTO = 0,
23 	DIFF_DRIVER_BINARY = 1,
24 	DIFF_DRIVER_TEXT = 2,
25 	DIFF_DRIVER_PATTERNLIST = 3,
26 } git_diff_driver_t;
27 
28 typedef struct {
29 	git_regexp re;
30 	int flags;
31 } git_diff_driver_pattern;
32 
33 enum {
34 	REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
35 };
36 
37 /* data for finding function context for a given file type */
38 struct git_diff_driver {
39 	git_diff_driver_t type;
40 	uint32_t binary_flags;
41 	uint32_t other_flags;
42 	git_array_t(git_diff_driver_pattern) fn_patterns;
43 	git_regexp  word_pattern;
44 	char name[GIT_FLEX_ARRAY];
45 };
46 
47 #include "userdiff.h"
48 
49 struct git_diff_driver_registry {
50 	git_strmap *drivers;
51 };
52 
53 #define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)
54 
55 static git_diff_driver global_drivers[3] = {
56 	{ DIFF_DRIVER_AUTO,   0, 0, },
57 	{ DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
58 	{ DIFF_DRIVER_TEXT,   GIT_DIFF_FORCE_TEXT, 0 },
59 };
60 
git_diff_driver_registry_new(void)61 git_diff_driver_registry *git_diff_driver_registry_new(void)
62 {
63 	git_diff_driver_registry *reg =
64 		git__calloc(1, sizeof(git_diff_driver_registry));
65 	if (!reg)
66 		return NULL;
67 
68 	if (git_strmap_new(&reg->drivers) < 0) {
69 		git_diff_driver_registry_free(reg);
70 		return NULL;
71 	}
72 
73 	return reg;
74 }
75 
git_diff_driver_registry_free(git_diff_driver_registry * reg)76 void git_diff_driver_registry_free(git_diff_driver_registry *reg)
77 {
78 	git_diff_driver *drv;
79 
80 	if (!reg)
81 		return;
82 
83 	git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
84 	git_strmap_free(reg->drivers);
85 	git__free(reg);
86 }
87 
diff_driver_add_patterns(git_diff_driver * drv,const char * regex_str,int regex_flags)88 static int diff_driver_add_patterns(
89 	git_diff_driver *drv, const char *regex_str, int regex_flags)
90 {
91 	int error = 0;
92 	const char *scan, *end;
93 	git_diff_driver_pattern *pat = NULL;
94 	git_buf buf = GIT_BUF_INIT;
95 
96 	for (scan = regex_str; scan; scan = end) {
97 		/* get pattern to fill in */
98 		if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
99 			return -1;
100 		}
101 
102 		pat->flags = regex_flags;
103 		if (*scan == '!') {
104 			pat->flags |= REG_NEGATE;
105 			++scan;
106 		}
107 
108 		if ((end = strchr(scan, '\n')) != NULL) {
109 			error = git_buf_set(&buf, scan, end - scan);
110 			end++;
111 		} else {
112 			error = git_buf_sets(&buf, scan);
113 		}
114 		if (error < 0)
115 			break;
116 
117 		if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) {
118 			/*
119 			 * TODO: issue a warning
120 			 */
121 		}
122 	}
123 
124 	if (error && pat != NULL)
125 		(void)git_array_pop(drv->fn_patterns); /* release last item */
126 	git_buf_dispose(&buf);
127 
128 	/* We want to ignore bad patterns, so return success regardless */
129 	return 0;
130 }
131 
diff_driver_xfuncname(const git_config_entry * entry,void * payload)132 static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
133 {
134 	return diff_driver_add_patterns(payload, entry->value, 0);
135 }
136 
diff_driver_funcname(const git_config_entry * entry,void * payload)137 static int diff_driver_funcname(const git_config_entry *entry, void *payload)
138 {
139 	return diff_driver_add_patterns(payload, entry->value, 0);
140 }
141 
git_repository_driver_registry(git_repository * repo)142 static git_diff_driver_registry *git_repository_driver_registry(
143 	git_repository *repo)
144 {
145 	if (!repo->diff_drivers) {
146 		git_diff_driver_registry *reg = git_diff_driver_registry_new();
147 		reg = git_atomic_compare_and_swap(&repo->diff_drivers, NULL, reg);
148 
149 		if (reg != NULL) /* if we race, free losing allocation */
150 			git_diff_driver_registry_free(reg);
151 	}
152 
153 	if (!repo->diff_drivers)
154 		git_error_set(GIT_ERROR_REPOSITORY, "unable to create diff driver registry");
155 
156 	return repo->diff_drivers;
157 }
158 
diff_driver_alloc(git_diff_driver ** out,size_t * namelen_out,const char * name)159 static int diff_driver_alloc(
160 	git_diff_driver **out, size_t *namelen_out, const char *name)
161 {
162 	git_diff_driver *driver;
163 	size_t driverlen = sizeof(git_diff_driver),
164 		namelen = strlen(name),
165 		alloclen;
166 
167 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, driverlen, namelen);
168 	GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
169 
170 	driver = git__calloc(1, alloclen);
171 	GIT_ERROR_CHECK_ALLOC(driver);
172 
173 	memcpy(driver->name, name, namelen);
174 
175 	*out = driver;
176 
177 	if (namelen_out)
178 		*namelen_out = namelen;
179 
180 	return 0;
181 }
182 
git_diff_driver_builtin(git_diff_driver ** out,git_diff_driver_registry * reg,const char * driver_name)183 static int git_diff_driver_builtin(
184 	git_diff_driver **out,
185 	git_diff_driver_registry *reg,
186 	const char *driver_name)
187 {
188 	git_diff_driver_definition *ddef = NULL;
189 	git_diff_driver *drv = NULL;
190 	int error = 0;
191 	size_t idx;
192 
193 	for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
194 		if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
195 			ddef = &builtin_defs[idx];
196 			break;
197 		}
198 	}
199 	if (!ddef)
200 		goto done;
201 
202 	if ((error = diff_driver_alloc(&drv, NULL, ddef->name)) < 0)
203 		goto done;
204 
205 	drv->type = DIFF_DRIVER_PATTERNLIST;
206 
207 	if (ddef->fns &&
208 		(error = diff_driver_add_patterns(
209 			drv, ddef->fns, ddef->flags)) < 0)
210 		goto done;
211 
212 	if (ddef->words &&
213 	    (error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0)
214 		goto done;
215 
216 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
217 		goto done;
218 
219 done:
220 	if (error && drv)
221 		git_diff_driver_free(drv);
222 	else
223 		*out = drv;
224 
225 	return error;
226 }
227 
git_diff_driver_load(git_diff_driver ** out,git_repository * repo,const char * driver_name)228 static int git_diff_driver_load(
229 	git_diff_driver **out, git_repository *repo, const char *driver_name)
230 {
231 	int error = 0;
232 	git_diff_driver_registry *reg;
233 	git_diff_driver *drv;
234 	size_t namelen;
235 	git_config *cfg = NULL;
236 	git_buf name = GIT_BUF_INIT;
237 	git_config_entry *ce = NULL;
238 	bool found_driver = false;
239 
240 	if ((reg = git_repository_driver_registry(repo)) == NULL)
241 		return -1;
242 
243 	if ((drv = git_strmap_get(reg->drivers, driver_name)) != NULL) {
244 		*out = drv;
245 		return 0;
246 	}
247 
248 	if ((error = diff_driver_alloc(&drv, &namelen, driver_name)) < 0)
249 		goto done;
250 
251 	drv->type = DIFF_DRIVER_AUTO;
252 
253 	/* if you can't read config for repo, just use default driver */
254 	if (git_repository_config_snapshot(&cfg, repo) < 0) {
255 		git_error_clear();
256 		goto done;
257 	}
258 
259 	if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
260 		goto done;
261 
262 	switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
263 	case true:
264 		/* if diff.<driver>.binary is true, just return the binary driver */
265 		*out = &global_drivers[DIFF_DRIVER_BINARY];
266 		goto done;
267 	case false:
268 		/* if diff.<driver>.binary is false, force binary checks off */
269 		/* but still may have custom function context patterns, etc. */
270 		drv->binary_flags = GIT_DIFF_FORCE_TEXT;
271 		found_driver = true;
272 		break;
273 	default:
274 		/* diff.<driver>.binary unspecified or "auto", so just continue */
275 		break;
276 	}
277 
278 	/* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */
279 
280 	git_buf_truncate(&name, namelen + strlen("diff.."));
281 	if ((error = git_buf_PUTS(&name, "xfuncname")) < 0)
282 		goto done;
283 
284 	if ((error = git_config_get_multivar_foreach(
285 			cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
286 		if (error != GIT_ENOTFOUND)
287 			goto done;
288 		git_error_clear(); /* no diff.<driver>.xfuncname, so just continue */
289 	}
290 
291 	git_buf_truncate(&name, namelen + strlen("diff.."));
292 	if ((error = git_buf_PUTS(&name, "funcname")) < 0)
293 		goto done;
294 
295 	if ((error = git_config_get_multivar_foreach(
296 			cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
297 		if (error != GIT_ENOTFOUND)
298 			goto done;
299 		git_error_clear(); /* no diff.<driver>.funcname, so just continue */
300 	}
301 
302 	/* if we found any patterns, set driver type to use correct callback */
303 	if (git_array_size(drv->fn_patterns) > 0) {
304 		drv->type = DIFF_DRIVER_PATTERNLIST;
305 		found_driver = true;
306 	}
307 
308 	git_buf_truncate(&name, namelen + strlen("diff.."));
309 	if ((error = git_buf_PUTS(&name, "wordregex")) < 0)
310 		goto done;
311 
312 	if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
313 		goto done;
314 	if (!ce || !ce->value)
315 		/* no diff.<driver>.wordregex, so just continue */;
316 	else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0)))
317 		found_driver = true;
318 	else {
319 		/* TODO: warn about bad regex instead of failure */
320 		goto done;
321 	}
322 
323 	/* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
324 	 * diff in drv->other_flags
325 	 */
326 
327 	/* if no driver config found at all, fall back on AUTO driver */
328 	if (!found_driver)
329 		goto done;
330 
331 	/* store driver in registry */
332 	if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
333 		goto done;
334 
335 	*out = drv;
336 
337 done:
338 	git_config_entry_free(ce);
339 	git_buf_dispose(&name);
340 	git_config_free(cfg);
341 
342 	if (!*out) {
343 		int error2 = git_diff_driver_builtin(out, reg, driver_name);
344 		if (!error)
345 			error = error2;
346 	}
347 
348 	if (drv && drv != *out)
349 		git_diff_driver_free(drv);
350 
351 	return error;
352 }
353 
git_diff_driver_lookup(git_diff_driver ** out,git_repository * repo,git_attr_session * attrsession,const char * path)354 int git_diff_driver_lookup(
355 	git_diff_driver **out, git_repository *repo,
356 	git_attr_session *attrsession, const char *path)
357 {
358 	int error = 0;
359 	const char *values[1], *attrs[] = { "diff" };
360 
361 	GIT_ASSERT_ARG(out);
362 	*out = NULL;
363 
364 	if (!repo || !path || !strlen(path))
365 		/* just use the auto value */;
366 	else if ((error = git_attr_get_many_with_session(values, repo,
367 			attrsession, 0, path, 1, attrs)) < 0)
368 		/* return error below */;
369 
370 	else if (GIT_ATTR_IS_UNSPECIFIED(values[0]))
371 		/* just use the auto value */;
372 	else if (GIT_ATTR_IS_FALSE(values[0]))
373 		*out = &global_drivers[DIFF_DRIVER_BINARY];
374 	else if (GIT_ATTR_IS_TRUE(values[0]))
375 		*out = &global_drivers[DIFF_DRIVER_TEXT];
376 
377 	/* otherwise look for driver information in config and build driver */
378 	else if ((error = git_diff_driver_load(out, repo, values[0])) < 0) {
379 		if (error == GIT_ENOTFOUND) {
380 			error = 0;
381 			git_error_clear();
382 		}
383 	}
384 
385 	if (!*out)
386 		*out = &global_drivers[DIFF_DRIVER_AUTO];
387 
388 	return error;
389 }
390 
git_diff_driver_free(git_diff_driver * driver)391 void git_diff_driver_free(git_diff_driver *driver)
392 {
393 	size_t i;
394 
395 	if (!driver)
396 		return;
397 
398 	for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
399 		git_regexp_dispose(& git_array_get(driver->fn_patterns, i)->re);
400 	git_array_clear(driver->fn_patterns);
401 
402 	git_regexp_dispose(&driver->word_pattern);
403 
404 	git__free(driver);
405 }
406 
git_diff_driver_update_options(uint32_t * option_flags,git_diff_driver * driver)407 void git_diff_driver_update_options(
408 	uint32_t *option_flags, git_diff_driver *driver)
409 {
410 	if ((*option_flags & FORCE_DIFFABLE) == 0)
411 		*option_flags |= driver->binary_flags;
412 
413 	*option_flags |= driver->other_flags;
414 }
415 
git_diff_driver_content_is_binary(git_diff_driver * driver,const char * content,size_t content_len)416 int git_diff_driver_content_is_binary(
417 	git_diff_driver *driver, const char *content, size_t content_len)
418 {
419 	git_buf search = GIT_BUF_INIT;
420 
421 	GIT_UNUSED(driver);
422 
423 	git_buf_attach_notowned(&search, content,
424 		min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
425 
426 	/* TODO: provide encoding / binary detection callbacks that can
427 	 * be UTF-8 aware, etc.  For now, instead of trying to be smart,
428 	 * let's just use the simple NUL-byte detection that core git uses.
429 	 */
430 
431 	/* previously was: if (git_buf_text_is_binary(&search)) */
432 	if (git_buf_text_contains_nul(&search))
433 		return 1;
434 
435 	return 0;
436 }
437 
diff_context_line__simple(git_diff_driver * driver,git_buf * line)438 static int diff_context_line__simple(
439 	git_diff_driver *driver, git_buf *line)
440 {
441 	char firstch = line->ptr[0];
442 	GIT_UNUSED(driver);
443 	return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
444 }
445 
diff_context_line__pattern_match(git_diff_driver * driver,git_buf * line)446 static int diff_context_line__pattern_match(
447 	git_diff_driver *driver, git_buf *line)
448 {
449 	size_t i, maxi = git_array_size(driver->fn_patterns);
450 	git_regmatch pmatch[2];
451 
452 	for (i = 0; i < maxi; ++i) {
453 		git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);
454 
455 		if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) {
456 			if (pat->flags & REG_NEGATE)
457 				return false;
458 
459 			/* use pmatch data to trim line data */
460 			i = (pmatch[1].start >= 0) ? 1 : 0;
461 			git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start);
462 			git_buf_truncate(line, pmatch[i].end - pmatch[i].start);
463 			git_buf_rtrim(line);
464 
465 			return true;
466 		}
467 	}
468 
469 	return false;
470 }
471 
diff_context_find(const char * line,long line_len,char * out,long out_size,void * payload)472 static long diff_context_find(
473 	const char *line,
474 	long line_len,
475 	char *out,
476 	long out_size,
477 	void *payload)
478 {
479 	git_diff_find_context_payload *ctxt = payload;
480 
481 	if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
482 		return -1;
483 	git_buf_rtrim(&ctxt->line);
484 
485 	if (!ctxt->line.size)
486 		return -1;
487 
488 	if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
489 		return -1;
490 
491 	if (out_size > (long)ctxt->line.size)
492 		out_size = (long)ctxt->line.size;
493 	memcpy(out, ctxt->line.ptr, (size_t)out_size);
494 
495 	return out_size;
496 }
497 
git_diff_find_context_init(git_diff_find_context_fn * findfn_out,git_diff_find_context_payload * payload_out,git_diff_driver * driver)498 void git_diff_find_context_init(
499 	git_diff_find_context_fn *findfn_out,
500 	git_diff_find_context_payload *payload_out,
501 	git_diff_driver *driver)
502 {
503 	*findfn_out = driver ? diff_context_find : NULL;
504 
505 	memset(payload_out, 0, sizeof(*payload_out));
506 	if (driver) {
507 		payload_out->driver = driver;
508 		payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
509 			diff_context_line__pattern_match : diff_context_line__simple;
510 		git_buf_init(&payload_out->line, 0);
511 	}
512 }
513 
git_diff_find_context_clear(git_diff_find_context_payload * payload)514 void git_diff_find_context_clear(git_diff_find_context_payload *payload)
515 {
516 	if (payload) {
517 		git_buf_dispose(&payload->line);
518 		payload->driver = NULL;
519 	}
520 }
521