1 /*
2 * Copyright (C) the libgit2 contributors. All rights reserved.
3 *
4 * This file is part of libgit2, distributed under the GNU GPL v2 with
5 * a Linking Exception. For full terms see the included COPYING file.
6 */
7
8 #include "diff_driver.h"
9
10 #include "git2/attr.h"
11
12 #include "common.h"
13 #include "diff.h"
14 #include "strmap.h"
15 #include "map.h"
16 #include "config.h"
17 #include "regexp.h"
18 #include "repository.h"
19
20 typedef enum {
21 DIFF_DRIVER_AUTO = 0,
22 DIFF_DRIVER_BINARY = 1,
23 DIFF_DRIVER_TEXT = 2,
24 DIFF_DRIVER_PATTERNLIST = 3,
25 } git_diff_driver_t;
26
27 typedef struct {
28 git_regexp re;
29 int flags;
30 } git_diff_driver_pattern;
31
32 enum {
33 REG_NEGATE = (1 << 15) /* get out of the way of existing flags */
34 };
35
36 /* data for finding function context for a given file type */
37 struct git_diff_driver {
38 git_diff_driver_t type;
39 uint32_t binary_flags;
40 uint32_t other_flags;
41 git_array_t(git_diff_driver_pattern) fn_patterns;
42 git_regexp word_pattern;
43 char name[GIT_FLEX_ARRAY];
44 };
45
46 #include "userdiff.h"
47
48 struct git_diff_driver_registry {
49 git_strmap *drivers;
50 };
51
52 #define FORCE_DIFFABLE (GIT_DIFF_FORCE_TEXT | GIT_DIFF_FORCE_BINARY)
53
54 static git_diff_driver global_drivers[3] = {
55 { DIFF_DRIVER_AUTO, 0, 0, },
56 { DIFF_DRIVER_BINARY, GIT_DIFF_FORCE_BINARY, 0 },
57 { DIFF_DRIVER_TEXT, GIT_DIFF_FORCE_TEXT, 0 },
58 };
59
git_diff_driver_registry_new(void)60 git_diff_driver_registry *git_diff_driver_registry_new(void)
61 {
62 git_diff_driver_registry *reg =
63 git__calloc(1, sizeof(git_diff_driver_registry));
64 if (!reg)
65 return NULL;
66
67 if (git_strmap_new(®->drivers) < 0) {
68 git_diff_driver_registry_free(reg);
69 return NULL;
70 }
71
72 return reg;
73 }
74
git_diff_driver_registry_free(git_diff_driver_registry * reg)75 void git_diff_driver_registry_free(git_diff_driver_registry *reg)
76 {
77 git_diff_driver *drv;
78
79 if (!reg)
80 return;
81
82 git_strmap_foreach_value(reg->drivers, drv, git_diff_driver_free(drv));
83 git_strmap_free(reg->drivers);
84 git__free(reg);
85 }
86
diff_driver_add_patterns(git_diff_driver * drv,const char * regex_str,int regex_flags)87 static int diff_driver_add_patterns(
88 git_diff_driver *drv, const char *regex_str, int regex_flags)
89 {
90 int error = 0;
91 const char *scan, *end;
92 git_diff_driver_pattern *pat = NULL;
93 git_buf buf = GIT_BUF_INIT;
94
95 for (scan = regex_str; scan; scan = end) {
96 /* get pattern to fill in */
97 if ((pat = git_array_alloc(drv->fn_patterns)) == NULL) {
98 return -1;
99 }
100
101 pat->flags = regex_flags;
102 if (*scan == '!') {
103 pat->flags |= REG_NEGATE;
104 ++scan;
105 }
106
107 if ((end = strchr(scan, '\n')) != NULL) {
108 error = git_buf_set(&buf, scan, end - scan);
109 end++;
110 } else {
111 error = git_buf_sets(&buf, scan);
112 }
113 if (error < 0)
114 break;
115
116 if ((error = git_regexp_compile(&pat->re, buf.ptr, regex_flags)) != 0) {
117 /*
118 * TODO: issue a warning
119 */
120 }
121 }
122
123 if (error && pat != NULL)
124 (void)git_array_pop(drv->fn_patterns); /* release last item */
125 git_buf_dispose(&buf);
126
127 /* We want to ignore bad patterns, so return success regardless */
128 return 0;
129 }
130
diff_driver_xfuncname(const git_config_entry * entry,void * payload)131 static int diff_driver_xfuncname(const git_config_entry *entry, void *payload)
132 {
133 return diff_driver_add_patterns(payload, entry->value, 0);
134 }
135
diff_driver_funcname(const git_config_entry * entry,void * payload)136 static int diff_driver_funcname(const git_config_entry *entry, void *payload)
137 {
138 return diff_driver_add_patterns(payload, entry->value, 0);
139 }
140
git_repository_driver_registry(git_repository * repo)141 static git_diff_driver_registry *git_repository_driver_registry(
142 git_repository *repo)
143 {
144 if (!repo->diff_drivers) {
145 git_diff_driver_registry *reg = git_diff_driver_registry_new();
146 reg = git_atomic_compare_and_swap(&repo->diff_drivers, NULL, reg);
147
148 if (reg != NULL) /* if we race, free losing allocation */
149 git_diff_driver_registry_free(reg);
150 }
151
152 if (!repo->diff_drivers)
153 git_error_set(GIT_ERROR_REPOSITORY, "unable to create diff driver registry");
154
155 return repo->diff_drivers;
156 }
157
diff_driver_alloc(git_diff_driver ** out,size_t * namelen_out,const char * name)158 static int diff_driver_alloc(
159 git_diff_driver **out, size_t *namelen_out, const char *name)
160 {
161 git_diff_driver *driver;
162 size_t driverlen = sizeof(git_diff_driver),
163 namelen = strlen(name),
164 alloclen;
165
166 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, driverlen, namelen);
167 GIT_ERROR_CHECK_ALLOC_ADD(&alloclen, alloclen, 1);
168
169 driver = git__calloc(1, alloclen);
170 GIT_ERROR_CHECK_ALLOC(driver);
171
172 memcpy(driver->name, name, namelen);
173
174 *out = driver;
175
176 if (namelen_out)
177 *namelen_out = namelen;
178
179 return 0;
180 }
181
git_diff_driver_builtin(git_diff_driver ** out,git_diff_driver_registry * reg,const char * driver_name)182 static int git_diff_driver_builtin(
183 git_diff_driver **out,
184 git_diff_driver_registry *reg,
185 const char *driver_name)
186 {
187 git_diff_driver_definition *ddef = NULL;
188 git_diff_driver *drv = NULL;
189 int error = 0;
190 size_t idx;
191
192 for (idx = 0; idx < ARRAY_SIZE(builtin_defs); ++idx) {
193 if (!strcasecmp(driver_name, builtin_defs[idx].name)) {
194 ddef = &builtin_defs[idx];
195 break;
196 }
197 }
198 if (!ddef)
199 goto done;
200
201 if ((error = diff_driver_alloc(&drv, NULL, ddef->name)) < 0)
202 goto done;
203
204 drv->type = DIFF_DRIVER_PATTERNLIST;
205
206 if (ddef->fns &&
207 (error = diff_driver_add_patterns(
208 drv, ddef->fns, ddef->flags)) < 0)
209 goto done;
210
211 if (ddef->words &&
212 (error = git_regexp_compile(&drv->word_pattern, ddef->words, ddef->flags)) < 0)
213 goto done;
214
215 if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
216 goto done;
217
218 done:
219 if (error && drv)
220 git_diff_driver_free(drv);
221 else
222 *out = drv;
223
224 return error;
225 }
226
git_diff_driver_load(git_diff_driver ** out,git_repository * repo,const char * driver_name)227 static int git_diff_driver_load(
228 git_diff_driver **out, git_repository *repo, const char *driver_name)
229 {
230 int error = 0;
231 git_diff_driver_registry *reg;
232 git_diff_driver *drv;
233 size_t namelen;
234 git_config *cfg = NULL;
235 git_buf name = GIT_BUF_INIT;
236 git_config_entry *ce = NULL;
237 bool found_driver = false;
238
239 if ((reg = git_repository_driver_registry(repo)) == NULL)
240 return -1;
241
242 if ((drv = git_strmap_get(reg->drivers, driver_name)) != NULL) {
243 *out = drv;
244 return 0;
245 }
246
247 if ((error = diff_driver_alloc(&drv, &namelen, driver_name)) < 0)
248 goto done;
249
250 drv->type = DIFF_DRIVER_AUTO;
251
252 /* if you can't read config for repo, just use default driver */
253 if (git_repository_config_snapshot(&cfg, repo) < 0) {
254 git_error_clear();
255 goto done;
256 }
257
258 if ((error = git_buf_printf(&name, "diff.%s.binary", driver_name)) < 0)
259 goto done;
260
261 switch (git_config__get_bool_force(cfg, name.ptr, -1)) {
262 case true:
263 /* if diff.<driver>.binary is true, just return the binary driver */
264 *out = &global_drivers[DIFF_DRIVER_BINARY];
265 goto done;
266 case false:
267 /* if diff.<driver>.binary is false, force binary checks off */
268 /* but still may have custom function context patterns, etc. */
269 drv->binary_flags = GIT_DIFF_FORCE_TEXT;
270 found_driver = true;
271 break;
272 default:
273 /* diff.<driver>.binary unspecified or "auto", so just continue */
274 break;
275 }
276
277 /* TODO: warn if diff.<name>.command or diff.<name>.textconv are set */
278
279 git_buf_truncate(&name, namelen + strlen("diff.."));
280 if ((error = git_buf_PUTS(&name, "xfuncname")) < 0)
281 goto done;
282
283 if ((error = git_config_get_multivar_foreach(
284 cfg, name.ptr, NULL, diff_driver_xfuncname, drv)) < 0) {
285 if (error != GIT_ENOTFOUND)
286 goto done;
287 git_error_clear(); /* no diff.<driver>.xfuncname, so just continue */
288 }
289
290 git_buf_truncate(&name, namelen + strlen("diff.."));
291 if ((error = git_buf_PUTS(&name, "funcname")) < 0)
292 goto done;
293
294 if ((error = git_config_get_multivar_foreach(
295 cfg, name.ptr, NULL, diff_driver_funcname, drv)) < 0) {
296 if (error != GIT_ENOTFOUND)
297 goto done;
298 git_error_clear(); /* no diff.<driver>.funcname, so just continue */
299 }
300
301 /* if we found any patterns, set driver type to use correct callback */
302 if (git_array_size(drv->fn_patterns) > 0) {
303 drv->type = DIFF_DRIVER_PATTERNLIST;
304 found_driver = true;
305 }
306
307 git_buf_truncate(&name, namelen + strlen("diff.."));
308 if ((error = git_buf_PUTS(&name, "wordregex")) < 0)
309 goto done;
310
311 if ((error = git_config__lookup_entry(&ce, cfg, name.ptr, false)) < 0)
312 goto done;
313 if (!ce || !ce->value)
314 /* no diff.<driver>.wordregex, so just continue */;
315 else if (!(error = git_regexp_compile(&drv->word_pattern, ce->value, 0)))
316 found_driver = true;
317 else {
318 /* TODO: warn about bad regex instead of failure */
319 goto done;
320 }
321
322 /* TODO: look up diff.<driver>.algorithm to turn on minimal / patience
323 * diff in drv->other_flags
324 */
325
326 /* if no driver config found at all, fall back on AUTO driver */
327 if (!found_driver)
328 goto done;
329
330 /* store driver in registry */
331 if ((error = git_strmap_set(reg->drivers, drv->name, drv)) < 0)
332 goto done;
333
334 *out = drv;
335
336 done:
337 git_config_entry_free(ce);
338 git_buf_dispose(&name);
339 git_config_free(cfg);
340
341 if (!*out) {
342 int error2 = git_diff_driver_builtin(out, reg, driver_name);
343 if (!error)
344 error = error2;
345 }
346
347 if (drv && drv != *out)
348 git_diff_driver_free(drv);
349
350 return error;
351 }
352
git_diff_driver_lookup(git_diff_driver ** out,git_repository * repo,git_attr_session * attrsession,const char * path)353 int git_diff_driver_lookup(
354 git_diff_driver **out, git_repository *repo,
355 git_attr_session *attrsession, const char *path)
356 {
357 int error = 0;
358 const char *values[1], *attrs[] = { "diff" };
359
360 GIT_ASSERT_ARG(out);
361 *out = NULL;
362
363 if (!repo || !path || !strlen(path))
364 /* just use the auto value */;
365 else if ((error = git_attr_get_many_with_session(values, repo,
366 attrsession, 0, path, 1, attrs)) < 0)
367 /* return error below */;
368
369 else if (GIT_ATTR_IS_UNSPECIFIED(values[0]))
370 /* just use the auto value */;
371 else if (GIT_ATTR_IS_FALSE(values[0]))
372 *out = &global_drivers[DIFF_DRIVER_BINARY];
373 else if (GIT_ATTR_IS_TRUE(values[0]))
374 *out = &global_drivers[DIFF_DRIVER_TEXT];
375
376 /* otherwise look for driver information in config and build driver */
377 else if ((error = git_diff_driver_load(out, repo, values[0])) < 0) {
378 if (error == GIT_ENOTFOUND) {
379 error = 0;
380 git_error_clear();
381 }
382 }
383
384 if (!*out)
385 *out = &global_drivers[DIFF_DRIVER_AUTO];
386
387 return error;
388 }
389
git_diff_driver_free(git_diff_driver * driver)390 void git_diff_driver_free(git_diff_driver *driver)
391 {
392 size_t i;
393
394 if (!driver)
395 return;
396
397 for (i = 0; i < git_array_size(driver->fn_patterns); ++i)
398 git_regexp_dispose(& git_array_get(driver->fn_patterns, i)->re);
399 git_array_clear(driver->fn_patterns);
400
401 git_regexp_dispose(&driver->word_pattern);
402
403 git__free(driver);
404 }
405
git_diff_driver_update_options(uint32_t * option_flags,git_diff_driver * driver)406 void git_diff_driver_update_options(
407 uint32_t *option_flags, git_diff_driver *driver)
408 {
409 if ((*option_flags & FORCE_DIFFABLE) == 0)
410 *option_flags |= driver->binary_flags;
411
412 *option_flags |= driver->other_flags;
413 }
414
git_diff_driver_content_is_binary(git_diff_driver * driver,const char * content,size_t content_len)415 int git_diff_driver_content_is_binary(
416 git_diff_driver *driver, const char *content, size_t content_len)
417 {
418 git_buf search = GIT_BUF_INIT;
419
420 GIT_UNUSED(driver);
421
422 git_buf_attach_notowned(&search, content,
423 min(content_len, GIT_FILTER_BYTES_TO_CHECK_NUL));
424
425 /* TODO: provide encoding / binary detection callbacks that can
426 * be UTF-8 aware, etc. For now, instead of trying to be smart,
427 * let's just use the simple NUL-byte detection that core git uses.
428 */
429
430 /* previously was: if (git_buf_is_binary(&search)) */
431 if (git_buf_contains_nul(&search))
432 return 1;
433
434 return 0;
435 }
436
diff_context_line__simple(git_diff_driver * driver,git_buf * line)437 static int diff_context_line__simple(
438 git_diff_driver *driver, git_buf *line)
439 {
440 char firstch = line->ptr[0];
441 GIT_UNUSED(driver);
442 return (git__isalpha(firstch) || firstch == '_' || firstch == '$');
443 }
444
diff_context_line__pattern_match(git_diff_driver * driver,git_buf * line)445 static int diff_context_line__pattern_match(
446 git_diff_driver *driver, git_buf *line)
447 {
448 size_t i, maxi = git_array_size(driver->fn_patterns);
449 git_regmatch pmatch[2];
450
451 for (i = 0; i < maxi; ++i) {
452 git_diff_driver_pattern *pat = git_array_get(driver->fn_patterns, i);
453
454 if (!git_regexp_search(&pat->re, line->ptr, 2, pmatch)) {
455 if (pat->flags & REG_NEGATE)
456 return false;
457
458 /* use pmatch data to trim line data */
459 i = (pmatch[1].start >= 0) ? 1 : 0;
460 git_buf_consume(line, git_buf_cstr(line) + pmatch[i].start);
461 git_buf_truncate(line, pmatch[i].end - pmatch[i].start);
462 git_buf_rtrim(line);
463
464 return true;
465 }
466 }
467
468 return false;
469 }
470
diff_context_find(const char * line,long line_len,char * out,long out_size,void * payload)471 static long diff_context_find(
472 const char *line,
473 long line_len,
474 char *out,
475 long out_size,
476 void *payload)
477 {
478 git_diff_find_context_payload *ctxt = payload;
479
480 if (git_buf_set(&ctxt->line, line, (size_t)line_len) < 0)
481 return -1;
482 git_buf_rtrim(&ctxt->line);
483
484 if (!ctxt->line.size)
485 return -1;
486
487 if (!ctxt->match_line || !ctxt->match_line(ctxt->driver, &ctxt->line))
488 return -1;
489
490 if (out_size > (long)ctxt->line.size)
491 out_size = (long)ctxt->line.size;
492 memcpy(out, ctxt->line.ptr, (size_t)out_size);
493
494 return out_size;
495 }
496
git_diff_find_context_init(git_diff_find_context_fn * findfn_out,git_diff_find_context_payload * payload_out,git_diff_driver * driver)497 void git_diff_find_context_init(
498 git_diff_find_context_fn *findfn_out,
499 git_diff_find_context_payload *payload_out,
500 git_diff_driver *driver)
501 {
502 *findfn_out = driver ? diff_context_find : NULL;
503
504 memset(payload_out, 0, sizeof(*payload_out));
505 if (driver) {
506 payload_out->driver = driver;
507 payload_out->match_line = (driver->type == DIFF_DRIVER_PATTERNLIST) ?
508 diff_context_line__pattern_match : diff_context_line__simple;
509 git_buf_init(&payload_out->line, 0);
510 }
511 }
512
git_diff_find_context_clear(git_diff_find_context_payload * payload)513 void git_diff_find_context_clear(git_diff_find_context_payload *payload)
514 {
515 if (payload) {
516 git_buf_dispose(&payload->line);
517 payload->driver = NULL;
518 }
519 }
520