1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "attr_file.h"
9 
10 #include "repository.h"
11 #include "filebuf.h"
12 #include "attrcache.h"
13 #include "git2/blob.h"
14 #include "git2/tree.h"
15 #include "blob.h"
16 #include "index.h"
17 #include "wildmatch.h"
18 #include <ctype.h>
19 
attr_file_free(git_attr_file * file)20 static void attr_file_free(git_attr_file *file)
21 {
22 	bool unlock = !git_mutex_lock(&file->lock);
23 	git_attr_file__clear_rules(file, false);
24 	git_pool_clear(&file->pool);
25 	if (unlock)
26 		git_mutex_unlock(&file->lock);
27 	git_mutex_free(&file->lock);
28 
29 	git__memzero(file, sizeof(*file));
30 	git__free(file);
31 }
32 
git_attr_file__new(git_attr_file ** out,git_attr_file_entry * entry,git_attr_file_source source)33 int git_attr_file__new(
34 	git_attr_file **out,
35 	git_attr_file_entry *entry,
36 	git_attr_file_source source)
37 {
38 	git_attr_file *attrs = git__calloc(1, sizeof(git_attr_file));
39 	GIT_ERROR_CHECK_ALLOC(attrs);
40 
41 	if (git_mutex_init(&attrs->lock) < 0) {
42 		git_error_set(GIT_ERROR_OS, "failed to initialize lock");
43 		goto on_error;
44 	}
45 
46 	if (git_pool_init(&attrs->pool, 1) < 0)
47 		goto on_error;
48 
49 	GIT_REFCOUNT_INC(attrs);
50 	attrs->entry  = entry;
51 	attrs->source = source;
52 	*out = attrs;
53 	return 0;
54 
55 on_error:
56 	git__free(attrs);
57 	return -1;
58 }
59 
git_attr_file__clear_rules(git_attr_file * file,bool need_lock)60 int git_attr_file__clear_rules(git_attr_file *file, bool need_lock)
61 {
62 	unsigned int i;
63 	git_attr_rule *rule;
64 
65 	if (need_lock && git_mutex_lock(&file->lock) < 0) {
66 		git_error_set(GIT_ERROR_OS, "failed to lock attribute file");
67 		return -1;
68 	}
69 
70 	git_vector_foreach(&file->rules, i, rule)
71 		git_attr_rule__free(rule);
72 	git_vector_free(&file->rules);
73 
74 	if (need_lock)
75 		git_mutex_unlock(&file->lock);
76 
77 	return 0;
78 }
79 
git_attr_file__free(git_attr_file * file)80 void git_attr_file__free(git_attr_file *file)
81 {
82 	if (!file)
83 		return;
84 	GIT_REFCOUNT_DEC(file, attr_file_free);
85 }
86 
attr_file_oid_from_index(git_oid * oid,git_repository * repo,const char * path)87 static int attr_file_oid_from_index(
88 	git_oid *oid, git_repository *repo, const char *path)
89 {
90 	int error;
91 	git_index *idx;
92 	size_t pos;
93 	const git_index_entry *entry;
94 
95 	if ((error = git_repository_index__weakptr(&idx, repo)) < 0 ||
96 		(error = git_index__find_pos(&pos, idx, path, 0, 0)) < 0)
97 		return error;
98 
99 	if (!(entry = git_index_get_byindex(idx, pos)))
100 		return GIT_ENOTFOUND;
101 
102 	*oid = entry->id;
103 	return 0;
104 }
105 
git_attr_file__load(git_attr_file ** out,git_repository * repo,git_attr_session * attr_session,git_attr_file_entry * entry,git_attr_file_source source,git_attr_file_parser parser,bool allow_macros)106 int git_attr_file__load(
107 	git_attr_file **out,
108 	git_repository *repo,
109 	git_attr_session *attr_session,
110 	git_attr_file_entry *entry,
111 	git_attr_file_source source,
112 	git_attr_file_parser parser,
113 	bool allow_macros)
114 {
115 	int error = 0;
116 	git_tree *tree = NULL;
117 	git_tree_entry *tree_entry = NULL;
118 	git_blob *blob = NULL;
119 	git_buf content = GIT_BUF_INIT;
120 	const char *content_str;
121 	git_attr_file *file;
122 	struct stat st;
123 	bool nonexistent = false;
124 	int bom_offset;
125 	git_buf_bom_t bom;
126 	git_oid id;
127 	git_object_size_t blobsize;
128 
129 	*out = NULL;
130 
131 	switch (source) {
132 	case GIT_ATTR_FILE__IN_MEMORY:
133 		/* in-memory attribute file doesn't need data */
134 		break;
135 	case GIT_ATTR_FILE__FROM_INDEX: {
136 		if ((error = attr_file_oid_from_index(&id, repo, entry->path)) < 0 ||
137 			(error = git_blob_lookup(&blob, repo, &id)) < 0)
138 			return error;
139 
140 		/* Do not assume that data straight from the ODB is NULL-terminated;
141 		 * copy the contents of a file to a buffer to work on */
142 		blobsize = git_blob_rawsize(blob);
143 
144 		GIT_ERROR_CHECK_BLOBSIZE(blobsize);
145 		git_buf_put(&content, git_blob_rawcontent(blob), (size_t)blobsize);
146 		break;
147 	}
148 	case GIT_ATTR_FILE__FROM_FILE: {
149 		int fd = -1;
150 
151 		/* For open or read errors, pretend that we got ENOTFOUND. */
152 		/* TODO: issue warning when warning API is available */
153 
154 		if (p_stat(entry->fullpath, &st) < 0 ||
155 			S_ISDIR(st.st_mode) ||
156 			(fd = git_futils_open_ro(entry->fullpath)) < 0 ||
157 			(error = git_futils_readbuffer_fd(&content, fd, (size_t)st.st_size)) < 0)
158 			nonexistent = true;
159 
160 		if (fd >= 0)
161 			p_close(fd);
162 
163 		break;
164 	}
165 	case GIT_ATTR_FILE__FROM_HEAD: {
166 		if ((error = git_repository_head_tree(&tree, repo)) < 0 ||
167 		    (error = git_tree_entry_bypath(&tree_entry, tree, entry->path)) < 0 ||
168 		    (error = git_blob_lookup(&blob, repo, git_tree_entry_id(tree_entry))) < 0)
169 			goto cleanup;
170 
171 		/*
172 		 * Do not assume that data straight from the ODB is NULL-terminated;
173 		 * copy the contents of a file to a buffer to work on.
174 		 */
175 		blobsize = git_blob_rawsize(blob);
176 
177 		GIT_ERROR_CHECK_BLOBSIZE(blobsize);
178 		if ((error = git_buf_put(&content,
179 			git_blob_rawcontent(blob), (size_t)blobsize)) < 0)
180 			goto cleanup;
181 
182 		break;
183 	}
184 	default:
185 		git_error_set(GIT_ERROR_INVALID, "unknown file source %d", source);
186 		return -1;
187 	}
188 
189 	if ((error = git_attr_file__new(&file, entry, source)) < 0)
190 		goto cleanup;
191 
192 	/* advance over a UTF8 BOM */
193 	content_str = git_buf_cstr(&content);
194 	bom_offset = git_buf_detect_bom(&bom, &content);
195 
196 	if (bom == GIT_BUF_BOM_UTF8)
197 		content_str += bom_offset;
198 
199 	/* store the key of the attr_reader; don't bother with cache
200 	 * invalidation during the same attr reader session.
201 	 */
202 	if (attr_session)
203 		file->session_key = attr_session->key;
204 
205 	if (parser && (error = parser(repo, file, content_str, allow_macros)) < 0) {
206 		git_attr_file__free(file);
207 		goto cleanup;
208 	}
209 
210 	/* write cache breakers */
211 	if (nonexistent)
212 		file->nonexistent = 1;
213 	else if (source == GIT_ATTR_FILE__FROM_INDEX)
214 		git_oid_cpy(&file->cache_data.oid, git_blob_id(blob));
215 	else if (source == GIT_ATTR_FILE__FROM_HEAD)
216 		git_oid_cpy(&file->cache_data.oid, git_tree_id(tree));
217 	else if (source == GIT_ATTR_FILE__FROM_FILE)
218 		git_futils_filestamp_set_from_stat(&file->cache_data.stamp, &st);
219 	/* else always cacheable */
220 
221 	*out = file;
222 
223 cleanup:
224 	git_blob_free(blob);
225 	git_tree_entry_free(tree_entry);
226 	git_tree_free(tree);
227 	git_buf_dispose(&content);
228 
229 	return error;
230 }
231 
git_attr_file__out_of_date(git_repository * repo,git_attr_session * attr_session,git_attr_file * file)232 int git_attr_file__out_of_date(
233 	git_repository *repo,
234 	git_attr_session *attr_session,
235 	git_attr_file *file)
236 {
237 	if (!file)
238 		return 1;
239 
240 	/* we are never out of date if we just created this data in the same
241 	 * attr_session; otherwise, nonexistent files must be invalidated
242 	 */
243 	if (attr_session && attr_session->key == file->session_key)
244 		return 0;
245 	else if (file->nonexistent)
246 		return 1;
247 
248 	switch (file->source) {
249 	case GIT_ATTR_FILE__IN_MEMORY:
250 		return 0;
251 
252 	case GIT_ATTR_FILE__FROM_FILE:
253 		return git_futils_filestamp_check(
254 			&file->cache_data.stamp, file->entry->fullpath);
255 
256 	case GIT_ATTR_FILE__FROM_INDEX: {
257 		int error;
258 		git_oid id;
259 
260 		if ((error = attr_file_oid_from_index(
261 				&id, repo, file->entry->path)) < 0)
262 			return error;
263 
264 		return (git_oid__cmp(&file->cache_data.oid, &id) != 0);
265 	}
266 
267 	case GIT_ATTR_FILE__FROM_HEAD: {
268 		git_tree *tree;
269 		int error;
270 
271 		if ((error = git_repository_head_tree(&tree, repo)) < 0)
272 			return error;
273 
274 		error = git_oid__cmp(&file->cache_data.oid, git_tree_id(tree));
275 
276 		git_tree_free(tree);
277 		return error;
278 	}
279 
280 	default:
281 		git_error_set(GIT_ERROR_INVALID, "invalid file type %d", file->source);
282 		return -1;
283 	}
284 }
285 
286 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw);
287 static void git_attr_rule__clear(git_attr_rule *rule);
288 static bool parse_optimized_patterns(
289 	git_attr_fnmatch *spec,
290 	git_pool *pool,
291 	const char *pattern);
292 
git_attr_file__parse_buffer(git_repository * repo,git_attr_file * attrs,const char * data,bool allow_macros)293 int git_attr_file__parse_buffer(
294 	git_repository *repo, git_attr_file *attrs, const char *data, bool allow_macros)
295 {
296 	const char *scan = data, *context = NULL;
297 	git_attr_rule *rule = NULL;
298 	int error = 0;
299 
300 	/* If subdir file path, convert context for file paths */
301 	if (attrs->entry && git_path_root(attrs->entry->path) < 0 &&
302 	    !git__suffixcmp(attrs->entry->path, "/" GIT_ATTR_FILE))
303 		context = attrs->entry->path;
304 
305 	if (git_mutex_lock(&attrs->lock) < 0) {
306 		git_error_set(GIT_ERROR_OS, "failed to lock attribute file");
307 		return -1;
308 	}
309 
310 	while (!error && *scan) {
311 		/* Allocate rule if needed, otherwise re-use previous rule */
312 		if (!rule) {
313 			rule = git__calloc(1, sizeof(*rule));
314 			GIT_ERROR_CHECK_ALLOC(rule);
315 		} else
316 			git_attr_rule__clear(rule);
317 
318 		rule->match.flags = GIT_ATTR_FNMATCH_ALLOWNEG | GIT_ATTR_FNMATCH_ALLOWMACRO;
319 
320 		/* Parse the next "pattern attr attr attr" line */
321 		if ((error = git_attr_fnmatch__parse(&rule->match, &attrs->pool, context, &scan)) < 0 ||
322 		    (error = git_attr_assignment__parse(repo, &attrs->pool, &rule->assigns, &scan)) < 0)
323 		{
324 			if (error != GIT_ENOTFOUND)
325 				goto out;
326 			error = 0;
327 			continue;
328 		}
329 
330 		if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO) {
331 			/* TODO: warning if macro found in file below repo root */
332 			if (!allow_macros)
333 				continue;
334 			if ((error = git_attr_cache__insert_macro(repo, rule)) < 0)
335 				goto out;
336 		} else if ((error = git_vector_insert(&attrs->rules, rule)) < 0)
337 			goto out;
338 
339 		rule = NULL;
340 	}
341 
342 out:
343 	git_mutex_unlock(&attrs->lock);
344 	git_attr_rule__free(rule);
345 
346 	return error;
347 }
348 
git_attr_file__name_hash(const char * name)349 uint32_t git_attr_file__name_hash(const char *name)
350 {
351 	uint32_t h = 5381;
352 	int c;
353 
354 	GIT_ASSERT_ARG(name);
355 
356 	while ((c = (int)*name++) != 0)
357 		h = ((h << 5) + h) + c;
358 	return h;
359 }
360 
git_attr_file__lookup_one(git_attr_file * file,git_attr_path * path,const char * attr,const char ** value)361 int git_attr_file__lookup_one(
362 	git_attr_file *file,
363 	git_attr_path *path,
364 	const char *attr,
365 	const char **value)
366 {
367 	size_t i;
368 	git_attr_name name;
369 	git_attr_rule *rule;
370 
371 	*value = NULL;
372 
373 	name.name = attr;
374 	name.name_hash = git_attr_file__name_hash(attr);
375 
376 	git_attr_file__foreach_matching_rule(file, path, i, rule) {
377 		size_t pos;
378 
379 		if (!git_vector_bsearch(&pos, &rule->assigns, &name)) {
380 			*value = ((git_attr_assignment *)
381 					  git_vector_get(&rule->assigns, pos))->value;
382 			break;
383 		}
384 	}
385 
386 	return 0;
387 }
388 
git_attr_file__load_standalone(git_attr_file ** out,const char * path)389 int git_attr_file__load_standalone(git_attr_file **out, const char *path)
390 {
391 	git_buf content = GIT_BUF_INIT;
392 	git_attr_file *file = NULL;
393 	int error;
394 
395 	if ((error = git_futils_readbuffer(&content, path)) < 0)
396 		goto out;
397 
398 	/*
399 	 * Because the cache entry is allocated from the file's own pool, we
400 	 * don't have to free it - freeing file+pool will free cache entry, too.
401 	 */
402 
403 	if ((error = git_attr_file__new(&file, NULL, GIT_ATTR_FILE__FROM_FILE)) < 0 ||
404 	    (error = git_attr_file__parse_buffer(NULL, file, content.ptr, true)) < 0 ||
405 	    (error = git_attr_cache__alloc_file_entry(&file->entry, NULL, NULL, path, &file->pool)) < 0)
406 		goto out;
407 
408 	*out = file;
409 out:
410 	if (error < 0)
411 		git_attr_file__free(file);
412 	git_buf_dispose(&content);
413 
414 	return error;
415 }
416 
git_attr_fnmatch__match(git_attr_fnmatch * match,git_attr_path * path)417 bool git_attr_fnmatch__match(
418 	git_attr_fnmatch *match,
419 	git_attr_path *path)
420 {
421 	const char *relpath = path->path;
422 	const char *filename;
423 	int flags = 0;
424 
425 	/*
426 	 * If the rule was generated in a subdirectory, we must only
427 	 * use it for paths inside that directory. We can thus return
428 	 * a non-match if the prefixes don't match.
429 	 */
430 	if (match->containing_dir) {
431 		if (match->flags & GIT_ATTR_FNMATCH_ICASE) {
432 			if (git__strncasecmp(path->path, match->containing_dir, match->containing_dir_length))
433 				return 0;
434 		} else {
435 			if (git__prefixcmp(path->path, match->containing_dir))
436 				return 0;
437 		}
438 
439 		relpath += match->containing_dir_length;
440 	}
441 
442 	if (match->flags & GIT_ATTR_FNMATCH_ICASE)
443 		flags |= WM_CASEFOLD;
444 
445 	if (match->flags & GIT_ATTR_FNMATCH_FULLPATH) {
446 		filename = relpath;
447 		flags |= WM_PATHNAME;
448 	} else {
449 		filename = path->basename;
450 	}
451 
452 	if ((match->flags & GIT_ATTR_FNMATCH_DIRECTORY) && !path->is_dir) {
453 		bool samename;
454 
455 		/*
456 		 * for attribute checks or checks at the root of this match's
457 		 * containing_dir (or root of the repository if no containing_dir),
458 		 * do not match.
459 		 */
460 		if (!(match->flags & GIT_ATTR_FNMATCH_IGNORE) ||
461 			path->basename == relpath)
462 			return false;
463 
464 		/* fail match if this is a file with same name as ignored folder */
465 		samename = (match->flags & GIT_ATTR_FNMATCH_ICASE) ?
466 			!strcasecmp(match->pattern, relpath) :
467 			!strcmp(match->pattern, relpath);
468 
469 		if (samename)
470 			return false;
471 
472 		return (wildmatch(match->pattern, relpath, flags) == WM_MATCH);
473 	}
474 
475 	return (wildmatch(match->pattern, filename, flags) == WM_MATCH);
476 }
477 
git_attr_rule__match(git_attr_rule * rule,git_attr_path * path)478 bool git_attr_rule__match(
479 	git_attr_rule *rule,
480 	git_attr_path *path)
481 {
482 	bool matched = git_attr_fnmatch__match(&rule->match, path);
483 
484 	if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE)
485 		matched = !matched;
486 
487 	return matched;
488 }
489 
git_attr_rule__lookup_assignment(git_attr_rule * rule,const char * name)490 git_attr_assignment *git_attr_rule__lookup_assignment(
491 	git_attr_rule *rule, const char *name)
492 {
493 	size_t pos;
494 	git_attr_name key;
495 	key.name = name;
496 	key.name_hash = git_attr_file__name_hash(name);
497 
498 	if (git_vector_bsearch(&pos, &rule->assigns, &key))
499 		return NULL;
500 
501 	return git_vector_get(&rule->assigns, pos);
502 }
503 
git_attr_path__init(git_attr_path * info,git_repository * repo,const char * path,const char * base,git_dir_flag dir_flag)504 int git_attr_path__init(
505 	git_attr_path *info,
506 	git_repository *repo,
507 	const char *path,
508 	const char *base,
509 	git_dir_flag dir_flag)
510 {
511 	ssize_t root;
512 
513 	/* build full path as best we can */
514 	git_buf_init(&info->full, 0);
515 
516 	if (git_path_join_unrooted(&info->full, path, base, &root) < 0 ||
517 	    git_path_validate_workdir_buf(repo, &info->full) < 0)
518 		return -1;
519 
520 	info->path = info->full.ptr + root;
521 
522 	/* remove trailing slashes */
523 	while (info->full.size > 0) {
524 		if (info->full.ptr[info->full.size - 1] != '/')
525 			break;
526 		info->full.size--;
527 	}
528 	info->full.ptr[info->full.size] = '\0';
529 
530 	/* skip leading slashes in path */
531 	while (*info->path == '/')
532 		info->path++;
533 
534 	/* find trailing basename component */
535 	info->basename = strrchr(info->path, '/');
536 	if (info->basename)
537 		info->basename++;
538 	if (!info->basename || !*info->basename)
539 		info->basename = info->path;
540 
541 	switch (dir_flag)
542 	{
543 	case GIT_DIR_FLAG_FALSE:
544 		info->is_dir = 0;
545 		break;
546 
547 	case GIT_DIR_FLAG_TRUE:
548 		info->is_dir = 1;
549 		break;
550 
551 	case GIT_DIR_FLAG_UNKNOWN:
552 	default:
553 		info->is_dir = (int)git_path_isdir(info->full.ptr);
554 		break;
555 	}
556 
557 	return 0;
558 }
559 
git_attr_path__free(git_attr_path * info)560 void git_attr_path__free(git_attr_path *info)
561 {
562 	git_buf_dispose(&info->full);
563 	info->path = NULL;
564 	info->basename = NULL;
565 }
566 
567 /*
568  * From gitattributes(5):
569  *
570  * Patterns have the following format:
571  *
572  * - A blank line matches no files, so it can serve as a separator for
573  *   readability.
574  *
575  * - A line starting with # serves as a comment.
576  *
577  * - An optional prefix ! which negates the pattern; any matching file
578  *   excluded by a previous pattern will become included again. If a negated
579  *   pattern matches, this will override lower precedence patterns sources.
580  *
581  * - If the pattern ends with a slash, it is removed for the purpose of the
582  *   following description, but it would only find a match with a directory. In
583  *   other words, foo/ will match a directory foo and paths underneath it, but
584  *   will not match a regular file or a symbolic link foo (this is consistent
585  *   with the way how pathspec works in general in git).
586  *
587  * - If the pattern does not contain a slash /, git treats it as a shell glob
588  *   pattern and checks for a match against the pathname without leading
589  *   directories.
590  *
591  * - Otherwise, git treats the pattern as a shell glob suitable for consumption
592  *   by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
593  *   not match a / in the pathname. For example, "Documentation/\*.html" matches
594  *   "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
595  *   slash matches the beginning of the pathname; for example, "/\*.c" matches
596  *   "cat-file.c" but not "mozilla-sha1/sha1.c".
597  */
598 
599 /*
600  * Determine the length of trailing spaces. Escaped spaces do not count as
601  * trailing whitespace.
602  */
trailing_space_length(const char * p,size_t len)603 static size_t trailing_space_length(const char *p, size_t len)
604 {
605 	size_t n, i;
606 	for (n = len; n; n--) {
607 		if (p[n-1] != ' ' && p[n-1] != '\t')
608 			break;
609 
610 		/*
611 		 * Count escape-characters before space. In case where it's an
612 		 * even number of escape characters, then the escape char itself
613 		 * is escaped and the whitespace is an unescaped whitespace.
614 		 * Otherwise, the last escape char is not escaped and the
615 		 * whitespace in an escaped whitespace.
616 		 */
617 		i = n;
618 		while (i > 1 && p[i-2] == '\\')
619 			i--;
620 		if ((n - i) % 2)
621 			break;
622 	}
623 	return len - n;
624 }
625 
unescape_spaces(char * str)626 static size_t unescape_spaces(char *str)
627 {
628 	char *scan, *pos = str;
629 	bool escaped = false;
630 
631 	if (!str)
632 		return 0;
633 
634 	for (scan = str; *scan; scan++) {
635 		if (!escaped && *scan == '\\') {
636 			escaped = true;
637 			continue;
638 		}
639 
640 		/* Only insert the escape character for escaped non-spaces */
641 		if (escaped && !git__isspace(*scan))
642 			*pos++ = '\\';
643 
644 		*pos++ = *scan;
645 		escaped = false;
646 	}
647 
648 	if (pos != scan)
649 		*pos = '\0';
650 
651 	return (pos - str);
652 }
653 
654 /*
655  * This will return 0 if the spec was filled out,
656  * GIT_ENOTFOUND if the fnmatch does not require matching, or
657  * another error code there was an actual problem.
658  */
git_attr_fnmatch__parse(git_attr_fnmatch * spec,git_pool * pool,const char * context,const char ** base)659 int git_attr_fnmatch__parse(
660 	git_attr_fnmatch *spec,
661 	git_pool *pool,
662 	const char *context,
663 	const char **base)
664 {
665 	const char *pattern, *scan;
666 	int slash_count, allow_space;
667 	bool escaped;
668 
669 	GIT_ASSERT_ARG(spec);
670 	GIT_ASSERT_ARG(base && *base);
671 
672 	if (parse_optimized_patterns(spec, pool, *base))
673 		return 0;
674 
675 	spec->flags = (spec->flags & GIT_ATTR_FNMATCH__INCOMING);
676 	allow_space = ((spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE) != 0);
677 
678 	pattern = *base;
679 
680 	while (!allow_space && git__isspace(*pattern))
681 		pattern++;
682 
683 	if (!*pattern || *pattern == '#' || *pattern == '\n' ||
684 	    (*pattern == '\r' && *(pattern + 1) == '\n')) {
685 		*base = git__next_line(pattern);
686 		return GIT_ENOTFOUND;
687 	}
688 
689 	if (*pattern == '[' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWMACRO) != 0) {
690 		if (strncmp(pattern, "[attr]", 6) == 0) {
691 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO;
692 			pattern += 6;
693 		}
694 		/* else a character range like [a-e]* which is accepted */
695 	}
696 
697 	if (*pattern == '!' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWNEG) != 0) {
698 		spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE;
699 		pattern++;
700 	}
701 
702 	slash_count = 0;
703 	escaped = false;
704 	/* Scan until a non-escaped whitespace. */
705 	for (scan = pattern; *scan != '\0'; ++scan) {
706 		char c = *scan;
707 
708 		if (c == '\\' && !escaped) {
709 			escaped = true;
710 			continue;
711 		} else if (git__isspace(c) && !escaped) {
712 			if (!allow_space || (c != ' ' && c != '\t' && c != '\r'))
713 				break;
714 		} else if (c == '/') {
715 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH;
716 			slash_count++;
717 
718 			if (slash_count == 1 && pattern == scan)
719 				pattern++;
720 		} else if (git__iswildcard(c) && !escaped) {
721 			/* remember if we see an unescaped wildcard in pattern */
722 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD;
723 		}
724 
725 		escaped = false;
726 	}
727 
728 	*base = scan;
729 
730 	if ((spec->length = scan - pattern) == 0)
731 		return GIT_ENOTFOUND;
732 
733 	/*
734 	 * Remove one trailing \r in case this is a CRLF delimited
735 	 * file, in the case of Icon\r\r\n, we still leave the first
736 	 * \r there to match against.
737 	 */
738 	if (pattern[spec->length - 1] == '\r')
739 		if (--spec->length == 0)
740 			return GIT_ENOTFOUND;
741 
742 	/* Remove trailing spaces. */
743 	spec->length -= trailing_space_length(pattern, spec->length);
744 
745 	if (spec->length == 0)
746 		return GIT_ENOTFOUND;
747 
748 	if (pattern[spec->length - 1] == '/') {
749 		spec->length--;
750 		spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY;
751 		if (--slash_count <= 0)
752 			spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH;
753 	}
754 
755 	if (context) {
756 		char *slash = strrchr(context, '/');
757 		size_t len;
758 		if (slash) {
759 			/* include the slash for easier matching */
760 			len = slash - context + 1;
761 			spec->containing_dir = git_pool_strndup(pool, context, len);
762 			spec->containing_dir_length = len;
763 		}
764 	}
765 
766 	spec->pattern = git_pool_strndup(pool, pattern, spec->length);
767 
768 	if (!spec->pattern) {
769 		*base = git__next_line(pattern);
770 		return -1;
771 	} else {
772 		/* strip '\' that might have been used for internal whitespace */
773 		spec->length = unescape_spaces(spec->pattern);
774 	}
775 
776 	return 0;
777 }
778 
parse_optimized_patterns(git_attr_fnmatch * spec,git_pool * pool,const char * pattern)779 static bool parse_optimized_patterns(
780 	git_attr_fnmatch *spec,
781 	git_pool *pool,
782 	const char *pattern)
783 {
784 	if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) {
785 		spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL;
786 		spec->pattern = git_pool_strndup(pool, pattern, 1);
787 		spec->length = 1;
788 
789 		return true;
790 	}
791 
792 	return false;
793 }
794 
sort_by_hash_and_name(const void * a_raw,const void * b_raw)795 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw)
796 {
797 	const git_attr_name *a = a_raw;
798 	const git_attr_name *b = b_raw;
799 
800 	if (b->name_hash < a->name_hash)
801 		return 1;
802 	else if (b->name_hash > a->name_hash)
803 		return -1;
804 	else
805 		return strcmp(b->name, a->name);
806 }
807 
git_attr_assignment__free(git_attr_assignment * assign)808 static void git_attr_assignment__free(git_attr_assignment *assign)
809 {
810 	/* name and value are stored in a git_pool associated with the
811 	 * git_attr_file, so they do not need to be freed here
812 	 */
813 	assign->name = NULL;
814 	assign->value = NULL;
815 	git__free(assign);
816 }
817 
merge_assignments(void ** old_raw,void * new_raw)818 static int merge_assignments(void **old_raw, void *new_raw)
819 {
820 	git_attr_assignment **old = (git_attr_assignment **)old_raw;
821 	git_attr_assignment *new = (git_attr_assignment *)new_raw;
822 
823 	GIT_REFCOUNT_DEC(*old, git_attr_assignment__free);
824 	*old = new;
825 	return GIT_EEXISTS;
826 }
827 
git_attr_assignment__parse(git_repository * repo,git_pool * pool,git_vector * assigns,const char ** base)828 int git_attr_assignment__parse(
829 	git_repository *repo,
830 	git_pool *pool,
831 	git_vector *assigns,
832 	const char **base)
833 {
834 	int error;
835 	const char *scan = *base;
836 	git_attr_assignment *assign = NULL;
837 
838 	GIT_ASSERT_ARG(assigns && !assigns->length);
839 
840 	git_vector_set_cmp(assigns, sort_by_hash_and_name);
841 
842 	while (*scan && *scan != '\n') {
843 		const char *name_start, *value_start;
844 
845 		/* skip leading blanks */
846 		while (git__isspace(*scan) && *scan != '\n') scan++;
847 
848 		/* allocate assign if needed */
849 		if (!assign) {
850 			assign = git__calloc(1, sizeof(git_attr_assignment));
851 			GIT_ERROR_CHECK_ALLOC(assign);
852 			GIT_REFCOUNT_INC(assign);
853 		}
854 
855 		assign->name_hash = 5381;
856 		assign->value = git_attr__true;
857 
858 		/* look for magic name prefixes */
859 		if (*scan == '-') {
860 			assign->value = git_attr__false;
861 			scan++;
862 		} else if (*scan == '!') {
863 			assign->value = git_attr__unset; /* explicit unspecified state */
864 			scan++;
865 		} else if (*scan == '#') /* comment rest of line */
866 			break;
867 
868 		/* find the name */
869 		name_start = scan;
870 		while (*scan && !git__isspace(*scan) && *scan != '=') {
871 			assign->name_hash =
872 				((assign->name_hash << 5) + assign->name_hash) + *scan;
873 			scan++;
874 		}
875 		if (scan == name_start) {
876 			/* must have found lone prefix (" - ") or leading = ("=foo")
877 			 * or end of buffer -- advance until whitespace and continue
878 			 */
879 			while (*scan && !git__isspace(*scan)) scan++;
880 			continue;
881 		}
882 
883 		/* allocate permanent storage for name */
884 		assign->name = git_pool_strndup(pool, name_start, scan - name_start);
885 		GIT_ERROR_CHECK_ALLOC(assign->name);
886 
887 		/* if there is an equals sign, find the value */
888 		if (*scan == '=') {
889 			for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan);
890 
891 			/* if we found a value, allocate permanent storage for it */
892 			if (scan > value_start) {
893 				assign->value = git_pool_strndup(pool, value_start, scan - value_start);
894 				GIT_ERROR_CHECK_ALLOC(assign->value);
895 			}
896 		}
897 
898 		/* expand macros (if given a repo with a macro cache) */
899 		if (repo != NULL && assign->value == git_attr__true) {
900 			git_attr_rule *macro =
901 				git_attr_cache__lookup_macro(repo, assign->name);
902 
903 			if (macro != NULL) {
904 				unsigned int i;
905 				git_attr_assignment *massign;
906 
907 				git_vector_foreach(&macro->assigns, i, massign) {
908 					GIT_REFCOUNT_INC(massign);
909 
910 					error = git_vector_insert_sorted(
911 						assigns, massign, &merge_assignments);
912 					if (error < 0 && error != GIT_EEXISTS) {
913 						git_attr_assignment__free(assign);
914 						return error;
915 					}
916 				}
917 			}
918 		}
919 
920 		/* insert allocated assign into vector */
921 		error = git_vector_insert_sorted(assigns, assign, &merge_assignments);
922 		if (error < 0 && error != GIT_EEXISTS)
923 			return error;
924 
925 		/* clear assign since it is now "owned" by the vector */
926 		assign = NULL;
927 	}
928 
929 	if (assign != NULL)
930 		git_attr_assignment__free(assign);
931 
932 	*base = git__next_line(scan);
933 
934 	return (assigns->length == 0) ? GIT_ENOTFOUND : 0;
935 }
936 
git_attr_rule__clear(git_attr_rule * rule)937 static void git_attr_rule__clear(git_attr_rule *rule)
938 {
939 	unsigned int i;
940 	git_attr_assignment *assign;
941 
942 	if (!rule)
943 		return;
944 
945 	if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) {
946 		git_vector_foreach(&rule->assigns, i, assign)
947 			GIT_REFCOUNT_DEC(assign, git_attr_assignment__free);
948 		git_vector_free(&rule->assigns);
949 	}
950 
951 	/* match.pattern is stored in a git_pool, so no need to free */
952 	rule->match.pattern = NULL;
953 	rule->match.length = 0;
954 }
955 
git_attr_rule__free(git_attr_rule * rule)956 void git_attr_rule__free(git_attr_rule *rule)
957 {
958 	git_attr_rule__clear(rule);
959 	git__free(rule);
960 }
961 
git_attr_session__init(git_attr_session * session,git_repository * repo)962 int git_attr_session__init(git_attr_session *session, git_repository *repo)
963 {
964 	GIT_ASSERT_ARG(repo);
965 
966 	memset(session, 0, sizeof(*session));
967 	session->key = git_atomic32_inc(&repo->attr_session_key);
968 
969 	return 0;
970 }
971 
git_attr_session__free(git_attr_session * session)972 void git_attr_session__free(git_attr_session *session)
973 {
974 	if (!session)
975 		return;
976 
977 	git_buf_dispose(&session->sysdir);
978 	git_buf_dispose(&session->tmp);
979 
980 	memset(session, 0, sizeof(git_attr_session));
981 }
982