1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "attr_file.h"
9 
10 #include "repository.h"
11 #include "filebuf.h"
12 #include "attrcache.h"
13 #include "git2/blob.h"
14 #include "git2/tree.h"
15 #include "blob.h"
16 #include "index.h"
17 #include "wildmatch.h"
18 #include <ctype.h>
19 
attr_file_free(git_attr_file * file)20 static void attr_file_free(git_attr_file *file)
21 {
22 	bool unlock = !git_mutex_lock(&file->lock);
23 	git_attr_file__clear_rules(file, false);
24 	git_pool_clear(&file->pool);
25 	if (unlock)
26 		git_mutex_unlock(&file->lock);
27 	git_mutex_free(&file->lock);
28 
29 	git__memzero(file, sizeof(*file));
30 	git__free(file);
31 }
32 
git_attr_file__new(git_attr_file ** out,git_attr_file_entry * entry,git_attr_file_source * source)33 int git_attr_file__new(
34 	git_attr_file **out,
35 	git_attr_file_entry *entry,
36 	git_attr_file_source *source)
37 {
38 	git_attr_file *attrs = git__calloc(1, sizeof(git_attr_file));
39 	GIT_ERROR_CHECK_ALLOC(attrs);
40 
41 	if (git_mutex_init(&attrs->lock) < 0) {
42 		git_error_set(GIT_ERROR_OS, "failed to initialize lock");
43 		goto on_error;
44 	}
45 
46 	if (git_pool_init(&attrs->pool, 1) < 0)
47 		goto on_error;
48 
49 	GIT_REFCOUNT_INC(attrs);
50 	attrs->entry = entry;
51 	memcpy(&attrs->source, source, sizeof(git_attr_file_source));
52 	*out = attrs;
53 	return 0;
54 
55 on_error:
56 	git__free(attrs);
57 	return -1;
58 }
59 
git_attr_file__clear_rules(git_attr_file * file,bool need_lock)60 int git_attr_file__clear_rules(git_attr_file *file, bool need_lock)
61 {
62 	unsigned int i;
63 	git_attr_rule *rule;
64 
65 	if (need_lock && git_mutex_lock(&file->lock) < 0) {
66 		git_error_set(GIT_ERROR_OS, "failed to lock attribute file");
67 		return -1;
68 	}
69 
70 	git_vector_foreach(&file->rules, i, rule)
71 		git_attr_rule__free(rule);
72 	git_vector_free(&file->rules);
73 
74 	if (need_lock)
75 		git_mutex_unlock(&file->lock);
76 
77 	return 0;
78 }
79 
git_attr_file__free(git_attr_file * file)80 void git_attr_file__free(git_attr_file *file)
81 {
82 	if (!file)
83 		return;
84 	GIT_REFCOUNT_DEC(file, attr_file_free);
85 }
86 
attr_file_oid_from_index(git_oid * oid,git_repository * repo,const char * path)87 static int attr_file_oid_from_index(
88 	git_oid *oid, git_repository *repo, const char *path)
89 {
90 	int error;
91 	git_index *idx;
92 	size_t pos;
93 	const git_index_entry *entry;
94 
95 	if ((error = git_repository_index__weakptr(&idx, repo)) < 0 ||
96 		(error = git_index__find_pos(&pos, idx, path, 0, 0)) < 0)
97 		return error;
98 
99 	if (!(entry = git_index_get_byindex(idx, pos)))
100 		return GIT_ENOTFOUND;
101 
102 	*oid = entry->id;
103 	return 0;
104 }
105 
git_attr_file__load(git_attr_file ** out,git_repository * repo,git_attr_session * attr_session,git_attr_file_entry * entry,git_attr_file_source * source,git_attr_file_parser parser,bool allow_macros)106 int git_attr_file__load(
107 	git_attr_file **out,
108 	git_repository *repo,
109 	git_attr_session *attr_session,
110 	git_attr_file_entry *entry,
111 	git_attr_file_source *source,
112 	git_attr_file_parser parser,
113 	bool allow_macros)
114 {
115 	int error = 0;
116 	git_commit *commit = NULL;
117 	git_tree *tree = NULL;
118 	git_tree_entry *tree_entry = NULL;
119 	git_blob *blob = NULL;
120 	git_buf content = GIT_BUF_INIT;
121 	const char *content_str;
122 	git_attr_file *file;
123 	struct stat st;
124 	bool nonexistent = false;
125 	int bom_offset;
126 	git_buf_bom_t bom;
127 	git_oid id;
128 	git_object_size_t blobsize;
129 
130 	*out = NULL;
131 
132 	switch (source->type) {
133 	case GIT_ATTR_FILE_SOURCE_MEMORY:
134 		/* in-memory attribute file doesn't need data */
135 		break;
136 	case GIT_ATTR_FILE_SOURCE_INDEX: {
137 		if ((error = attr_file_oid_from_index(&id, repo, entry->path)) < 0 ||
138 			(error = git_blob_lookup(&blob, repo, &id)) < 0)
139 			return error;
140 
141 		/* Do not assume that data straight from the ODB is NULL-terminated;
142 		 * copy the contents of a file to a buffer to work on */
143 		blobsize = git_blob_rawsize(blob);
144 
145 		GIT_ERROR_CHECK_BLOBSIZE(blobsize);
146 		git_buf_put(&content, git_blob_rawcontent(blob), (size_t)blobsize);
147 		break;
148 	}
149 	case GIT_ATTR_FILE_SOURCE_FILE: {
150 		int fd = -1;
151 
152 		/* For open or read errors, pretend that we got ENOTFOUND. */
153 		/* TODO: issue warning when warning API is available */
154 
155 		if (p_stat(entry->fullpath, &st) < 0 ||
156 			S_ISDIR(st.st_mode) ||
157 			(fd = git_futils_open_ro(entry->fullpath)) < 0 ||
158 			(error = git_futils_readbuffer_fd(&content, fd, (size_t)st.st_size)) < 0)
159 			nonexistent = true;
160 
161 		if (fd >= 0)
162 			p_close(fd);
163 
164 		break;
165 	}
166 	case GIT_ATTR_FILE_SOURCE_HEAD:
167 	case GIT_ATTR_FILE_SOURCE_COMMIT: {
168 		if (source->type == GIT_ATTR_FILE_SOURCE_COMMIT) {
169 			if ((error = git_commit_lookup(&commit, repo, source->commit_id)) < 0 ||
170 			    (error = git_commit_tree(&tree, commit)) < 0)
171 				goto cleanup;
172 		} else {
173 			if ((error = git_repository_head_tree(&tree, repo)) < 0)
174 				goto cleanup;
175 		}
176 
177 		if ((error = git_tree_entry_bypath(&tree_entry, tree, entry->path)) < 0) {
178 			/*
179 			 * If the attributes file does not exist, we can
180 			 * cache an empty file for this commit to prevent
181 			 * needless future lookups.
182 			 */
183 			if (error == GIT_ENOTFOUND) {
184 				error = 0;
185 				break;
186 			}
187 
188 			goto cleanup;
189 		}
190 
191 		if ((error = git_blob_lookup(&blob, repo, git_tree_entry_id(tree_entry))) < 0)
192 			goto cleanup;
193 
194 		/*
195 		 * Do not assume that data straight from the ODB is NULL-terminated;
196 		 * copy the contents of a file to a buffer to work on.
197 		 */
198 		blobsize = git_blob_rawsize(blob);
199 
200 		GIT_ERROR_CHECK_BLOBSIZE(blobsize);
201 		if ((error = git_buf_put(&content,
202 			git_blob_rawcontent(blob), (size_t)blobsize)) < 0)
203 			goto cleanup;
204 
205 		break;
206 	}
207 	default:
208 		git_error_set(GIT_ERROR_INVALID, "unknown file source %d", source->type);
209 		return -1;
210 	}
211 
212 	if ((error = git_attr_file__new(&file, entry, source)) < 0)
213 		goto cleanup;
214 
215 	/* advance over a UTF8 BOM */
216 	content_str = git_buf_cstr(&content);
217 	bom_offset = git_buf_detect_bom(&bom, &content);
218 
219 	if (bom == GIT_BUF_BOM_UTF8)
220 		content_str += bom_offset;
221 
222 	/* store the key of the attr_reader; don't bother with cache
223 	 * invalidation during the same attr reader session.
224 	 */
225 	if (attr_session)
226 		file->session_key = attr_session->key;
227 
228 	if (parser && (error = parser(repo, file, content_str, allow_macros)) < 0) {
229 		git_attr_file__free(file);
230 		goto cleanup;
231 	}
232 
233 	/* write cache breakers */
234 	if (nonexistent)
235 		file->nonexistent = 1;
236 	else if (source->type == GIT_ATTR_FILE_SOURCE_INDEX)
237 		git_oid_cpy(&file->cache_data.oid, git_blob_id(blob));
238 	else if (source->type == GIT_ATTR_FILE_SOURCE_HEAD)
239 		git_oid_cpy(&file->cache_data.oid, git_tree_id(tree));
240 	else if (source->type == GIT_ATTR_FILE_SOURCE_COMMIT)
241 		git_oid_cpy(&file->cache_data.oid, git_tree_id(tree));
242 	else if (source->type == GIT_ATTR_FILE_SOURCE_FILE)
243 		git_futils_filestamp_set_from_stat(&file->cache_data.stamp, &st);
244 	/* else always cacheable */
245 
246 	*out = file;
247 
248 cleanup:
249 	git_blob_free(blob);
250 	git_tree_entry_free(tree_entry);
251 	git_tree_free(tree);
252 	git_commit_free(commit);
253 	git_buf_dispose(&content);
254 
255 	return error;
256 }
257 
git_attr_file__out_of_date(git_repository * repo,git_attr_session * attr_session,git_attr_file * file,git_attr_file_source * source)258 int git_attr_file__out_of_date(
259 	git_repository *repo,
260 	git_attr_session *attr_session,
261 	git_attr_file *file,
262 	git_attr_file_source *source)
263 {
264 	if (!file)
265 		return 1;
266 
267 	/* we are never out of date if we just created this data in the same
268 	 * attr_session; otherwise, nonexistent files must be invalidated
269 	 */
270 	if (attr_session && attr_session->key == file->session_key)
271 		return 0;
272 	else if (file->nonexistent)
273 		return 1;
274 
275 	switch (file->source.type) {
276 	case GIT_ATTR_FILE_SOURCE_MEMORY:
277 		return 0;
278 
279 	case GIT_ATTR_FILE_SOURCE_FILE:
280 		return git_futils_filestamp_check(
281 			&file->cache_data.stamp, file->entry->fullpath);
282 
283 	case GIT_ATTR_FILE_SOURCE_INDEX: {
284 		int error;
285 		git_oid id;
286 
287 		if ((error = attr_file_oid_from_index(
288 				&id, repo, file->entry->path)) < 0)
289 			return error;
290 
291 		return (git_oid__cmp(&file->cache_data.oid, &id) != 0);
292 	}
293 
294 	case GIT_ATTR_FILE_SOURCE_HEAD: {
295 		git_tree *tree = NULL;
296 		int error = git_repository_head_tree(&tree, repo);
297 
298 		if (error < 0)
299 			return error;
300 
301 		error = (git_oid__cmp(&file->cache_data.oid, git_tree_id(tree)) != 0);
302 
303 		git_tree_free(tree);
304 		return error;
305 	}
306 
307 	case GIT_ATTR_FILE_SOURCE_COMMIT: {
308 		git_commit *commit = NULL;
309 		git_tree *tree = NULL;
310 		int error;
311 
312 		if ((error = git_commit_lookup(&commit, repo, source->commit_id)) < 0)
313 			return error;
314 
315 		error = git_commit_tree(&tree, commit);
316 		git_commit_free(commit);
317 
318 		if (error < 0)
319 			return error;
320 
321 		error = (git_oid__cmp(&file->cache_data.oid, git_tree_id(tree)) != 0);
322 
323 		git_tree_free(tree);
324 		return error;
325 	}
326 
327 	default:
328 		git_error_set(GIT_ERROR_INVALID, "invalid file type %d", file->source.type);
329 		return -1;
330 	}
331 }
332 
333 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw);
334 static void git_attr_rule__clear(git_attr_rule *rule);
335 static bool parse_optimized_patterns(
336 	git_attr_fnmatch *spec,
337 	git_pool *pool,
338 	const char *pattern);
339 
git_attr_file__parse_buffer(git_repository * repo,git_attr_file * attrs,const char * data,bool allow_macros)340 int git_attr_file__parse_buffer(
341 	git_repository *repo, git_attr_file *attrs, const char *data, bool allow_macros)
342 {
343 	const char *scan = data, *context = NULL;
344 	git_attr_rule *rule = NULL;
345 	int error = 0;
346 
347 	/* If subdir file path, convert context for file paths */
348 	if (attrs->entry && git_path_root(attrs->entry->path) < 0 &&
349 	    !git__suffixcmp(attrs->entry->path, "/" GIT_ATTR_FILE))
350 		context = attrs->entry->path;
351 
352 	if (git_mutex_lock(&attrs->lock) < 0) {
353 		git_error_set(GIT_ERROR_OS, "failed to lock attribute file");
354 		return -1;
355 	}
356 
357 	while (!error && *scan) {
358 		/* Allocate rule if needed, otherwise re-use previous rule */
359 		if (!rule) {
360 			rule = git__calloc(1, sizeof(*rule));
361 			GIT_ERROR_CHECK_ALLOC(rule);
362 		} else
363 			git_attr_rule__clear(rule);
364 
365 		rule->match.flags = GIT_ATTR_FNMATCH_ALLOWNEG | GIT_ATTR_FNMATCH_ALLOWMACRO;
366 
367 		/* Parse the next "pattern attr attr attr" line */
368 		if ((error = git_attr_fnmatch__parse(&rule->match, &attrs->pool, context, &scan)) < 0 ||
369 		    (error = git_attr_assignment__parse(repo, &attrs->pool, &rule->assigns, &scan)) < 0)
370 		{
371 			if (error != GIT_ENOTFOUND)
372 				goto out;
373 			error = 0;
374 			continue;
375 		}
376 
377 		if (rule->match.flags & GIT_ATTR_FNMATCH_MACRO) {
378 			/* TODO: warning if macro found in file below repo root */
379 			if (!allow_macros)
380 				continue;
381 			if ((error = git_attr_cache__insert_macro(repo, rule)) < 0)
382 				goto out;
383 		} else if ((error = git_vector_insert(&attrs->rules, rule)) < 0)
384 			goto out;
385 
386 		rule = NULL;
387 	}
388 
389 out:
390 	git_mutex_unlock(&attrs->lock);
391 	git_attr_rule__free(rule);
392 
393 	return error;
394 }
395 
git_attr_file__name_hash(const char * name)396 uint32_t git_attr_file__name_hash(const char *name)
397 {
398 	uint32_t h = 5381;
399 	int c;
400 
401 	GIT_ASSERT_ARG(name);
402 
403 	while ((c = (int)*name++) != 0)
404 		h = ((h << 5) + h) + c;
405 	return h;
406 }
407 
git_attr_file__lookup_one(git_attr_file * file,git_attr_path * path,const char * attr,const char ** value)408 int git_attr_file__lookup_one(
409 	git_attr_file *file,
410 	git_attr_path *path,
411 	const char *attr,
412 	const char **value)
413 {
414 	size_t i;
415 	git_attr_name name;
416 	git_attr_rule *rule;
417 
418 	*value = NULL;
419 
420 	name.name = attr;
421 	name.name_hash = git_attr_file__name_hash(attr);
422 
423 	git_attr_file__foreach_matching_rule(file, path, i, rule) {
424 		size_t pos;
425 
426 		if (!git_vector_bsearch(&pos, &rule->assigns, &name)) {
427 			*value = ((git_attr_assignment *)
428 					  git_vector_get(&rule->assigns, pos))->value;
429 			break;
430 		}
431 	}
432 
433 	return 0;
434 }
435 
git_attr_file__load_standalone(git_attr_file ** out,const char * path)436 int git_attr_file__load_standalone(git_attr_file **out, const char *path)
437 {
438 	git_buf content = GIT_BUF_INIT;
439 	git_attr_file_source source = { GIT_ATTR_FILE_SOURCE_FILE };
440 	git_attr_file *file = NULL;
441 	int error;
442 
443 	if ((error = git_futils_readbuffer(&content, path)) < 0)
444 		goto out;
445 
446 	/*
447 	 * Because the cache entry is allocated from the file's own pool, we
448 	 * don't have to free it - freeing file+pool will free cache entry, too.
449 	 */
450 
451 	if ((error = git_attr_file__new(&file, NULL, &source)) < 0 ||
452 	    (error = git_attr_file__parse_buffer(NULL, file, content.ptr, true)) < 0 ||
453 	    (error = git_attr_cache__alloc_file_entry(&file->entry, NULL, NULL, path, &file->pool)) < 0)
454 		goto out;
455 
456 	*out = file;
457 out:
458 	if (error < 0)
459 		git_attr_file__free(file);
460 	git_buf_dispose(&content);
461 
462 	return error;
463 }
464 
git_attr_fnmatch__match(git_attr_fnmatch * match,git_attr_path * path)465 bool git_attr_fnmatch__match(
466 	git_attr_fnmatch *match,
467 	git_attr_path *path)
468 {
469 	const char *relpath = path->path;
470 	const char *filename;
471 	int flags = 0;
472 
473 	/*
474 	 * If the rule was generated in a subdirectory, we must only
475 	 * use it for paths inside that directory. We can thus return
476 	 * a non-match if the prefixes don't match.
477 	 */
478 	if (match->containing_dir) {
479 		if (match->flags & GIT_ATTR_FNMATCH_ICASE) {
480 			if (git__strncasecmp(path->path, match->containing_dir, match->containing_dir_length))
481 				return 0;
482 		} else {
483 			if (git__prefixcmp(path->path, match->containing_dir))
484 				return 0;
485 		}
486 
487 		relpath += match->containing_dir_length;
488 	}
489 
490 	if (match->flags & GIT_ATTR_FNMATCH_ICASE)
491 		flags |= WM_CASEFOLD;
492 
493 	if (match->flags & GIT_ATTR_FNMATCH_FULLPATH) {
494 		filename = relpath;
495 		flags |= WM_PATHNAME;
496 	} else {
497 		filename = path->basename;
498 	}
499 
500 	if ((match->flags & GIT_ATTR_FNMATCH_DIRECTORY) && !path->is_dir) {
501 		bool samename;
502 
503 		/*
504 		 * for attribute checks or checks at the root of this match's
505 		 * containing_dir (or root of the repository if no containing_dir),
506 		 * do not match.
507 		 */
508 		if (!(match->flags & GIT_ATTR_FNMATCH_IGNORE) ||
509 			path->basename == relpath)
510 			return false;
511 
512 		/* fail match if this is a file with same name as ignored folder */
513 		samename = (match->flags & GIT_ATTR_FNMATCH_ICASE) ?
514 			!strcasecmp(match->pattern, relpath) :
515 			!strcmp(match->pattern, relpath);
516 
517 		if (samename)
518 			return false;
519 
520 		return (wildmatch(match->pattern, relpath, flags) == WM_MATCH);
521 	}
522 
523 	return (wildmatch(match->pattern, filename, flags) == WM_MATCH);
524 }
525 
git_attr_rule__match(git_attr_rule * rule,git_attr_path * path)526 bool git_attr_rule__match(
527 	git_attr_rule *rule,
528 	git_attr_path *path)
529 {
530 	bool matched = git_attr_fnmatch__match(&rule->match, path);
531 
532 	if (rule->match.flags & GIT_ATTR_FNMATCH_NEGATIVE)
533 		matched = !matched;
534 
535 	return matched;
536 }
537 
git_attr_rule__lookup_assignment(git_attr_rule * rule,const char * name)538 git_attr_assignment *git_attr_rule__lookup_assignment(
539 	git_attr_rule *rule, const char *name)
540 {
541 	size_t pos;
542 	git_attr_name key;
543 	key.name = name;
544 	key.name_hash = git_attr_file__name_hash(name);
545 
546 	if (git_vector_bsearch(&pos, &rule->assigns, &key))
547 		return NULL;
548 
549 	return git_vector_get(&rule->assigns, pos);
550 }
551 
git_attr_path__init(git_attr_path * info,const char * path,const char * base,git_dir_flag dir_flag)552 int git_attr_path__init(
553 	git_attr_path *info,
554 	const char *path,
555 	const char *base,
556 	git_dir_flag dir_flag)
557 {
558 	ssize_t root;
559 
560 	/* build full path as best we can */
561 	git_buf_init(&info->full, 0);
562 
563 	if (git_path_join_unrooted(&info->full, path, base, &root) < 0)
564 		return -1;
565 
566 	info->path = info->full.ptr + root;
567 
568 	/* remove trailing slashes */
569 	while (info->full.size > 0) {
570 		if (info->full.ptr[info->full.size - 1] != '/')
571 			break;
572 		info->full.size--;
573 	}
574 	info->full.ptr[info->full.size] = '\0';
575 
576 	/* skip leading slashes in path */
577 	while (*info->path == '/')
578 		info->path++;
579 
580 	/* find trailing basename component */
581 	info->basename = strrchr(info->path, '/');
582 	if (info->basename)
583 		info->basename++;
584 	if (!info->basename || !*info->basename)
585 		info->basename = info->path;
586 
587 	switch (dir_flag)
588 	{
589 	case GIT_DIR_FLAG_FALSE:
590 		info->is_dir = 0;
591 		break;
592 
593 	case GIT_DIR_FLAG_TRUE:
594 		info->is_dir = 1;
595 		break;
596 
597 	case GIT_DIR_FLAG_UNKNOWN:
598 	default:
599 		info->is_dir = (int)git_path_isdir(info->full.ptr);
600 		break;
601 	}
602 
603 	return 0;
604 }
605 
git_attr_path__free(git_attr_path * info)606 void git_attr_path__free(git_attr_path *info)
607 {
608 	git_buf_dispose(&info->full);
609 	info->path = NULL;
610 	info->basename = NULL;
611 }
612 
613 /*
614  * From gitattributes(5):
615  *
616  * Patterns have the following format:
617  *
618  * - A blank line matches no files, so it can serve as a separator for
619  *   readability.
620  *
621  * - A line starting with # serves as a comment.
622  *
623  * - An optional prefix ! which negates the pattern; any matching file
624  *   excluded by a previous pattern will become included again. If a negated
625  *   pattern matches, this will override lower precedence patterns sources.
626  *
627  * - If the pattern ends with a slash, it is removed for the purpose of the
628  *   following description, but it would only find a match with a directory. In
629  *   other words, foo/ will match a directory foo and paths underneath it, but
630  *   will not match a regular file or a symbolic link foo (this is consistent
631  *   with the way how pathspec works in general in git).
632  *
633  * - If the pattern does not contain a slash /, git treats it as a shell glob
634  *   pattern and checks for a match against the pathname without leading
635  *   directories.
636  *
637  * - Otherwise, git treats the pattern as a shell glob suitable for consumption
638  *   by fnmatch(3) with the FNM_PATHNAME flag: wildcards in the pattern will
639  *   not match a / in the pathname. For example, "Documentation/\*.html" matches
640  *   "Documentation/git.html" but not "Documentation/ppc/ppc.html". A leading
641  *   slash matches the beginning of the pathname; for example, "/\*.c" matches
642  *   "cat-file.c" but not "mozilla-sha1/sha1.c".
643  */
644 
645 /*
646  * Determine the length of trailing spaces. Escaped spaces do not count as
647  * trailing whitespace.
648  */
trailing_space_length(const char * p,size_t len)649 static size_t trailing_space_length(const char *p, size_t len)
650 {
651 	size_t n, i;
652 	for (n = len; n; n--) {
653 		if (p[n-1] != ' ' && p[n-1] != '\t')
654 			break;
655 
656 		/*
657 		 * Count escape-characters before space. In case where it's an
658 		 * even number of escape characters, then the escape char itself
659 		 * is escaped and the whitespace is an unescaped whitespace.
660 		 * Otherwise, the last escape char is not escaped and the
661 		 * whitespace in an escaped whitespace.
662 		 */
663 		i = n;
664 		while (i > 1 && p[i-2] == '\\')
665 			i--;
666 		if ((n - i) % 2)
667 			break;
668 	}
669 	return len - n;
670 }
671 
unescape_spaces(char * str)672 static size_t unescape_spaces(char *str)
673 {
674 	char *scan, *pos = str;
675 	bool escaped = false;
676 
677 	if (!str)
678 		return 0;
679 
680 	for (scan = str; *scan; scan++) {
681 		if (!escaped && *scan == '\\') {
682 			escaped = true;
683 			continue;
684 		}
685 
686 		/* Only insert the escape character for escaped non-spaces */
687 		if (escaped && !git__isspace(*scan))
688 			*pos++ = '\\';
689 
690 		*pos++ = *scan;
691 		escaped = false;
692 	}
693 
694 	if (pos != scan)
695 		*pos = '\0';
696 
697 	return (pos - str);
698 }
699 
700 /*
701  * This will return 0 if the spec was filled out,
702  * GIT_ENOTFOUND if the fnmatch does not require matching, or
703  * another error code there was an actual problem.
704  */
git_attr_fnmatch__parse(git_attr_fnmatch * spec,git_pool * pool,const char * context,const char ** base)705 int git_attr_fnmatch__parse(
706 	git_attr_fnmatch *spec,
707 	git_pool *pool,
708 	const char *context,
709 	const char **base)
710 {
711 	const char *pattern, *scan;
712 	int slash_count, allow_space;
713 	bool escaped;
714 
715 	GIT_ASSERT_ARG(spec);
716 	GIT_ASSERT_ARG(base && *base);
717 
718 	if (parse_optimized_patterns(spec, pool, *base))
719 		return 0;
720 
721 	spec->flags = (spec->flags & GIT_ATTR_FNMATCH__INCOMING);
722 	allow_space = ((spec->flags & GIT_ATTR_FNMATCH_ALLOWSPACE) != 0);
723 
724 	pattern = *base;
725 
726 	while (!allow_space && git__isspace(*pattern))
727 		pattern++;
728 
729 	if (!*pattern || *pattern == '#' || *pattern == '\n' ||
730 	    (*pattern == '\r' && *(pattern + 1) == '\n')) {
731 		*base = git__next_line(pattern);
732 		return GIT_ENOTFOUND;
733 	}
734 
735 	if (*pattern == '[' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWMACRO) != 0) {
736 		if (strncmp(pattern, "[attr]", 6) == 0) {
737 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_MACRO;
738 			pattern += 6;
739 		}
740 		/* else a character range like [a-e]* which is accepted */
741 	}
742 
743 	if (*pattern == '!' && (spec->flags & GIT_ATTR_FNMATCH_ALLOWNEG) != 0) {
744 		spec->flags = spec->flags | GIT_ATTR_FNMATCH_NEGATIVE;
745 		pattern++;
746 	}
747 
748 	slash_count = 0;
749 	escaped = false;
750 	/* Scan until a non-escaped whitespace. */
751 	for (scan = pattern; *scan != '\0'; ++scan) {
752 		char c = *scan;
753 
754 		if (c == '\\' && !escaped) {
755 			escaped = true;
756 			continue;
757 		} else if (git__isspace(c) && !escaped) {
758 			if (!allow_space || (c != ' ' && c != '\t' && c != '\r'))
759 				break;
760 		} else if (c == '/') {
761 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_FULLPATH;
762 			slash_count++;
763 
764 			if (slash_count == 1 && pattern == scan)
765 				pattern++;
766 		} else if (git__iswildcard(c) && !escaped) {
767 			/* remember if we see an unescaped wildcard in pattern */
768 			spec->flags = spec->flags | GIT_ATTR_FNMATCH_HASWILD;
769 		}
770 
771 		escaped = false;
772 	}
773 
774 	*base = scan;
775 
776 	if ((spec->length = scan - pattern) == 0)
777 		return GIT_ENOTFOUND;
778 
779 	/*
780 	 * Remove one trailing \r in case this is a CRLF delimited
781 	 * file, in the case of Icon\r\r\n, we still leave the first
782 	 * \r there to match against.
783 	 */
784 	if (pattern[spec->length - 1] == '\r')
785 		if (--spec->length == 0)
786 			return GIT_ENOTFOUND;
787 
788 	/* Remove trailing spaces. */
789 	spec->length -= trailing_space_length(pattern, spec->length);
790 
791 	if (spec->length == 0)
792 		return GIT_ENOTFOUND;
793 
794 	if (pattern[spec->length - 1] == '/') {
795 		spec->length--;
796 		spec->flags = spec->flags | GIT_ATTR_FNMATCH_DIRECTORY;
797 		if (--slash_count <= 0)
798 			spec->flags = spec->flags & ~GIT_ATTR_FNMATCH_FULLPATH;
799 	}
800 
801 	if (context) {
802 		char *slash = strrchr(context, '/');
803 		size_t len;
804 		if (slash) {
805 			/* include the slash for easier matching */
806 			len = slash - context + 1;
807 			spec->containing_dir = git_pool_strndup(pool, context, len);
808 			spec->containing_dir_length = len;
809 		}
810 	}
811 
812 	spec->pattern = git_pool_strndup(pool, pattern, spec->length);
813 
814 	if (!spec->pattern) {
815 		*base = git__next_line(pattern);
816 		return -1;
817 	} else {
818 		/* strip '\' that might have been used for internal whitespace */
819 		spec->length = unescape_spaces(spec->pattern);
820 	}
821 
822 	return 0;
823 }
824 
parse_optimized_patterns(git_attr_fnmatch * spec,git_pool * pool,const char * pattern)825 static bool parse_optimized_patterns(
826 	git_attr_fnmatch *spec,
827 	git_pool *pool,
828 	const char *pattern)
829 {
830 	if (!pattern[1] && (pattern[0] == '*' || pattern[0] == '.')) {
831 		spec->flags = GIT_ATTR_FNMATCH_MATCH_ALL;
832 		spec->pattern = git_pool_strndup(pool, pattern, 1);
833 		spec->length = 1;
834 
835 		return true;
836 	}
837 
838 	return false;
839 }
840 
sort_by_hash_and_name(const void * a_raw,const void * b_raw)841 static int sort_by_hash_and_name(const void *a_raw, const void *b_raw)
842 {
843 	const git_attr_name *a = a_raw;
844 	const git_attr_name *b = b_raw;
845 
846 	if (b->name_hash < a->name_hash)
847 		return 1;
848 	else if (b->name_hash > a->name_hash)
849 		return -1;
850 	else
851 		return strcmp(b->name, a->name);
852 }
853 
git_attr_assignment__free(git_attr_assignment * assign)854 static void git_attr_assignment__free(git_attr_assignment *assign)
855 {
856 	/* name and value are stored in a git_pool associated with the
857 	 * git_attr_file, so they do not need to be freed here
858 	 */
859 	assign->name = NULL;
860 	assign->value = NULL;
861 	git__free(assign);
862 }
863 
merge_assignments(void ** old_raw,void * new_raw)864 static int merge_assignments(void **old_raw, void *new_raw)
865 {
866 	git_attr_assignment **old = (git_attr_assignment **)old_raw;
867 	git_attr_assignment *new = (git_attr_assignment *)new_raw;
868 
869 	GIT_REFCOUNT_DEC(*old, git_attr_assignment__free);
870 	*old = new;
871 	return GIT_EEXISTS;
872 }
873 
git_attr_assignment__parse(git_repository * repo,git_pool * pool,git_vector * assigns,const char ** base)874 int git_attr_assignment__parse(
875 	git_repository *repo,
876 	git_pool *pool,
877 	git_vector *assigns,
878 	const char **base)
879 {
880 	int error;
881 	const char *scan = *base;
882 	git_attr_assignment *assign = NULL;
883 
884 	GIT_ASSERT_ARG(assigns && !assigns->length);
885 
886 	git_vector_set_cmp(assigns, sort_by_hash_and_name);
887 
888 	while (*scan && *scan != '\n') {
889 		const char *name_start, *value_start;
890 
891 		/* skip leading blanks */
892 		while (git__isspace(*scan) && *scan != '\n') scan++;
893 
894 		/* allocate assign if needed */
895 		if (!assign) {
896 			assign = git__calloc(1, sizeof(git_attr_assignment));
897 			GIT_ERROR_CHECK_ALLOC(assign);
898 			GIT_REFCOUNT_INC(assign);
899 		}
900 
901 		assign->name_hash = 5381;
902 		assign->value = git_attr__true;
903 
904 		/* look for magic name prefixes */
905 		if (*scan == '-') {
906 			assign->value = git_attr__false;
907 			scan++;
908 		} else if (*scan == '!') {
909 			assign->value = git_attr__unset; /* explicit unspecified state */
910 			scan++;
911 		} else if (*scan == '#') /* comment rest of line */
912 			break;
913 
914 		/* find the name */
915 		name_start = scan;
916 		while (*scan && !git__isspace(*scan) && *scan != '=') {
917 			assign->name_hash =
918 				((assign->name_hash << 5) + assign->name_hash) + *scan;
919 			scan++;
920 		}
921 		if (scan == name_start) {
922 			/* must have found lone prefix (" - ") or leading = ("=foo")
923 			 * or end of buffer -- advance until whitespace and continue
924 			 */
925 			while (*scan && !git__isspace(*scan)) scan++;
926 			continue;
927 		}
928 
929 		/* allocate permanent storage for name */
930 		assign->name = git_pool_strndup(pool, name_start, scan - name_start);
931 		GIT_ERROR_CHECK_ALLOC(assign->name);
932 
933 		/* if there is an equals sign, find the value */
934 		if (*scan == '=') {
935 			for (value_start = ++scan; *scan && !git__isspace(*scan); ++scan);
936 
937 			/* if we found a value, allocate permanent storage for it */
938 			if (scan > value_start) {
939 				assign->value = git_pool_strndup(pool, value_start, scan - value_start);
940 				GIT_ERROR_CHECK_ALLOC(assign->value);
941 			}
942 		}
943 
944 		/* expand macros (if given a repo with a macro cache) */
945 		if (repo != NULL && assign->value == git_attr__true) {
946 			git_attr_rule *macro =
947 				git_attr_cache__lookup_macro(repo, assign->name);
948 
949 			if (macro != NULL) {
950 				unsigned int i;
951 				git_attr_assignment *massign;
952 
953 				git_vector_foreach(&macro->assigns, i, massign) {
954 					GIT_REFCOUNT_INC(massign);
955 
956 					error = git_vector_insert_sorted(
957 						assigns, massign, &merge_assignments);
958 					if (error < 0 && error != GIT_EEXISTS) {
959 						git_attr_assignment__free(assign);
960 						return error;
961 					}
962 				}
963 			}
964 		}
965 
966 		/* insert allocated assign into vector */
967 		error = git_vector_insert_sorted(assigns, assign, &merge_assignments);
968 		if (error < 0 && error != GIT_EEXISTS)
969 			return error;
970 
971 		/* clear assign since it is now "owned" by the vector */
972 		assign = NULL;
973 	}
974 
975 	if (assign != NULL)
976 		git_attr_assignment__free(assign);
977 
978 	*base = git__next_line(scan);
979 
980 	return (assigns->length == 0) ? GIT_ENOTFOUND : 0;
981 }
982 
git_attr_rule__clear(git_attr_rule * rule)983 static void git_attr_rule__clear(git_attr_rule *rule)
984 {
985 	unsigned int i;
986 	git_attr_assignment *assign;
987 
988 	if (!rule)
989 		return;
990 
991 	if (!(rule->match.flags & GIT_ATTR_FNMATCH_IGNORE)) {
992 		git_vector_foreach(&rule->assigns, i, assign)
993 			GIT_REFCOUNT_DEC(assign, git_attr_assignment__free);
994 		git_vector_free(&rule->assigns);
995 	}
996 
997 	/* match.pattern is stored in a git_pool, so no need to free */
998 	rule->match.pattern = NULL;
999 	rule->match.length = 0;
1000 }
1001 
git_attr_rule__free(git_attr_rule * rule)1002 void git_attr_rule__free(git_attr_rule *rule)
1003 {
1004 	git_attr_rule__clear(rule);
1005 	git__free(rule);
1006 }
1007 
git_attr_session__init(git_attr_session * session,git_repository * repo)1008 int git_attr_session__init(git_attr_session *session, git_repository *repo)
1009 {
1010 	GIT_ASSERT_ARG(repo);
1011 
1012 	memset(session, 0, sizeof(*session));
1013 	session->key = git_atomic32_inc(&repo->attr_session_key);
1014 
1015 	return 0;
1016 }
1017 
git_attr_session__free(git_attr_session * session)1018 void git_attr_session__free(git_attr_session *session)
1019 {
1020 	if (!session)
1021 		return;
1022 
1023 	git_buf_dispose(&session->sysdir);
1024 	git_buf_dispose(&session->tmp);
1025 
1026 	memset(session, 0, sizeof(git_attr_session));
1027 }
1028