1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "iterator.h"
9 
10 #include "tree.h"
11 #include "index.h"
12 
13 #define GIT_ITERATOR_FIRST_ACCESS   (1 << 15)
14 #define GIT_ITERATOR_HONOR_IGNORES  (1 << 16)
15 #define GIT_ITERATOR_IGNORE_DOT_GIT (1 << 17)
16 
17 #define iterator__flag(I,F) ((((git_iterator *)(I))->flags & GIT_ITERATOR_ ## F) != 0)
18 #define iterator__ignore_case(I)       iterator__flag(I,IGNORE_CASE)
19 #define iterator__include_trees(I)     iterator__flag(I,INCLUDE_TREES)
20 #define iterator__dont_autoexpand(I)   iterator__flag(I,DONT_AUTOEXPAND)
21 #define iterator__do_autoexpand(I)    !iterator__flag(I,DONT_AUTOEXPAND)
22 #define iterator__include_conflicts(I) iterator__flag(I,INCLUDE_CONFLICTS)
23 #define iterator__has_been_accessed(I) iterator__flag(I,FIRST_ACCESS)
24 #define iterator__honor_ignores(I)     iterator__flag(I,HONOR_IGNORES)
25 #define iterator__ignore_dot_git(I)    iterator__flag(I,IGNORE_DOT_GIT)
26 #define iterator__descend_symlinks(I)  iterator__flag(I,DESCEND_SYMLINKS)
27 
28 
iterator_set_ignore_case(git_iterator * iter,bool ignore_case)29 static void iterator_set_ignore_case(git_iterator *iter, bool ignore_case)
30 {
31 	if (ignore_case)
32 		iter->flags |= GIT_ITERATOR_IGNORE_CASE;
33 	else
34 		iter->flags &= ~GIT_ITERATOR_IGNORE_CASE;
35 
36 	iter->strcomp = ignore_case ? git__strcasecmp : git__strcmp;
37 	iter->strncomp = ignore_case ? git__strncasecmp : git__strncmp;
38 	iter->prefixcomp = ignore_case ? git__prefixcmp_icase : git__prefixcmp;
39 	iter->entry_srch = ignore_case ? git_index_entry_isrch : git_index_entry_srch;
40 
41 	git_vector_set_cmp(&iter->pathlist, (git_vector_cmp)iter->strcomp);
42 }
43 
iterator_range_init(git_iterator * iter,const char * start,const char * end)44 static int iterator_range_init(
45 	git_iterator *iter, const char *start, const char *end)
46 {
47 	if (start && *start) {
48 		iter->start = git__strdup(start);
49 		GIT_ERROR_CHECK_ALLOC(iter->start);
50 
51 		iter->start_len = strlen(iter->start);
52 	}
53 
54 	if (end && *end) {
55 		iter->end = git__strdup(end);
56 		GIT_ERROR_CHECK_ALLOC(iter->end);
57 
58 		iter->end_len = strlen(iter->end);
59 	}
60 
61 	iter->started = (iter->start == NULL);
62 	iter->ended = false;
63 
64 	return 0;
65 }
66 
iterator_range_free(git_iterator * iter)67 static void iterator_range_free(git_iterator *iter)
68 {
69 	if (iter->start) {
70 		git__free(iter->start);
71 		iter->start = NULL;
72 		iter->start_len = 0;
73 	}
74 
75 	if (iter->end) {
76 		git__free(iter->end);
77 		iter->end = NULL;
78 		iter->end_len = 0;
79 	}
80 }
81 
iterator_reset_range(git_iterator * iter,const char * start,const char * end)82 static int iterator_reset_range(
83 	git_iterator *iter, const char *start, const char *end)
84 {
85 	iterator_range_free(iter);
86 	return iterator_range_init(iter, start, end);
87 }
88 
iterator_pathlist_init(git_iterator * iter,git_strarray * pathlist)89 static int iterator_pathlist_init(git_iterator *iter, git_strarray *pathlist)
90 {
91 	size_t i;
92 
93 	if (git_vector_init(&iter->pathlist, pathlist->count, NULL) < 0)
94 		return -1;
95 
96 	for (i = 0; i < pathlist->count; i++) {
97 		if (!pathlist->strings[i])
98 			continue;
99 
100 		if (git_vector_insert(&iter->pathlist, pathlist->strings[i]) < 0)
101 			return -1;
102 	}
103 
104 	return 0;
105 }
106 
iterator_init_common(git_iterator * iter,git_repository * repo,git_index * index,git_iterator_options * given_opts)107 static int iterator_init_common(
108 	git_iterator *iter,
109 	git_repository *repo,
110 	git_index *index,
111 	git_iterator_options *given_opts)
112 {
113 	static git_iterator_options default_opts = GIT_ITERATOR_OPTIONS_INIT;
114 	git_iterator_options *options = given_opts ? given_opts : &default_opts;
115 	bool ignore_case;
116 	int precompose;
117 	int error;
118 
119 	iter->repo = repo;
120 	iter->index = index;
121 	iter->flags = options->flags;
122 
123 	if ((iter->flags & GIT_ITERATOR_IGNORE_CASE) != 0) {
124 		ignore_case = true;
125 	} else if ((iter->flags & GIT_ITERATOR_DONT_IGNORE_CASE) != 0) {
126 		ignore_case = false;
127 	} else if (repo) {
128 		git_index *index;
129 
130 		if ((error = git_repository_index__weakptr(&index, iter->repo)) < 0)
131 			return error;
132 
133 		ignore_case = !!index->ignore_case;
134 
135 		if (ignore_case == 1)
136 			iter->flags |= GIT_ITERATOR_IGNORE_CASE;
137 		else
138 			iter->flags |= GIT_ITERATOR_DONT_IGNORE_CASE;
139 	} else {
140 		ignore_case = false;
141 	}
142 
143 	/* try to look up precompose and set flag if appropriate */
144 	if (repo &&
145 		(iter->flags & GIT_ITERATOR_PRECOMPOSE_UNICODE) == 0 &&
146 		(iter->flags & GIT_ITERATOR_DONT_PRECOMPOSE_UNICODE) == 0) {
147 
148 		if (git_repository__configmap_lookup(&precompose, repo, GIT_CONFIGMAP_PRECOMPOSE) < 0)
149 			git_error_clear();
150 		else if (precompose)
151 			iter->flags |= GIT_ITERATOR_PRECOMPOSE_UNICODE;
152 	}
153 
154 	if ((iter->flags & GIT_ITERATOR_DONT_AUTOEXPAND))
155 		iter->flags |= GIT_ITERATOR_INCLUDE_TREES;
156 
157 	if ((error = iterator_range_init(iter, options->start, options->end)) < 0 ||
158 		(error = iterator_pathlist_init(iter, &options->pathlist)) < 0)
159 		return error;
160 
161 	iterator_set_ignore_case(iter, ignore_case);
162 	return 0;
163 }
164 
iterator_clear(git_iterator * iter)165 static void iterator_clear(git_iterator *iter)
166 {
167 	iter->started = false;
168 	iter->ended = false;
169 	iter->stat_calls = 0;
170 	iter->pathlist_walk_idx = 0;
171 	iter->flags &= ~GIT_ITERATOR_FIRST_ACCESS;
172 }
173 
iterator_has_started(git_iterator * iter,const char * path,bool is_submodule)174 GIT_INLINE(bool) iterator_has_started(
175 	git_iterator *iter, const char *path, bool is_submodule)
176 {
177 	size_t path_len;
178 
179 	if (iter->start == NULL || iter->started == true)
180 		return true;
181 
182 	/* the starting path is generally a prefix - we have started once we
183 	 * are prefixed by this path
184 	 */
185 	iter->started = (iter->prefixcomp(path, iter->start) >= 0);
186 
187 	if (iter->started)
188 		return true;
189 
190 	path_len = strlen(path);
191 
192 	/* if, however, we are a submodule, then we support `start` being
193 	 * suffixed with a `/` for crazy legacy reasons.  match `submod`
194 	 * with a start path of `submod/`.
195 	 */
196 	if (is_submodule && iter->start_len && path_len == iter->start_len - 1 &&
197 		iter->start[iter->start_len-1] == '/')
198 		return true;
199 
200 	/* if, however, our current path is a directory, and our starting path
201 	 * is _beneath_ that directory, then recurse into the directory (even
202 	 * though we have not yet "started")
203 	 */
204 	if (path_len > 0 && path[path_len-1] == '/' &&
205 		iter->strncomp(path, iter->start, path_len) == 0)
206 		return true;
207 
208 	return false;
209 }
210 
iterator_has_ended(git_iterator * iter,const char * path)211 GIT_INLINE(bool) iterator_has_ended(git_iterator *iter, const char *path)
212 {
213 	if (iter->end == NULL)
214 		return false;
215 	else if (iter->ended)
216 		return true;
217 
218 	iter->ended = (iter->prefixcomp(path, iter->end) > 0);
219 	return iter->ended;
220 }
221 
222 /* walker for the index and tree iterator that allows it to walk the sorted
223  * pathlist entries alongside sorted iterator entries.
224  */
iterator_pathlist_next_is(git_iterator * iter,const char * path)225 static bool iterator_pathlist_next_is(git_iterator *iter, const char *path)
226 {
227 	char *p;
228 	size_t path_len, p_len, cmp_len, i;
229 	int cmp;
230 
231 	if (iter->pathlist.length == 0)
232 		return true;
233 
234 	git_vector_sort(&iter->pathlist);
235 
236 	path_len = strlen(path);
237 
238 	/* for comparison, drop the trailing slash on the current '/' */
239 	if (path_len && path[path_len-1] == '/')
240 		path_len--;
241 
242 	for (i = iter->pathlist_walk_idx; i < iter->pathlist.length; i++) {
243 		p = iter->pathlist.contents[i];
244 		p_len = strlen(p);
245 
246 		if (p_len && p[p_len-1] == '/')
247 			p_len--;
248 
249 		cmp_len = min(path_len, p_len);
250 
251 		/* see if the pathlist entry is a prefix of this path */
252 		cmp = iter->strncomp(p, path, cmp_len);
253 
254 		/* prefix match - see if there's an exact match, or if we were
255 		 * given a path that matches the directory
256 		 */
257 		if (cmp == 0) {
258 			/* if this pathlist entry is not suffixed with a '/' then
259 			 * it matches a path that is a file or a directory.
260 			 * (eg, pathlist = "foo" and path is "foo" or "foo/" or
261 			 * "foo/something")
262 			 */
263 			if (p[cmp_len] == '\0' &&
264 				(path[cmp_len] == '\0' || path[cmp_len] == '/'))
265 				return true;
266 
267 			/* if this pathlist entry _is_ suffixed with a '/' then
268 			 * it matches only paths that are directories.
269 			 * (eg, pathlist = "foo/" and path is "foo/" or "foo/something")
270 			 */
271 			if (p[cmp_len] == '/' && path[cmp_len] == '/')
272 				return true;
273 		}
274 
275 		/* this pathlist entry sorts before the given path, try the next */
276 		else if (cmp < 0) {
277 			iter->pathlist_walk_idx++;
278 			continue;
279 		}
280 
281 		/* this pathlist sorts after the given path, no match. */
282 		else if (cmp > 0) {
283 			break;
284 		}
285 	}
286 
287 	return false;
288 }
289 
290 typedef enum {
291 	ITERATOR_PATHLIST_NONE = 0,
292 	ITERATOR_PATHLIST_IS_FILE = 1,
293 	ITERATOR_PATHLIST_IS_DIR = 2,
294 	ITERATOR_PATHLIST_IS_PARENT = 3,
295 	ITERATOR_PATHLIST_FULL = 4,
296 } iterator_pathlist_search_t;
297 
iterator_pathlist_search(git_iterator * iter,const char * path,size_t path_len)298 static iterator_pathlist_search_t iterator_pathlist_search(
299 	git_iterator *iter, const char *path, size_t path_len)
300 {
301 	const char *p;
302 	size_t idx;
303 	int error;
304 
305 	if (iter->pathlist.length == 0)
306 		return ITERATOR_PATHLIST_FULL;
307 
308 	git_vector_sort(&iter->pathlist);
309 
310 	error = git_vector_bsearch2(&idx, &iter->pathlist,
311 		(git_vector_cmp)iter->strcomp, path);
312 
313 	/* the given path was found in the pathlist.  since the pathlist only
314 	 * matches directories when they're suffixed with a '/', analyze the
315 	 * path string to determine whether it's a directory or not.
316 	 */
317 	if (error == 0) {
318 		if (path_len && path[path_len-1] == '/')
319 			return ITERATOR_PATHLIST_IS_DIR;
320 
321 		return ITERATOR_PATHLIST_IS_FILE;
322 	}
323 
324 	/* at this point, the path we're examining may be a directory (though we
325 	 * don't know that yet, since we're avoiding a stat unless it's necessary)
326 	 * so walk the pathlist looking for the given path with a '/' after it,
327 	 */
328 	while ((p = git_vector_get(&iter->pathlist, idx)) != NULL) {
329 		if (iter->prefixcomp(p, path) != 0)
330 			break;
331 
332 		/* an exact match would have been matched by the bsearch above */
333 		assert(p[path_len]);
334 
335 		/* is this a literal directory entry (eg `foo/`) or a file beneath */
336 		if (p[path_len] == '/') {
337 			return (p[path_len+1] == '\0') ?
338 				ITERATOR_PATHLIST_IS_DIR :
339 				ITERATOR_PATHLIST_IS_PARENT;
340 		}
341 
342 		if (p[path_len] > '/')
343 			break;
344 
345 		idx++;
346 	}
347 
348 	return ITERATOR_PATHLIST_NONE;
349 }
350 
351 /* Empty iterator */
352 
empty_iterator_noop(const git_index_entry ** e,git_iterator * i)353 static int empty_iterator_noop(const git_index_entry **e, git_iterator *i)
354 {
355 	GIT_UNUSED(i);
356 
357 	if (e)
358 		*e = NULL;
359 
360 	return GIT_ITEROVER;
361 }
362 
empty_iterator_advance_over(const git_index_entry ** e,git_iterator_status_t * s,git_iterator * i)363 static int empty_iterator_advance_over(
364 	const git_index_entry **e,
365 	git_iterator_status_t *s,
366 	git_iterator *i)
367 {
368 	*s = GIT_ITERATOR_STATUS_EMPTY;
369 	return empty_iterator_noop(e, i);
370 }
371 
empty_iterator_reset(git_iterator * i)372 static int empty_iterator_reset(git_iterator *i)
373 {
374 	GIT_UNUSED(i);
375 	return 0;
376 }
377 
empty_iterator_free(git_iterator * i)378 static void empty_iterator_free(git_iterator *i)
379 {
380 	GIT_UNUSED(i);
381 }
382 
383 typedef struct {
384 	git_iterator base;
385 	git_iterator_callbacks cb;
386 } empty_iterator;
387 
git_iterator_for_nothing(git_iterator ** out,git_iterator_options * options)388 int git_iterator_for_nothing(
389 	git_iterator **out,
390 	git_iterator_options *options)
391 {
392 	empty_iterator *iter;
393 
394 	static git_iterator_callbacks callbacks = {
395 		empty_iterator_noop,
396 		empty_iterator_noop,
397 		empty_iterator_noop,
398 		empty_iterator_advance_over,
399 		empty_iterator_reset,
400 		empty_iterator_free
401 	};
402 
403 	*out = NULL;
404 
405 	iter = git__calloc(1, sizeof(empty_iterator));
406 	GIT_ERROR_CHECK_ALLOC(iter);
407 
408 	iter->base.type = GIT_ITERATOR_TYPE_EMPTY;
409 	iter->base.cb = &callbacks;
410 	iter->base.flags = options->flags;
411 
412 	*out = &iter->base;
413 	return 0;
414 }
415 
416 /* Tree iterator */
417 
418 typedef struct {
419 	git_tree_entry *tree_entry;
420 	const char *parent_path;
421 } tree_iterator_entry;
422 
423 typedef struct {
424 	git_tree *tree;
425 
426 	/* path to this particular frame (folder) */
427 	git_buf path;
428 
429 	/* a sorted list of the entries for this frame (folder), these are
430 	 * actually pointers to the iterator's entry pool.
431 	 */
432 	git_vector entries;
433 	tree_iterator_entry *current;
434 
435 	size_t next_idx;
436 
437 	/* on case insensitive iterations, we also have an array of other
438 	 * paths that were case insensitively equal to this one, and their
439 	 * tree objects.  we have coalesced the tree entries into this frame.
440 	 * a child `tree_iterator_entry` will contain a pointer to its actual
441 	 * parent path.
442 	 */
443 	git_vector similar_trees;
444 	git_array_t(git_buf) similar_paths;
445 } tree_iterator_frame;
446 
447 typedef struct {
448 	git_iterator base;
449 	git_tree *root;
450 	git_array_t(tree_iterator_frame) frames;
451 
452 	git_index_entry entry;
453 	git_buf entry_path;
454 
455 	/* a pool of entries to reduce the number of allocations */
456 	git_pool entry_pool;
457 } tree_iterator;
458 
tree_iterator_parent_frame(tree_iterator * iter)459 GIT_INLINE(tree_iterator_frame *) tree_iterator_parent_frame(
460 	tree_iterator *iter)
461 {
462 	return iter->frames.size > 1 ?
463 		&iter->frames.ptr[iter->frames.size-2] : NULL;
464 }
465 
tree_iterator_current_frame(tree_iterator * iter)466 GIT_INLINE(tree_iterator_frame *) tree_iterator_current_frame(
467 	tree_iterator *iter)
468 {
469 	return iter->frames.size ? &iter->frames.ptr[iter->frames.size-1] : NULL;
470 }
471 
tree_entry_cmp(const git_tree_entry * a,const git_tree_entry * b,bool icase)472 GIT_INLINE(int) tree_entry_cmp(
473 	const git_tree_entry *a, const git_tree_entry *b, bool icase)
474 {
475 	return git_path_cmp(
476 		a->filename, a->filename_len, a->attr == GIT_FILEMODE_TREE,
477 		b->filename, b->filename_len, b->attr == GIT_FILEMODE_TREE,
478 		icase ? git__strncasecmp : git__strncmp);
479 }
480 
tree_iterator_entry_cmp_icase(const void * ptr_a,const void * ptr_b)481 GIT_INLINE(int) tree_iterator_entry_cmp_icase(
482 	const void *ptr_a, const void *ptr_b)
483 {
484 	const tree_iterator_entry *a = (const tree_iterator_entry *)ptr_a;
485 	const tree_iterator_entry *b = (const tree_iterator_entry *)ptr_b;
486 
487 	return tree_entry_cmp(a->tree_entry, b->tree_entry, true);
488 }
489 
tree_iterator_entry_sort_icase(const void * ptr_a,const void * ptr_b)490 static int tree_iterator_entry_sort_icase(const void *ptr_a, const void *ptr_b)
491 {
492 	const tree_iterator_entry *a = (const tree_iterator_entry *)ptr_a;
493 	const tree_iterator_entry *b = (const tree_iterator_entry *)ptr_b;
494 
495 	int c = tree_entry_cmp(a->tree_entry, b->tree_entry, true);
496 
497 	/* stabilize the sort order for filenames that are (case insensitively)
498 	 * the same by examining the parent path (case sensitively) before
499 	 * falling back to a case sensitive sort of the filename.
500 	 */
501 	if (!c && a->parent_path != b->parent_path)
502 		c = git__strcmp(a->parent_path, b->parent_path);
503 
504 	if (!c)
505 		c = tree_entry_cmp(a->tree_entry, b->tree_entry, false);
506 
507 	return c;
508 }
509 
tree_iterator_compute_path(git_buf * out,tree_iterator_entry * entry)510 static int tree_iterator_compute_path(
511 	git_buf *out,
512 	tree_iterator_entry *entry)
513 {
514 	git_buf_clear(out);
515 
516 	if (entry->parent_path)
517 		git_buf_joinpath(out, entry->parent_path, entry->tree_entry->filename);
518 	else
519 		git_buf_puts(out, entry->tree_entry->filename);
520 
521 	if (git_tree_entry__is_tree(entry->tree_entry))
522 		git_buf_putc(out, '/');
523 
524 	if (git_buf_oom(out))
525 		return -1;
526 
527 	return 0;
528 }
529 
tree_iterator_frame_init(tree_iterator * iter,git_tree * tree,tree_iterator_entry * frame_entry)530 static int tree_iterator_frame_init(
531 	tree_iterator *iter,
532 	git_tree *tree,
533 	tree_iterator_entry *frame_entry)
534 {
535 	tree_iterator_frame *new_frame = NULL;
536 	tree_iterator_entry *new_entry;
537 	git_tree *dup = NULL;
538 	git_tree_entry *tree_entry;
539 	git_vector_cmp cmp;
540 	size_t i;
541 	int error = 0;
542 
543 	new_frame = git_array_alloc(iter->frames);
544 	GIT_ERROR_CHECK_ALLOC(new_frame);
545 
546 	memset(new_frame, 0, sizeof(tree_iterator_frame));
547 
548 	if ((error = git_tree_dup(&dup, tree)) < 0)
549 		goto done;
550 
551 	memset(new_frame, 0x0, sizeof(tree_iterator_frame));
552 	new_frame->tree = dup;
553 
554 	if (frame_entry &&
555 		(error = tree_iterator_compute_path(&new_frame->path, frame_entry)) < 0)
556 		goto done;
557 
558 	cmp = iterator__ignore_case(&iter->base) ?
559 		tree_iterator_entry_sort_icase : NULL;
560 
561 	if ((error = git_vector_init(
562 		&new_frame->entries, dup->entries.size, cmp)) < 0)
563 		goto done;
564 
565 	git_array_foreach(dup->entries, i, tree_entry) {
566 		new_entry = git_pool_malloc(&iter->entry_pool, 1);
567 		GIT_ERROR_CHECK_ALLOC(new_entry);
568 
569 		new_entry->tree_entry = tree_entry;
570 		new_entry->parent_path = new_frame->path.ptr;
571 
572 		if ((error = git_vector_insert(&new_frame->entries, new_entry)) < 0)
573 			goto done;
574 	}
575 
576 	git_vector_set_sorted(&new_frame->entries,
577 		!iterator__ignore_case(&iter->base));
578 
579 done:
580 	if (error < 0) {
581 		git_tree_free(dup);
582 		git_array_pop(iter->frames);
583 	}
584 
585 	return error;
586 }
587 
tree_iterator_current_entry(tree_iterator_frame * frame)588 GIT_INLINE(tree_iterator_entry *) tree_iterator_current_entry(
589 	tree_iterator_frame *frame)
590 {
591 	return frame->current;
592 }
593 
tree_iterator_frame_push_neighbors(tree_iterator * iter,tree_iterator_frame * parent_frame,tree_iterator_frame * frame,const char * filename)594 GIT_INLINE(int) tree_iterator_frame_push_neighbors(
595 	tree_iterator *iter,
596 	tree_iterator_frame *parent_frame,
597 	tree_iterator_frame *frame,
598 	const char *filename)
599 {
600 	tree_iterator_entry *entry, *new_entry;
601 	git_tree *tree = NULL;
602 	git_tree_entry *tree_entry;
603 	git_buf *path;
604 	size_t new_size, i;
605 	int error = 0;
606 
607 	while (parent_frame->next_idx < parent_frame->entries.length) {
608 		entry = parent_frame->entries.contents[parent_frame->next_idx];
609 
610 		if (strcasecmp(filename, entry->tree_entry->filename) != 0)
611 			break;
612 
613 		if ((error = git_tree_lookup(&tree,
614 			iter->base.repo, entry->tree_entry->oid)) < 0)
615 			break;
616 
617 		if (git_vector_insert(&parent_frame->similar_trees, tree) < 0)
618 			break;
619 
620 		path = git_array_alloc(parent_frame->similar_paths);
621 		GIT_ERROR_CHECK_ALLOC(path);
622 
623 		memset(path, 0, sizeof(git_buf));
624 
625 		if ((error = tree_iterator_compute_path(path, entry)) < 0)
626 			break;
627 
628 		GIT_ERROR_CHECK_ALLOC_ADD(&new_size,
629 			frame->entries.length, tree->entries.size);
630 		git_vector_size_hint(&frame->entries, new_size);
631 
632 		git_array_foreach(tree->entries, i, tree_entry) {
633 			new_entry = git_pool_malloc(&iter->entry_pool, 1);
634 			GIT_ERROR_CHECK_ALLOC(new_entry);
635 
636 			new_entry->tree_entry = tree_entry;
637 			new_entry->parent_path = path->ptr;
638 
639 			if ((error = git_vector_insert(&frame->entries, new_entry)) < 0)
640 				break;
641 		}
642 
643 		if (error)
644 			break;
645 
646 		parent_frame->next_idx++;
647 	}
648 
649 	return error;
650 }
651 
tree_iterator_frame_push(tree_iterator * iter,tree_iterator_entry * entry)652 GIT_INLINE(int) tree_iterator_frame_push(
653 	tree_iterator *iter, tree_iterator_entry *entry)
654 {
655 	tree_iterator_frame *parent_frame, *frame;
656 	git_tree *tree = NULL;
657 	int error;
658 
659 	if ((error = git_tree_lookup(&tree,
660 			iter->base.repo, entry->tree_entry->oid)) < 0 ||
661 		(error = tree_iterator_frame_init(iter, tree, entry)) < 0)
662 		goto done;
663 
664 	parent_frame = tree_iterator_parent_frame(iter);
665 	frame = tree_iterator_current_frame(iter);
666 
667 	/* if we're case insensitive, then we may have another directory that
668 	 * is (case insensitively) equal to this one.  coalesce those children
669 	 * into this tree.
670 	 */
671 	if (iterator__ignore_case(&iter->base))
672 		error = tree_iterator_frame_push_neighbors(iter,
673 			parent_frame, frame, entry->tree_entry->filename);
674 
675 done:
676 	git_tree_free(tree);
677 	return error;
678 }
679 
tree_iterator_frame_pop(tree_iterator * iter)680 static void tree_iterator_frame_pop(tree_iterator *iter)
681 {
682 	tree_iterator_frame *frame;
683 	git_buf *buf = NULL;
684 	git_tree *tree;
685 	size_t i;
686 
687 	assert(iter->frames.size);
688 
689 	frame = git_array_pop(iter->frames);
690 
691 	git_vector_free(&frame->entries);
692 	git_tree_free(frame->tree);
693 
694 	do {
695 		buf = git_array_pop(frame->similar_paths);
696 		git_buf_dispose(buf);
697 	} while (buf != NULL);
698 
699 	git_array_clear(frame->similar_paths);
700 
701 	git_vector_foreach(&frame->similar_trees, i, tree)
702 		git_tree_free(tree);
703 
704 	git_vector_free(&frame->similar_trees);
705 
706 	git_buf_dispose(&frame->path);
707 }
708 
tree_iterator_current(const git_index_entry ** out,git_iterator * i)709 static int tree_iterator_current(
710 	const git_index_entry **out, git_iterator *i)
711 {
712 	tree_iterator *iter = (tree_iterator *)i;
713 
714 	if (!iterator__has_been_accessed(i))
715 		return iter->base.cb->advance(out, i);
716 
717 	if (!iter->frames.size) {
718 		*out = NULL;
719 		return GIT_ITEROVER;
720 	}
721 
722 	*out = &iter->entry;
723 	return 0;
724 }
725 
tree_iterator_set_current(tree_iterator * iter,tree_iterator_frame * frame,tree_iterator_entry * entry)726 static void tree_iterator_set_current(
727 	tree_iterator *iter,
728 	tree_iterator_frame *frame,
729 	tree_iterator_entry *entry)
730 {
731 	git_tree_entry *tree_entry = entry->tree_entry;
732 
733 	frame->current = entry;
734 
735 	memset(&iter->entry, 0x0, sizeof(git_index_entry));
736 
737 	iter->entry.mode = tree_entry->attr;
738 	iter->entry.path = iter->entry_path.ptr;
739 	git_oid_cpy(&iter->entry.id, tree_entry->oid);
740 }
741 
tree_iterator_advance(const git_index_entry ** out,git_iterator * i)742 static int tree_iterator_advance(const git_index_entry **out, git_iterator *i)
743 {
744 	tree_iterator *iter = (tree_iterator *)i;
745 	int error = 0;
746 
747 	iter->base.flags |= GIT_ITERATOR_FIRST_ACCESS;
748 
749 	/* examine tree entries until we find the next one to return */
750 	while (true) {
751 		tree_iterator_entry *prev_entry, *entry;
752 		tree_iterator_frame *frame;
753 		bool is_tree;
754 
755 		if ((frame = tree_iterator_current_frame(iter)) == NULL) {
756 			error = GIT_ITEROVER;
757 			break;
758 		}
759 
760 		/* no more entries in this frame.  pop the frame out */
761 		if (frame->next_idx == frame->entries.length) {
762 			tree_iterator_frame_pop(iter);
763 			continue;
764 		}
765 
766 		/* we may have coalesced the contents of case-insensitively same-named
767 		 * directories, so do the sort now.
768 		 */
769 		if (frame->next_idx == 0 && !git_vector_is_sorted(&frame->entries))
770 			git_vector_sort(&frame->entries);
771 
772 		/* we have more entries in the current frame, that's our next entry */
773 		prev_entry = tree_iterator_current_entry(frame);
774 		entry = frame->entries.contents[frame->next_idx];
775 		frame->next_idx++;
776 
777 		/* we can have collisions when iterating case insensitively.  (eg,
778 		 * 'A/a' and 'a/A').  squash this one if it's already been seen.
779 		 */
780 		if (iterator__ignore_case(&iter->base) &&
781 			prev_entry &&
782 			tree_iterator_entry_cmp_icase(prev_entry, entry) == 0)
783 			continue;
784 
785 		if ((error = tree_iterator_compute_path(&iter->entry_path, entry)) < 0)
786 			break;
787 
788 		/* if this path is before our start, advance over this entry */
789 		if (!iterator_has_started(&iter->base, iter->entry_path.ptr, false))
790 			continue;
791 
792 		/* if this path is after our end, stop */
793 		if (iterator_has_ended(&iter->base, iter->entry_path.ptr)) {
794 			error = GIT_ITEROVER;
795 			break;
796 		}
797 
798 		/* if we have a list of paths we're interested in, examine it */
799 		if (!iterator_pathlist_next_is(&iter->base, iter->entry_path.ptr))
800 			continue;
801 
802 		is_tree = git_tree_entry__is_tree(entry->tree_entry);
803 
804 		/* if we are *not* including trees then advance over this entry */
805 		if (is_tree && !iterator__include_trees(iter)) {
806 
807 			/* if we've found a tree (and are not returning it to the caller)
808 			 * and we are autoexpanding, then we want to return the first
809 			 * child.  push the new directory and advance.
810 			 */
811 			if (iterator__do_autoexpand(iter)) {
812 				if ((error = tree_iterator_frame_push(iter, entry)) < 0)
813 					break;
814 			}
815 
816 			continue;
817 		}
818 
819 		tree_iterator_set_current(iter, frame, entry);
820 
821 		/* if we are autoexpanding, then push this as a new frame, so that
822 		 * the next call to `advance` will dive into this directory.
823 		 */
824 		if (is_tree && iterator__do_autoexpand(iter))
825 			error = tree_iterator_frame_push(iter, entry);
826 
827 		break;
828 	}
829 
830 	if (out)
831 		*out = (error == 0) ? &iter->entry : NULL;
832 
833 	return error;
834 }
835 
tree_iterator_advance_into(const git_index_entry ** out,git_iterator * i)836 static int tree_iterator_advance_into(
837 	const git_index_entry **out, git_iterator *i)
838 {
839 	tree_iterator *iter = (tree_iterator *)i;
840     tree_iterator_frame *frame;
841 	tree_iterator_entry *prev_entry;
842 	int error;
843 
844 	if (out)
845 		*out = NULL;
846 
847 	if ((frame = tree_iterator_current_frame(iter)) == NULL)
848 		return GIT_ITEROVER;
849 
850 	/* get the last seen entry */
851 	prev_entry = tree_iterator_current_entry(frame);
852 
853 	/* it's legal to call advance_into when auto-expand is on.  in this case,
854 	 * we will have pushed a new (empty) frame on to the stack for this
855 	 * new directory.  since it's empty, its current_entry should be null.
856 	 */
857 	assert(iterator__do_autoexpand(i) ^ (prev_entry != NULL));
858 
859 	if (prev_entry) {
860 		if (!git_tree_entry__is_tree(prev_entry->tree_entry))
861 			return 0;
862 
863 		if ((error = tree_iterator_frame_push(iter, prev_entry)) < 0)
864 			return error;
865 	}
866 
867 	/* we've advanced into the directory in question, let advance
868 	 * find the first entry
869 	 */
870 	return tree_iterator_advance(out, i);
871 }
872 
tree_iterator_advance_over(const git_index_entry ** out,git_iterator_status_t * status,git_iterator * i)873 static int tree_iterator_advance_over(
874 	const git_index_entry **out,
875 	git_iterator_status_t *status,
876 	git_iterator *i)
877 {
878 	*status = GIT_ITERATOR_STATUS_NORMAL;
879 	return git_iterator_advance(out, i);
880 }
881 
tree_iterator_clear(tree_iterator * iter)882 static void tree_iterator_clear(tree_iterator *iter)
883 {
884 	while (iter->frames.size)
885 		tree_iterator_frame_pop(iter);
886 
887 	git_array_clear(iter->frames);
888 
889 	git_pool_clear(&iter->entry_pool);
890 	git_buf_clear(&iter->entry_path);
891 
892 	iterator_clear(&iter->base);
893 }
894 
tree_iterator_init(tree_iterator * iter)895 static int tree_iterator_init(tree_iterator *iter)
896 {
897 	int error;
898 
899 	git_pool_init(&iter->entry_pool, sizeof(tree_iterator_entry));
900 
901 	if ((error = tree_iterator_frame_init(iter, iter->root, NULL)) < 0)
902 		return error;
903 
904 	iter->base.flags &= ~GIT_ITERATOR_FIRST_ACCESS;
905 
906 	return 0;
907 }
908 
tree_iterator_reset(git_iterator * i)909 static int tree_iterator_reset(git_iterator *i)
910 {
911 	tree_iterator *iter = (tree_iterator *)i;
912 
913 	tree_iterator_clear(iter);
914 	return tree_iterator_init(iter);
915 }
916 
tree_iterator_free(git_iterator * i)917 static void tree_iterator_free(git_iterator *i)
918 {
919 	tree_iterator *iter = (tree_iterator *)i;
920 
921 	tree_iterator_clear(iter);
922 
923 	git_tree_free(iter->root);
924 	git_buf_dispose(&iter->entry_path);
925 }
926 
git_iterator_for_tree(git_iterator ** out,git_tree * tree,git_iterator_options * options)927 int git_iterator_for_tree(
928 	git_iterator **out,
929 	git_tree *tree,
930 	git_iterator_options *options)
931 {
932 	tree_iterator *iter;
933 	int error;
934 
935 	static git_iterator_callbacks callbacks = {
936 		tree_iterator_current,
937 		tree_iterator_advance,
938 		tree_iterator_advance_into,
939 		tree_iterator_advance_over,
940 		tree_iterator_reset,
941 		tree_iterator_free
942 	};
943 
944 	*out = NULL;
945 
946 	if (tree == NULL)
947 		return git_iterator_for_nothing(out, options);
948 
949 	iter = git__calloc(1, sizeof(tree_iterator));
950 	GIT_ERROR_CHECK_ALLOC(iter);
951 
952 	iter->base.type = GIT_ITERATOR_TYPE_TREE;
953 	iter->base.cb = &callbacks;
954 
955 	if ((error = iterator_init_common(&iter->base,
956 			git_tree_owner(tree), NULL, options)) < 0 ||
957 		(error = git_tree_dup(&iter->root, tree)) < 0 ||
958 		(error = tree_iterator_init(iter)) < 0)
959 		goto on_error;
960 
961 	*out = &iter->base;
962 	return 0;
963 
964 on_error:
965 	git_iterator_free(&iter->base);
966 	return error;
967 }
968 
git_iterator_current_tree_entry(const git_tree_entry ** tree_entry,git_iterator * i)969 int git_iterator_current_tree_entry(
970 	const git_tree_entry **tree_entry, git_iterator *i)
971 {
972 	tree_iterator *iter;
973 	tree_iterator_frame *frame;
974 	tree_iterator_entry *entry;
975 
976 	assert(i->type == GIT_ITERATOR_TYPE_TREE);
977 
978 	iter = (tree_iterator *)i;
979 
980 	frame = tree_iterator_current_frame(iter);
981 	entry = tree_iterator_current_entry(frame);
982 
983 	*tree_entry = entry->tree_entry;
984 	return 0;
985 }
986 
git_iterator_current_parent_tree(const git_tree ** parent_tree,git_iterator * i,size_t depth)987 int git_iterator_current_parent_tree(
988 	const git_tree **parent_tree, git_iterator *i, size_t depth)
989 {
990 	tree_iterator *iter;
991 	tree_iterator_frame *frame;
992 
993 	assert(i->type == GIT_ITERATOR_TYPE_TREE);
994 
995 	iter = (tree_iterator *)i;
996 
997 	assert(depth < iter->frames.size);
998 	frame = &iter->frames.ptr[iter->frames.size-depth-1];
999 
1000 	*parent_tree = frame->tree;
1001 	return 0;
1002 }
1003 
1004 /* Filesystem iterator */
1005 
1006 typedef struct {
1007 	struct stat st;
1008 	size_t path_len;
1009 	iterator_pathlist_search_t match;
1010 	git_oid id;
1011 	char path[GIT_FLEX_ARRAY];
1012 } filesystem_iterator_entry;
1013 
1014 typedef struct {
1015 	git_vector entries;
1016 	git_pool entry_pool;
1017 	size_t next_idx;
1018 
1019 	size_t path_len;
1020 	int is_ignored;
1021 } filesystem_iterator_frame;
1022 
1023 typedef struct {
1024 	git_iterator base;
1025 	char *root;
1026 	size_t root_len;
1027 
1028 	unsigned int dirload_flags;
1029 
1030 	git_tree *tree;
1031 	git_index *index;
1032 	git_vector index_snapshot;
1033 
1034 	git_array_t(filesystem_iterator_frame) frames;
1035 	git_ignores ignores;
1036 
1037 	/* info about the current entry */
1038 	git_index_entry entry;
1039 	git_buf current_path;
1040 	int current_is_ignored;
1041 
1042 	/* temporary buffer for advance_over */
1043 	git_buf tmp_buf;
1044 } filesystem_iterator;
1045 
1046 
filesystem_iterator_parent_frame(filesystem_iterator * iter)1047 GIT_INLINE(filesystem_iterator_frame *) filesystem_iterator_parent_frame(
1048 	filesystem_iterator *iter)
1049 {
1050 	return iter->frames.size > 1 ?
1051 		&iter->frames.ptr[iter->frames.size-2] : NULL;
1052 }
1053 
filesystem_iterator_current_frame(filesystem_iterator * iter)1054 GIT_INLINE(filesystem_iterator_frame *) filesystem_iterator_current_frame(
1055 	filesystem_iterator *iter)
1056 {
1057 	return iter->frames.size ? &iter->frames.ptr[iter->frames.size-1] : NULL;
1058 }
1059 
filesystem_iterator_current_entry(filesystem_iterator_frame * frame)1060 GIT_INLINE(filesystem_iterator_entry *) filesystem_iterator_current_entry(
1061 	filesystem_iterator_frame *frame)
1062 {
1063 	return frame->next_idx == 0 ?
1064 		NULL : frame->entries.contents[frame->next_idx-1];
1065 }
1066 
filesystem_iterator_entry_cmp(const void * _a,const void * _b)1067 static int filesystem_iterator_entry_cmp(const void *_a, const void *_b)
1068 {
1069 	const filesystem_iterator_entry *a = (const filesystem_iterator_entry *)_a;
1070 	const filesystem_iterator_entry *b = (const filesystem_iterator_entry *)_b;
1071 
1072 	return git__strcmp(a->path, b->path);
1073 }
1074 
filesystem_iterator_entry_cmp_icase(const void * _a,const void * _b)1075 static int filesystem_iterator_entry_cmp_icase(const void *_a, const void *_b)
1076 {
1077 	const filesystem_iterator_entry *a = (const filesystem_iterator_entry *)_a;
1078 	const filesystem_iterator_entry *b = (const filesystem_iterator_entry *)_b;
1079 
1080 	return git__strcasecmp(a->path, b->path);
1081 }
1082 
1083 #define FILESYSTEM_MAX_DEPTH 100
1084 
1085 /**
1086  * Figure out if an entry is a submodule.
1087  *
1088  * We consider it a submodule if the path is listed as a submodule in
1089  * either the tree or the index.
1090  */
filesystem_iterator_is_submodule(bool * out,filesystem_iterator * iter,const char * path,size_t path_len)1091 static int filesystem_iterator_is_submodule(
1092 	bool *out, filesystem_iterator *iter, const char *path, size_t path_len)
1093 {
1094 	bool is_submodule = false;
1095 	int error;
1096 
1097 	*out = false;
1098 
1099 	/* first see if this path is a submodule in HEAD */
1100 	if (iter->tree) {
1101 		git_tree_entry *entry;
1102 
1103 		error = git_tree_entry_bypath(&entry, iter->tree, path);
1104 
1105 		if (error < 0 && error != GIT_ENOTFOUND)
1106 			return error;
1107 
1108 		if (!error) {
1109 			is_submodule = (entry->attr == GIT_FILEMODE_COMMIT);
1110 			git_tree_entry_free(entry);
1111 		}
1112 	}
1113 
1114 	if (!is_submodule && iter->base.index) {
1115 		size_t pos;
1116 
1117 		error = git_index_snapshot_find(&pos,
1118 			&iter->index_snapshot, iter->base.entry_srch, path, path_len, 0);
1119 
1120 		if (error < 0 && error != GIT_ENOTFOUND)
1121 			return error;
1122 
1123 		if (!error) {
1124 			git_index_entry *e = git_vector_get(&iter->index_snapshot, pos);
1125 			is_submodule = (e->mode == GIT_FILEMODE_COMMIT);
1126 		}
1127 	}
1128 
1129 	*out = is_submodule;
1130 	return 0;
1131 }
1132 
filesystem_iterator_frame_push_ignores(filesystem_iterator * iter,filesystem_iterator_entry * frame_entry,filesystem_iterator_frame * new_frame)1133 static void filesystem_iterator_frame_push_ignores(
1134 	filesystem_iterator *iter,
1135 	filesystem_iterator_entry *frame_entry,
1136 	filesystem_iterator_frame *new_frame)
1137 {
1138 	filesystem_iterator_frame *previous_frame;
1139 	const char *path = frame_entry ? frame_entry->path : "";
1140 
1141 	if (!iterator__honor_ignores(&iter->base))
1142 		return;
1143 
1144 	if (git_ignore__lookup(&new_frame->is_ignored,
1145 			&iter->ignores, path, GIT_DIR_FLAG_TRUE) < 0) {
1146 		git_error_clear();
1147 		new_frame->is_ignored = GIT_IGNORE_NOTFOUND;
1148 	}
1149 
1150 	/* if this is not the top level directory... */
1151 	if (frame_entry) {
1152 		const char *relative_path;
1153 
1154 		previous_frame = filesystem_iterator_parent_frame(iter);
1155 
1156 		/* push new ignores for files in this directory */
1157 		relative_path = frame_entry->path + previous_frame->path_len;
1158 
1159 		/* inherit ignored from parent if no rule specified */
1160 		if (new_frame->is_ignored <= GIT_IGNORE_NOTFOUND)
1161 			new_frame->is_ignored = previous_frame->is_ignored;
1162 
1163 		git_ignore__push_dir(&iter->ignores, relative_path);
1164 	}
1165 }
1166 
filesystem_iterator_frame_pop_ignores(filesystem_iterator * iter)1167 static void filesystem_iterator_frame_pop_ignores(
1168 	filesystem_iterator *iter)
1169 {
1170 	if (iterator__honor_ignores(&iter->base))
1171 		git_ignore__pop_dir(&iter->ignores);
1172 }
1173 
filesystem_iterator_examine_path(bool * is_dir_out,iterator_pathlist_search_t * match_out,filesystem_iterator * iter,filesystem_iterator_entry * frame_entry,const char * path,size_t path_len)1174 GIT_INLINE(bool) filesystem_iterator_examine_path(
1175 	bool *is_dir_out,
1176 	iterator_pathlist_search_t *match_out,
1177 	filesystem_iterator *iter,
1178 	filesystem_iterator_entry *frame_entry,
1179 	const char *path,
1180 	size_t path_len)
1181 {
1182 	bool is_dir = 0;
1183 	iterator_pathlist_search_t match = ITERATOR_PATHLIST_FULL;
1184 
1185 	*is_dir_out = false;
1186 	*match_out = ITERATOR_PATHLIST_NONE;
1187 
1188 	if (iter->base.start_len) {
1189 		int cmp = iter->base.strncomp(path, iter->base.start, path_len);
1190 
1191 		/* we haven't stat'ed `path` yet, so we don't yet know if it's a
1192 		 * directory or not.  special case if the current path may be a
1193 		 * directory that matches the start prefix.
1194 		 */
1195 		if (cmp == 0) {
1196 			if (iter->base.start[path_len] == '/')
1197 				is_dir = true;
1198 
1199 			else if (iter->base.start[path_len] != '\0')
1200 				cmp = -1;
1201 		}
1202 
1203 		if (cmp < 0)
1204 			return false;
1205 	}
1206 
1207 	if (iter->base.end_len) {
1208 		int cmp = iter->base.strncomp(path, iter->base.end, iter->base.end_len);
1209 
1210 		if (cmp > 0)
1211 			return false;
1212 	}
1213 
1214 	/* if we have a pathlist that we're limiting to, examine this path now
1215 	 * to avoid a `stat` if we're not interested in the path.
1216 	 */
1217 	if (iter->base.pathlist.length) {
1218 		/* if our parent was explicitly included, so too are we */
1219 		if (frame_entry && frame_entry->match != ITERATOR_PATHLIST_IS_PARENT)
1220 			match = ITERATOR_PATHLIST_FULL;
1221 		else
1222 			match = iterator_pathlist_search(&iter->base, path, path_len);
1223 
1224 		if (match == ITERATOR_PATHLIST_NONE)
1225 			return false;
1226 
1227 		/* Ensure that the pathlist entry lines up with what we expected */
1228 		if (match == ITERATOR_PATHLIST_IS_DIR ||
1229 			match == ITERATOR_PATHLIST_IS_PARENT)
1230 			is_dir = true;
1231 	}
1232 
1233 	*is_dir_out = is_dir;
1234 	*match_out = match;
1235 	return true;
1236 }
1237 
filesystem_iterator_is_dot_git(filesystem_iterator * iter,const char * path,size_t path_len)1238 GIT_INLINE(bool) filesystem_iterator_is_dot_git(
1239 	filesystem_iterator *iter, const char *path, size_t path_len)
1240 {
1241 	size_t len;
1242 
1243 	if (!iterator__ignore_dot_git(&iter->base))
1244 		return false;
1245 
1246 	if ((len = path_len) < 4)
1247 		return false;
1248 
1249 	if (path[len - 1] == '/')
1250 		len--;
1251 
1252 	if (git__tolower(path[len - 1]) != 't' ||
1253 		git__tolower(path[len - 2]) != 'i' ||
1254 		git__tolower(path[len - 3]) != 'g' ||
1255 		git__tolower(path[len - 4]) != '.')
1256 		return false;
1257 
1258 	return (len == 4 || path[len - 5] == '/');
1259 }
1260 
filesystem_iterator_entry_hash(filesystem_iterator * iter,filesystem_iterator_entry * entry)1261 static int filesystem_iterator_entry_hash(
1262 	filesystem_iterator *iter,
1263 	filesystem_iterator_entry *entry)
1264 {
1265 	git_buf fullpath = GIT_BUF_INIT;
1266 	int error;
1267 
1268 	if (S_ISDIR(entry->st.st_mode)) {
1269 		memset(&entry->id, 0, GIT_OID_RAWSZ);
1270 		return 0;
1271 	}
1272 
1273 	if (iter->base.type == GIT_ITERATOR_TYPE_WORKDIR)
1274 		return git_repository_hashfile(&entry->id,
1275 			iter->base.repo, entry->path, GIT_OBJECT_BLOB, NULL);
1276 
1277 	if (!(error = git_buf_joinpath(&fullpath, iter->root, entry->path)))
1278 		error = git_odb_hashfile(&entry->id, fullpath.ptr, GIT_OBJECT_BLOB);
1279 
1280 	git_buf_dispose(&fullpath);
1281 	return error;
1282 }
1283 
filesystem_iterator_entry_init(filesystem_iterator_entry ** out,filesystem_iterator * iter,filesystem_iterator_frame * frame,const char * path,size_t path_len,struct stat * statbuf,iterator_pathlist_search_t pathlist_match)1284 static int filesystem_iterator_entry_init(
1285 	filesystem_iterator_entry **out,
1286 	filesystem_iterator *iter,
1287 	filesystem_iterator_frame *frame,
1288 	const char *path,
1289 	size_t path_len,
1290 	struct stat *statbuf,
1291 	iterator_pathlist_search_t pathlist_match)
1292 {
1293 	filesystem_iterator_entry *entry;
1294 	size_t entry_size;
1295 	int error = 0;
1296 
1297 	*out = NULL;
1298 
1299 	/* Make sure to append two bytes, one for the path's null
1300 	 * termination, one for a possible trailing '/' for folders.
1301 	 */
1302 	GIT_ERROR_CHECK_ALLOC_ADD(&entry_size,
1303 		sizeof(filesystem_iterator_entry), path_len);
1304 	GIT_ERROR_CHECK_ALLOC_ADD(&entry_size, entry_size, 2);
1305 
1306 	entry = git_pool_malloc(&frame->entry_pool, entry_size);
1307 	GIT_ERROR_CHECK_ALLOC(entry);
1308 
1309 	entry->path_len = path_len;
1310 	entry->match = pathlist_match;
1311 	memcpy(entry->path, path, path_len);
1312 	memcpy(&entry->st, statbuf, sizeof(struct stat));
1313 
1314 	/* Suffix directory paths with a '/' */
1315 	if (S_ISDIR(entry->st.st_mode))
1316 		entry->path[entry->path_len++] = '/';
1317 
1318 	entry->path[entry->path_len] = '\0';
1319 
1320 	if (iter->base.flags & GIT_ITERATOR_INCLUDE_HASH)
1321 		error = filesystem_iterator_entry_hash(iter, entry);
1322 
1323 	if (!error)
1324 		*out = entry;
1325 
1326 	return error;
1327 }
1328 
filesystem_iterator_frame_push(filesystem_iterator * iter,filesystem_iterator_entry * frame_entry)1329 static int filesystem_iterator_frame_push(
1330 	filesystem_iterator *iter,
1331 	filesystem_iterator_entry *frame_entry)
1332 {
1333 	filesystem_iterator_frame *new_frame = NULL;
1334 	git_path_diriter diriter = GIT_PATH_DIRITER_INIT;
1335 	git_buf root = GIT_BUF_INIT;
1336 	const char *path;
1337 	filesystem_iterator_entry *entry;
1338 	struct stat statbuf;
1339 	size_t path_len;
1340 	int error;
1341 
1342 	if (iter->frames.size == FILESYSTEM_MAX_DEPTH) {
1343 		git_error_set(GIT_ERROR_REPOSITORY,
1344 			"directory nesting too deep (%"PRIuZ")", iter->frames.size);
1345 		return -1;
1346 	}
1347 
1348 	new_frame = git_array_alloc(iter->frames);
1349 	GIT_ERROR_CHECK_ALLOC(new_frame);
1350 
1351 	memset(new_frame, 0, sizeof(filesystem_iterator_frame));
1352 
1353 	if (frame_entry)
1354 		git_buf_joinpath(&root, iter->root, frame_entry->path);
1355 	else
1356 		git_buf_puts(&root, iter->root);
1357 
1358 	if (git_buf_oom(&root)) {
1359 		error = -1;
1360 		goto done;
1361 	}
1362 
1363 	new_frame->path_len = frame_entry ? frame_entry->path_len : 0;
1364 
1365 	/* Any error here is equivalent to the dir not existing, skip over it */
1366 	if ((error = git_path_diriter_init(
1367 			&diriter, root.ptr, iter->dirload_flags)) < 0) {
1368 		error = GIT_ENOTFOUND;
1369 		goto done;
1370 	}
1371 
1372 	if ((error = git_vector_init(&new_frame->entries, 64,
1373 			iterator__ignore_case(&iter->base) ?
1374 			filesystem_iterator_entry_cmp_icase :
1375 			filesystem_iterator_entry_cmp)) < 0)
1376 		goto done;
1377 
1378 	git_pool_init(&new_frame->entry_pool, 1);
1379 
1380 	/* check if this directory is ignored */
1381 	filesystem_iterator_frame_push_ignores(iter, frame_entry, new_frame);
1382 
1383 	while ((error = git_path_diriter_next(&diriter)) == 0) {
1384 		iterator_pathlist_search_t pathlist_match = ITERATOR_PATHLIST_FULL;
1385 		bool dir_expected = false;
1386 
1387 		if ((error = git_path_diriter_fullpath(&path, &path_len, &diriter)) < 0)
1388 			goto done;
1389 
1390 		assert(path_len > iter->root_len);
1391 
1392 		/* remove the prefix if requested */
1393 		path += iter->root_len;
1394 		path_len -= iter->root_len;
1395 
1396 		/* examine start / end and the pathlist to see if this path is in it.
1397 		 * note that since we haven't yet stat'ed the path, we cannot know
1398 		 * whether it's a directory yet or not, so this can give us an
1399 		 * expected type (S_IFDIR or S_IFREG) that we should examine)
1400 		 */
1401 		if (!filesystem_iterator_examine_path(&dir_expected, &pathlist_match,
1402 			iter, frame_entry, path, path_len))
1403 			continue;
1404 
1405 		/* TODO: don't need to stat if assume unchanged for this path and
1406 		 * we have an index, we can just copy the data out of it.
1407 		 */
1408 
1409 		if ((error = git_path_diriter_stat(&statbuf, &diriter)) < 0) {
1410 			/* file was removed between readdir and lstat */
1411 			if (error == GIT_ENOTFOUND)
1412 				continue;
1413 
1414 			/* treat the file as unreadable */
1415 			memset(&statbuf, 0, sizeof(statbuf));
1416 			statbuf.st_mode = GIT_FILEMODE_UNREADABLE;
1417 
1418 			error = 0;
1419 		}
1420 
1421 		iter->base.stat_calls++;
1422 
1423 		/* Ignore wacky things in the filesystem */
1424 		if (!S_ISDIR(statbuf.st_mode) &&
1425 			!S_ISREG(statbuf.st_mode) &&
1426 			!S_ISLNK(statbuf.st_mode) &&
1427 			statbuf.st_mode != GIT_FILEMODE_UNREADABLE)
1428 			continue;
1429 
1430 		if (filesystem_iterator_is_dot_git(iter, path, path_len))
1431 			continue;
1432 
1433 		/* convert submodules to GITLINK and remove trailing slashes */
1434 		if (S_ISDIR(statbuf.st_mode)) {
1435 			bool submodule = false;
1436 
1437 			if ((error = filesystem_iterator_is_submodule(&submodule,
1438 					iter, path, path_len)) < 0)
1439 				goto done;
1440 
1441 			if (submodule)
1442 				statbuf.st_mode = GIT_FILEMODE_COMMIT;
1443 		}
1444 
1445 		/* Ensure that the pathlist entry lines up with what we expected */
1446 		else if (dir_expected)
1447 			continue;
1448 
1449 		if ((error = filesystem_iterator_entry_init(&entry,
1450 			iter, new_frame, path, path_len, &statbuf, pathlist_match)) < 0)
1451 			goto done;
1452 
1453 		git_vector_insert(&new_frame->entries, entry);
1454 	}
1455 
1456 	if (error == GIT_ITEROVER)
1457 		error = 0;
1458 
1459 	/* sort now that directory suffix is added */
1460 	git_vector_sort(&new_frame->entries);
1461 
1462 done:
1463 	if (error < 0)
1464 		git_array_pop(iter->frames);
1465 
1466 	git_buf_dispose(&root);
1467 	git_path_diriter_free(&diriter);
1468 	return error;
1469 }
1470 
filesystem_iterator_frame_pop(filesystem_iterator * iter)1471 GIT_INLINE(void) filesystem_iterator_frame_pop(filesystem_iterator *iter)
1472 {
1473 	filesystem_iterator_frame *frame;
1474 
1475 	assert(iter->frames.size);
1476 
1477 	frame = git_array_pop(iter->frames);
1478 	filesystem_iterator_frame_pop_ignores(iter);
1479 
1480 	git_pool_clear(&frame->entry_pool);
1481 	git_vector_free(&frame->entries);
1482 }
1483 
filesystem_iterator_set_current(filesystem_iterator * iter,filesystem_iterator_entry * entry)1484 static void filesystem_iterator_set_current(
1485 	filesystem_iterator *iter,
1486 	filesystem_iterator_entry *entry)
1487 {
1488 	/*
1489 	 * Index entries are limited to 32 bit timestamps.  We can safely
1490 	 * cast this since workdir times are only used in the cache; any
1491 	 * mismatch will cause a hash recomputation which is unfortunate
1492 	 * but affects only people who set their filetimes to 2038.
1493 	 * (Same with the file size.)
1494 	 */
1495 	iter->entry.ctime.seconds = (int32_t)entry->st.st_ctime;
1496 	iter->entry.mtime.seconds = (int32_t)entry->st.st_mtime;
1497 
1498 #if defined(GIT_USE_NSEC)
1499 	iter->entry.ctime.nanoseconds = entry->st.st_ctime_nsec;
1500 	iter->entry.mtime.nanoseconds = entry->st.st_mtime_nsec;
1501 #else
1502 	iter->entry.ctime.nanoseconds = 0;
1503 	iter->entry.mtime.nanoseconds = 0;
1504 #endif
1505 
1506 	iter->entry.dev = entry->st.st_dev;
1507 	iter->entry.ino = entry->st.st_ino;
1508 	iter->entry.mode = git_futils_canonical_mode(entry->st.st_mode);
1509 	iter->entry.uid = entry->st.st_uid;
1510 	iter->entry.gid = entry->st.st_gid;
1511 	iter->entry.file_size = (uint32_t)entry->st.st_size;
1512 
1513 	if (iter->base.flags & GIT_ITERATOR_INCLUDE_HASH)
1514 		git_oid_cpy(&iter->entry.id, &entry->id);
1515 
1516 	iter->entry.path = entry->path;
1517 
1518 	iter->current_is_ignored = GIT_IGNORE_UNCHECKED;
1519 }
1520 
filesystem_iterator_current(const git_index_entry ** out,git_iterator * i)1521 static int filesystem_iterator_current(
1522 	const git_index_entry **out, git_iterator *i)
1523 {
1524 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1525 
1526 	if (!iterator__has_been_accessed(i))
1527 		return iter->base.cb->advance(out, i);
1528 
1529 	if (!iter->frames.size) {
1530 		*out = NULL;
1531 		return GIT_ITEROVER;
1532 	}
1533 
1534 	*out = &iter->entry;
1535 	return 0;
1536 }
1537 
filesystem_iterator_is_dir(bool * is_dir,const filesystem_iterator * iter,const filesystem_iterator_entry * entry)1538 static int filesystem_iterator_is_dir(
1539 	bool *is_dir,
1540 	const filesystem_iterator *iter,
1541 	const filesystem_iterator_entry *entry)
1542 {
1543 	struct stat st;
1544 	git_buf fullpath = GIT_BUF_INIT;
1545 	int error = 0;
1546 
1547 	if (S_ISDIR(entry->st.st_mode)) {
1548 		*is_dir = 1;
1549 		goto done;
1550 	}
1551 
1552 	if (!iterator__descend_symlinks(iter) || !S_ISLNK(entry->st.st_mode)) {
1553 		*is_dir = 0;
1554 		goto done;
1555 	}
1556 
1557 	if ((error = git_buf_joinpath(&fullpath, iter->root, entry->path)) < 0 ||
1558 		(error = p_stat(fullpath.ptr, &st)) < 0)
1559 		goto done;
1560 
1561 	*is_dir = S_ISDIR(st.st_mode);
1562 
1563 done:
1564 	git_buf_dispose(&fullpath);
1565 	return error;
1566 }
1567 
filesystem_iterator_advance(const git_index_entry ** out,git_iterator * i)1568 static int filesystem_iterator_advance(
1569 	const git_index_entry **out, git_iterator *i)
1570 {
1571 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1572 	bool is_dir;
1573 	int error = 0;
1574 
1575 	iter->base.flags |= GIT_ITERATOR_FIRST_ACCESS;
1576 
1577 	/* examine filesystem entries until we find the next one to return */
1578 	while (true) {
1579 		filesystem_iterator_frame *frame;
1580 		filesystem_iterator_entry *entry;
1581 
1582 		if ((frame = filesystem_iterator_current_frame(iter)) == NULL) {
1583 			error = GIT_ITEROVER;
1584 			break;
1585 		}
1586 
1587 		/* no more entries in this frame.  pop the frame out */
1588 		if (frame->next_idx == frame->entries.length) {
1589 			filesystem_iterator_frame_pop(iter);
1590 			continue;
1591 		}
1592 
1593 		/* we have more entries in the current frame, that's our next entry */
1594 		entry = frame->entries.contents[frame->next_idx];
1595 		frame->next_idx++;
1596 
1597 		if ((error = filesystem_iterator_is_dir(&is_dir, iter, entry)) < 0)
1598 			break;
1599 
1600 		if (is_dir) {
1601 			if (iterator__do_autoexpand(iter)) {
1602 				error = filesystem_iterator_frame_push(iter, entry);
1603 
1604 				/* may get GIT_ENOTFOUND due to races or permission problems
1605 				 * that we want to quietly swallow
1606 				 */
1607 				if (error == GIT_ENOTFOUND)
1608 					continue;
1609 				else if (error < 0)
1610 					break;
1611 			}
1612 
1613 			if (!iterator__include_trees(iter))
1614 				continue;
1615 		}
1616 
1617 		filesystem_iterator_set_current(iter, entry);
1618 		break;
1619 	}
1620 
1621 	if (out)
1622 		*out = (error == 0) ? &iter->entry : NULL;
1623 
1624 	return error;
1625 }
1626 
filesystem_iterator_advance_into(const git_index_entry ** out,git_iterator * i)1627 static int filesystem_iterator_advance_into(
1628 	const git_index_entry **out, git_iterator *i)
1629 {
1630 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1631 	filesystem_iterator_frame *frame;
1632 	filesystem_iterator_entry *prev_entry;
1633 	int error;
1634 
1635 	if (out)
1636 		*out = NULL;
1637 
1638 	if ((frame = filesystem_iterator_current_frame(iter)) == NULL)
1639 		return GIT_ITEROVER;
1640 
1641 	/* get the last seen entry */
1642 	prev_entry = filesystem_iterator_current_entry(frame);
1643 
1644 	/* it's legal to call advance_into when auto-expand is on.  in this case,
1645 	 * we will have pushed a new (empty) frame on to the stack for this
1646 	 * new directory.  since it's empty, its current_entry should be null.
1647 	 */
1648 	assert(iterator__do_autoexpand(i) ^ (prev_entry != NULL));
1649 
1650 	if (prev_entry) {
1651 		if (prev_entry->st.st_mode != GIT_FILEMODE_COMMIT &&
1652 			!S_ISDIR(prev_entry->st.st_mode))
1653 			return 0;
1654 
1655 		if ((error = filesystem_iterator_frame_push(iter, prev_entry)) < 0)
1656 			return error;
1657 	}
1658 
1659 	/* we've advanced into the directory in question, let advance
1660 	 * find the first entry
1661 	 */
1662 	return filesystem_iterator_advance(out, i);
1663 }
1664 
git_iterator_current_workdir_path(git_buf ** out,git_iterator * i)1665 int git_iterator_current_workdir_path(git_buf **out, git_iterator *i)
1666 {
1667 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1668 	const git_index_entry *entry;
1669 
1670 	if (i->type != GIT_ITERATOR_TYPE_FS &&
1671 		i->type != GIT_ITERATOR_TYPE_WORKDIR) {
1672 		*out = NULL;
1673 		return 0;
1674 	}
1675 
1676 	git_buf_truncate(&iter->current_path, iter->root_len);
1677 
1678 	if (git_iterator_current(&entry, i) < 0 ||
1679 		git_buf_puts(&iter->current_path, entry->path) < 0)
1680 		return -1;
1681 
1682 	*out = &iter->current_path;
1683 	return 0;
1684 }
1685 
entry_dir_flag(git_index_entry * entry)1686 GIT_INLINE(git_dir_flag) entry_dir_flag(git_index_entry *entry)
1687 {
1688 #if defined(GIT_WIN32) && !defined(__MINGW32__)
1689 	return (entry && entry->mode) ?
1690 		(S_ISDIR(entry->mode) ? GIT_DIR_FLAG_TRUE : GIT_DIR_FLAG_FALSE) :
1691 		GIT_DIR_FLAG_UNKNOWN;
1692 #else
1693 	GIT_UNUSED(entry);
1694 	return GIT_DIR_FLAG_UNKNOWN;
1695 #endif
1696 }
1697 
filesystem_iterator_update_ignored(filesystem_iterator * iter)1698 static void filesystem_iterator_update_ignored(filesystem_iterator *iter)
1699 {
1700 	filesystem_iterator_frame *frame;
1701 	git_dir_flag dir_flag = entry_dir_flag(&iter->entry);
1702 
1703 	if (git_ignore__lookup(&iter->current_is_ignored,
1704 			&iter->ignores, iter->entry.path, dir_flag) < 0) {
1705 		git_error_clear();
1706 		iter->current_is_ignored = GIT_IGNORE_NOTFOUND;
1707 	}
1708 
1709 	/* use ignore from containing frame stack */
1710 	if (iter->current_is_ignored <= GIT_IGNORE_NOTFOUND) {
1711 		frame = filesystem_iterator_current_frame(iter);
1712 		iter->current_is_ignored = frame->is_ignored;
1713 	}
1714 }
1715 
filesystem_iterator_current_is_ignored(filesystem_iterator * iter)1716 GIT_INLINE(bool) filesystem_iterator_current_is_ignored(
1717 	filesystem_iterator *iter)
1718 {
1719 	if (iter->current_is_ignored == GIT_IGNORE_UNCHECKED)
1720 		filesystem_iterator_update_ignored(iter);
1721 
1722 	return (iter->current_is_ignored == GIT_IGNORE_TRUE);
1723 }
1724 
git_iterator_current_is_ignored(git_iterator * i)1725 bool git_iterator_current_is_ignored(git_iterator *i)
1726 {
1727 	filesystem_iterator *iter = NULL;
1728 
1729 	if (i->type != GIT_ITERATOR_TYPE_WORKDIR)
1730 		return false;
1731 
1732 	iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1733 
1734 	return filesystem_iterator_current_is_ignored(iter);
1735 }
1736 
git_iterator_current_tree_is_ignored(git_iterator * i)1737 bool git_iterator_current_tree_is_ignored(git_iterator *i)
1738 {
1739 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1740 	filesystem_iterator_frame *frame;
1741 
1742 	if (i->type != GIT_ITERATOR_TYPE_WORKDIR)
1743 		return false;
1744 
1745 	frame = filesystem_iterator_current_frame(iter);
1746 	return (frame->is_ignored == GIT_IGNORE_TRUE);
1747 }
1748 
filesystem_iterator_advance_over(const git_index_entry ** out,git_iterator_status_t * status,git_iterator * i)1749 static int filesystem_iterator_advance_over(
1750 	const git_index_entry **out,
1751 	git_iterator_status_t *status,
1752 	git_iterator *i)
1753 {
1754 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1755 	filesystem_iterator_frame *current_frame;
1756 	filesystem_iterator_entry *current_entry;
1757 	const git_index_entry *entry = NULL;
1758 	const char *base;
1759 	int error = 0;
1760 
1761 	*out = NULL;
1762 	*status = GIT_ITERATOR_STATUS_NORMAL;
1763 
1764 	assert(iterator__has_been_accessed(i));
1765 
1766 	current_frame = filesystem_iterator_current_frame(iter);
1767 	assert(current_frame);
1768 	current_entry = filesystem_iterator_current_entry(current_frame);
1769 	assert(current_entry);
1770 
1771 	if ((error = git_iterator_current(&entry, i)) < 0)
1772 		return error;
1773 
1774 	if (!S_ISDIR(entry->mode)) {
1775 		if (filesystem_iterator_current_is_ignored(iter))
1776 			*status = GIT_ITERATOR_STATUS_IGNORED;
1777 
1778 		return filesystem_iterator_advance(out, i);
1779 	}
1780 
1781 	git_buf_clear(&iter->tmp_buf);
1782 	if ((error = git_buf_puts(&iter->tmp_buf, entry->path)) < 0)
1783 		return error;
1784 
1785 	base = iter->tmp_buf.ptr;
1786 
1787 	/* scan inside the directory looking for files.  if we find nothing,
1788 	 * we will remain EMPTY.  if we find any ignored item, upgrade EMPTY to
1789 	 * IGNORED.  if we find a real actual item, upgrade all the way to NORMAL
1790 	 * and then stop.
1791 	 *
1792 	 * however, if we're here looking for a pathlist item (but are not
1793 	 * actually in the pathlist ourselves) then start at FILTERED instead of
1794 	 * EMPTY.  callers then know that this path was not something they asked
1795 	 * about.
1796 	 */
1797 	*status = current_entry->match == ITERATOR_PATHLIST_IS_PARENT ?
1798 		GIT_ITERATOR_STATUS_FILTERED : GIT_ITERATOR_STATUS_EMPTY;
1799 
1800 	while (entry && !iter->base.prefixcomp(entry->path, base)) {
1801 		if (filesystem_iterator_current_is_ignored(iter)) {
1802 			/* if we found an explicitly ignored item, then update from
1803 			 * EMPTY to IGNORED
1804 			 */
1805 			*status = GIT_ITERATOR_STATUS_IGNORED;
1806 		} else if (S_ISDIR(entry->mode)) {
1807 			error = filesystem_iterator_advance_into(&entry, i);
1808 
1809 			if (!error)
1810 				continue;
1811 
1812 			/* this directory disappeared, ignore it */
1813 			else if (error == GIT_ENOTFOUND)
1814 				error = 0;
1815 
1816 			/* a real error occurred */
1817 			else
1818 				break;
1819 		} else {
1820 			/* we found a non-ignored item, treat parent as untracked */
1821 			*status = GIT_ITERATOR_STATUS_NORMAL;
1822 			break;
1823 		}
1824 
1825 		if ((error = git_iterator_advance(&entry, i)) < 0)
1826 			break;
1827 	}
1828 
1829 	/* wrap up scan back to base directory */
1830 	while (entry && !iter->base.prefixcomp(entry->path, base)) {
1831 		if ((error = git_iterator_advance(&entry, i)) < 0)
1832 			break;
1833 	}
1834 
1835 	if (!error)
1836 		*out = entry;
1837 
1838 	return error;
1839 }
1840 
filesystem_iterator_clear(filesystem_iterator * iter)1841 static void filesystem_iterator_clear(filesystem_iterator *iter)
1842 {
1843 	while (iter->frames.size)
1844 		filesystem_iterator_frame_pop(iter);
1845 
1846 	git_array_clear(iter->frames);
1847 	git_ignore__free(&iter->ignores);
1848 
1849 	git_buf_dispose(&iter->tmp_buf);
1850 
1851 	iterator_clear(&iter->base);
1852 }
1853 
filesystem_iterator_init(filesystem_iterator * iter)1854 static int filesystem_iterator_init(filesystem_iterator *iter)
1855 {
1856 	int error;
1857 
1858 	if (iterator__honor_ignores(&iter->base) &&
1859 		(error = git_ignore__for_path(iter->base.repo,
1860 			".gitignore", &iter->ignores)) < 0)
1861 		return error;
1862 
1863 	if ((error = filesystem_iterator_frame_push(iter, NULL)) < 0)
1864 		return error;
1865 
1866 	iter->base.flags &= ~GIT_ITERATOR_FIRST_ACCESS;
1867 
1868 	return 0;
1869 }
1870 
filesystem_iterator_reset(git_iterator * i)1871 static int filesystem_iterator_reset(git_iterator *i)
1872 {
1873 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1874 
1875 	filesystem_iterator_clear(iter);
1876 	return filesystem_iterator_init(iter);
1877 }
1878 
filesystem_iterator_free(git_iterator * i)1879 static void filesystem_iterator_free(git_iterator *i)
1880 {
1881 	filesystem_iterator *iter = GIT_CONTAINER_OF(i, filesystem_iterator, base);
1882 	git__free(iter->root);
1883 	git_buf_dispose(&iter->current_path);
1884 	git_tree_free(iter->tree);
1885 	if (iter->index)
1886 		git_index_snapshot_release(&iter->index_snapshot, iter->index);
1887 	filesystem_iterator_clear(iter);
1888 }
1889 
iterator_for_filesystem(git_iterator ** out,git_repository * repo,const char * root,git_index * index,git_tree * tree,git_iterator_type_t type,git_iterator_options * options)1890 static int iterator_for_filesystem(
1891 	git_iterator **out,
1892 	git_repository *repo,
1893 	const char *root,
1894 	git_index *index,
1895 	git_tree *tree,
1896 	git_iterator_type_t type,
1897 	git_iterator_options *options)
1898 {
1899 	filesystem_iterator *iter;
1900 	size_t root_len;
1901 	int error;
1902 
1903 	static git_iterator_callbacks callbacks = {
1904 		filesystem_iterator_current,
1905 		filesystem_iterator_advance,
1906 		filesystem_iterator_advance_into,
1907 		filesystem_iterator_advance_over,
1908 		filesystem_iterator_reset,
1909 		filesystem_iterator_free
1910 	};
1911 
1912 	*out = NULL;
1913 
1914 	if (root == NULL)
1915 		return git_iterator_for_nothing(out, options);
1916 
1917 	iter = git__calloc(1, sizeof(filesystem_iterator));
1918 	GIT_ERROR_CHECK_ALLOC(iter);
1919 
1920 	iter->base.type = type;
1921 	iter->base.cb = &callbacks;
1922 
1923 	root_len = strlen(root);
1924 
1925 	iter->root = git__malloc(root_len+2);
1926 	GIT_ERROR_CHECK_ALLOC(iter->root);
1927 
1928 	memcpy(iter->root, root, root_len);
1929 
1930 	if (root_len == 0 || root[root_len-1] != '/') {
1931 		iter->root[root_len] = '/';
1932 		root_len++;
1933 	}
1934 	iter->root[root_len] = '\0';
1935 	iter->root_len = root_len;
1936 
1937 	if ((error = git_buf_puts(&iter->current_path, iter->root)) < 0)
1938 		goto on_error;
1939 
1940 	if ((error = iterator_init_common(&iter->base, repo, index, options)) < 0)
1941 		goto on_error;
1942 
1943 	if (tree && (error = git_tree_dup(&iter->tree, tree)) < 0)
1944 		goto on_error;
1945 
1946 	if (index &&
1947 		(error = git_index_snapshot_new(&iter->index_snapshot, index)) < 0)
1948 		goto on_error;
1949 
1950 	iter->index = index;
1951 	iter->dirload_flags =
1952 		(iterator__ignore_case(&iter->base) ? GIT_PATH_DIR_IGNORE_CASE : 0) |
1953 		(iterator__flag(&iter->base, PRECOMPOSE_UNICODE) ?
1954 			 GIT_PATH_DIR_PRECOMPOSE_UNICODE : 0);
1955 
1956 	if ((error = filesystem_iterator_init(iter)) < 0)
1957 		goto on_error;
1958 
1959 	*out = &iter->base;
1960 	return 0;
1961 
1962 on_error:
1963 	git_iterator_free(&iter->base);
1964 	return error;
1965 }
1966 
git_iterator_for_filesystem(git_iterator ** out,const char * root,git_iterator_options * options)1967 int git_iterator_for_filesystem(
1968 	git_iterator **out,
1969 	const char *root,
1970 	git_iterator_options *options)
1971 {
1972 	return iterator_for_filesystem(out,
1973 		NULL, root, NULL, NULL, GIT_ITERATOR_TYPE_FS, options);
1974 }
1975 
git_iterator_for_workdir_ext(git_iterator ** out,git_repository * repo,const char * repo_workdir,git_index * index,git_tree * tree,git_iterator_options * given_opts)1976 int git_iterator_for_workdir_ext(
1977 	git_iterator **out,
1978 	git_repository *repo,
1979 	const char *repo_workdir,
1980 	git_index *index,
1981 	git_tree *tree,
1982 	git_iterator_options *given_opts)
1983 {
1984 	git_iterator_options options = GIT_ITERATOR_OPTIONS_INIT;
1985 
1986 	if (!repo_workdir) {
1987 		if (git_repository__ensure_not_bare(repo, "scan working directory") < 0)
1988 			return GIT_EBAREREPO;
1989 
1990 		repo_workdir = git_repository_workdir(repo);
1991 	}
1992 
1993 	/* upgrade to a workdir iterator, adding necessary internal flags */
1994 	if (given_opts)
1995 		memcpy(&options, given_opts, sizeof(git_iterator_options));
1996 
1997 	options.flags |= GIT_ITERATOR_HONOR_IGNORES |
1998 		GIT_ITERATOR_IGNORE_DOT_GIT;
1999 
2000 	return iterator_for_filesystem(out,
2001 		repo, repo_workdir, index, tree, GIT_ITERATOR_TYPE_WORKDIR, &options);
2002 }
2003 
2004 
2005 /* Index iterator */
2006 
2007 
2008 typedef struct {
2009 	git_iterator base;
2010 	git_vector entries;
2011 	size_t next_idx;
2012 
2013 	/* the pseudotree entry */
2014 	git_index_entry tree_entry;
2015 	git_buf tree_buf;
2016 	bool skip_tree;
2017 
2018 	const git_index_entry *entry;
2019 } index_iterator;
2020 
index_iterator_current(const git_index_entry ** out,git_iterator * i)2021 static int index_iterator_current(
2022 	const git_index_entry **out, git_iterator *i)
2023 {
2024 	index_iterator *iter = (index_iterator *)i;
2025 
2026 	if (!iterator__has_been_accessed(i))
2027 		return iter->base.cb->advance(out, i);
2028 
2029 	if (iter->entry == NULL) {
2030 		*out = NULL;
2031 		return GIT_ITEROVER;
2032 	}
2033 
2034 	*out = iter->entry;
2035 	return 0;
2036 }
2037 
index_iterator_create_pseudotree(const git_index_entry ** out,index_iterator * iter,const char * path)2038 static bool index_iterator_create_pseudotree(
2039 	const git_index_entry **out,
2040 	index_iterator *iter,
2041 	const char *path)
2042 {
2043 	const char *prev_path, *relative_path, *dirsep;
2044 	size_t common_len;
2045 
2046 	prev_path = iter->entry ? iter->entry->path : "";
2047 
2048 	/* determine if the new path is in a different directory from the old */
2049 	common_len = git_path_common_dirlen(prev_path, path);
2050 	relative_path = path + common_len;
2051 
2052 	if ((dirsep = strchr(relative_path, '/')) == NULL)
2053 		return false;
2054 
2055 	git_buf_clear(&iter->tree_buf);
2056 	git_buf_put(&iter->tree_buf, path, (dirsep - path) + 1);
2057 
2058 	iter->tree_entry.mode = GIT_FILEMODE_TREE;
2059 	iter->tree_entry.path = iter->tree_buf.ptr;
2060 
2061 	*out = &iter->tree_entry;
2062 	return true;
2063 }
2064 
index_iterator_skip_pseudotree(index_iterator * iter)2065 static int index_iterator_skip_pseudotree(index_iterator *iter)
2066 {
2067 	assert(iterator__has_been_accessed(&iter->base));
2068 	assert(S_ISDIR(iter->entry->mode));
2069 
2070 	while (true) {
2071 		const git_index_entry *next_entry = NULL;
2072 
2073 		if (++iter->next_idx >= iter->entries.length)
2074 			return GIT_ITEROVER;
2075 
2076 		next_entry = iter->entries.contents[iter->next_idx];
2077 
2078 		if (iter->base.strncomp(iter->tree_buf.ptr, next_entry->path,
2079 			iter->tree_buf.size) != 0)
2080 			break;
2081 	}
2082 
2083 	iter->skip_tree = false;
2084 	return 0;
2085 }
2086 
index_iterator_advance(const git_index_entry ** out,git_iterator * i)2087 static int index_iterator_advance(
2088 	const git_index_entry **out, git_iterator *i)
2089 {
2090 	index_iterator *iter = GIT_CONTAINER_OF(i, index_iterator, base);
2091 	const git_index_entry *entry = NULL;
2092 	bool is_submodule;
2093 	int error = 0;
2094 
2095 	iter->base.flags |= GIT_ITERATOR_FIRST_ACCESS;
2096 
2097 	while (true) {
2098 		if (iter->next_idx >= iter->entries.length) {
2099 			error = GIT_ITEROVER;
2100 			break;
2101 		}
2102 
2103 		/* we were not asked to expand this pseudotree.  advance over it. */
2104 		if (iter->skip_tree) {
2105 			index_iterator_skip_pseudotree(iter);
2106 			continue;
2107 		}
2108 
2109 		entry = iter->entries.contents[iter->next_idx];
2110 		is_submodule = S_ISGITLINK(entry->mode);
2111 
2112 		if (!iterator_has_started(&iter->base, entry->path, is_submodule)) {
2113 			iter->next_idx++;
2114 			continue;
2115 		}
2116 
2117 		if (iterator_has_ended(&iter->base, entry->path)) {
2118 			error = GIT_ITEROVER;
2119 			break;
2120 		}
2121 
2122 		/* if we have a list of paths we're interested in, examine it */
2123 		if (!iterator_pathlist_next_is(&iter->base, entry->path)) {
2124 			iter->next_idx++;
2125 			continue;
2126 		}
2127 
2128 		/* if this is a conflict, skip it unless we're including conflicts */
2129 		if (git_index_entry_is_conflict(entry) &&
2130 			!iterator__include_conflicts(&iter->base)) {
2131 			iter->next_idx++;
2132 			continue;
2133 		}
2134 
2135 		/* we've found what will be our next _file_ entry.  but if we are
2136 		 * returning trees entries, we may need to return a pseudotree
2137 		 * entry that will contain this.  don't advance over this entry,
2138 		 * though, we still need to return it on the next `advance`.
2139 		 */
2140 		if (iterator__include_trees(&iter->base) &&
2141 			index_iterator_create_pseudotree(&entry, iter, entry->path)) {
2142 
2143 			/* Note whether this pseudo tree should be expanded or not */
2144 			iter->skip_tree = iterator__dont_autoexpand(&iter->base);
2145 			break;
2146 		}
2147 
2148 		iter->next_idx++;
2149 		break;
2150 	}
2151 
2152 	iter->entry = (error == 0) ? entry : NULL;
2153 
2154 	if (out)
2155 		*out = iter->entry;
2156 
2157 	return error;
2158 }
2159 
index_iterator_advance_into(const git_index_entry ** out,git_iterator * i)2160 static int index_iterator_advance_into(
2161 	const git_index_entry **out, git_iterator *i)
2162 {
2163 	index_iterator *iter = GIT_CONTAINER_OF(i, index_iterator, base);
2164 
2165 	if (! S_ISDIR(iter->tree_entry.mode)) {
2166 		if (out)
2167 			*out = NULL;
2168 
2169 		return 0;
2170 	}
2171 
2172 	iter->skip_tree = false;
2173 	return index_iterator_advance(out, i);
2174 }
2175 
index_iterator_advance_over(const git_index_entry ** out,git_iterator_status_t * status,git_iterator * i)2176 static int index_iterator_advance_over(
2177 	const git_index_entry **out,
2178 	git_iterator_status_t *status,
2179 	git_iterator *i)
2180 {
2181 	index_iterator *iter = GIT_CONTAINER_OF(i, index_iterator, base);
2182 	const git_index_entry *entry;
2183 	int error;
2184 
2185 	if ((error = index_iterator_current(&entry, i)) < 0)
2186 		return error;
2187 
2188 	if (S_ISDIR(entry->mode))
2189 		index_iterator_skip_pseudotree(iter);
2190 
2191 	*status = GIT_ITERATOR_STATUS_NORMAL;
2192 	return index_iterator_advance(out, i);
2193 }
2194 
index_iterator_clear(index_iterator * iter)2195 static void index_iterator_clear(index_iterator *iter)
2196 {
2197 	iterator_clear(&iter->base);
2198 }
2199 
index_iterator_init(index_iterator * iter)2200 static int index_iterator_init(index_iterator *iter)
2201 {
2202 	iter->base.flags &= ~GIT_ITERATOR_FIRST_ACCESS;
2203 	iter->next_idx = 0;
2204 	iter->skip_tree = false;
2205 	return 0;
2206 }
2207 
index_iterator_reset(git_iterator * i)2208 static int index_iterator_reset(git_iterator *i)
2209 {
2210 	index_iterator *iter = GIT_CONTAINER_OF(i, index_iterator, base);
2211 
2212 	index_iterator_clear(iter);
2213 	return index_iterator_init(iter);
2214 }
2215 
index_iterator_free(git_iterator * i)2216 static void index_iterator_free(git_iterator *i)
2217 {
2218 	index_iterator *iter = GIT_CONTAINER_OF(i, index_iterator, base);
2219 
2220 	git_index_snapshot_release(&iter->entries, iter->base.index);
2221 	git_buf_dispose(&iter->tree_buf);
2222 }
2223 
git_iterator_for_index(git_iterator ** out,git_repository * repo,git_index * index,git_iterator_options * options)2224 int git_iterator_for_index(
2225 	git_iterator **out,
2226 	git_repository *repo,
2227 	git_index  *index,
2228 	git_iterator_options *options)
2229 {
2230 	index_iterator *iter;
2231 	int error;
2232 
2233 	static git_iterator_callbacks callbacks = {
2234 		index_iterator_current,
2235 		index_iterator_advance,
2236 		index_iterator_advance_into,
2237 		index_iterator_advance_over,
2238 		index_iterator_reset,
2239 		index_iterator_free
2240 	};
2241 
2242 	*out = NULL;
2243 
2244 	if (index == NULL)
2245 		return git_iterator_for_nothing(out, options);
2246 
2247 	iter = git__calloc(1, sizeof(index_iterator));
2248 	GIT_ERROR_CHECK_ALLOC(iter);
2249 
2250 	iter->base.type = GIT_ITERATOR_TYPE_INDEX;
2251 	iter->base.cb = &callbacks;
2252 
2253 	if ((error = iterator_init_common(&iter->base, repo, index, options)) < 0 ||
2254 		(error = git_index_snapshot_new(&iter->entries, index)) < 0 ||
2255 		(error = index_iterator_init(iter)) < 0)
2256 		goto on_error;
2257 
2258 	git_vector_set_cmp(&iter->entries, iterator__ignore_case(&iter->base) ?
2259 		git_index_entry_icmp : git_index_entry_cmp);
2260 	git_vector_sort(&iter->entries);
2261 
2262 	*out = &iter->base;
2263 	return 0;
2264 
2265 on_error:
2266 	git_iterator_free(&iter->base);
2267 	return error;
2268 }
2269 
2270 
2271 /* Iterator API */
2272 
git_iterator_reset_range(git_iterator * i,const char * start,const char * end)2273 int git_iterator_reset_range(
2274 	git_iterator *i, const char *start, const char *end)
2275 {
2276 	if (iterator_reset_range(i, start, end) < 0)
2277 		return -1;
2278 
2279 	return i->cb->reset(i);
2280 }
2281 
git_iterator_set_ignore_case(git_iterator * i,bool ignore_case)2282 void git_iterator_set_ignore_case(git_iterator *i, bool ignore_case)
2283 {
2284 	assert(!iterator__has_been_accessed(i));
2285 	iterator_set_ignore_case(i, ignore_case);
2286 }
2287 
git_iterator_free(git_iterator * iter)2288 void git_iterator_free(git_iterator *iter)
2289 {
2290 	if (iter == NULL)
2291 		return;
2292 
2293 	iter->cb->free(iter);
2294 
2295 	git_vector_free(&iter->pathlist);
2296 	git__free(iter->start);
2297 	git__free(iter->end);
2298 
2299 	memset(iter, 0, sizeof(*iter));
2300 
2301 	git__free(iter);
2302 }
2303 
git_iterator_foreach(git_iterator * iterator,git_iterator_foreach_cb cb,void * data)2304 int git_iterator_foreach(
2305 	git_iterator *iterator,
2306 	git_iterator_foreach_cb cb,
2307 	void *data)
2308 {
2309 	const git_index_entry *iterator_item;
2310 	int error = 0;
2311 
2312 	if ((error = git_iterator_current(&iterator_item, iterator)) < 0)
2313 		goto done;
2314 
2315 	if ((error = cb(iterator_item, data)) != 0)
2316 		goto done;
2317 
2318 	while (true) {
2319 		if ((error = git_iterator_advance(&iterator_item, iterator)) < 0)
2320 			goto done;
2321 
2322 		if ((error = cb(iterator_item, data)) != 0)
2323 			goto done;
2324 	}
2325 
2326 done:
2327 	if (error == GIT_ITEROVER)
2328 		error = 0;
2329 
2330 	return error;
2331 }
2332 
git_iterator_walk(git_iterator ** iterators,size_t cnt,git_iterator_walk_cb cb,void * data)2333 int git_iterator_walk(
2334 	git_iterator **iterators,
2335 	size_t cnt,
2336 	git_iterator_walk_cb cb,
2337 	void *data)
2338 {
2339 	const git_index_entry **iterator_item;	/* next in each iterator */
2340 	const git_index_entry **cur_items;		/* current path in each iter */
2341 	const git_index_entry *first_match;
2342 	size_t i, j;
2343 	int error = 0;
2344 
2345 	iterator_item = git__calloc(cnt, sizeof(git_index_entry *));
2346 	cur_items = git__calloc(cnt, sizeof(git_index_entry *));
2347 
2348 	GIT_ERROR_CHECK_ALLOC(iterator_item);
2349 	GIT_ERROR_CHECK_ALLOC(cur_items);
2350 
2351 	/* Set up the iterators */
2352 	for (i = 0; i < cnt; i++) {
2353 		error = git_iterator_current(&iterator_item[i], iterators[i]);
2354 
2355 		if (error < 0 && error != GIT_ITEROVER)
2356 			goto done;
2357 	}
2358 
2359 	while (true) {
2360 		for (i = 0; i < cnt; i++)
2361 			cur_items[i] = NULL;
2362 
2363 		first_match = NULL;
2364 
2365 		/* Find the next path(s) to consume from each iterator */
2366 		for (i = 0; i < cnt; i++) {
2367 			if (iterator_item[i] == NULL)
2368 				continue;
2369 
2370 			if (first_match == NULL) {
2371 				first_match = iterator_item[i];
2372 				cur_items[i] = iterator_item[i];
2373 			} else {
2374 				int path_diff = git_index_entry_cmp(iterator_item[i], first_match);
2375 
2376 				if (path_diff < 0) {
2377 					/* Found an index entry that sorts before the one we're
2378 					 * looking at.  Forget that we've seen the other and
2379 					 * look at the other iterators for this path.
2380 					 */
2381 					for (j = 0; j < i; j++)
2382 						cur_items[j] = NULL;
2383 
2384 					first_match = iterator_item[i];
2385 					cur_items[i] = iterator_item[i];
2386 				} else if (path_diff == 0) {
2387 					cur_items[i] = iterator_item[i];
2388 				}
2389 			}
2390 		}
2391 
2392 		if (first_match == NULL)
2393 			break;
2394 
2395 		if ((error = cb(cur_items, data)) != 0)
2396 			goto done;
2397 
2398 		/* Advance each iterator that participated */
2399 		for (i = 0; i < cnt; i++) {
2400 			if (cur_items[i] == NULL)
2401 				continue;
2402 
2403 			error = git_iterator_advance(&iterator_item[i], iterators[i]);
2404 
2405 			if (error < 0 && error != GIT_ITEROVER)
2406 				goto done;
2407 		}
2408 	}
2409 
2410 done:
2411 	git__free((git_index_entry **)iterator_item);
2412 	git__free((git_index_entry **)cur_items);
2413 
2414 	if (error == GIT_ITEROVER)
2415 		error = 0;
2416 
2417 	return error;
2418 }
2419