1 #include "cache.h"
2 #include "tree-walk.h"
3 #include "dir.h"
4 #include "object-store.h"
5 #include "tree.h"
6 #include "pathspec.h"
7 #include "json-writer.h"
8 
get_mode(const char * str,unsigned int * modep)9 static const char *get_mode(const char *str, unsigned int *modep)
10 {
11 	unsigned char c;
12 	unsigned int mode = 0;
13 
14 	if (*str == ' ')
15 		return NULL;
16 
17 	while ((c = *str++) != ' ') {
18 		if (c < '0' || c > '7')
19 			return NULL;
20 		mode = (mode << 3) + (c - '0');
21 	}
22 	*modep = mode;
23 	return str;
24 }
25 
decode_tree_entry(struct tree_desc * desc,const char * buf,unsigned long size,struct strbuf * err)26 static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned long size, struct strbuf *err)
27 {
28 	const char *path;
29 	unsigned int mode, len;
30 	const unsigned hashsz = the_hash_algo->rawsz;
31 
32 	if (size < hashsz + 3 || buf[size - (hashsz + 1)]) {
33 		strbuf_addstr(err, _("too-short tree object"));
34 		return -1;
35 	}
36 
37 	path = get_mode(buf, &mode);
38 	if (!path) {
39 		strbuf_addstr(err, _("malformed mode in tree entry"));
40 		return -1;
41 	}
42 	if (!*path) {
43 		strbuf_addstr(err, _("empty filename in tree entry"));
44 		return -1;
45 	}
46 	len = strlen(path) + 1;
47 
48 	/* Initialize the descriptor entry */
49 	desc->entry.path = path;
50 	desc->entry.mode = canon_mode(mode);
51 	desc->entry.pathlen = len - 1;
52 	oidread(&desc->entry.oid, (const unsigned char *)path + len);
53 
54 	return 0;
55 }
56 
init_tree_desc_internal(struct tree_desc * desc,const void * buffer,unsigned long size,struct strbuf * err)57 static int init_tree_desc_internal(struct tree_desc *desc, const void *buffer, unsigned long size, struct strbuf *err)
58 {
59 	desc->buffer = buffer;
60 	desc->size = size;
61 	if (size)
62 		return decode_tree_entry(desc, buffer, size, err);
63 	return 0;
64 }
65 
init_tree_desc(struct tree_desc * desc,const void * buffer,unsigned long size)66 void init_tree_desc(struct tree_desc *desc, const void *buffer, unsigned long size)
67 {
68 	struct strbuf err = STRBUF_INIT;
69 	if (init_tree_desc_internal(desc, buffer, size, &err))
70 		die("%s", err.buf);
71 	strbuf_release(&err);
72 }
73 
init_tree_desc_gently(struct tree_desc * desc,const void * buffer,unsigned long size)74 int init_tree_desc_gently(struct tree_desc *desc, const void *buffer, unsigned long size)
75 {
76 	struct strbuf err = STRBUF_INIT;
77 	int result = init_tree_desc_internal(desc, buffer, size, &err);
78 	if (result)
79 		error("%s", err.buf);
80 	strbuf_release(&err);
81 	return result;
82 }
83 
fill_tree_descriptor(struct repository * r,struct tree_desc * desc,const struct object_id * oid)84 void *fill_tree_descriptor(struct repository *r,
85 			   struct tree_desc *desc,
86 			   const struct object_id *oid)
87 {
88 	unsigned long size = 0;
89 	void *buf = NULL;
90 
91 	if (oid) {
92 		buf = read_object_with_reference(r, oid, tree_type, &size, NULL);
93 		if (!buf)
94 			die("unable to read tree %s", oid_to_hex(oid));
95 	}
96 	init_tree_desc(desc, buf, size);
97 	return buf;
98 }
99 
entry_clear(struct name_entry * a)100 static void entry_clear(struct name_entry *a)
101 {
102 	memset(a, 0, sizeof(*a));
103 }
104 
entry_extract(struct tree_desc * t,struct name_entry * a)105 static void entry_extract(struct tree_desc *t, struct name_entry *a)
106 {
107 	*a = t->entry;
108 }
109 
update_tree_entry_internal(struct tree_desc * desc,struct strbuf * err)110 static int update_tree_entry_internal(struct tree_desc *desc, struct strbuf *err)
111 {
112 	const void *buf = desc->buffer;
113 	const unsigned char *end = (const unsigned char *)desc->entry.path + desc->entry.pathlen + 1 + the_hash_algo->rawsz;
114 	unsigned long size = desc->size;
115 	unsigned long len = end - (const unsigned char *)buf;
116 
117 	if (size < len)
118 		die(_("too-short tree file"));
119 	buf = end;
120 	size -= len;
121 	desc->buffer = buf;
122 	desc->size = size;
123 	if (size)
124 		return decode_tree_entry(desc, buf, size, err);
125 	return 0;
126 }
127 
update_tree_entry(struct tree_desc * desc)128 void update_tree_entry(struct tree_desc *desc)
129 {
130 	struct strbuf err = STRBUF_INIT;
131 	if (update_tree_entry_internal(desc, &err))
132 		die("%s", err.buf);
133 	strbuf_release(&err);
134 }
135 
update_tree_entry_gently(struct tree_desc * desc)136 int update_tree_entry_gently(struct tree_desc *desc)
137 {
138 	struct strbuf err = STRBUF_INIT;
139 	if (update_tree_entry_internal(desc, &err)) {
140 		error("%s", err.buf);
141 		strbuf_release(&err);
142 		/* Stop processing this tree after error */
143 		desc->size = 0;
144 		return -1;
145 	}
146 	strbuf_release(&err);
147 	return 0;
148 }
149 
tree_entry(struct tree_desc * desc,struct name_entry * entry)150 int tree_entry(struct tree_desc *desc, struct name_entry *entry)
151 {
152 	if (!desc->size)
153 		return 0;
154 
155 	*entry = desc->entry;
156 	update_tree_entry(desc);
157 	return 1;
158 }
159 
tree_entry_gently(struct tree_desc * desc,struct name_entry * entry)160 int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
161 {
162 	if (!desc->size)
163 		return 0;
164 
165 	*entry = desc->entry;
166 	if (update_tree_entry_gently(desc))
167 		return 0;
168 	return 1;
169 }
170 
171 static int traverse_trees_atexit_registered;
172 static int traverse_trees_count;
173 static int traverse_trees_cur_depth;
174 static int traverse_trees_max_depth;
175 
trace2_traverse_trees_statistics_atexit(void)176 static void trace2_traverse_trees_statistics_atexit(void)
177 {
178 	struct json_writer jw = JSON_WRITER_INIT;
179 
180 	jw_object_begin(&jw, 0);
181 	jw_object_intmax(&jw, "traverse_trees_count", traverse_trees_count);
182 	jw_object_intmax(&jw, "traverse_trees_max_depth", traverse_trees_max_depth);
183 	jw_end(&jw);
184 
185 	trace2_data_json("traverse_trees", the_repository, "statistics", &jw);
186 
187 	jw_release(&jw);
188 }
189 
setup_traverse_info(struct traverse_info * info,const char * base)190 void setup_traverse_info(struct traverse_info *info, const char *base)
191 {
192 	size_t pathlen = strlen(base);
193 	static struct traverse_info dummy;
194 
195 	memset(info, 0, sizeof(*info));
196 	if (pathlen && base[pathlen-1] == '/')
197 		pathlen--;
198 	info->pathlen = pathlen ? pathlen + 1 : 0;
199 	info->name = base;
200 	info->namelen = pathlen;
201 	if (pathlen)
202 		info->prev = &dummy;
203 
204 	if (trace2_is_enabled() && !traverse_trees_atexit_registered) {
205 		atexit(trace2_traverse_trees_statistics_atexit);
206 		traverse_trees_atexit_registered = 1;
207 	}
208 }
209 
make_traverse_path(char * path,size_t pathlen,const struct traverse_info * info,const char * name,size_t namelen)210 char *make_traverse_path(char *path, size_t pathlen,
211 			 const struct traverse_info *info,
212 			 const char *name, size_t namelen)
213 {
214 	/* Always points to the end of the name we're about to add */
215 	size_t pos = st_add(info->pathlen, namelen);
216 
217 	if (pos >= pathlen)
218 		BUG("too small buffer passed to make_traverse_path");
219 
220 	path[pos] = 0;
221 	for (;;) {
222 		if (pos < namelen)
223 			BUG("traverse_info pathlen does not match strings");
224 		pos -= namelen;
225 		memcpy(path + pos, name, namelen);
226 
227 		if (!pos)
228 			break;
229 		path[--pos] = '/';
230 
231 		if (!info)
232 			BUG("traverse_info ran out of list items");
233 		name = info->name;
234 		namelen = info->namelen;
235 		info = info->prev;
236 	}
237 	return path;
238 }
239 
strbuf_make_traverse_path(struct strbuf * out,const struct traverse_info * info,const char * name,size_t namelen)240 void strbuf_make_traverse_path(struct strbuf *out,
241 			       const struct traverse_info *info,
242 			       const char *name, size_t namelen)
243 {
244 	size_t len = traverse_path_len(info, namelen);
245 
246 	strbuf_grow(out, len);
247 	make_traverse_path(out->buf + out->len, out->alloc - out->len,
248 			   info, name, namelen);
249 	strbuf_setlen(out, out->len + len);
250 }
251 
252 struct tree_desc_skip {
253 	struct tree_desc_skip *prev;
254 	const void *ptr;
255 };
256 
257 struct tree_desc_x {
258 	struct tree_desc d;
259 	struct tree_desc_skip *skip;
260 };
261 
check_entry_match(const char * a,int a_len,const char * b,int b_len)262 static int check_entry_match(const char *a, int a_len, const char *b, int b_len)
263 {
264 	/*
265 	 * The caller wants to pick *a* from a tree or nothing.
266 	 * We are looking at *b* in a tree.
267 	 *
268 	 * (0) If a and b are the same name, we are trivially happy.
269 	 *
270 	 * There are three possibilities where *a* could be hiding
271 	 * behind *b*.
272 	 *
273 	 * (1) *a* == "t",   *b* == "ab"  i.e. *b* sorts earlier than *a* no
274 	 *                                matter what.
275 	 * (2) *a* == "t",   *b* == "t-2" and "t" is a subtree in the tree;
276 	 * (3) *a* == "t-2", *b* == "t"   and "t-2" is a blob in the tree.
277 	 *
278 	 * Otherwise we know *a* won't appear in the tree without
279 	 * scanning further.
280 	 */
281 
282 	int cmp = name_compare(a, a_len, b, b_len);
283 
284 	/* Most common case first -- reading sync'd trees */
285 	if (!cmp)
286 		return cmp;
287 
288 	if (0 < cmp) {
289 		/* a comes after b; it does not matter if it is case (3)
290 		if (b_len < a_len && !memcmp(a, b, b_len) && a[b_len] < '/')
291 			return 1;
292 		*/
293 		return 1; /* keep looking */
294 	}
295 
296 	/* b comes after a; are we looking at case (2)? */
297 	if (a_len < b_len && !memcmp(a, b, a_len) && b[a_len] < '/')
298 		return 1; /* keep looking */
299 
300 	return -1; /* a cannot appear in the tree */
301 }
302 
303 /*
304  * From the extended tree_desc, extract the first name entry, while
305  * paying attention to the candidate "first" name.  Most importantly,
306  * when looking for an entry, if there are entries that sorts earlier
307  * in the tree object representation than that name, skip them and
308  * process the named entry first.  We will remember that we haven't
309  * processed the first entry yet, and in the later call skip the
310  * entry we processed early when update_extended_entry() is called.
311  *
312  * E.g. if the underlying tree object has these entries:
313  *
314  *    blob    "t-1"
315  *    blob    "t-2"
316  *    tree    "t"
317  *    blob    "t=1"
318  *
319  * and the "first" asks for "t", remember that we still need to
320  * process "t-1" and "t-2" but extract "t".  After processing the
321  * entry "t" from this call, the caller will let us know by calling
322  * update_extended_entry() that we can remember "t" has been processed
323  * already.
324  */
325 
extended_entry_extract(struct tree_desc_x * t,struct name_entry * a,const char * first,int first_len)326 static void extended_entry_extract(struct tree_desc_x *t,
327 				   struct name_entry *a,
328 				   const char *first,
329 				   int first_len)
330 {
331 	const char *path;
332 	int len;
333 	struct tree_desc probe;
334 	struct tree_desc_skip *skip;
335 
336 	/*
337 	 * Extract the first entry from the tree_desc, but skip the
338 	 * ones that we already returned in earlier rounds.
339 	 */
340 	while (1) {
341 		if (!t->d.size) {
342 			entry_clear(a);
343 			break; /* not found */
344 		}
345 		entry_extract(&t->d, a);
346 		for (skip = t->skip; skip; skip = skip->prev)
347 			if (a->path == skip->ptr)
348 				break; /* found */
349 		if (!skip)
350 			break;
351 		/* We have processed this entry already. */
352 		update_tree_entry(&t->d);
353 	}
354 
355 	if (!first || !a->path)
356 		return;
357 
358 	/*
359 	 * The caller wants "first" from this tree, or nothing.
360 	 */
361 	path = a->path;
362 	len = tree_entry_len(a);
363 	switch (check_entry_match(first, first_len, path, len)) {
364 	case -1:
365 		entry_clear(a);
366 	case 0:
367 		return;
368 	default:
369 		break;
370 	}
371 
372 	/*
373 	 * We need to look-ahead -- we suspect that a subtree whose
374 	 * name is "first" may be hiding behind the current entry "path".
375 	 */
376 	probe = t->d;
377 	while (probe.size) {
378 		entry_extract(&probe, a);
379 		path = a->path;
380 		len = tree_entry_len(a);
381 		switch (check_entry_match(first, first_len, path, len)) {
382 		case -1:
383 			entry_clear(a);
384 		case 0:
385 			return;
386 		default:
387 			update_tree_entry(&probe);
388 			break;
389 		}
390 		/* keep looking */
391 	}
392 	entry_clear(a);
393 }
394 
update_extended_entry(struct tree_desc_x * t,struct name_entry * a)395 static void update_extended_entry(struct tree_desc_x *t, struct name_entry *a)
396 {
397 	if (t->d.entry.path == a->path) {
398 		update_tree_entry(&t->d);
399 	} else {
400 		/* we have returned this entry early */
401 		struct tree_desc_skip *skip = xmalloc(sizeof(*skip));
402 		skip->ptr = a->path;
403 		skip->prev = t->skip;
404 		t->skip = skip;
405 	}
406 }
407 
free_extended_entry(struct tree_desc_x * t)408 static void free_extended_entry(struct tree_desc_x *t)
409 {
410 	struct tree_desc_skip *p, *s;
411 
412 	for (s = t->skip; s; s = p) {
413 		p = s->prev;
414 		free(s);
415 	}
416 }
417 
prune_traversal(struct index_state * istate,struct name_entry * e,struct traverse_info * info,struct strbuf * base,int still_interesting)418 static inline int prune_traversal(struct index_state *istate,
419 				  struct name_entry *e,
420 				  struct traverse_info *info,
421 				  struct strbuf *base,
422 				  int still_interesting)
423 {
424 	if (!info->pathspec || still_interesting == 2)
425 		return 2;
426 	if (still_interesting < 0)
427 		return still_interesting;
428 	return tree_entry_interesting(istate, e, base,
429 				      0, info->pathspec);
430 }
431 
traverse_trees(struct index_state * istate,int n,struct tree_desc * t,struct traverse_info * info)432 int traverse_trees(struct index_state *istate,
433 		   int n, struct tree_desc *t,
434 		   struct traverse_info *info)
435 {
436 	int error = 0;
437 	struct name_entry entry[MAX_TRAVERSE_TREES];
438 	int i;
439 	struct tree_desc_x tx[ARRAY_SIZE(entry)];
440 	struct strbuf base = STRBUF_INIT;
441 	int interesting = 1;
442 	char *traverse_path;
443 
444 	traverse_trees_count++;
445 	traverse_trees_cur_depth++;
446 
447 	if (traverse_trees_cur_depth > traverse_trees_max_depth)
448 		traverse_trees_max_depth = traverse_trees_cur_depth;
449 
450 	if (n >= ARRAY_SIZE(entry))
451 		BUG("traverse_trees() called with too many trees (%d)", n);
452 
453 	for (i = 0; i < n; i++) {
454 		tx[i].d = t[i];
455 		tx[i].skip = NULL;
456 	}
457 
458 	if (info->prev) {
459 		strbuf_make_traverse_path(&base, info->prev,
460 					  info->name, info->namelen);
461 		strbuf_addch(&base, '/');
462 		traverse_path = xstrndup(base.buf, base.len);
463 	} else {
464 		traverse_path = xstrndup(info->name, info->pathlen);
465 	}
466 	info->traverse_path = traverse_path;
467 	for (;;) {
468 		int trees_used;
469 		unsigned long mask, dirmask;
470 		const char *first = NULL;
471 		int first_len = 0;
472 		struct name_entry *e = NULL;
473 		int len;
474 
475 		for (i = 0; i < n; i++) {
476 			e = entry + i;
477 			extended_entry_extract(tx + i, e, NULL, 0);
478 		}
479 
480 		/*
481 		 * A tree may have "t-2" at the current location even
482 		 * though it may have "t" that is a subtree behind it,
483 		 * and another tree may return "t".  We want to grab
484 		 * all "t" from all trees to match in such a case.
485 		 */
486 		for (i = 0; i < n; i++) {
487 			e = entry + i;
488 			if (!e->path)
489 				continue;
490 			len = tree_entry_len(e);
491 			if (!first) {
492 				first = e->path;
493 				first_len = len;
494 				continue;
495 			}
496 			if (name_compare(e->path, len, first, first_len) < 0) {
497 				first = e->path;
498 				first_len = len;
499 			}
500 		}
501 
502 		if (first) {
503 			for (i = 0; i < n; i++) {
504 				e = entry + i;
505 				extended_entry_extract(tx + i, e, first, first_len);
506 				/* Cull the ones that are not the earliest */
507 				if (!e->path)
508 					continue;
509 				len = tree_entry_len(e);
510 				if (name_compare(e->path, len, first, first_len))
511 					entry_clear(e);
512 			}
513 		}
514 
515 		/* Now we have in entry[i] the earliest name from the trees */
516 		mask = 0;
517 		dirmask = 0;
518 		for (i = 0; i < n; i++) {
519 			if (!entry[i].path)
520 				continue;
521 			mask |= 1ul << i;
522 			if (S_ISDIR(entry[i].mode))
523 				dirmask |= 1ul << i;
524 			e = &entry[i];
525 		}
526 		if (!mask)
527 			break;
528 		interesting = prune_traversal(istate, e, info, &base, interesting);
529 		if (interesting < 0)
530 			break;
531 		if (interesting) {
532 			trees_used = info->fn(n, mask, dirmask, entry, info);
533 			if (trees_used < 0) {
534 				error = trees_used;
535 				if (!info->show_all_errors)
536 					break;
537 			}
538 			mask &= trees_used;
539 		}
540 		for (i = 0; i < n; i++)
541 			if (mask & (1ul << i))
542 				update_extended_entry(tx + i, entry + i);
543 	}
544 	for (i = 0; i < n; i++)
545 		free_extended_entry(tx + i);
546 	free(traverse_path);
547 	info->traverse_path = NULL;
548 	strbuf_release(&base);
549 
550 	traverse_trees_cur_depth--;
551 	return error;
552 }
553 
554 struct dir_state {
555 	void *tree;
556 	unsigned long size;
557 	struct object_id oid;
558 };
559 
find_tree_entry(struct repository * r,struct tree_desc * t,const char * name,struct object_id * result,unsigned short * mode)560 static int find_tree_entry(struct repository *r, struct tree_desc *t,
561 			   const char *name, struct object_id *result,
562 			   unsigned short *mode)
563 {
564 	int namelen = strlen(name);
565 	while (t->size) {
566 		const char *entry;
567 		struct object_id oid;
568 		int entrylen, cmp;
569 
570 		oidcpy(&oid, tree_entry_extract(t, &entry, mode));
571 		entrylen = tree_entry_len(&t->entry);
572 		update_tree_entry(t);
573 		if (entrylen > namelen)
574 			continue;
575 		cmp = memcmp(name, entry, entrylen);
576 		if (cmp > 0)
577 			continue;
578 		if (cmp < 0)
579 			break;
580 		if (entrylen == namelen) {
581 			oidcpy(result, &oid);
582 			return 0;
583 		}
584 		if (name[entrylen] != '/')
585 			continue;
586 		if (!S_ISDIR(*mode))
587 			break;
588 		if (++entrylen == namelen) {
589 			oidcpy(result, &oid);
590 			return 0;
591 		}
592 		return get_tree_entry(r, &oid, name + entrylen, result, mode);
593 	}
594 	return -1;
595 }
596 
get_tree_entry(struct repository * r,const struct object_id * tree_oid,const char * name,struct object_id * oid,unsigned short * mode)597 int get_tree_entry(struct repository *r,
598 		   const struct object_id *tree_oid,
599 		   const char *name,
600 		   struct object_id *oid,
601 		   unsigned short *mode)
602 {
603 	int retval;
604 	void *tree;
605 	unsigned long size;
606 	struct object_id root;
607 
608 	tree = read_object_with_reference(r, tree_oid, tree_type, &size, &root);
609 	if (!tree)
610 		return -1;
611 
612 	if (name[0] == '\0') {
613 		oidcpy(oid, &root);
614 		free(tree);
615 		return 0;
616 	}
617 
618 	if (!size) {
619 		retval = -1;
620 	} else {
621 		struct tree_desc t;
622 		init_tree_desc(&t, tree, size);
623 		retval = find_tree_entry(r, &t, name, oid, mode);
624 	}
625 	free(tree);
626 	return retval;
627 }
628 
629 /*
630  * This is Linux's built-in max for the number of symlinks to follow.
631  * That limit, of course, does not affect git, but it's a reasonable
632  * choice.
633  */
634 #define GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS 40
635 
636 /**
637  * Find a tree entry by following symlinks in tree_sha (which is
638  * assumed to be the root of the repository).  In the event that a
639  * symlink points outside the repository (e.g. a link to /foo or a
640  * root-level link to ../foo), the portion of the link which is
641  * outside the repository will be returned in result_path, and *mode
642  * will be set to 0.  It is assumed that result_path is uninitialized.
643  * If there are no symlinks, or the end result of the symlink chain
644  * points to an object inside the repository, result will be filled in
645  * with the sha1 of the found object, and *mode will hold the mode of
646  * the object.
647  *
648  * See the code for enum get_oid_result for a description of
649  * the return values.
650  */
get_tree_entry_follow_symlinks(struct repository * r,struct object_id * tree_oid,const char * name,struct object_id * result,struct strbuf * result_path,unsigned short * mode)651 enum get_oid_result get_tree_entry_follow_symlinks(struct repository *r,
652 		struct object_id *tree_oid, const char *name,
653 		struct object_id *result, struct strbuf *result_path,
654 		unsigned short *mode)
655 {
656 	int retval = MISSING_OBJECT;
657 	struct dir_state *parents = NULL;
658 	size_t parents_alloc = 0;
659 	size_t i, parents_nr = 0;
660 	struct object_id current_tree_oid;
661 	struct strbuf namebuf = STRBUF_INIT;
662 	struct tree_desc t;
663 	int follows_remaining = GET_TREE_ENTRY_FOLLOW_SYMLINKS_MAX_LINKS;
664 
665 	init_tree_desc(&t, NULL, 0UL);
666 	strbuf_addstr(&namebuf, name);
667 	oidcpy(&current_tree_oid, tree_oid);
668 
669 	while (1) {
670 		int find_result;
671 		char *first_slash;
672 		char *remainder = NULL;
673 
674 		if (!t.buffer) {
675 			void *tree;
676 			struct object_id root;
677 			unsigned long size;
678 			tree = read_object_with_reference(r,
679 							  &current_tree_oid,
680 							  tree_type, &size,
681 							  &root);
682 			if (!tree)
683 				goto done;
684 
685 			ALLOC_GROW(parents, parents_nr + 1, parents_alloc);
686 			parents[parents_nr].tree = tree;
687 			parents[parents_nr].size = size;
688 			oidcpy(&parents[parents_nr].oid, &root);
689 			parents_nr++;
690 
691 			if (namebuf.buf[0] == '\0') {
692 				oidcpy(result, &root);
693 				retval = FOUND;
694 				goto done;
695 			}
696 
697 			if (!size)
698 				goto done;
699 
700 			/* descend */
701 			init_tree_desc(&t, tree, size);
702 		}
703 
704 		/* Handle symlinks to e.g. a//b by removing leading slashes */
705 		while (namebuf.buf[0] == '/') {
706 			strbuf_remove(&namebuf, 0, 1);
707 		}
708 
709 		/* Split namebuf into a first component and a remainder */
710 		if ((first_slash = strchr(namebuf.buf, '/'))) {
711 			*first_slash = 0;
712 			remainder = first_slash + 1;
713 		}
714 
715 		if (!strcmp(namebuf.buf, "..")) {
716 			struct dir_state *parent;
717 			/*
718 			 * We could end up with .. in the namebuf if it
719 			 * appears in a symlink.
720 			 */
721 
722 			if (parents_nr == 1) {
723 				if (remainder)
724 					*first_slash = '/';
725 				strbuf_add(result_path, namebuf.buf,
726 					   namebuf.len);
727 				*mode = 0;
728 				retval = FOUND;
729 				goto done;
730 			}
731 			parent = &parents[parents_nr - 1];
732 			free(parent->tree);
733 			parents_nr--;
734 			parent = &parents[parents_nr - 1];
735 			init_tree_desc(&t, parent->tree, parent->size);
736 			strbuf_remove(&namebuf, 0, remainder ? 3 : 2);
737 			continue;
738 		}
739 
740 		/* We could end up here via a symlink to dir/.. */
741 		if (namebuf.buf[0] == '\0') {
742 			oidcpy(result, &parents[parents_nr - 1].oid);
743 			retval = FOUND;
744 			goto done;
745 		}
746 
747 		/* Look up the first (or only) path component in the tree. */
748 		find_result = find_tree_entry(r, &t, namebuf.buf,
749 					      &current_tree_oid, mode);
750 		if (find_result) {
751 			goto done;
752 		}
753 
754 		if (S_ISDIR(*mode)) {
755 			if (!remainder) {
756 				oidcpy(result, &current_tree_oid);
757 				retval = FOUND;
758 				goto done;
759 			}
760 			/* Descend the tree */
761 			t.buffer = NULL;
762 			strbuf_remove(&namebuf, 0,
763 				      1 + first_slash - namebuf.buf);
764 		} else if (S_ISREG(*mode)) {
765 			if (!remainder) {
766 				oidcpy(result, &current_tree_oid);
767 				retval = FOUND;
768 			} else {
769 				retval = NOT_DIR;
770 			}
771 			goto done;
772 		} else if (S_ISLNK(*mode)) {
773 			/* Follow a symlink */
774 			unsigned long link_len;
775 			size_t len;
776 			char *contents, *contents_start;
777 			struct dir_state *parent;
778 			enum object_type type;
779 
780 			if (follows_remaining-- == 0) {
781 				/* Too many symlinks followed */
782 				retval = SYMLINK_LOOP;
783 				goto done;
784 			}
785 
786 			/*
787 			 * At this point, we have followed at a least
788 			 * one symlink, so on error we need to report this.
789 			 */
790 			retval = DANGLING_SYMLINK;
791 
792 			contents = repo_read_object_file(r,
793 						    &current_tree_oid, &type,
794 						    &link_len);
795 
796 			if (!contents)
797 				goto done;
798 
799 			if (contents[0] == '/') {
800 				strbuf_addstr(result_path, contents);
801 				free(contents);
802 				*mode = 0;
803 				retval = FOUND;
804 				goto done;
805 			}
806 
807 			if (remainder)
808 				len = first_slash - namebuf.buf;
809 			else
810 				len = namebuf.len;
811 
812 			contents_start = contents;
813 
814 			parent = &parents[parents_nr - 1];
815 			init_tree_desc(&t, parent->tree, parent->size);
816 			strbuf_splice(&namebuf, 0, len,
817 				      contents_start, link_len);
818 			if (remainder)
819 				namebuf.buf[link_len] = '/';
820 			free(contents);
821 		}
822 	}
823 done:
824 	for (i = 0; i < parents_nr; i++)
825 		free(parents[i].tree);
826 	free(parents);
827 
828 	strbuf_release(&namebuf);
829 	return retval;
830 }
831 
match_entry(const struct pathspec_item * item,const struct name_entry * entry,int pathlen,const char * match,int matchlen,enum interesting * never_interesting)832 static int match_entry(const struct pathspec_item *item,
833 		       const struct name_entry *entry, int pathlen,
834 		       const char *match, int matchlen,
835 		       enum interesting *never_interesting)
836 {
837 	int m = -1; /* signals that we haven't called strncmp() */
838 
839 	if (item->magic & PATHSPEC_ICASE)
840 		/*
841 		 * "Never interesting" trick requires exact
842 		 * matching. We could do something clever with inexact
843 		 * matching, but it's trickier (and not to forget that
844 		 * strcasecmp is locale-dependent, at least in
845 		 * glibc). Just disable it for now. It can't be worse
846 		 * than the wildcard's codepath of '[Tt][Hi][Is][Ss]'
847 		 * pattern.
848 		 */
849 		*never_interesting = entry_not_interesting;
850 	else if (*never_interesting != entry_not_interesting) {
851 		/*
852 		 * We have not seen any match that sorts later
853 		 * than the current path.
854 		 */
855 
856 		/*
857 		 * Does match sort strictly earlier than path
858 		 * with their common parts?
859 		 */
860 		m = strncmp(match, entry->path,
861 			    (matchlen < pathlen) ? matchlen : pathlen);
862 		if (m < 0)
863 			return 0;
864 
865 		/*
866 		 * If we come here even once, that means there is at
867 		 * least one pathspec that would sort equal to or
868 		 * later than the path we are currently looking at.
869 		 * In other words, if we have never reached this point
870 		 * after iterating all pathspecs, it means all
871 		 * pathspecs are either outside of base, or inside the
872 		 * base but sorts strictly earlier than the current
873 		 * one.  In either case, they will never match the
874 		 * subsequent entries.  In such a case, we initialized
875 		 * the variable to -1 and that is what will be
876 		 * returned, allowing the caller to terminate early.
877 		 */
878 		*never_interesting = entry_not_interesting;
879 	}
880 
881 	if (pathlen > matchlen)
882 		return 0;
883 
884 	if (matchlen > pathlen) {
885 		if (match[pathlen] != '/')
886 			return 0;
887 		/*
888 		 * Reject non-directories as partial pathnames, except
889 		 * when match is a submodule with a trailing slash and
890 		 * nothing else (to handle 'submod/' and 'submod'
891 		 * uniformly).
892 		 */
893 		if (!S_ISDIR(entry->mode) &&
894 		    (!S_ISGITLINK(entry->mode) || matchlen > pathlen + 1))
895 			return 0;
896 	}
897 
898 	if (m == -1)
899 		/*
900 		 * we cheated and did not do strncmp(), so we do
901 		 * that here.
902 		 */
903 		m = ps_strncmp(item, match, entry->path, pathlen);
904 
905 	/*
906 	 * If common part matched earlier then it is a hit,
907 	 * because we rejected the case where path is not a
908 	 * leading directory and is shorter than match.
909 	 */
910 	if (!m)
911 		/*
912 		 * match_entry does not check if the prefix part is
913 		 * matched case-sensitively. If the entry is a
914 		 * directory and part of prefix, it'll be rematched
915 		 * eventually by basecmp with special treatment for
916 		 * the prefix.
917 		 */
918 		return 1;
919 
920 	return 0;
921 }
922 
923 /* :(icase)-aware string compare */
basecmp(const struct pathspec_item * item,const char * base,const char * match,int len)924 static int basecmp(const struct pathspec_item *item,
925 		   const char *base, const char *match, int len)
926 {
927 	if (item->magic & PATHSPEC_ICASE) {
928 		int ret, n = len > item->prefix ? item->prefix : len;
929 		ret = strncmp(base, match, n);
930 		if (ret)
931 			return ret;
932 		base += n;
933 		match += n;
934 		len -= n;
935 	}
936 	return ps_strncmp(item, base, match, len);
937 }
938 
match_dir_prefix(const struct pathspec_item * item,const char * base,const char * match,int matchlen)939 static int match_dir_prefix(const struct pathspec_item *item,
940 			    const char *base,
941 			    const char *match, int matchlen)
942 {
943 	if (basecmp(item, base, match, matchlen))
944 		return 0;
945 
946 	/*
947 	 * If the base is a subdirectory of a path which
948 	 * was specified, all of them are interesting.
949 	 */
950 	if (!matchlen ||
951 	    base[matchlen] == '/' ||
952 	    match[matchlen - 1] == '/')
953 		return 1;
954 
955 	/* Just a random prefix match */
956 	return 0;
957 }
958 
959 /*
960  * Perform matching on the leading non-wildcard part of
961  * pathspec. item->nowildcard_len must be greater than zero. Return
962  * non-zero if base is matched.
963  */
match_wildcard_base(const struct pathspec_item * item,const char * base,int baselen,int * matched)964 static int match_wildcard_base(const struct pathspec_item *item,
965 			       const char *base, int baselen,
966 			       int *matched)
967 {
968 	const char *match = item->match;
969 	/* the wildcard part is not considered in this function */
970 	int matchlen = item->nowildcard_len;
971 
972 	if (baselen) {
973 		int dirlen;
974 		/*
975 		 * Return early if base is longer than the
976 		 * non-wildcard part but it does not match.
977 		 */
978 		if (baselen >= matchlen) {
979 			*matched = matchlen;
980 			return !basecmp(item, base, match, matchlen);
981 		}
982 
983 		dirlen = matchlen;
984 		while (dirlen && match[dirlen - 1] != '/')
985 			dirlen--;
986 
987 		/*
988 		 * Return early if base is shorter than the
989 		 * non-wildcard part but it does not match. Note that
990 		 * base ends with '/' so we are sure it really matches
991 		 * directory
992 		 */
993 		if (basecmp(item, base, match, baselen))
994 			return 0;
995 		*matched = baselen;
996 	} else
997 		*matched = 0;
998 	/*
999 	 * we could have checked entry against the non-wildcard part
1000 	 * that is not in base and does similar never_interesting
1001 	 * optimization as in match_entry. For now just be happy with
1002 	 * base comparison.
1003 	 */
1004 	return entry_interesting;
1005 }
1006 
1007 /*
1008  * Is a tree entry interesting given the pathspec we have?
1009  *
1010  * Pre-condition: either baselen == base_offset (i.e. empty path)
1011  * or base[baselen-1] == '/' (i.e. with trailing slash).
1012  */
do_match(struct index_state * istate,const struct name_entry * entry,struct strbuf * base,int base_offset,const struct pathspec * ps,int exclude)1013 static enum interesting do_match(struct index_state *istate,
1014 				 const struct name_entry *entry,
1015 				 struct strbuf *base, int base_offset,
1016 				 const struct pathspec *ps,
1017 				 int exclude)
1018 {
1019 	int i;
1020 	int pathlen, baselen = base->len - base_offset;
1021 	enum interesting never_interesting = ps->has_wildcard ?
1022 		entry_not_interesting : all_entries_not_interesting;
1023 
1024 	GUARD_PATHSPEC(ps,
1025 		       PATHSPEC_FROMTOP |
1026 		       PATHSPEC_MAXDEPTH |
1027 		       PATHSPEC_LITERAL |
1028 		       PATHSPEC_GLOB |
1029 		       PATHSPEC_ICASE |
1030 		       PATHSPEC_EXCLUDE |
1031 		       PATHSPEC_ATTR);
1032 
1033 	if (!ps->nr) {
1034 		if (!ps->recursive ||
1035 		    !(ps->magic & PATHSPEC_MAXDEPTH) ||
1036 		    ps->max_depth == -1)
1037 			return all_entries_interesting;
1038 		return within_depth(base->buf + base_offset, baselen,
1039 				    !!S_ISDIR(entry->mode),
1040 				    ps->max_depth) ?
1041 			entry_interesting : entry_not_interesting;
1042 	}
1043 
1044 	pathlen = tree_entry_len(entry);
1045 
1046 	for (i = ps->nr - 1; i >= 0; i--) {
1047 		const struct pathspec_item *item = ps->items+i;
1048 		const char *match = item->match;
1049 		const char *base_str = base->buf + base_offset;
1050 		int matchlen = item->len, matched = 0;
1051 
1052 		if ((!exclude &&   item->magic & PATHSPEC_EXCLUDE) ||
1053 		    ( exclude && !(item->magic & PATHSPEC_EXCLUDE)))
1054 			continue;
1055 
1056 		if (baselen >= matchlen) {
1057 			/* If it doesn't match, move along... */
1058 			if (!match_dir_prefix(item, base_str, match, matchlen))
1059 				goto match_wildcards;
1060 
1061 			if (!ps->recursive ||
1062 			    !(ps->magic & PATHSPEC_MAXDEPTH) ||
1063 			    ps->max_depth == -1) {
1064 				if (!item->attr_match_nr)
1065 					return all_entries_interesting;
1066 				else
1067 					goto interesting;
1068 			}
1069 
1070 			if (within_depth(base_str + matchlen + 1,
1071 					 baselen - matchlen - 1,
1072 					 !!S_ISDIR(entry->mode),
1073 					 ps->max_depth))
1074 				goto interesting;
1075 			else
1076 				return entry_not_interesting;
1077 		}
1078 
1079 		/* Either there must be no base, or the base must match. */
1080 		if (baselen == 0 || !basecmp(item, base_str, match, baselen)) {
1081 			if (match_entry(item, entry, pathlen,
1082 					match + baselen, matchlen - baselen,
1083 					&never_interesting))
1084 				goto interesting;
1085 
1086 			if (item->nowildcard_len < item->len) {
1087 				if (!git_fnmatch(item, match + baselen, entry->path,
1088 						 item->nowildcard_len - baselen))
1089 					goto interesting;
1090 
1091 				/*
1092 				 * Match all directories. We'll try to
1093 				 * match files later on.
1094 				 */
1095 				if (ps->recursive && S_ISDIR(entry->mode))
1096 					return entry_interesting;
1097 
1098 				/*
1099 				 * When matching against submodules with
1100 				 * wildcard characters, ensure that the entry
1101 				 * at least matches up to the first wild
1102 				 * character.  More accurate matching can then
1103 				 * be performed in the submodule itself.
1104 				 */
1105 				if (ps->recurse_submodules &&
1106 				    S_ISGITLINK(entry->mode) &&
1107 				    !ps_strncmp(item, match + baselen,
1108 						entry->path,
1109 						item->nowildcard_len - baselen))
1110 					goto interesting;
1111 			}
1112 
1113 			continue;
1114 		}
1115 
1116 match_wildcards:
1117 		if (item->nowildcard_len == item->len)
1118 			continue;
1119 
1120 		if (item->nowildcard_len &&
1121 		    !match_wildcard_base(item, base_str, baselen, &matched))
1122 			continue;
1123 
1124 		/*
1125 		 * Concatenate base and entry->path into one and do
1126 		 * fnmatch() on it.
1127 		 *
1128 		 * While we could avoid concatenation in certain cases
1129 		 * [1], which saves a memcpy and potentially a
1130 		 * realloc, it turns out not worth it. Measurement on
1131 		 * linux-2.6 does not show any clear improvements,
1132 		 * partly because of the nowildcard_len optimization
1133 		 * in git_fnmatch(). Avoid micro-optimizations here.
1134 		 *
1135 		 * [1] if match_wildcard_base() says the base
1136 		 * directory is already matched, we only need to match
1137 		 * the rest, which is shorter so _in theory_ faster.
1138 		 */
1139 
1140 		strbuf_add(base, entry->path, pathlen);
1141 
1142 		if (!git_fnmatch(item, match, base->buf + base_offset,
1143 				 item->nowildcard_len)) {
1144 			strbuf_setlen(base, base_offset + baselen);
1145 			goto interesting;
1146 		}
1147 
1148 		/*
1149 		 * When matching against submodules with
1150 		 * wildcard characters, ensure that the entry
1151 		 * at least matches up to the first wild
1152 		 * character.  More accurate matching can then
1153 		 * be performed in the submodule itself.
1154 		 */
1155 		if (ps->recurse_submodules && S_ISGITLINK(entry->mode) &&
1156 		    !ps_strncmp(item, match, base->buf + base_offset,
1157 				item->nowildcard_len)) {
1158 			strbuf_setlen(base, base_offset + baselen);
1159 			goto interesting;
1160 		}
1161 
1162 		strbuf_setlen(base, base_offset + baselen);
1163 
1164 		/*
1165 		 * Match all directories. We'll try to match files
1166 		 * later on.
1167 		 * max_depth is ignored but we may consider support it
1168 		 * in future, see
1169 		 * https://lore.kernel.org/git/7vmxo5l2g4.fsf@alter.siamese.dyndns.org/
1170 		 */
1171 		if (ps->recursive && S_ISDIR(entry->mode))
1172 			return entry_interesting;
1173 		continue;
1174 interesting:
1175 		if (item->attr_match_nr) {
1176 			int ret;
1177 
1178 			/*
1179 			 * Must not return all_entries_not_interesting
1180 			 * prematurely. We do not know if all entries do not
1181 			 * match some attributes with current attr API.
1182 			 */
1183 			never_interesting = entry_not_interesting;
1184 
1185 			/*
1186 			 * Consider all directories interesting (because some
1187 			 * of those files inside may match some attributes
1188 			 * even though the parent dir does not)
1189 			 *
1190 			 * FIXME: attributes _can_ match directories and we
1191 			 * can probably return all_entries_interesting or
1192 			 * all_entries_not_interesting here if matched.
1193 			 */
1194 			if (S_ISDIR(entry->mode))
1195 				return entry_interesting;
1196 
1197 			strbuf_add(base, entry->path, pathlen);
1198 			ret = match_pathspec_attrs(istate, base->buf + base_offset,
1199 						   base->len - base_offset, item);
1200 			strbuf_setlen(base, base_offset + baselen);
1201 			if (!ret)
1202 				continue;
1203 		}
1204 		return entry_interesting;
1205 	}
1206 	return never_interesting; /* No matches */
1207 }
1208 
1209 /*
1210  * Is a tree entry interesting given the pathspec we have?
1211  *
1212  * Pre-condition: either baselen == base_offset (i.e. empty path)
1213  * or base[baselen-1] == '/' (i.e. with trailing slash).
1214  */
tree_entry_interesting(struct index_state * istate,const struct name_entry * entry,struct strbuf * base,int base_offset,const struct pathspec * ps)1215 enum interesting tree_entry_interesting(struct index_state *istate,
1216 					const struct name_entry *entry,
1217 					struct strbuf *base, int base_offset,
1218 					const struct pathspec *ps)
1219 {
1220 	enum interesting positive, negative;
1221 	positive = do_match(istate, entry, base, base_offset, ps, 0);
1222 
1223 	/*
1224 	 * case | entry | positive | negative | result
1225 	 * -----+-------+----------+----------+-------
1226 	 *   1  |  file |   -1     |  -1..2   |  -1
1227 	 *   2  |  file |    0     |  -1..2   |   0
1228 	 *   3  |  file |    1     |   -1     |   1
1229 	 *   4  |  file |    1     |    0     |   1
1230 	 *   5  |  file |    1     |    1     |   0
1231 	 *   6  |  file |    1     |    2     |   0
1232 	 *   7  |  file |    2     |   -1     |   2
1233 	 *   8  |  file |    2     |    0     |   1
1234 	 *   9  |  file |    2     |    1     |   0
1235 	 *  10  |  file |    2     |    2     |  -1
1236 	 * -----+-------+----------+----------+-------
1237 	 *  11  |  dir  |   -1     |  -1..2   |  -1
1238 	 *  12  |  dir  |    0     |  -1..2   |   0
1239 	 *  13  |  dir  |    1     |   -1     |   1
1240 	 *  14  |  dir  |    1     |    0     |   1
1241 	 *  15  |  dir  |    1     |    1     |   1 (*)
1242 	 *  16  |  dir  |    1     |    2     |   0
1243 	 *  17  |  dir  |    2     |   -1     |   2
1244 	 *  18  |  dir  |    2     |    0     |   1
1245 	 *  19  |  dir  |    2     |    1     |   1 (*)
1246 	 *  20  |  dir  |    2     |    2     |  -1
1247 	 *
1248 	 * (*) An exclude pattern interested in a directory does not
1249 	 * necessarily mean it will exclude all of the directory. In
1250 	 * wildcard case, it can't decide until looking at individual
1251 	 * files inside. So don't write such directories off yet.
1252 	 */
1253 
1254 	if (!(ps->magic & PATHSPEC_EXCLUDE) ||
1255 	    positive <= entry_not_interesting) /* #1, #2, #11, #12 */
1256 		return positive;
1257 
1258 	negative = do_match(istate, entry, base, base_offset, ps, 1);
1259 
1260 	/* #8, #18 */
1261 	if (positive == all_entries_interesting &&
1262 	    negative == entry_not_interesting)
1263 		return entry_interesting;
1264 
1265 	/* #3, #4, #7, #13, #14, #17 */
1266 	if (negative <= entry_not_interesting)
1267 		return positive;
1268 
1269 	/* #15, #19 */
1270 	if (S_ISDIR(entry->mode) &&
1271 	    positive >= entry_interesting &&
1272 	    negative == entry_interesting)
1273 		return entry_interesting;
1274 
1275 	if ((positive == entry_interesting &&
1276 	     negative >= entry_interesting) || /* #5, #6, #16 */
1277 	    (positive == all_entries_interesting &&
1278 	     negative == entry_interesting)) /* #9 */
1279 		return entry_not_interesting;
1280 
1281 	return all_entries_not_interesting; /* #10, #20 */
1282 }
1283