xref: /dragonfly/lib/libc/gen/glob.c (revision 0720b42f)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)glob.c	8.3 (Berkeley) 10/13/93
38  * $FreeBSD: head/lib/libc/gen/glob.c 249381 2013-04-11 20:15:37Z emaste $
39  */
40 
41 /*
42  * glob(3) -- a superset of the one defined in POSIX 1003.2.
43  *
44  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
45  *
46  * Optional extra services, controlled by flags not defined by POSIX:
47  *
48  * GLOB_QUOTE:
49  *	Escaping convention: \ inhibits any special meaning the following
50  *	character might have (except \ at end of string is retained).
51  * GLOB_MAGCHAR:
52  *	Set in gl_flags if pattern contained a globbing character.
53  * GLOB_NOMAGIC:
54  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
55  *	not contain any magic characters.  [Used in csh style globbing]
56  * GLOB_ALTDIRFUNC:
57  *	Use alternately specified directory access functions.
58  * GLOB_TILDE:
59  *	expand ~user/foo to the /home/dir/of/user/foo
60  * GLOB_BRACE:
61  *	expand {1,2}{a,b} to 1a 1b 2a 2b
62  * gl_matchc:
63  *	Number of matches in the current invocation of glob.
64  */
65 
66 /*
67  * Some notes on multibyte character support:
68  * 1. Patterns with illegal byte sequences match nothing - even if
69  *    GLOB_NOCHECK is specified.
70  * 2. Illegal byte sequences in filenames are handled by treating them as
71  *    single-byte characters with a value of the first byte of the sequence
72  *    cast to wchar_t.
73  * 3. State-dependent encodings are not currently supported.
74  */
75 
76 #include <sys/param.h>
77 #include <sys/stat.h>
78 
79 #include <ctype.h>
80 #include <dirent.h>
81 #include <errno.h>
82 #include <glob.h>
83 #include <limits.h>
84 #include <pwd.h>
85 #include <stdint.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <unistd.h>
90 #include <wchar.h>
91 
92 #include "collate.h"
93 
94 /*
95  * glob(3) expansion limits. Stop the expansion if any of these limits
96  * is reached. This caps the runtime in the face of DoS attacks. See
97  * also CVE-2010-2632
98  */
99 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
100 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
101 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
102 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
103 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
104 
105 struct glob_limit {
106 	size_t	l_brace_cnt;
107 	size_t	l_path_lim;
108 	size_t	l_readdir_cnt;
109 	size_t	l_stat_cnt;
110 	size_t	l_string_cnt;
111 };
112 
113 #define	DOLLAR		'$'
114 #define	DOT		'.'
115 #define	EOS		'\0'
116 #define	LBRACKET	'['
117 #define	NOT		'!'
118 #define	QUESTION	'?'
119 #define	QUOTE		'\\'
120 #define	RANGE		'-'
121 #define	RBRACKET	']'
122 #define	SEP		'/'
123 #define	STAR		'*'
124 #define	TILDE		'~'
125 #define	UNDERSCORE	'_'
126 #define	LBRACE		'{'
127 #define	RBRACE		'}'
128 #define	SLASH		'/'
129 #define	COMMA		','
130 
131 #ifndef DEBUG
132 
133 #define	M_QUOTE		0x8000000000ULL
134 #define	M_PROTECT	0x4000000000ULL
135 #define	M_MASK		0xffffffffffULL
136 #define	M_CHAR		0x00ffffffffULL
137 
138 typedef uint_fast64_t Char;
139 
140 #else
141 
142 #define	M_QUOTE		0x80
143 #define	M_PROTECT	0x40
144 #define	M_MASK		0xff
145 #define	M_CHAR		0x7f
146 
147 typedef char Char;
148 
149 #endif
150 
151 
152 #define	CHAR(c)		((Char)((c)&M_CHAR))
153 #define	META(c)		((Char)((c)|M_QUOTE))
154 #define	M_ALL		META('*')
155 #define	M_END		META(']')
156 #define	M_NOT		META('!')
157 #define	M_ONE		META('?')
158 #define	M_RNG		META('-')
159 #define	M_SET		META('[')
160 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
161 
162 
163 static int	 compare(const void *, const void *);
164 static int	 g_Ctoc(const Char *, char *, size_t);
165 static int	 g_lstat(Char *, struct stat *, glob_t *);
166 static DIR	*g_opendir(Char *, glob_t *);
167 static const Char *g_strchr(const Char *, wchar_t);
168 #ifdef notdef
169 static Char	*g_strcat(Char *, const Char *);
170 #endif
171 static int	 g_stat(Char *, struct stat *, glob_t *);
172 static int	 glob0(const Char *, glob_t *, struct glob_limit *);
173 static int	 glob1(Char *, glob_t *, struct glob_limit *);
174 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
175     struct glob_limit *);
176 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
177     struct glob_limit *);
178 static int	 globextend(const Char *, glob_t *, struct glob_limit *);
179 static const Char *
180 		 globtilde(const Char *, Char *, size_t, glob_t *);
181 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
182 static int	 globexp2(const Char *, const Char *, glob_t *, int *,
183     struct glob_limit *);
184 static int	 match(Char *, Char *, Char *);
185 #ifdef DEBUG
186 static void	 qprintf(const char *, Char *);
187 #endif
188 
189 int
190 glob(const char * __restrict pattern, int flags,
191 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
192 {
193 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
194 	const char *patnext;
195 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
196 	mbstate_t mbs;
197 	wchar_t wc;
198 	size_t clen;
199 
200 	patnext = pattern;
201 	if (!(flags & GLOB_APPEND)) {
202 		pglob->gl_pathc = 0;
203 		pglob->gl_pathv = NULL;
204 		if (!(flags & GLOB_DOOFFS))
205 			pglob->gl_offs = 0;
206 	}
207 	if (flags & GLOB_LIMIT) {
208 		limit.l_path_lim = pglob->gl_matchc;
209 		if (limit.l_path_lim == 0)
210 			limit.l_path_lim = GLOB_LIMIT_PATH;
211 	}
212 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
213 	pglob->gl_errfunc = errfunc;
214 	pglob->gl_matchc = 0;
215 
216 	bufnext = patbuf;
217 	bufend = bufnext + MAXPATHLEN - 1;
218 	if (flags & GLOB_NOESCAPE) {
219 		memset(&mbs, 0, sizeof(mbs));
220 		while (bufend - bufnext >= MB_CUR_MAX) {
221 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
222 			if (clen == (size_t)-1 || clen == (size_t)-2)
223 				return (GLOB_NOMATCH);
224 			else if (clen == 0)
225 				break;
226 			*bufnext++ = wc;
227 			patnext += clen;
228 		}
229 	} else {
230 		/* Protect the quoted characters. */
231 		memset(&mbs, 0, sizeof(mbs));
232 		while (bufend - bufnext >= MB_CUR_MAX) {
233 			if (*patnext == QUOTE) {
234 				if (*++patnext == EOS) {
235 					*bufnext++ = QUOTE | M_PROTECT;
236 					continue;
237 				}
238 				prot = M_PROTECT;
239 			} else
240 				prot = 0;
241 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
242 			if (clen == (size_t)-1 || clen == (size_t)-2)
243 				return (GLOB_NOMATCH);
244 			else if (clen == 0)
245 				break;
246 			*bufnext++ = wc | prot;
247 			patnext += clen;
248 		}
249 	}
250 	*bufnext = EOS;
251 
252 	if (flags & GLOB_BRACE)
253 	    return (globexp1(patbuf, pglob, &limit));
254 	else
255 	    return (glob0(patbuf, pglob, &limit));
256 }
257 
258 /*
259  * Expand recursively a glob {} pattern. When there is no more expansion
260  * invoke the standard globbing routine to glob the rest of the magic
261  * characters
262  */
263 static int
264 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
265 {
266 	const Char* ptr = pattern;
267 	int rv;
268 
269 	if ((pglob->gl_flags & GLOB_LIMIT) &&
270 	    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
271 		errno = 0;
272 		return (GLOB_NOSPACE);
273 	}
274 
275 	/* Protect a single {}, for find(1), like csh */
276 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
277 		return glob0(pattern, pglob, limit);
278 
279 	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
280 		if (!globexp2(ptr, pattern, pglob, &rv, limit))
281 			return rv;
282 
283 	return glob0(pattern, pglob, limit);
284 }
285 
286 
287 /*
288  * Recursive brace globbing helper. Tries to expand a single brace.
289  * If it succeeds then it invokes globexp1 with the new pattern.
290  * If it fails then it tries to glob the rest of the pattern and returns.
291  */
292 static int
293 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv,
294     struct glob_limit *limit)
295 {
296 	int     i;
297 	Char   *lm, *ls;
298 	const Char *pe, *pm, *pm1, *pl;
299 	Char    patbuf[MAXPATHLEN];
300 
301 	/* copy part up to the brace */
302 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
303 		continue;
304 	*lm = EOS;
305 	ls = lm;
306 
307 	/* Find the balanced brace */
308 	for (i = 0, pe = ++ptr; *pe; pe++)
309 		if (*pe == LBRACKET) {
310 			/* Ignore everything between [] */
311 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
312 				continue;
313 			if (*pe == EOS) {
314 				/*
315 				 * We could not find a matching RBRACKET.
316 				 * Ignore and just look for RBRACE
317 				 */
318 				pe = pm;
319 			}
320 		}
321 		else if (*pe == LBRACE)
322 			i++;
323 		else if (*pe == RBRACE) {
324 			if (i == 0)
325 				break;
326 			i--;
327 		}
328 
329 	/* Non matching braces; just glob the pattern */
330 	if (i != 0 || *pe == EOS) {
331 		*rv = glob0(patbuf, pglob, limit);
332 		return (0);
333 	}
334 
335 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
336 		switch (*pm) {
337 		case LBRACKET:
338 			/* Ignore everything between [] */
339 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
340 				continue;
341 			if (*pm == EOS) {
342 				/*
343 				 * We could not find a matching RBRACKET.
344 				 * Ignore and just look for RBRACE
345 				 */
346 				pm = pm1;
347 			}
348 			break;
349 
350 		case LBRACE:
351 			i++;
352 			break;
353 
354 		case RBRACE:
355 			if (i) {
356 			    i--;
357 			    break;
358 			}
359 			/* FALLTHROUGH */
360 		case COMMA:
361 			if (i && *pm == COMMA)
362 				break;
363 			else {
364 				/* Append the current string */
365 				for (lm = ls; (pl < pm); *lm++ = *pl++)
366 					continue;
367 				/*
368 				 * Append the rest of the pattern after the
369 				 * closing brace
370 				 */
371 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
372 					continue;
373 
374 				/* Expand the current pattern */
375 #ifdef DEBUG
376 				qprintf("globexp2:", patbuf);
377 #endif
378 				*rv = globexp1(patbuf, pglob, limit);
379 
380 				/* move after the comma, to the next string */
381 				pl = pm + 1;
382 			}
383 			break;
384 
385 		default:
386 			break;
387 		}
388 	*rv = 0;
389 	return (0);
390 }
391 
392 
393 
394 /*
395  * expand tilde from the passwd file.
396  */
397 static const Char *
398 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
399 {
400 	struct passwd *pwd;
401 	char *h;
402 	const Char *p;
403 	Char *b, *eb;
404 
405 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
406 		return (pattern);
407 
408 	/*
409 	 * Copy up to the end of the string or /
410 	 */
411 	eb = &patbuf[patbuf_len - 1];
412 	for (p = pattern + 1, h = (char *) patbuf;
413 	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
414 		continue;
415 
416 	*h = EOS;
417 
418 	if (((char *) patbuf)[0] == EOS) {
419 		/*
420 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
421 		 * we're not running setuid or setgid) and then trying
422 		 * the password file
423 		 */
424 		if (issetugid() != 0 ||
425 		    (h = getenv("HOME")) == NULL) {
426 			if (((h = getlogin()) != NULL &&
427 			     (pwd = getpwnam(h)) != NULL) ||
428 			    (pwd = getpwuid(getuid())) != NULL)
429 				h = pwd->pw_dir;
430 			else
431 				return (pattern);
432 		}
433 	}
434 	else {
435 		/*
436 		 * Expand a ~user
437 		 */
438 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
439 			return (pattern);
440 		else
441 			h = pwd->pw_dir;
442 	}
443 
444 	/* Copy the home directory */
445 	for (b = patbuf; b < eb && *h; *b++ = *h++)
446 		continue;
447 
448 	/* Append the rest of the pattern */
449 	while (b < eb && (*b++ = *p++) != EOS)
450 		continue;
451 	*b = EOS;
452 
453 	return (patbuf);
454 }
455 
456 
457 /*
458  * The main glob() routine: compiles the pattern (optionally processing
459  * quotes), calls glob1() to do the real pattern matching, and finally
460  * sorts the list (unless unsorted operation is requested).  Returns 0
461  * if things went well, nonzero if errors occurred.
462  */
463 static int
464 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
465 {
466 	const Char *qpatnext;
467 	int err;
468 	size_t oldpathc;
469 	Char *bufnext, c, patbuf[MAXPATHLEN];
470 
471 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
472 	oldpathc = pglob->gl_pathc;
473 	bufnext = patbuf;
474 
475 	/* We don't need to check for buffer overflow any more. */
476 	while ((c = *qpatnext++) != EOS) {
477 		switch (c) {
478 		case LBRACKET:
479 			c = *qpatnext;
480 			if (c == NOT)
481 				++qpatnext;
482 			if (*qpatnext == EOS ||
483 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
484 				*bufnext++ = LBRACKET;
485 				if (c == NOT)
486 					--qpatnext;
487 				break;
488 			}
489 			*bufnext++ = M_SET;
490 			if (c == NOT)
491 				*bufnext++ = M_NOT;
492 			c = *qpatnext++;
493 			do {
494 				*bufnext++ = CHAR(c);
495 				if (*qpatnext == RANGE &&
496 				    (c = qpatnext[1]) != RBRACKET) {
497 					*bufnext++ = M_RNG;
498 					*bufnext++ = CHAR(c);
499 					qpatnext += 2;
500 				}
501 			} while ((c = *qpatnext++) != RBRACKET);
502 			pglob->gl_flags |= GLOB_MAGCHAR;
503 			*bufnext++ = M_END;
504 			break;
505 		case QUESTION:
506 			pglob->gl_flags |= GLOB_MAGCHAR;
507 			*bufnext++ = M_ONE;
508 			break;
509 		case STAR:
510 			pglob->gl_flags |= GLOB_MAGCHAR;
511 			/* collapse adjacent stars to one,
512 			 * to avoid exponential behavior
513 			 */
514 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
515 			    *bufnext++ = M_ALL;
516 			break;
517 		default:
518 			*bufnext++ = CHAR(c);
519 			break;
520 		}
521 	}
522 	*bufnext = EOS;
523 #ifdef DEBUG
524 	qprintf("glob0:", patbuf);
525 #endif
526 
527 	if ((err = glob1(patbuf, pglob, limit)) != 0)
528 		return(err);
529 
530 	/*
531 	 * If there was no match we are going to append the pattern
532 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
533 	 * and the pattern did not contain any magic characters
534 	 * GLOB_NOMAGIC is there just for compatibility with csh.
535 	 */
536 	if (pglob->gl_pathc == oldpathc) {
537 		if (((pglob->gl_flags & GLOB_NOCHECK) ||
538 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
539 			!(pglob->gl_flags & GLOB_MAGCHAR))))
540 			return (globextend(pattern, pglob, limit));
541 		else
542 			return (GLOB_NOMATCH);
543 	}
544 	if (!(pglob->gl_flags & GLOB_NOSORT))
545 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
546 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
547 	return (0);
548 }
549 
550 static int
551 compare(const void *p, const void *q)
552 {
553 	return (strcmp(*(char **)p, *(char **)q));
554 }
555 
556 static int
557 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
558 {
559 	Char pathbuf[MAXPATHLEN];
560 
561 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
562 	if (*pattern == EOS)
563 		return (0);
564 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
565 	    pattern, pglob, limit));
566 }
567 
568 /*
569  * The functions glob2 and glob3 are mutually recursive; there is one level
570  * of recursion for each segment in the pattern that contains one or more
571  * meta characters.
572  */
573 static int
574 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
575       glob_t *pglob, struct glob_limit *limit)
576 {
577 	struct stat sb;
578 	Char *p, *q;
579 	int anymeta;
580 
581 	/*
582 	 * Loop over pattern segments until end of pattern or until
583 	 * segment with meta character found.
584 	 */
585 	for (anymeta = 0;;) {
586 		if (*pattern == EOS) {		/* End of pattern? */
587 			*pathend = EOS;
588 			if (g_lstat(pathbuf, &sb, pglob))
589 				return (0);
590 
591 			if ((pglob->gl_flags & GLOB_LIMIT) &&
592 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
593 				errno = 0;
594 				if (pathend + 1 > pathend_last)
595 					return (GLOB_ABORTED);
596 				*pathend++ = SEP;
597 				*pathend = EOS;
598 				return (GLOB_NOSPACE);
599 			}
600 			if (((pglob->gl_flags & GLOB_MARK) &&
601 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
602 			    || (S_ISLNK(sb.st_mode) &&
603 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
604 			    S_ISDIR(sb.st_mode)))) {
605 				if (pathend + 1 > pathend_last)
606 					return (GLOB_ABORTED);
607 				*pathend++ = SEP;
608 				*pathend = EOS;
609 			}
610 			++pglob->gl_matchc;
611 			return (globextend(pathbuf, pglob, limit));
612 		}
613 
614 		/* Find end of next segment, copy tentatively to pathend. */
615 		q = pathend;
616 		p = pattern;
617 		while (*p != EOS && *p != SEP) {
618 			if (ismeta(*p))
619 				anymeta = 1;
620 			if (q + 1 > pathend_last)
621 				return (GLOB_ABORTED);
622 			*q++ = *p++;
623 		}
624 
625 		if (!anymeta) {		/* No expansion, do next segment. */
626 			pathend = q;
627 			pattern = p;
628 			while (*pattern == SEP) {
629 				if (pathend + 1 > pathend_last)
630 					return (GLOB_ABORTED);
631 				*pathend++ = *pattern++;
632 			}
633 		} else			/* Need expansion, recurse. */
634 			return (glob3(pathbuf, pathend, pathend_last, pattern,
635 			    p, pglob, limit));
636 	}
637 	/* NOTREACHED */
638 }
639 
640 static int
641 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
642       Char *pattern, Char *restpattern,
643       glob_t *pglob, struct glob_limit *limit)
644 {
645 	struct dirent *dp;
646 	DIR *dirp;
647 	int err;
648 	char buf[MAXPATHLEN];
649 
650 	/*
651 	 * The readdirfunc declaration can't be prototyped, because it is
652 	 * assigned, below, to two functions which are prototyped in glob.h
653 	 * and dirent.h as taking pointers to differently typed opaque
654 	 * structures.
655 	 */
656 	struct dirent *(*readdirfunc)();
657 
658 	if (pathend > pathend_last)
659 		return (GLOB_ABORTED);
660 	*pathend = EOS;
661 	errno = 0;
662 
663 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
664 		/* TODO: don't call for ENOENT or ENOTDIR? */
665 		if (pglob->gl_errfunc) {
666 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
667 				return (GLOB_ABORTED);
668 			if (pglob->gl_errfunc(buf, errno) ||
669 			    pglob->gl_flags & GLOB_ERR)
670 				return (GLOB_ABORTED);
671 		}
672 		return (0);
673 	}
674 
675 	err = 0;
676 
677 	/* Search directory for matching names. */
678 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
679 		readdirfunc = pglob->gl_readdir;
680 	else
681 		readdirfunc = readdir;
682 	while ((dp = (*readdirfunc)(dirp))) {
683 		char *sc;
684 		Char *dc;
685 		wchar_t wc;
686 		size_t clen;
687 		mbstate_t mbs;
688 
689 		if ((pglob->gl_flags & GLOB_LIMIT) &&
690 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
691 			errno = 0;
692 			if (pathend + 1 > pathend_last)
693 				err = GLOB_ABORTED;
694 			else {
695 				*pathend++ = SEP;
696 				*pathend = EOS;
697 				err = GLOB_NOSPACE;
698 			}
699 			break;
700 		}
701 
702 		/* Initial DOT must be matched literally. */
703 		if (dp->d_name[0] == DOT && *pattern != DOT)
704 			continue;
705 		memset(&mbs, 0, sizeof(mbs));
706 		dc = pathend;
707 		sc = dp->d_name;
708 		while (dc < pathend_last) {
709 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
710 			if (clen == (size_t)-1 || clen == (size_t)-2) {
711 				wc = *sc;
712 				clen = 1;
713 				memset(&mbs, 0, sizeof(mbs));
714 			}
715 			if ((*dc++ = wc) == EOS)
716 				break;
717 			sc += clen;
718 		}
719 		if (!match(pathend, pattern, restpattern)) {
720 			*pathend = EOS;
721 			continue;
722 		}
723 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
724 		    pglob, limit);
725 		if (err)
726 			break;
727 	}
728 
729 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
730 		(*pglob->gl_closedir)(dirp);
731 	else
732 		closedir(dirp);
733 	return (err);
734 }
735 
736 
737 /*
738  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
739  * add the new item, and update gl_pathc.
740  *
741  * This assumes the BSD realloc, which only copies the block when its size
742  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
743  * behavior.
744  *
745  * Return 0 if new item added, error code if memory couldn't be allocated.
746  *
747  * Invariant of the glob_t structure:
748  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
749  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
750  */
751 static int
752 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit)
753 {
754 	char **pathv;
755 	size_t i, newsize, len;
756 	char *copy;
757 	const Char *p;
758 
759 	if ((pglob->gl_flags & GLOB_LIMIT) &&
760 	    pglob->gl_matchc > limit->l_path_lim) {
761 		errno = 0;
762 		return (GLOB_NOSPACE);
763 	}
764 
765 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
766 	/* realloc(NULL, newsize) is equivalent to malloc(newsize). */
767 	pathv = realloc((void *)pglob->gl_pathv, newsize);
768 	if (pathv == NULL)
769 		return (GLOB_NOSPACE);
770 
771 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
772 		/* first time around -- clear initial gl_offs items */
773 		pathv += pglob->gl_offs;
774 		for (i = pglob->gl_offs + 1; --i > 0; )
775 			*--pathv = NULL;
776 	}
777 	pglob->gl_pathv = pathv;
778 
779 	for (p = path; *p++;)
780 		continue;
781 	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
782 	limit->l_string_cnt += len;
783 	if ((pglob->gl_flags & GLOB_LIMIT) &&
784 	    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
785 		errno = 0;
786 		return (GLOB_NOSPACE);
787 	}
788 	if ((copy = malloc(len)) != NULL) {
789 		if (g_Ctoc(path, copy, len)) {
790 			free(copy);
791 			return (GLOB_NOSPACE);
792 		}
793 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
794 	}
795 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
796 	return (copy == NULL ? GLOB_NOSPACE : 0);
797 }
798 
799 /*
800  * pattern matching function for filenames.  Each occurrence of the *
801  * pattern causes a recursion level.
802  */
803 static int
804 match(Char *name, Char *pat, Char *patend)
805 {
806 	int ok, negate_range;
807 	Char c, k;
808 	struct xlocale_collate *table =
809 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
810 
811 	while (pat < patend) {
812 		c = *pat++;
813 		switch (c & M_MASK) {
814 		case M_ALL:
815 			if (pat == patend)
816 				return (1);
817 			do
818 			    if (match(name, pat, patend))
819 				    return (1);
820 			while (*name++ != EOS);
821 			return (0);
822 		case M_ONE:
823 			if (*name++ == EOS)
824 				return (0);
825 			break;
826 		case M_SET:
827 			ok = 0;
828 			if ((k = *name++) == EOS)
829 				return (0);
830 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
831 				++pat;
832 			while (((c = *pat++) & M_MASK) != M_END)
833 				if ((*pat & M_MASK) == M_RNG) {
834 					if (table->__collate_load_error ?
835 					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
836 					       __collate_range_cmp(table, CHAR(c), CHAR(k)) <= 0
837 					    && __collate_range_cmp(table, CHAR(k), CHAR(pat[1])) <= 0
838 					   )
839 						ok = 1;
840 					pat += 2;
841 				} else if (c == k)
842 					ok = 1;
843 			if (ok == negate_range)
844 				return (0);
845 			break;
846 		default:
847 			if (*name++ != c)
848 				return (0);
849 			break;
850 		}
851 	}
852 	return (*name == EOS);
853 }
854 
855 /* Free allocated data belonging to a glob_t structure. */
856 void
857 globfree(glob_t *pglob)
858 {
859 	size_t i;
860 	char **pp;
861 
862 	if (pglob->gl_pathv != NULL) {
863 		pp = pglob->gl_pathv + pglob->gl_offs;
864 		for (i = pglob->gl_pathc; i--; ++pp)
865 			if (*pp)
866 				free(*pp);
867 		free(pglob->gl_pathv);
868 		pglob->gl_pathv = NULL;
869 	}
870 }
871 
872 static DIR *
873 g_opendir(Char *str, glob_t *pglob)
874 {
875 	char buf[MAXPATHLEN];
876 
877 	if (!*str)
878 		strcpy(buf, ".");
879 	else {
880 		if (g_Ctoc(str, buf, sizeof(buf)))
881 			return (NULL);
882 	}
883 
884 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
885 		return ((*pglob->gl_opendir)(buf));
886 
887 	return (opendir(buf));
888 }
889 
890 static int
891 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
892 {
893 	char buf[MAXPATHLEN];
894 
895 	if (g_Ctoc(fn, buf, sizeof(buf))) {
896 		errno = ENAMETOOLONG;
897 		return (-1);
898 	}
899 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
900 		return((*pglob->gl_lstat)(buf, sb));
901 	return (lstat(buf, sb));
902 }
903 
904 static int
905 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
906 {
907 	char buf[MAXPATHLEN];
908 
909 	if (g_Ctoc(fn, buf, sizeof(buf))) {
910 		errno = ENAMETOOLONG;
911 		return (-1);
912 	}
913 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
914 		return ((*pglob->gl_stat)(buf, sb));
915 	return (stat(buf, sb));
916 }
917 
918 static const Char *
919 g_strchr(const Char *str, wchar_t ch)
920 {
921 
922 	do {
923 		if (*str == ch)
924 			return (str);
925 	} while (*str++);
926 	return (NULL);
927 }
928 
929 static int
930 g_Ctoc(const Char *str, char *buf, size_t len)
931 {
932 	mbstate_t mbs;
933 	size_t clen;
934 
935 	memset(&mbs, 0, sizeof(mbs));
936 	while (len >= MB_CUR_MAX) {
937 		clen = wcrtomb(buf, *str, &mbs);
938 		if (clen == (size_t)-1)
939 			return (1);
940 		if (*str == L'\0')
941 			return (0);
942 		str++;
943 		buf += clen;
944 		len -= clen;
945 	}
946 	return (1);
947 }
948 
949 #ifdef DEBUG
950 static void
951 qprintf(const char *str, Char *s)
952 {
953 	Char *p;
954 
955 	(void)printf("%s:\n", str);
956 	for (p = s; *p; p++)
957 		(void)printf("%c", CHAR(*p));
958 	(void)printf("\n");
959 	for (p = s; *p; p++)
960 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
961 	(void)printf("\n");
962 	for (p = s; *p; p++)
963 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
964 	(void)printf("\n");
965 }
966 #endif
967