xref: /dragonfly/lib/libc/gen/glob.c (revision 49837aef)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)glob.c	8.3 (Berkeley) 10/13/93
38  * $FreeBSD: head/lib/libc/gen/glob.c 249381 2013-04-11 20:15:37Z emaste $
39  */
40 
41 /*
42  * glob(3) -- a superset of the one defined in POSIX 1003.2.
43  *
44  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
45  *
46  * Optional extra services, controlled by flags not defined by POSIX:
47  *
48  * GLOB_QUOTE:
49  *	Escaping convention: \ inhibits any special meaning the following
50  *	character might have (except \ at end of string is retained).
51  * GLOB_MAGCHAR:
52  *	Set in gl_flags if pattern contained a globbing character.
53  * GLOB_NOMAGIC:
54  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
55  *	not contain any magic characters.  [Used in csh style globbing]
56  * GLOB_ALTDIRFUNC:
57  *	Use alternately specified directory access functions.
58  * GLOB_TILDE:
59  *	expand ~user/foo to the /home/dir/of/user/foo
60  * GLOB_BRACE:
61  *	expand {1,2}{a,b} to 1a 1b 2a 2b
62  * gl_matchc:
63  *	Number of matches in the current invocation of glob.
64  */
65 
66 /*
67  * Some notes on multibyte character support:
68  * 1. Patterns with illegal byte sequences match nothing - even if
69  *    GLOB_NOCHECK is specified.
70  * 2. Illegal byte sequences in filenames are handled by treating them as
71  *    single-byte characters with a value of the first byte of the sequence
72  *    cast to wchar_t.
73  * 3. State-dependent encodings are not currently supported.
74  */
75 
76 #include "namespace.h"
77 #include <sys/param.h>
78 #include <sys/stat.h>
79 
80 #include <ctype.h>
81 #include <dirent.h>
82 #include <errno.h>
83 #include <glob.h>
84 #include <limits.h>
85 #include <pwd.h>
86 #include <stdint.h>
87 #include <stdio.h>
88 #include <stdlib.h>
89 #include <string.h>
90 #include <unistd.h>
91 #include <wchar.h>
92 
93 #include "collate.h"
94 #include "un-namespace.h"
95 
96 /*
97  * glob(3) expansion limits. Stop the expansion if any of these limits
98  * is reached. This caps the runtime in the face of DoS attacks. See
99  * also CVE-2010-2632
100  */
101 #define	GLOB_LIMIT_BRACE	128	/* number of brace calls */
102 #define	GLOB_LIMIT_PATH		65536	/* number of path elements */
103 #define	GLOB_LIMIT_READDIR	16384	/* number of readdirs */
104 #define	GLOB_LIMIT_STAT		1024	/* number of stat system calls */
105 #define	GLOB_LIMIT_STRING	ARG_MAX	/* maximum total size for paths */
106 
107 struct glob_limit {
108 	size_t	l_brace_cnt;
109 	size_t	l_path_lim;
110 	size_t	l_readdir_cnt;
111 	size_t	l_stat_cnt;
112 	size_t	l_string_cnt;
113 };
114 
115 #define	DOLLAR		'$'
116 #define	DOT		'.'
117 #define	EOS		'\0'
118 #define	LBRACKET	'['
119 #define	NOT		'!'
120 #define	QUESTION	'?'
121 #define	QUOTE		'\\'
122 #define	RANGE		'-'
123 #define	RBRACKET	']'
124 #define	SEP		'/'
125 #define	STAR		'*'
126 #define	TILDE		'~'
127 #define	UNDERSCORE	'_'
128 #define	LBRACE		'{'
129 #define	RBRACE		'}'
130 #define	SLASH		'/'
131 #define	COMMA		','
132 
133 #ifndef DEBUG
134 
135 #define	M_QUOTE		0x8000000000ULL
136 #define	M_PROTECT	0x4000000000ULL
137 #define	M_MASK		0xffffffffffULL
138 #define	M_CHAR		0x00ffffffffULL
139 
140 typedef uint_fast64_t Char;
141 
142 #else
143 
144 #define	M_QUOTE		0x80
145 #define	M_PROTECT	0x40
146 #define	M_MASK		0xff
147 #define	M_CHAR		0x7f
148 
149 typedef char Char;
150 
151 #endif
152 
153 
154 #define	CHAR(c)		((Char)((c)&M_CHAR))
155 #define	META(c)		((Char)((c)|M_QUOTE))
156 #define	M_ALL		META('*')
157 #define	M_END		META(']')
158 #define	M_NOT		META('!')
159 #define	M_ONE		META('?')
160 #define	M_RNG		META('-')
161 #define	M_SET		META('[')
162 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
163 
164 
165 static int	 compare(const void *, const void *);
166 static int	 g_Ctoc(const Char *, char *, size_t);
167 static int	 g_lstat(Char *, struct stat *, glob_t *);
168 static DIR	*g_opendir(Char *, glob_t *);
169 static const Char *g_strchr(const Char *, wchar_t);
170 #ifdef notdef
171 static Char	*g_strcat(Char *, const Char *);
172 #endif
173 static int	 g_stat(Char *, struct stat *, glob_t *);
174 static int	 glob0(const Char *, glob_t *, struct glob_limit *);
175 static int	 glob1(Char *, glob_t *, struct glob_limit *);
176 static int	 glob2(Char *, Char *, Char *, Char *, glob_t *,
177     struct glob_limit *);
178 static int	 glob3(Char *, Char *, Char *, Char *, Char *, glob_t *,
179     struct glob_limit *);
180 static int	 globextend(const Char *, glob_t *, struct glob_limit *);
181 static const Char *
182 		 globtilde(const Char *, Char *, size_t, glob_t *);
183 static int	 globexp1(const Char *, glob_t *, struct glob_limit *);
184 static int	 globexp2(const Char *, const Char *, glob_t *, int *,
185     struct glob_limit *);
186 static int	 match(Char *, Char *, Char *);
187 #ifdef DEBUG
188 static void	 qprintf(const char *, Char *);
189 #endif
190 
191 int
192 glob(const char * __restrict pattern, int flags,
193 	 int (*errfunc)(const char *, int), glob_t * __restrict pglob)
194 {
195 	struct glob_limit limit = { 0, 0, 0, 0, 0 };
196 	const char *patnext;
197 	Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
198 	mbstate_t mbs;
199 	wchar_t wc;
200 	size_t clen;
201 
202 	patnext = pattern;
203 	if (!(flags & GLOB_APPEND)) {
204 		pglob->gl_pathc = 0;
205 		pglob->gl_pathv = NULL;
206 		if (!(flags & GLOB_DOOFFS))
207 			pglob->gl_offs = 0;
208 	}
209 	if (flags & GLOB_LIMIT) {
210 		limit.l_path_lim = pglob->gl_matchc;
211 		if (limit.l_path_lim == 0)
212 			limit.l_path_lim = GLOB_LIMIT_PATH;
213 	}
214 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
215 	pglob->gl_errfunc = errfunc;
216 	pglob->gl_matchc = 0;
217 
218 	bufnext = patbuf;
219 	bufend = bufnext + MAXPATHLEN - 1;
220 	if (flags & GLOB_NOESCAPE) {
221 		memset(&mbs, 0, sizeof(mbs));
222 		while (bufend - bufnext >= MB_CUR_MAX) {
223 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
224 			if (clen == (size_t)-1 || clen == (size_t)-2)
225 				return (GLOB_NOMATCH);
226 			else if (clen == 0)
227 				break;
228 			*bufnext++ = wc;
229 			patnext += clen;
230 		}
231 	} else {
232 		/* Protect the quoted characters. */
233 		memset(&mbs, 0, sizeof(mbs));
234 		while (bufend - bufnext >= MB_CUR_MAX) {
235 			if (*patnext == QUOTE) {
236 				if (*++patnext == EOS) {
237 					*bufnext++ = QUOTE | M_PROTECT;
238 					continue;
239 				}
240 				prot = M_PROTECT;
241 			} else
242 				prot = 0;
243 			clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
244 			if (clen == (size_t)-1 || clen == (size_t)-2)
245 				return (GLOB_NOMATCH);
246 			else if (clen == 0)
247 				break;
248 			*bufnext++ = wc | prot;
249 			patnext += clen;
250 		}
251 	}
252 	*bufnext = EOS;
253 
254 	if (flags & GLOB_BRACE)
255 	    return (globexp1(patbuf, pglob, &limit));
256 	else
257 	    return (glob0(patbuf, pglob, &limit));
258 }
259 
260 /*
261  * Expand recursively a glob {} pattern. When there is no more expansion
262  * invoke the standard globbing routine to glob the rest of the magic
263  * characters
264  */
265 static int
266 globexp1(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
267 {
268 	const Char* ptr = pattern;
269 	int rv;
270 
271 	if ((pglob->gl_flags & GLOB_LIMIT) &&
272 	    limit->l_brace_cnt++ >= GLOB_LIMIT_BRACE) {
273 		errno = 0;
274 		return (GLOB_NOSPACE);
275 	}
276 
277 	/* Protect a single {}, for find(1), like csh */
278 	if (pattern[0] == LBRACE && pattern[1] == RBRACE && pattern[2] == EOS)
279 		return glob0(pattern, pglob, limit);
280 
281 	while ((ptr = g_strchr(ptr, LBRACE)) != NULL)
282 		if (!globexp2(ptr, pattern, pglob, &rv, limit))
283 			return rv;
284 
285 	return glob0(pattern, pglob, limit);
286 }
287 
288 
289 /*
290  * Recursive brace globbing helper. Tries to expand a single brace.
291  * If it succeeds then it invokes globexp1 with the new pattern.
292  * If it fails then it tries to glob the rest of the pattern and returns.
293  */
294 static int
295 globexp2(const Char *ptr, const Char *pattern, glob_t *pglob, int *rv,
296     struct glob_limit *limit)
297 {
298 	int     i;
299 	Char   *lm, *ls;
300 	const Char *pe, *pm, *pm1, *pl;
301 	Char    patbuf[MAXPATHLEN];
302 
303 	/* copy part up to the brace */
304 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
305 		continue;
306 	*lm = EOS;
307 	ls = lm;
308 
309 	/* Find the balanced brace */
310 	for (i = 0, pe = ++ptr; *pe; pe++)
311 		if (*pe == LBRACKET) {
312 			/* Ignore everything between [] */
313 			for (pm = pe++; *pe != RBRACKET && *pe != EOS; pe++)
314 				continue;
315 			if (*pe == EOS) {
316 				/*
317 				 * We could not find a matching RBRACKET.
318 				 * Ignore and just look for RBRACE
319 				 */
320 				pe = pm;
321 			}
322 		}
323 		else if (*pe == LBRACE)
324 			i++;
325 		else if (*pe == RBRACE) {
326 			if (i == 0)
327 				break;
328 			i--;
329 		}
330 
331 	/* Non matching braces; just glob the pattern */
332 	if (i != 0 || *pe == EOS) {
333 		*rv = glob0(patbuf, pglob, limit);
334 		return (0);
335 	}
336 
337 	for (i = 0, pl = pm = ptr; pm <= pe; pm++)
338 		switch (*pm) {
339 		case LBRACKET:
340 			/* Ignore everything between [] */
341 			for (pm1 = pm++; *pm != RBRACKET && *pm != EOS; pm++)
342 				continue;
343 			if (*pm == EOS) {
344 				/*
345 				 * We could not find a matching RBRACKET.
346 				 * Ignore and just look for RBRACE
347 				 */
348 				pm = pm1;
349 			}
350 			break;
351 
352 		case LBRACE:
353 			i++;
354 			break;
355 
356 		case RBRACE:
357 			if (i) {
358 			    i--;
359 			    break;
360 			}
361 			/* FALLTHROUGH */
362 		case COMMA:
363 			if (i && *pm == COMMA)
364 				break;
365 			else {
366 				/* Append the current string */
367 				for (lm = ls; (pl < pm); *lm++ = *pl++)
368 					continue;
369 				/*
370 				 * Append the rest of the pattern after the
371 				 * closing brace
372 				 */
373 				for (pl = pe + 1; (*lm++ = *pl++) != EOS;)
374 					continue;
375 
376 				/* Expand the current pattern */
377 #ifdef DEBUG
378 				qprintf("globexp2:", patbuf);
379 #endif
380 				*rv = globexp1(patbuf, pglob, limit);
381 
382 				/* move after the comma, to the next string */
383 				pl = pm + 1;
384 			}
385 			break;
386 
387 		default:
388 			break;
389 		}
390 	*rv = 0;
391 	return (0);
392 }
393 
394 
395 
396 /*
397  * expand tilde from the passwd file.
398  */
399 static const Char *
400 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
401 {
402 	struct passwd *pwd;
403 	char *h;
404 	const Char *p;
405 	Char *b, *eb;
406 
407 	if (*pattern != TILDE || !(pglob->gl_flags & GLOB_TILDE))
408 		return (pattern);
409 
410 	/*
411 	 * Copy up to the end of the string or /
412 	 */
413 	eb = &patbuf[patbuf_len - 1];
414 	for (p = pattern + 1, h = (char *) patbuf;
415 	    h < (char *)eb && *p && *p != SLASH; *h++ = *p++)
416 		continue;
417 
418 	*h = EOS;
419 
420 	if (((char *) patbuf)[0] == EOS) {
421 		/*
422 		 * handle a plain ~ or ~/ by expanding $HOME first (iff
423 		 * we're not running setuid or setgid) and then trying
424 		 * the password file
425 		 */
426 		if (issetugid() != 0 ||
427 		    (h = getenv("HOME")) == NULL) {
428 			if (((h = _getlogin()) != NULL &&
429 			     (pwd = getpwnam(h)) != NULL) ||
430 			    (pwd = getpwuid(getuid())) != NULL)
431 				h = pwd->pw_dir;
432 			else
433 				return (pattern);
434 		}
435 	}
436 	else {
437 		/*
438 		 * Expand a ~user
439 		 */
440 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
441 			return (pattern);
442 		else
443 			h = pwd->pw_dir;
444 	}
445 
446 	/* Copy the home directory */
447 	for (b = patbuf; b < eb && *h; *b++ = *h++)
448 		continue;
449 
450 	/* Append the rest of the pattern */
451 	while (b < eb && (*b++ = *p++) != EOS)
452 		continue;
453 	*b = EOS;
454 
455 	return (patbuf);
456 }
457 
458 
459 /*
460  * The main glob() routine: compiles the pattern (optionally processing
461  * quotes), calls glob1() to do the real pattern matching, and finally
462  * sorts the list (unless unsorted operation is requested).  Returns 0
463  * if things went well, nonzero if errors occurred.
464  */
465 static int
466 glob0(const Char *pattern, glob_t *pglob, struct glob_limit *limit)
467 {
468 	const Char *qpatnext;
469 	int err;
470 	size_t oldpathc;
471 	Char *bufnext, c, patbuf[MAXPATHLEN];
472 
473 	qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
474 	oldpathc = pglob->gl_pathc;
475 	bufnext = patbuf;
476 
477 	/* We don't need to check for buffer overflow any more. */
478 	while ((c = *qpatnext++) != EOS) {
479 		switch (c) {
480 		case LBRACKET:
481 			c = *qpatnext;
482 			if (c == NOT)
483 				++qpatnext;
484 			if (*qpatnext == EOS ||
485 			    g_strchr(qpatnext+1, RBRACKET) == NULL) {
486 				*bufnext++ = LBRACKET;
487 				if (c == NOT)
488 					--qpatnext;
489 				break;
490 			}
491 			*bufnext++ = M_SET;
492 			if (c == NOT)
493 				*bufnext++ = M_NOT;
494 			c = *qpatnext++;
495 			do {
496 				*bufnext++ = CHAR(c);
497 				if (*qpatnext == RANGE &&
498 				    (c = qpatnext[1]) != RBRACKET) {
499 					*bufnext++ = M_RNG;
500 					*bufnext++ = CHAR(c);
501 					qpatnext += 2;
502 				}
503 			} while ((c = *qpatnext++) != RBRACKET);
504 			pglob->gl_flags |= GLOB_MAGCHAR;
505 			*bufnext++ = M_END;
506 			break;
507 		case QUESTION:
508 			pglob->gl_flags |= GLOB_MAGCHAR;
509 			*bufnext++ = M_ONE;
510 			break;
511 		case STAR:
512 			pglob->gl_flags |= GLOB_MAGCHAR;
513 			/* collapse adjacent stars to one,
514 			 * to avoid exponential behavior
515 			 */
516 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
517 			    *bufnext++ = M_ALL;
518 			break;
519 		default:
520 			*bufnext++ = CHAR(c);
521 			break;
522 		}
523 	}
524 	*bufnext = EOS;
525 #ifdef DEBUG
526 	qprintf("glob0:", patbuf);
527 #endif
528 
529 	if ((err = glob1(patbuf, pglob, limit)) != 0)
530 		return(err);
531 
532 	/*
533 	 * If there was no match we are going to append the pattern
534 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
535 	 * and the pattern did not contain any magic characters
536 	 * GLOB_NOMAGIC is there just for compatibility with csh.
537 	 */
538 	if (pglob->gl_pathc == oldpathc) {
539 		if (((pglob->gl_flags & GLOB_NOCHECK) ||
540 		    ((pglob->gl_flags & GLOB_NOMAGIC) &&
541 			!(pglob->gl_flags & GLOB_MAGCHAR))))
542 			return (globextend(pattern, pglob, limit));
543 		else
544 			return (GLOB_NOMATCH);
545 	}
546 	if (!(pglob->gl_flags & GLOB_NOSORT))
547 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
548 		    pglob->gl_pathc - oldpathc, sizeof(char *), compare);
549 	return (0);
550 }
551 
552 static int
553 compare(const void *p, const void *q)
554 {
555 	return (strcmp(*(char **)p, *(char **)q));
556 }
557 
558 static int
559 glob1(Char *pattern, glob_t *pglob, struct glob_limit *limit)
560 {
561 	Char pathbuf[MAXPATHLEN];
562 
563 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
564 	if (*pattern == EOS)
565 		return (0);
566 	return (glob2(pathbuf, pathbuf, pathbuf + MAXPATHLEN - 1,
567 	    pattern, pglob, limit));
568 }
569 
570 /*
571  * The functions glob2 and glob3 are mutually recursive; there is one level
572  * of recursion for each segment in the pattern that contains one or more
573  * meta characters.
574  */
575 static int
576 glob2(Char *pathbuf, Char *pathend, Char *pathend_last, Char *pattern,
577       glob_t *pglob, struct glob_limit *limit)
578 {
579 	struct stat sb;
580 	Char *p, *q;
581 	int anymeta;
582 
583 	/*
584 	 * Loop over pattern segments until end of pattern or until
585 	 * segment with meta character found.
586 	 */
587 	for (anymeta = 0;;) {
588 		if (*pattern == EOS) {		/* End of pattern? */
589 			*pathend = EOS;
590 			if (g_lstat(pathbuf, &sb, pglob))
591 				return (0);
592 
593 			if ((pglob->gl_flags & GLOB_LIMIT) &&
594 			    limit->l_stat_cnt++ >= GLOB_LIMIT_STAT) {
595 				errno = 0;
596 				if (pathend + 1 > pathend_last)
597 					return (GLOB_ABORTED);
598 				*pathend++ = SEP;
599 				*pathend = EOS;
600 				return (GLOB_NOSPACE);
601 			}
602 			if (((pglob->gl_flags & GLOB_MARK) &&
603 			    pathend[-1] != SEP) && (S_ISDIR(sb.st_mode)
604 			    || (S_ISLNK(sb.st_mode) &&
605 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
606 			    S_ISDIR(sb.st_mode)))) {
607 				if (pathend + 1 > pathend_last)
608 					return (GLOB_ABORTED);
609 				*pathend++ = SEP;
610 				*pathend = EOS;
611 			}
612 			++pglob->gl_matchc;
613 			return (globextend(pathbuf, pglob, limit));
614 		}
615 
616 		/* Find end of next segment, copy tentatively to pathend. */
617 		q = pathend;
618 		p = pattern;
619 		while (*p != EOS && *p != SEP) {
620 			if (ismeta(*p))
621 				anymeta = 1;
622 			if (q + 1 > pathend_last)
623 				return (GLOB_ABORTED);
624 			*q++ = *p++;
625 		}
626 
627 		if (!anymeta) {		/* No expansion, do next segment. */
628 			pathend = q;
629 			pattern = p;
630 			while (*pattern == SEP) {
631 				if (pathend + 1 > pathend_last)
632 					return (GLOB_ABORTED);
633 				*pathend++ = *pattern++;
634 			}
635 		} else			/* Need expansion, recurse. */
636 			return (glob3(pathbuf, pathend, pathend_last, pattern,
637 			    p, pglob, limit));
638 	}
639 	/* NOTREACHED */
640 }
641 
642 static int
643 glob3(Char *pathbuf, Char *pathend, Char *pathend_last,
644       Char *pattern, Char *restpattern,
645       glob_t *pglob, struct glob_limit *limit)
646 {
647 	struct dirent *dp;
648 	DIR *dirp;
649 	int err;
650 	char buf[MAXPATHLEN];
651 
652 	struct dirent *(*readdirfunc)(DIR *);
653 
654 	if (pathend > pathend_last)
655 		return (GLOB_ABORTED);
656 	*pathend = EOS;
657 	errno = 0;
658 
659 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
660 		/* TODO: don't call for ENOENT or ENOTDIR? */
661 		if (pglob->gl_errfunc) {
662 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
663 				return (GLOB_ABORTED);
664 			if (pglob->gl_errfunc(buf, errno) ||
665 			    pglob->gl_flags & GLOB_ERR)
666 				return (GLOB_ABORTED);
667 		}
668 		return (0);
669 	}
670 
671 	err = 0;
672 
673 	/* pglob->gl_readdir takes a void *, fix this manually */
674 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
675 		readdirfunc = (struct dirent *(*)(DIR *))pglob->gl_readdir;
676 	else
677 		readdirfunc = readdir;
678 
679 	/* Search directory for matching names. */
680 	while ((dp = (*readdirfunc)(dirp)) != NULL) {
681 		char *sc;
682 		Char *dc;
683 		wchar_t wc;
684 		size_t clen;
685 		mbstate_t mbs;
686 
687 		if ((pglob->gl_flags & GLOB_LIMIT) &&
688 		    limit->l_readdir_cnt++ >= GLOB_LIMIT_READDIR) {
689 			errno = 0;
690 			if (pathend + 1 > pathend_last)
691 				err = GLOB_ABORTED;
692 			else {
693 				*pathend++ = SEP;
694 				*pathend = EOS;
695 				err = GLOB_NOSPACE;
696 			}
697 			break;
698 		}
699 
700 		/* Initial DOT must be matched literally. */
701 		if (dp->d_name[0] == DOT && *pattern != DOT)
702 			continue;
703 		memset(&mbs, 0, sizeof(mbs));
704 		dc = pathend;
705 		sc = dp->d_name;
706 		while (dc < pathend_last) {
707 			clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
708 			if (clen == (size_t)-1 || clen == (size_t)-2) {
709 				wc = *sc;
710 				clen = 1;
711 				memset(&mbs, 0, sizeof(mbs));
712 			}
713 			if ((*dc++ = wc) == EOS)
714 				break;
715 			sc += clen;
716 		}
717 		if (!match(pathend, pattern, restpattern)) {
718 			*pathend = EOS;
719 			continue;
720 		}
721 		err = glob2(pathbuf, --dc, pathend_last, restpattern,
722 		    pglob, limit);
723 		if (err)
724 			break;
725 	}
726 
727 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
728 		(*pglob->gl_closedir)(dirp);
729 	else
730 		closedir(dirp);
731 	return (err);
732 }
733 
734 
735 /*
736  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
737  * add the new item, and update gl_pathc.
738  *
739  * This assumes the BSD realloc, which only copies the block when its size
740  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
741  * behavior.
742  *
743  * Return 0 if new item added, error code if memory couldn't be allocated.
744  *
745  * Invariant of the glob_t structure:
746  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
747  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
748  */
749 static int
750 globextend(const Char *path, glob_t *pglob, struct glob_limit *limit)
751 {
752 	char **pathv;
753 	size_t i, newsize, len;
754 	char *copy;
755 	const Char *p;
756 
757 	if ((pglob->gl_flags & GLOB_LIMIT) &&
758 	    pglob->gl_matchc > limit->l_path_lim) {
759 		errno = 0;
760 		return (GLOB_NOSPACE);
761 	}
762 
763 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
764 	/* realloc(NULL, newsize) is equivalent to malloc(newsize). */
765 	pathv = realloc((void *)pglob->gl_pathv, newsize);
766 	if (pathv == NULL)
767 		return (GLOB_NOSPACE);
768 
769 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
770 		/* first time around -- clear initial gl_offs items */
771 		pathv += pglob->gl_offs;
772 		for (i = pglob->gl_offs + 1; --i > 0; )
773 			*--pathv = NULL;
774 	}
775 	pglob->gl_pathv = pathv;
776 
777 	for (p = path; *p++;)
778 		continue;
779 	len = MB_CUR_MAX * (size_t)(p - path);	/* XXX overallocation */
780 	limit->l_string_cnt += len;
781 	if ((pglob->gl_flags & GLOB_LIMIT) &&
782 	    limit->l_string_cnt >= GLOB_LIMIT_STRING) {
783 		errno = 0;
784 		return (GLOB_NOSPACE);
785 	}
786 	if ((copy = malloc(len)) != NULL) {
787 		if (g_Ctoc(path, copy, len)) {
788 			free(copy);
789 			return (GLOB_NOSPACE);
790 		}
791 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
792 	}
793 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
794 	return (copy == NULL ? GLOB_NOSPACE : 0);
795 }
796 
797 /*
798  * pattern matching function for filenames.  Each occurrence of the *
799  * pattern causes a recursion level.
800  */
801 static int
802 match(Char *name, Char *pat, Char *patend)
803 {
804 	int ok, negate_range;
805 	Char c, k;
806 	struct xlocale_collate *table =
807 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
808 
809 	while (pat < patend) {
810 		c = *pat++;
811 		switch (c & M_MASK) {
812 		case M_ALL:
813 			if (pat == patend)
814 				return (1);
815 			do
816 			    if (match(name, pat, patend))
817 				    return (1);
818 			while (*name++ != EOS);
819 			return (0);
820 		case M_ONE:
821 			if (*name++ == EOS)
822 				return (0);
823 			break;
824 		case M_SET:
825 			ok = 0;
826 			if ((k = *name++) == EOS)
827 				return (0);
828 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != EOS)
829 				++pat;
830 			while (((c = *pat++) & M_MASK) != M_END)
831 				if ((*pat & M_MASK) == M_RNG) {
832 					if (table->__collate_load_error ?
833 					    CHAR(c) <= CHAR(k) && CHAR(k) <= CHAR(pat[1]) :
834 					       __wcollate_range_cmp(CHAR(c), CHAR(k)) <= 0 &&
835 					       __wcollate_range_cmp(CHAR(k), CHAR(pat[1])) <= 0
836 					   )
837 						ok = 1;
838 					pat += 2;
839 				} else if (c == k)
840 					ok = 1;
841 			if (ok == negate_range)
842 				return (0);
843 			break;
844 		default:
845 			if (*name++ != c)
846 				return (0);
847 			break;
848 		}
849 	}
850 	return (*name == EOS);
851 }
852 
853 /* Free allocated data belonging to a glob_t structure. */
854 void
855 globfree(glob_t *pglob)
856 {
857 	size_t i;
858 	char **pp;
859 
860 	if (pglob->gl_pathv != NULL) {
861 		pp = pglob->gl_pathv + pglob->gl_offs;
862 		for (i = pglob->gl_pathc; i--; ++pp)
863 			if (*pp)
864 				free(*pp);
865 		free(pglob->gl_pathv);
866 		pglob->gl_pathv = NULL;
867 	}
868 }
869 
870 static DIR *
871 g_opendir(Char *str, glob_t *pglob)
872 {
873 	char buf[MAXPATHLEN];
874 
875 	if (!*str)
876 		strcpy(buf, ".");
877 	else {
878 		if (g_Ctoc(str, buf, sizeof(buf)))
879 			return (NULL);
880 	}
881 
882 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
883 		return ((*pglob->gl_opendir)(buf));
884 
885 	return (opendir(buf));
886 }
887 
888 static int
889 g_lstat(Char *fn, struct stat *sb, glob_t *pglob)
890 {
891 	char buf[MAXPATHLEN];
892 
893 	if (g_Ctoc(fn, buf, sizeof(buf))) {
894 		errno = ENAMETOOLONG;
895 		return (-1);
896 	}
897 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
898 		return((*pglob->gl_lstat)(buf, sb));
899 	return (lstat(buf, sb));
900 }
901 
902 static int
903 g_stat(Char *fn, struct stat *sb, glob_t *pglob)
904 {
905 	char buf[MAXPATHLEN];
906 
907 	if (g_Ctoc(fn, buf, sizeof(buf))) {
908 		errno = ENAMETOOLONG;
909 		return (-1);
910 	}
911 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
912 		return ((*pglob->gl_stat)(buf, sb));
913 	return (stat(buf, sb));
914 }
915 
916 static const Char *
917 g_strchr(const Char *str, wchar_t ch)
918 {
919 
920 	do {
921 		if (*str == ch)
922 			return (str);
923 	} while (*str++);
924 	return (NULL);
925 }
926 
927 static int
928 g_Ctoc(const Char *str, char *buf, size_t len)
929 {
930 	mbstate_t mbs;
931 	size_t clen;
932 
933 	memset(&mbs, 0, sizeof(mbs));
934 	while (len >= MB_CUR_MAX) {
935 		clen = wcrtomb(buf, *str, &mbs);
936 		if (clen == (size_t)-1)
937 			return (1);
938 		if (*str == L'\0')
939 			return (0);
940 		str++;
941 		buf += clen;
942 		len -= clen;
943 	}
944 	return (1);
945 }
946 
947 #ifdef DEBUG
948 static void
949 qprintf(const char *str, Char *s)
950 {
951 	Char *p;
952 
953 	(void)printf("%s:\n", str);
954 	for (p = s; *p; p++)
955 		(void)printf("%c", CHAR(*p));
956 	(void)printf("\n");
957 	for (p = s; *p; p++)
958 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
959 	(void)printf("\n");
960 	for (p = s; *p; p++)
961 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
962 	(void)printf("\n");
963 }
964 #endif
965