1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #if defined(LIBC_SCCS) && !defined(lint)
34 static char sccsid[] = "@(#)glob.c	8.3 (Berkeley) 10/13/93";
35 /* most changes between the version above and the one below have been ported:
36 static char sscsid[]=  "$OpenBSD: glob.c,v 1.8.10.1 2001/04/10 jason Exp $";
37  */
38 #endif /* LIBC_SCCS and not lint */
39 
40 /*
41  * glob(3) -- a superset of the one defined in POSIX 1003.2.
42  *
43  * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
44  *
45  * Optional extra services, controlled by flags not defined by POSIX:
46  *
47  * GLOB_QUOTE:
48  *	Escaping convention: \ inhibits any special meaning the following
49  *	character might have (except \ at end of string is retained).
50  * GLOB_MAGCHAR:
51  *	Set in gl_flags if pattern contained a globbing character.
52  * GLOB_NOMAGIC:
53  *	Same as GLOB_NOCHECK, but it will only append pattern if it did
54  *	not contain any magic characters.  [Used in csh style globbing]
55  * GLOB_ALTDIRFUNC:
56  *	Use alternately specified directory access functions.
57  * GLOB_TILDE:
58  *	expand ~user/foo to the /home/dir/of/user/foo
59  * GLOB_BRACE:
60  *	expand {1,2}{a,b} to 1a 1b 2a 2b
61  * gl_matchc:
62  *	Number of matches in the current invocation of glob.
63  * GLOB_ALPHASORT:
64  *	sort alphabetically like csh (case doesn't matter) instead of in ASCII
65  *	order
66  */
67 
68 #include <EXTERN.h>
69 #include <perl.h>
70 #include <XSUB.h>
71 
72 #include "bsd_glob.h"
73 #ifdef I_PWD
74 #	include <pwd.h>
75 #else
76 #if defined(HAS_PASSWD) && !defined(VMS)
77 	struct passwd *getpwnam(char *);
78 	struct passwd *getpwuid(Uid_t);
79 #endif
80 #endif
81 
82 #ifndef MAXPATHLEN
83 #  ifdef PATH_MAX
84 #    define	MAXPATHLEN	PATH_MAX
85 #  else
86 #    define	MAXPATHLEN	1024
87 #  endif
88 #endif
89 
90 #include <limits.h>
91 
92 #ifndef ARG_MAX
93 #  ifdef _SC_ARG_MAX
94 #    define		ARG_MAX		(sysconf(_SC_ARG_MAX))
95 #  else
96 #    ifdef _POSIX_ARG_MAX
97 #      define		ARG_MAX		_POSIX_ARG_MAX
98 #    else
99 #      ifdef WIN32
100 #        define	ARG_MAX		14500	/* from VC's limits.h */
101 #      else
102 #        define	ARG_MAX		4096	/* from POSIX, be conservative */
103 #      endif
104 #    endif
105 #  endif
106 #endif
107 
108 #define	BG_DOLLAR	'$'
109 #define	BG_DOT		'.'
110 #define	BG_EOS		'\0'
111 #define	BG_LBRACKET	'['
112 #define	BG_NOT		'!'
113 #define	BG_QUESTION	'?'
114 #define	BG_QUOTE	'\\'
115 #define	BG_RANGE	'-'
116 #define	BG_RBRACKET	']'
117 #define	BG_SEP	'/'
118 #ifdef DOSISH
119 #define BG_SEP2		'\\'
120 #endif
121 #define	BG_STAR		'*'
122 #define	BG_TILDE	'~'
123 #define	BG_UNDERSCORE	'_'
124 #define	BG_LBRACE	'{'
125 #define	BG_RBRACE	'}'
126 #define	BG_SLASH	'/'
127 #define	BG_COMMA	','
128 
129 #ifndef GLOB_DEBUG
130 
131 #define	M_QUOTE		0x8000
132 #define	M_PROTECT	0x4000
133 #define	M_MASK		0xffff
134 #define	M_ASCII		0x00ff
135 
136 typedef U16 Char;
137 
138 #else
139 
140 #define	M_QUOTE		0x80
141 #define	M_PROTECT	0x40
142 #define	M_MASK		0xff
143 #define	M_ASCII		0x7f
144 
145 typedef U8 Char;
146 
147 #endif /* !GLOB_DEBUG */
148 
149 
150 #define	CHAR(c)		((Char)((c)&M_ASCII))
151 #define	META(c)		((Char)((c)|M_QUOTE))
152 #define	M_ALL		META('*')
153 #define	M_END		META(']')
154 #define	M_NOT		META('!')
155 #define	M_ONE		META('?')
156 #define	M_RNG		META('-')
157 #define	M_SET		META('[')
158 #define	ismeta(c)	(((c)&M_QUOTE) != 0)
159 
160 
161 static int	 compare(const void *, const void *);
162 static int	 ci_compare(const void *, const void *);
163 static int	 g_Ctoc(const Char *, char *, STRLEN);
164 static int	 g_lstat(Char *, Stat_t *, glob_t *);
165 static DIR	*g_opendir(Char *, glob_t *);
166 static Char	*g_strchr(Char *, int);
167 static int	 g_stat(Char *, Stat_t *, glob_t *);
168 static int	 glob0(const Char *, glob_t *);
169 static int	 glob1(Char *, Char *, glob_t *, size_t *);
170 static int	 glob2(Char *, Char *, Char *, Char *, Char *, Char *,
171 		       glob_t *, size_t *);
172 static int	 glob3(Char *, Char *, Char *, Char *, Char *,
173 		       Char *, Char *, glob_t *, size_t *);
174 static int	 globextend(const Char *, glob_t *, size_t *);
175 static const Char *
176 		 globtilde(const Char *, Char *, size_t, glob_t *);
177 static int	 globexp1(const Char *, glob_t *);
178 static int	 globexp2(const Char *, const Char *, glob_t *, int *);
179 static int	 match(Char *, Char *, Char *, int);
180 #ifdef GLOB_DEBUG
181 static void	 qprintf(const char *, Char *);
182 #endif /* GLOB_DEBUG */
183 
184 #ifdef PERL_IMPLICIT_CONTEXT
185 static Direntry_t *	my_readdir(DIR*);
186 
187 static Direntry_t *
my_readdir(DIR * d)188 my_readdir(DIR *d)
189 {
190 #ifndef NETWARE
191     return PerlDir_read(d);
192 #else
193     return (DIR *)PerlDir_read(d);
194 #endif
195 }
196 #else
197 
198 /* ReliantUNIX (OS formerly known as SINIX) defines readdir
199  * in LFS-mode to be a 64-bit version of readdir.  */
200 
201 #   ifdef sinix
202 static Direntry_t *    my_readdir(DIR*);
203 
204 static Direntry_t *
my_readdir(DIR * d)205 my_readdir(DIR *d)
206 {
207     return readdir(d);
208 }
209 #   else
210 
211 #       define	my_readdir	readdir
212 
213 #   endif
214 
215 #endif
216 
217 int
bsd_glob(const char * pattern,int flags,int (* errfunc)(const char *,int),glob_t * pglob)218 bsd_glob(const char *pattern, int flags,
219 	 int (*errfunc)(const char *, int), glob_t *pglob)
220 {
221 	const U8 *patnext;
222 	int c;
223 	Char *bufnext, *bufend, patbuf[MAXPATHLEN];
224 	patnext = (U8 *) pattern;
225 	/* TODO: GLOB_APPEND / GLOB_DOOFFS aren't supported yet */
226 #if 0
227 	if (!(flags & GLOB_APPEND)) {
228 		pglob->gl_pathc = 0;
229 		pglob->gl_pathv = NULL;
230 		if (!(flags & GLOB_DOOFFS))
231 			pglob->gl_offs = 0;
232 	}
233 #else
234 	pglob->gl_pathc = 0;
235 	pglob->gl_pathv = NULL;
236 	pglob->gl_offs = 0;
237 #endif
238 	pglob->gl_flags = flags & ~GLOB_MAGCHAR;
239 	pglob->gl_errfunc = errfunc;
240 	pglob->gl_matchc = 0;
241 
242 	bufnext = patbuf;
243 	bufend = bufnext + MAXPATHLEN - 1;
244 #ifdef DOSISH
245 	/* Nasty hack to treat patterns like "C:*" correctly. In this
246 	 * case, the * should match any file in the current directory
247 	 * on the C: drive. However, the glob code does not treat the
248 	 * colon specially, so it looks for files beginning "C:" in
249 	 * the current directory. To fix this, change the pattern to
250 	 * add an explicit "./" at the start (just after the drive
251 	 * letter and colon - ie change to "C:./").
252 	 */
253 	if (isalpha(pattern[0]) && pattern[1] == ':' &&
254 	    pattern[2] != BG_SEP && pattern[2] != BG_SEP2 &&
255 	    bufend - bufnext > 4) {
256 		*bufnext++ = pattern[0];
257 		*bufnext++ = ':';
258 		*bufnext++ = '.';
259 		*bufnext++ = BG_SEP;
260 		patnext += 2;
261 	}
262 #endif
263 
264 	if (flags & GLOB_QUOTE) {
265 		/* Protect the quoted characters. */
266 		while (bufnext < bufend && (c = *patnext++) != BG_EOS)
267 			if (c == BG_QUOTE) {
268 #ifdef DOSISH
269 				    /* To avoid backslashitis on Win32,
270 				     * we only treat \ as a quoting character
271 				     * if it precedes one of the
272 				     * metacharacters []-{}~\
273 				     */
274 				if ((c = *patnext++) != '[' && c != ']' &&
275 				    c != '-' && c != '{' && c != '}' &&
276 				    c != '~' && c != '\\') {
277 #else
278 				if ((c = *patnext++) == BG_EOS) {
279 #endif
280 					c = BG_QUOTE;
281 					--patnext;
282 				}
283 				*bufnext++ = c | M_PROTECT;
284 			} else
285 				*bufnext++ = c;
286 	} else
287 		while (bufnext < bufend && (c = *patnext++) != BG_EOS)
288 			*bufnext++ = c;
289 	*bufnext = BG_EOS;
290 
291 	if (flags & GLOB_BRACE)
292 	    return globexp1(patbuf, pglob);
293 	else
294 	    return glob0(patbuf, pglob);
295 }
296 
297 /*
298  * Expand recursively a glob {} pattern. When there is no more expansion
299  * invoke the standard globbing routine to glob the rest of the magic
300  * characters
301  */
302 static int
303 globexp1(const Char *pattern, glob_t *pglob)
304 {
305 	const Char* ptr = pattern;
306 	int rv;
307 
308 	/* Protect a single {}, for find(1), like csh */
309 	if (pattern[0] == BG_LBRACE && pattern[1] == BG_RBRACE && pattern[2] == BG_EOS)
310 		return glob0(pattern, pglob);
311 
312 	while ((ptr = (const Char *) g_strchr((Char *) ptr, BG_LBRACE)) != NULL)
313 		if (!globexp2(ptr, pattern, pglob, &rv))
314 			return rv;
315 
316 	return glob0(pattern, pglob);
317 }
318 
319 
320 /*
321  * Recursive brace globbing helper. Tries to expand a single brace.
322  * If it succeeds then it invokes globexp1 with the new pattern.
323  * If it fails then it tries to glob the rest of the pattern and returns.
324  */
325 static int
326 globexp2(const Char *ptr, const Char *pattern,
327 	 glob_t *pglob, int *rv)
328 {
329 	int     i;
330 	Char   *lm, *ls;
331 	const Char *pe, *pm, *pm1, *pl;
332 	Char    patbuf[MAXPATHLEN];
333 
334 	/* copy part up to the brace */
335 	for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
336 		;
337 	*lm = BG_EOS;
338 	ls = lm;
339 
340 	/* Find the balanced brace */
341 	for (i = 0, pe = ++ptr; *pe; pe++)
342 		if (*pe == BG_LBRACKET) {
343 			/* Ignore everything between [] */
344 			for (pm = pe++; *pe != BG_RBRACKET && *pe != BG_EOS; pe++)
345 				;
346 			if (*pe == BG_EOS) {
347 				/*
348 				 * We could not find a matching BG_RBRACKET.
349 				 * Ignore and just look for BG_RBRACE
350 				 */
351 				pe = pm;
352 			}
353 		} else if (*pe == BG_LBRACE)
354 			i++;
355 		else if (*pe == BG_RBRACE) {
356 			if (i == 0)
357 				break;
358 			i--;
359 		}
360 
361 	/* Non matching braces; just glob the pattern */
362 	if (i != 0 || *pe == BG_EOS) {
363 		*rv = glob0(patbuf, pglob);
364 		return 0;
365 	}
366 
367 	for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
368 		switch (*pm) {
369 		case BG_LBRACKET:
370 			/* Ignore everything between [] */
371 			for (pm1 = pm++; *pm != BG_RBRACKET && *pm != BG_EOS; pm++)
372 				;
373 			if (*pm == BG_EOS) {
374 				/*
375 				 * We could not find a matching BG_RBRACKET.
376 				 * Ignore and just look for BG_RBRACE
377 				 */
378 				pm = pm1;
379 			}
380 			break;
381 
382 		case BG_LBRACE:
383 			i++;
384 			break;
385 
386 		case BG_RBRACE:
387 			if (i) {
388 				i--;
389 				break;
390 			}
391 			/* FALLTHROUGH */
392 		case BG_COMMA:
393 			if (i && *pm == BG_COMMA)
394 				break;
395 			else {
396 				/* Append the current string */
397 				for (lm = ls; (pl < pm); *lm++ = *pl++)
398 					;
399 
400 				/*
401 				 * Append the rest of the pattern after the
402 				 * closing brace
403 				 */
404 				for (pl = pe + 1; (*lm++ = *pl++) != BG_EOS; )
405 					;
406 
407 				/* Expand the current pattern */
408 #ifdef GLOB_DEBUG
409 				qprintf("globexp2:", patbuf);
410 #endif /* GLOB_DEBUG */
411 				*rv = globexp1(patbuf, pglob);
412 
413 				/* move after the comma, to the next string */
414 				pl = pm + 1;
415 			}
416 			break;
417 
418 		default:
419 			break;
420 		}
421 	}
422 	*rv = 0;
423 	return 0;
424 }
425 
426 
427 
428 /*
429  * expand tilde from the passwd file.
430  */
431 static const Char *
432 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
433 {
434 	char *h;
435 	const Char *p;
436 	Char *b, *eb;
437 
438 	if (*pattern != BG_TILDE || !(pglob->gl_flags & GLOB_TILDE))
439 		return pattern;
440 
441 	/* Copy up to the end of the string or / */
442 	eb = &patbuf[patbuf_len - 1];
443 	for (p = pattern + 1, h = (char *) patbuf;
444 	     h < (char*)eb && *p && *p != BG_SLASH; *h++ = (char)*p++)
445 		;
446 
447 	*h = BG_EOS;
448 
449 #if 0
450 	if (h == (char *)eb)
451 		return what;
452 #endif
453 
454 	if (((char *) patbuf)[0] == BG_EOS) {
455 		/*
456 		 * handle a plain ~ or ~/ by expanding $HOME
457 		 * first and then trying the password file
458 		 * or $USERPROFILE on DOSISH systems
459 		 */
460 		if ((h = getenv("HOME")) == NULL) {
461 #ifdef HAS_PASSWD
462 			struct passwd *pwd;
463 			if ((pwd = getpwuid(getuid())) == NULL)
464 				return pattern;
465 			else
466 				h = pwd->pw_dir;
467 #elif DOSISH
468 			/*
469 			 * When no passwd file, fallback to the USERPROFILE
470 			 * environment variable on DOSish systems.
471 			 */
472 			if ((h = getenv("USERPROFILE")) == NULL) {
473 			    return pattern;
474 			}
475 #else
476                         return pattern;
477 #endif
478 		}
479 	} else {
480 		/*
481 		 * Expand a ~user
482 		 */
483 #ifdef HAS_PASSWD
484 		struct passwd *pwd;
485 		if ((pwd = getpwnam((char*) patbuf)) == NULL)
486 			return pattern;
487 		else
488 			h = pwd->pw_dir;
489 #else
490                 return pattern;
491 #endif
492 	}
493 
494 	/* Copy the home directory */
495 	for (b = patbuf; b < eb && *h; *b++ = *h++)
496 		;
497 
498 	/* Append the rest of the pattern */
499 	while (b < eb && (*b++ = *p++) != BG_EOS)
500 		;
501 	*b = BG_EOS;
502 
503 	return patbuf;
504 }
505 
506 
507 /*
508  * The main glob() routine: compiles the pattern (optionally processing
509  * quotes), calls glob1() to do the real pattern matching, and finally
510  * sorts the list (unless unsorted operation is requested).  Returns 0
511  * if things went well, nonzero if errors occurred.  It is not an error
512  * to find no matches.
513  */
514 static int
515 glob0(const Char *pattern, glob_t *pglob)
516 {
517 	const Char *qpat, *qpatnext;
518 	int c, err, oldflags, oldpathc;
519 	Char *bufnext, patbuf[MAXPATHLEN];
520 	size_t limit = 0;
521 
522 	qpat = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
523 	qpatnext = qpat;
524 	oldflags = pglob->gl_flags;
525 	oldpathc = pglob->gl_pathc;
526 	bufnext = patbuf;
527 
528 	/* We don't need to check for buffer overflow any more. */
529 	while ((c = *qpatnext++) != BG_EOS) {
530 		switch (c) {
531 		case BG_LBRACKET:
532 			c = *qpatnext;
533 			if (c == BG_NOT)
534 				++qpatnext;
535 			if (*qpatnext == BG_EOS ||
536 			    g_strchr((Char *) qpatnext+1, BG_RBRACKET) == NULL) {
537 				*bufnext++ = BG_LBRACKET;
538 				if (c == BG_NOT)
539 					--qpatnext;
540 				break;
541 			}
542 			*bufnext++ = M_SET;
543 			if (c == BG_NOT)
544 				*bufnext++ = M_NOT;
545 			c = *qpatnext++;
546 			do {
547 				*bufnext++ = CHAR(c);
548 				if (*qpatnext == BG_RANGE &&
549 				    (c = qpatnext[1]) != BG_RBRACKET) {
550 					*bufnext++ = M_RNG;
551 					*bufnext++ = CHAR(c);
552 					qpatnext += 2;
553 				}
554 			} while ((c = *qpatnext++) != BG_RBRACKET);
555 			pglob->gl_flags |= GLOB_MAGCHAR;
556 			*bufnext++ = M_END;
557 			break;
558 		case BG_QUESTION:
559 			pglob->gl_flags |= GLOB_MAGCHAR;
560 			*bufnext++ = M_ONE;
561 			break;
562 		case BG_STAR:
563 			pglob->gl_flags |= GLOB_MAGCHAR;
564                         /* Collapse adjacent stars to one.
565                          * This is required to ensure that a pattern like
566                          * "a**" matches a name like "a", as without this
567                          * check when the first star matched everything it would
568                          * cause the second star to return a match fail.
569                          * As long ** is folded here this does not happen.
570 			 */
571 			if (bufnext == patbuf || bufnext[-1] != M_ALL)
572 				*bufnext++ = M_ALL;
573 			break;
574 		default:
575 			*bufnext++ = CHAR(c);
576 			break;
577 		}
578 	}
579 	*bufnext = BG_EOS;
580 #ifdef GLOB_DEBUG
581 	qprintf("glob0:", patbuf);
582 #endif /* GLOB_DEBUG */
583 
584 	if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, &limit)) != 0) {
585 		pglob->gl_flags = oldflags;
586 		return(err);
587 	}
588 
589 	/*
590 	 * If there was no match we are going to append the pattern
591 	 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
592 	 * and the pattern did not contain any magic characters
593 	 * GLOB_NOMAGIC is there just for compatibility with csh.
594 	 */
595 	if (pglob->gl_pathc == oldpathc &&
596 	    ((pglob->gl_flags & GLOB_NOCHECK) ||
597 	      ((pglob->gl_flags & GLOB_NOMAGIC) &&
598 	       !(pglob->gl_flags & GLOB_MAGCHAR))))
599 	{
600 #ifdef GLOB_DEBUG
601 		printf("calling globextend from glob0\n");
602 #endif /* GLOB_DEBUG */
603 		pglob->gl_flags = oldflags;
604 		return(globextend(qpat, pglob, &limit));
605         }
606 	else if (!(pglob->gl_flags & GLOB_NOSORT))
607             if (pglob->gl_pathv)
608 		qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
609 		    pglob->gl_pathc - oldpathc, sizeof(char *),
610 		    (pglob->gl_flags & (GLOB_ALPHASORT|GLOB_NOCASE))
611 			? ci_compare : compare);
612 	pglob->gl_flags = oldflags;
613 	return(0);
614 }
615 
616 static int
617 ci_compare(const void *p, const void *q)
618 {
619 	const char *pp = *(const char **)p;
620 	const char *qq = *(const char **)q;
621 	int ci;
622 	while (*pp && *qq) {
623 		if (toFOLD(*pp) != toFOLD(*qq))
624 			break;
625 		++pp;
626 		++qq;
627 	}
628 	ci = toFOLD(*pp) - toFOLD(*qq);
629 	if (ci == 0)
630 		return compare(p, q);
631 	return ci;
632 }
633 
634 static int
635 compare(const void *p, const void *q)
636 {
637 	return(strcmp(*(char **)p, *(char **)q));
638 }
639 
640 static int
641 glob1(Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
642 {
643 	Char pathbuf[MAXPATHLEN];
644 
645         assert(pattern < pattern_last);
646 
647 	/* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
648 	if (*pattern == BG_EOS)
649 		return(0);
650 	return(glob2(pathbuf, pathbuf+MAXPATHLEN-1,
651 		     pathbuf, pathbuf+MAXPATHLEN-1,
652 		     pattern, pattern_last, pglob, limitp));
653 }
654 
655 /*
656  * The functions glob2 and glob3 are mutually recursive; there is one level
657  * of recursion for each segment in the pattern that contains one or more
658  * meta characters.
659  */
660 static int
661 glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
662       Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
663 {
664 	Stat_t sb;
665 	Char *p, *q;
666 	int anymeta;
667 
668         assert(pattern < pattern_last);
669 
670 	/*
671 	 * Loop over pattern segments until end of pattern or until
672 	 * segment with meta character found.
673 	 */
674 	for (anymeta = 0;;) {
675 		if (*pattern == BG_EOS) {		/* End of pattern? */
676 			*pathend = BG_EOS;
677 			if (g_lstat(pathbuf, &sb, pglob))
678 				return(0);
679 
680 			if (((pglob->gl_flags & GLOB_MARK) &&
681 			    pathend[-1] != BG_SEP
682 #ifdef DOSISH
683 			    && pathend[-1] != BG_SEP2
684 #endif
685 			    ) && (S_ISDIR(sb.st_mode) ||
686 				  (S_ISLNK(sb.st_mode) &&
687 			    (g_stat(pathbuf, &sb, pglob) == 0) &&
688 			    S_ISDIR(sb.st_mode)))) {
689 				if (pathend+1 > pathend_last)
690 					return (1);
691 				*pathend++ = BG_SEP;
692 				*pathend = BG_EOS;
693 			}
694 			++pglob->gl_matchc;
695 #ifdef GLOB_DEBUG
696                         printf("calling globextend from glob2\n");
697 #endif /* GLOB_DEBUG */
698 			return(globextend(pathbuf, pglob, limitp));
699 		}
700 
701 		/* Find end of next segment, copy tentatively to pathend. */
702 		q = pathend;
703 		p = pattern;
704 		while (*p != BG_EOS && *p != BG_SEP
705 #ifdef DOSISH
706 		       && *p != BG_SEP2
707 #endif
708 		       ) {
709                         assert(p < pattern_last);
710 			if (ismeta(*p))
711 				anymeta = 1;
712 			if (q+1 > pathend_last)
713 				return (1);
714 			*q++ = *p++;
715 		}
716 
717 		if (!anymeta) {		/* No expansion, do next segment. */
718 			pathend = q;
719 			pattern = p;
720 			while (*pattern == BG_SEP
721 #ifdef DOSISH
722 			       || *pattern == BG_SEP2
723 #endif
724 			       ) {
725                                 assert(p < pattern_last);
726 				if (pathend+1 > pathend_last)
727 					return (1);
728 				*pathend++ = *pattern++;
729 			}
730 		} else
731 			/* Need expansion, recurse. */
732 			return(glob3(pathbuf, pathbuf_last, pathend,
733 				     pathend_last, pattern,
734 				     p, pattern_last, pglob, limitp));
735 	}
736 	/* NOTREACHED */
737 }
738 
739 static int
740 glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
741       Char *pattern,
742       Char *restpattern, Char *restpattern_last, glob_t *pglob, size_t *limitp)
743 {
744 	Direntry_t *dp;
745 	DIR *dirp;
746 	int err;
747 	int nocase;
748 	char buf[MAXPATHLEN];
749 
750 	/*
751 	 * The readdirfunc declaration can't be prototyped, because it is
752 	 * assigned, below, to two functions which are prototyped in glob.h
753 	 * and dirent.h as taking pointers to differently typed opaque
754 	 * structures.
755 	 */
756 	Direntry_t *(*readdirfunc)(DIR*);
757 
758         assert(pattern < restpattern_last);
759         assert(restpattern < restpattern_last);
760 
761 	if (pathend > pathend_last)
762 		return (1);
763 	*pathend = BG_EOS;
764 	errno = 0;
765 
766 #ifdef VMS
767         {
768 		Char *q = pathend;
769 		if (q - pathbuf > 5) {
770 			q -= 5;
771 			if (q[0] == '.' &&
772 			    tolower(q[1]) == 'd' && tolower(q[2]) == 'i' &&
773 			    tolower(q[3]) == 'r' && q[4] == '/')
774 			{
775 				q[0] = '/';
776 				q[1] = BG_EOS;
777 				pathend = q+1;
778 			}
779 		}
780         }
781 #endif
782 
783 	if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
784 		/* TODO: don't call for ENOENT or ENOTDIR? */
785 		if (pglob->gl_errfunc) {
786 			if (g_Ctoc(pathbuf, buf, sizeof(buf)))
787 				return (GLOB_ABEND);
788 			if (pglob->gl_errfunc(buf, errno) ||
789 			    (pglob->gl_flags & GLOB_ERR))
790 				return (GLOB_ABEND);
791 		}
792 		return(0);
793 	}
794 
795 	err = 0;
796 	nocase = ((pglob->gl_flags & GLOB_NOCASE) != 0);
797 
798 	/* Search directory for matching names. */
799 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
800 		readdirfunc = (Direntry_t *(*)(DIR *))pglob->gl_readdir;
801 	else
802 		readdirfunc = (Direntry_t *(*)(DIR *))my_readdir;
803 	while ((dp = (*readdirfunc)(dirp))) {
804 		U8 *sc;
805 		Char *dc;
806 
807 		/* Initial BG_DOT must be matched literally. */
808 		if (dp->d_name[0] == BG_DOT && *pattern != BG_DOT)
809 			continue;
810 		dc = pathend;
811 		sc = (U8 *) dp->d_name;
812 		while (dc < pathend_last && (*dc++ = *sc++) != BG_EOS)
813 			;
814 		if (dc >= pathend_last) {
815 			*dc = BG_EOS;
816 			err = 1;
817 			break;
818 		}
819 
820 		if (!match(pathend, pattern, restpattern, nocase)) {
821 			*pathend = BG_EOS;
822 			continue;
823 		}
824 		err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
825 			    restpattern, restpattern_last, pglob, limitp);
826 		if (err)
827 			break;
828 	}
829 
830 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
831 		(*pglob->gl_closedir)(dirp);
832 	else
833 		PerlDir_close(dirp);
834 	return(err);
835 }
836 
837 
838 /*
839  * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
840  * add the new item, and update gl_pathc.
841  *
842  * This assumes the BSD realloc, which only copies the block when its size
843  * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
844  * behavior.
845  *
846  * Return 0 if new item added, error code if memory couldn't be allocated.
847  *
848  * Invariant of the glob_t structure:
849  *	Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
850  *	gl_pathv points to (gl_offs + gl_pathc + 1) items.
851  */
852 static int
853 globextend(const Char *path, glob_t *pglob, size_t *limitp)
854 {
855 	char **pathv;
856 	int i;
857 	STRLEN newsize, len;
858 	char *copy;
859 	const Char *p;
860 
861 #ifdef GLOB_DEBUG
862 	printf("Adding ");
863         for (p = path; *p; p++)
864                 (void)printf("%c", CHAR(*p));
865         printf("\n");
866 #endif /* GLOB_DEBUG */
867 
868 	newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
869 	if (pglob->gl_pathv)
870 		pathv = Renew(pglob->gl_pathv,newsize,char*);
871 	else
872 		Newx(pathv,newsize,char*);
873 	if (pathv == NULL) {
874 		if (pglob->gl_pathv) {
875 			Safefree(pglob->gl_pathv);
876 			pglob->gl_pathv = NULL;
877 		}
878 		return(GLOB_NOSPACE);
879 	}
880 
881 	if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
882 		/* first time around -- clear initial gl_offs items */
883 		pathv += pglob->gl_offs;
884 		for (i = pglob->gl_offs; --i >= 0; )
885 			*--pathv = NULL;
886 	}
887 	pglob->gl_pathv = pathv;
888 
889 	for (p = path; *p++;)
890 		;
891 	len = (STRLEN)(p - path);
892 	*limitp += len;
893 	Newx(copy, p-path, char);
894 	if (copy != NULL) {
895 		if (g_Ctoc(path, copy, len)) {
896 			Safefree(copy);
897 			return(GLOB_NOSPACE);
898 		}
899 		pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
900 	}
901 	pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
902 
903 	if ((pglob->gl_flags & GLOB_LIMIT) &&
904 	    newsize + *limitp >= (unsigned long)ARG_MAX) {
905 		errno = 0;
906 		return(GLOB_NOSPACE);
907 	}
908 
909 	return(copy == NULL ? GLOB_NOSPACE : 0);
910 }
911 
912 
913 /*
914  * pattern matching function for filenames using state machine to avoid
915  * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
916  * without additional callframes, and to do cleanly prune the backtracking
917  * state when multiple '*' (start) matches are included in the pattern.
918  *
919  * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
920  * matching on failure.
921  *
922  * https://research.swtch.com/glob
923  *
924  * An example would be a pattern
925  *  ("a*" x 100) . "y"
926  * against a file name like
927  *  ("a" x 100) . "x"
928  *
929  */
930 static int
931 match(Char *name, Char *pat, Char *patend, int nocase)
932 {
933 	int ok, negate_range;
934 	Char c, k;
935 	Char *nextp = NULL;
936 	Char *nextn = NULL;
937 
938     redo:
939 	while (pat < patend) {
940 		c = *pat++;
941 		switch (c & M_MASK) {
942 		case M_ALL:
943 			if (pat == patend)
944 				return(1);
945 	                if (*name == BG_EOS)
946 	                        return 0;
947 			nextn = name + 1;
948 	                nextp = pat - 1;
949 			break;
950 		case M_ONE:
951                         /* since * matches leftmost-shortest first   *
952                          * if we encounter the EOS then backtracking *
953                          * will not help, so we can exit early here. */
954 			if (*name++ == BG_EOS)
955                                 return 0;
956 			break;
957 		case M_SET:
958 			ok = 0;
959                         /* since * matches leftmost-shortest first   *
960                          * if we encounter the EOS then backtracking *
961                          * will not help, so we can exit early here. */
962 			if ((k = *name++) == BG_EOS)
963                                 return 0;
964 			if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
965 				++pat;
966 			while (((c = *pat++) & M_MASK) != M_END)
967 				if ((*pat & M_MASK) == M_RNG) {
968 					if (nocase) {
969 						if (tolower(c) <= tolower(k) && tolower(k) <= tolower(pat[1]))
970 							ok = 1;
971 					} else {
972 						if (c <= k && k <= pat[1])
973 							ok = 1;
974 					}
975 					pat += 2;
976 				} else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
977 					ok = 1;
978 			if (ok == negate_range)
979 				goto fail;
980 			break;
981 		default:
982 			k = *name++;
983 			if (nocase ? (tolower(k) != tolower(c)) : (k != c))
984 				goto fail;
985 			break;
986 		}
987 	}
988 	if (*name == BG_EOS)
989 		return 1;
990 
991     fail:
992 	if (nextn) {
993 		pat = nextp;
994 		name = nextn;
995 		goto redo;
996 	}
997 	return 0;
998 }
999 
1000 /* Free allocated data belonging to a glob_t structure. */
1001 void
1002 bsd_globfree(glob_t *pglob)
1003 {
1004 	int i;
1005 	char **pp;
1006 
1007 	if (pglob->gl_pathv != NULL) {
1008 		pp = pglob->gl_pathv + pglob->gl_offs;
1009 		for (i = pglob->gl_pathc; i--; ++pp)
1010 			if (*pp)
1011 				Safefree(*pp);
1012 		Safefree(pglob->gl_pathv);
1013 		pglob->gl_pathv = NULL;
1014 	}
1015 }
1016 
1017 static DIR *
1018 g_opendir(Char *str, glob_t *pglob)
1019 {
1020 	char buf[MAXPATHLEN];
1021 
1022 	if (!*str) {
1023 		my_strlcpy(buf, ".", sizeof(buf));
1024 	} else {
1025 		if (g_Ctoc(str, buf, sizeof(buf)))
1026 			return(NULL);
1027 	}
1028 
1029 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1030 		return((DIR*)(*pglob->gl_opendir)(buf));
1031 
1032 	return(PerlDir_open(buf));
1033 }
1034 
1035 static int
1036 g_lstat(Char *fn, Stat_t *sb, glob_t *pglob)
1037 {
1038 	char buf[MAXPATHLEN];
1039 
1040 	if (g_Ctoc(fn, buf, sizeof(buf)))
1041 		return(-1);
1042 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1043 		return((*pglob->gl_lstat)(buf, sb));
1044 #ifdef HAS_LSTAT
1045 	return(PerlLIO_lstat(buf, sb));
1046 #else
1047 	return(PerlLIO_stat(buf, sb));
1048 #endif /* HAS_LSTAT */
1049 }
1050 
1051 static int
1052 g_stat(Char *fn, Stat_t *sb, glob_t *pglob)
1053 {
1054 	char buf[MAXPATHLEN];
1055 
1056 	if (g_Ctoc(fn, buf, sizeof(buf)))
1057 		return(-1);
1058 	if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1059 		return((*pglob->gl_stat)(buf, sb));
1060 	return(PerlLIO_stat(buf, sb));
1061 }
1062 
1063 static Char *
1064 g_strchr(Char *str, int ch)
1065 {
1066 	do {
1067 		if (*str == ch)
1068 			return (str);
1069 	} while (*str++);
1070 	return (NULL);
1071 }
1072 
1073 static int
1074 g_Ctoc(const Char *str, char *buf, STRLEN len)
1075 {
1076 	while (len--) {
1077 		if ((*buf++ = (char)*str++) == BG_EOS)
1078 			return (0);
1079 	}
1080 	return (1);
1081 }
1082 
1083 #ifdef GLOB_DEBUG
1084 static void
1085 qprintf(const char *str, Char *s)
1086 {
1087 	Char *p;
1088 
1089 	(void)printf("%s:\n", str);
1090 	for (p = s; *p; p++)
1091 		(void)printf("%c", CHAR(*p));
1092 	(void)printf("\n");
1093 	for (p = s; *p; p++)
1094 		(void)printf("%c", *p & M_PROTECT ? '"' : ' ');
1095 	(void)printf("\n");
1096 	for (p = s; *p; p++)
1097 		(void)printf("%c", ismeta(*p) ? '_' : ' ');
1098 	(void)printf("\n");
1099 }
1100 #endif /* GLOB_DEBUG */
1101