1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Guido van Rossum.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #if defined(LIBC_SCCS) && !defined(lint)
34 static char sccsid[] = "@(#)glob.c 8.3 (Berkeley) 10/13/93";
35 /* most changes between the version above and the one below have been ported:
36 static char sscsid[]= "$OpenBSD: glob.c,v 1.8.10.1 2001/04/10 jason Exp $";
37 */
38 #endif /* LIBC_SCCS and not lint */
39
40 /*
41 * glob(3) -- a superset of the one defined in POSIX 1003.2.
42 *
43 * The [!...] convention to negate a range is supported (SysV, Posix, ksh).
44 *
45 * Optional extra services, controlled by flags not defined by POSIX:
46 *
47 * GLOB_QUOTE:
48 * Escaping convention: \ inhibits any special meaning the following
49 * character might have (except \ at end of string is retained).
50 * GLOB_MAGCHAR:
51 * Set in gl_flags if pattern contained a globbing character.
52 * GLOB_NOMAGIC:
53 * Same as GLOB_NOCHECK, but it will only append pattern if it did
54 * not contain any magic characters. [Used in csh style globbing]
55 * GLOB_ALTDIRFUNC:
56 * Use alternately specified directory access functions.
57 * GLOB_TILDE:
58 * expand ~user/foo to the /home/dir/of/user/foo
59 * GLOB_BRACE:
60 * expand {1,2}{a,b} to 1a 1b 2a 2b
61 * gl_matchc:
62 * Number of matches in the current invocation of glob.
63 * GLOB_ALPHASORT:
64 * sort alphabetically like csh (case doesn't matter) instead of in ASCII
65 * order
66 */
67
68 #include <EXTERN.h>
69 #include <perl.h>
70 #include <XSUB.h>
71
72 #include "bsd_glob.h"
73 #ifdef I_PWD
74 # include <pwd.h>
75 #else
76 #if defined(HAS_PASSWD) && !defined(VMS)
77 struct passwd *getpwnam(char *);
78 struct passwd *getpwuid(Uid_t);
79 #endif
80 #endif
81
82 #ifndef MAXPATHLEN
83 # ifdef PATH_MAX
84 # define MAXPATHLEN PATH_MAX
85 # else
86 # define MAXPATHLEN 1024
87 # endif
88 #endif
89
90 #include <limits.h>
91
92 #ifndef ARG_MAX
93 # ifdef _SC_ARG_MAX
94 # define ARG_MAX (sysconf(_SC_ARG_MAX))
95 # else
96 # ifdef _POSIX_ARG_MAX
97 # define ARG_MAX _POSIX_ARG_MAX
98 # else
99 # ifdef WIN32
100 # define ARG_MAX 14500 /* from VC's limits.h */
101 # else
102 # define ARG_MAX 4096 /* from POSIX, be conservative */
103 # endif
104 # endif
105 # endif
106 #endif
107
108 #define BG_DOLLAR '$'
109 #define BG_DOT '.'
110 #define BG_EOS '\0'
111 #define BG_LBRACKET '['
112 #define BG_NOT '!'
113 #define BG_QUESTION '?'
114 #define BG_QUOTE '\\'
115 #define BG_RANGE '-'
116 #define BG_RBRACKET ']'
117 #define BG_SEP '/'
118 #ifdef DOSISH
119 #define BG_SEP2 '\\'
120 #endif
121 #define BG_STAR '*'
122 #define BG_TILDE '~'
123 #define BG_UNDERSCORE '_'
124 #define BG_LBRACE '{'
125 #define BG_RBRACE '}'
126 #define BG_SLASH '/'
127 #define BG_COMMA ','
128
129 #ifndef GLOB_DEBUG
130
131 #define M_QUOTE 0x8000
132 #define M_PROTECT 0x4000
133 #define M_MASK 0xffff
134 #define M_ASCII 0x00ff
135
136 typedef U16 Char;
137
138 #else
139
140 #define M_QUOTE 0x80
141 #define M_PROTECT 0x40
142 #define M_MASK 0xff
143 #define M_ASCII 0x7f
144
145 typedef U8 Char;
146
147 #endif /* !GLOB_DEBUG */
148
149
150 #define CHAR(c) ((Char)((c)&M_ASCII))
151 #define META(c) ((Char)((c)|M_QUOTE))
152 #define M_ALL META('*')
153 #define M_END META(']')
154 #define M_NOT META('!')
155 #define M_ONE META('?')
156 #define M_RNG META('-')
157 #define M_SET META('[')
158 #define ismeta(c) (((c)&M_QUOTE) != 0)
159
160
161 static int compare(const void *, const void *);
162 static int ci_compare(const void *, const void *);
163 static int g_Ctoc(const Char *, char *, STRLEN);
164 static int g_lstat(Char *, Stat_t *, glob_t *);
165 static DIR *g_opendir(Char *, glob_t *);
166 static Char *g_strchr(Char *, int);
167 static int g_stat(Char *, Stat_t *, glob_t *);
168 static int glob0(const Char *, glob_t *);
169 static int glob1(Char *, Char *, glob_t *, size_t *);
170 static int glob2(Char *, Char *, Char *, Char *, Char *, Char *,
171 glob_t *, size_t *);
172 static int glob3(Char *, Char *, Char *, Char *, Char *,
173 Char *, Char *, glob_t *, size_t *);
174 static int globextend(const Char *, glob_t *, size_t *);
175 static const Char *
176 globtilde(const Char *, Char *, size_t, glob_t *);
177 static int globexp1(const Char *, glob_t *);
178 static int globexp2(const Char *, const Char *, glob_t *, int *);
179 static int match(Char *, Char *, Char *, int);
180 #ifdef GLOB_DEBUG
181 static void qprintf(const char *, Char *);
182 #endif /* GLOB_DEBUG */
183
184 #ifdef PERL_IMPLICIT_CONTEXT
185 static Direntry_t * my_readdir(DIR*);
186
187 static Direntry_t *
my_readdir(DIR * d)188 my_readdir(DIR *d)
189 {
190 #ifndef NETWARE
191 return PerlDir_read(d);
192 #else
193 return (DIR *)PerlDir_read(d);
194 #endif
195 }
196 #else
197
198 /* ReliantUNIX (OS formerly known as SINIX) defines readdir
199 * in LFS-mode to be a 64-bit version of readdir. */
200
201 # ifdef sinix
202 static Direntry_t * my_readdir(DIR*);
203
204 static Direntry_t *
my_readdir(DIR * d)205 my_readdir(DIR *d)
206 {
207 return readdir(d);
208 }
209 # else
210
211 # define my_readdir readdir
212
213 # endif
214
215 #endif
216
217 int
bsd_glob(const char * pattern,int flags,int (* errfunc)(const char *,int),glob_t * pglob)218 bsd_glob(const char *pattern, int flags,
219 int (*errfunc)(const char *, int), glob_t *pglob)
220 {
221 const U8 *patnext;
222 int c;
223 Char *bufnext, *bufend, patbuf[MAXPATHLEN];
224 patnext = (U8 *) pattern;
225 /* TODO: GLOB_APPEND / GLOB_DOOFFS aren't supported yet */
226 #if 0
227 if (!(flags & GLOB_APPEND)) {
228 pglob->gl_pathc = 0;
229 pglob->gl_pathv = NULL;
230 if (!(flags & GLOB_DOOFFS))
231 pglob->gl_offs = 0;
232 }
233 #else
234 pglob->gl_pathc = 0;
235 pglob->gl_pathv = NULL;
236 pglob->gl_offs = 0;
237 #endif
238 pglob->gl_flags = flags & ~GLOB_MAGCHAR;
239 pglob->gl_errfunc = errfunc;
240 pglob->gl_matchc = 0;
241
242 bufnext = patbuf;
243 bufend = bufnext + MAXPATHLEN - 1;
244 #ifdef DOSISH
245 /* Nasty hack to treat patterns like "C:*" correctly. In this
246 * case, the * should match any file in the current directory
247 * on the C: drive. However, the glob code does not treat the
248 * colon specially, so it looks for files beginning "C:" in
249 * the current directory. To fix this, change the pattern to
250 * add an explicit "./" at the start (just after the drive
251 * letter and colon - ie change to "C:./").
252 */
253 if (isalpha(pattern[0]) && pattern[1] == ':' &&
254 pattern[2] != BG_SEP && pattern[2] != BG_SEP2 &&
255 bufend - bufnext > 4) {
256 *bufnext++ = pattern[0];
257 *bufnext++ = ':';
258 *bufnext++ = '.';
259 *bufnext++ = BG_SEP;
260 patnext += 2;
261 }
262 #endif
263
264 if (flags & GLOB_QUOTE) {
265 /* Protect the quoted characters. */
266 while (bufnext < bufend && (c = *patnext++) != BG_EOS)
267 if (c == BG_QUOTE) {
268 #ifdef DOSISH
269 /* To avoid backslashitis on Win32,
270 * we only treat \ as a quoting character
271 * if it precedes one of the
272 * metacharacters []-{}~\
273 */
274 if ((c = *patnext++) != '[' && c != ']' &&
275 c != '-' && c != '{' && c != '}' &&
276 c != '~' && c != '\\') {
277 #else
278 if ((c = *patnext++) == BG_EOS) {
279 #endif
280 c = BG_QUOTE;
281 --patnext;
282 }
283 *bufnext++ = c | M_PROTECT;
284 } else
285 *bufnext++ = c;
286 } else
287 while (bufnext < bufend && (c = *patnext++) != BG_EOS)
288 *bufnext++ = c;
289 *bufnext = BG_EOS;
290
291 if (flags & GLOB_BRACE)
292 return globexp1(patbuf, pglob);
293 else
294 return glob0(patbuf, pglob);
295 }
296
297 /*
298 * Expand recursively a glob {} pattern. When there is no more expansion
299 * invoke the standard globbing routine to glob the rest of the magic
300 * characters
301 */
302 static int
303 globexp1(const Char *pattern, glob_t *pglob)
304 {
305 const Char* ptr = pattern;
306 int rv;
307
308 /* Protect a single {}, for find(1), like csh */
309 if (pattern[0] == BG_LBRACE && pattern[1] == BG_RBRACE && pattern[2] == BG_EOS)
310 return glob0(pattern, pglob);
311
312 while ((ptr = (const Char *) g_strchr((Char *) ptr, BG_LBRACE)) != NULL)
313 if (!globexp2(ptr, pattern, pglob, &rv))
314 return rv;
315
316 return glob0(pattern, pglob);
317 }
318
319
320 /*
321 * Recursive brace globbing helper. Tries to expand a single brace.
322 * If it succeeds then it invokes globexp1 with the new pattern.
323 * If it fails then it tries to glob the rest of the pattern and returns.
324 */
325 static int
326 globexp2(const Char *ptr, const Char *pattern,
327 glob_t *pglob, int *rv)
328 {
329 int i;
330 Char *lm, *ls;
331 const Char *pe, *pm, *pm1, *pl;
332 Char patbuf[MAXPATHLEN];
333
334 /* copy part up to the brace */
335 for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++)
336 ;
337 *lm = BG_EOS;
338 ls = lm;
339
340 /* Find the balanced brace */
341 for (i = 0, pe = ++ptr; *pe; pe++)
342 if (*pe == BG_LBRACKET) {
343 /* Ignore everything between [] */
344 for (pm = pe++; *pe != BG_RBRACKET && *pe != BG_EOS; pe++)
345 ;
346 if (*pe == BG_EOS) {
347 /*
348 * We could not find a matching BG_RBRACKET.
349 * Ignore and just look for BG_RBRACE
350 */
351 pe = pm;
352 }
353 } else if (*pe == BG_LBRACE)
354 i++;
355 else if (*pe == BG_RBRACE) {
356 if (i == 0)
357 break;
358 i--;
359 }
360
361 /* Non matching braces; just glob the pattern */
362 if (i != 0 || *pe == BG_EOS) {
363 *rv = glob0(patbuf, pglob);
364 return 0;
365 }
366
367 for (i = 0, pl = pm = ptr; pm <= pe; pm++) {
368 switch (*pm) {
369 case BG_LBRACKET:
370 /* Ignore everything between [] */
371 for (pm1 = pm++; *pm != BG_RBRACKET && *pm != BG_EOS; pm++)
372 ;
373 if (*pm == BG_EOS) {
374 /*
375 * We could not find a matching BG_RBRACKET.
376 * Ignore and just look for BG_RBRACE
377 */
378 pm = pm1;
379 }
380 break;
381
382 case BG_LBRACE:
383 i++;
384 break;
385
386 case BG_RBRACE:
387 if (i) {
388 i--;
389 break;
390 }
391 /* FALLTHROUGH */
392 case BG_COMMA:
393 if (i && *pm == BG_COMMA)
394 break;
395 else {
396 /* Append the current string */
397 for (lm = ls; (pl < pm); *lm++ = *pl++)
398 ;
399
400 /*
401 * Append the rest of the pattern after the
402 * closing brace
403 */
404 for (pl = pe + 1; (*lm++ = *pl++) != BG_EOS; )
405 ;
406
407 /* Expand the current pattern */
408 #ifdef GLOB_DEBUG
409 qprintf("globexp2:", patbuf);
410 #endif /* GLOB_DEBUG */
411 *rv = globexp1(patbuf, pglob);
412
413 /* move after the comma, to the next string */
414 pl = pm + 1;
415 }
416 break;
417
418 default:
419 break;
420 }
421 }
422 *rv = 0;
423 return 0;
424 }
425
426
427
428 /*
429 * expand tilde from the passwd file.
430 */
431 static const Char *
432 globtilde(const Char *pattern, Char *patbuf, size_t patbuf_len, glob_t *pglob)
433 {
434 char *h;
435 const Char *p;
436 Char *b, *eb;
437
438 if (*pattern != BG_TILDE || !(pglob->gl_flags & GLOB_TILDE))
439 return pattern;
440
441 /* Copy up to the end of the string or / */
442 eb = &patbuf[patbuf_len - 1];
443 for (p = pattern + 1, h = (char *) patbuf;
444 h < (char*)eb && *p && *p != BG_SLASH; *h++ = (char)*p++)
445 ;
446
447 *h = BG_EOS;
448
449 #if 0
450 if (h == (char *)eb)
451 return what;
452 #endif
453
454 if (((char *) patbuf)[0] == BG_EOS) {
455 /*
456 * handle a plain ~ or ~/ by expanding $HOME
457 * first and then trying the password file
458 * or $USERPROFILE on DOSISH systems
459 */
460 if ((h = getenv("HOME")) == NULL) {
461 #ifdef HAS_PASSWD
462 struct passwd *pwd;
463 if ((pwd = getpwuid(getuid())) == NULL)
464 return pattern;
465 else
466 h = pwd->pw_dir;
467 #elif DOSISH
468 /*
469 * When no passwd file, fallback to the USERPROFILE
470 * environment variable on DOSish systems.
471 */
472 if ((h = getenv("USERPROFILE")) == NULL) {
473 return pattern;
474 }
475 #else
476 return pattern;
477 #endif
478 }
479 } else {
480 /*
481 * Expand a ~user
482 */
483 #ifdef HAS_PASSWD
484 struct passwd *pwd;
485 if ((pwd = getpwnam((char*) patbuf)) == NULL)
486 return pattern;
487 else
488 h = pwd->pw_dir;
489 #else
490 return pattern;
491 #endif
492 }
493
494 /* Copy the home directory */
495 for (b = patbuf; b < eb && *h; *b++ = *h++)
496 ;
497
498 /* Append the rest of the pattern */
499 while (b < eb && (*b++ = *p++) != BG_EOS)
500 ;
501 *b = BG_EOS;
502
503 return patbuf;
504 }
505
506
507 /*
508 * The main glob() routine: compiles the pattern (optionally processing
509 * quotes), calls glob1() to do the real pattern matching, and finally
510 * sorts the list (unless unsorted operation is requested). Returns 0
511 * if things went well, nonzero if errors occurred. It is not an error
512 * to find no matches.
513 */
514 static int
515 glob0(const Char *pattern, glob_t *pglob)
516 {
517 const Char *qpat, *qpatnext;
518 int c, err, oldflags, oldpathc;
519 Char *bufnext, patbuf[MAXPATHLEN];
520 size_t limit = 0;
521
522 qpat = globtilde(pattern, patbuf, MAXPATHLEN, pglob);
523 qpatnext = qpat;
524 oldflags = pglob->gl_flags;
525 oldpathc = pglob->gl_pathc;
526 bufnext = patbuf;
527
528 /* We don't need to check for buffer overflow any more. */
529 while ((c = *qpatnext++) != BG_EOS) {
530 switch (c) {
531 case BG_LBRACKET:
532 c = *qpatnext;
533 if (c == BG_NOT)
534 ++qpatnext;
535 if (*qpatnext == BG_EOS ||
536 g_strchr((Char *) qpatnext+1, BG_RBRACKET) == NULL) {
537 *bufnext++ = BG_LBRACKET;
538 if (c == BG_NOT)
539 --qpatnext;
540 break;
541 }
542 *bufnext++ = M_SET;
543 if (c == BG_NOT)
544 *bufnext++ = M_NOT;
545 c = *qpatnext++;
546 do {
547 *bufnext++ = CHAR(c);
548 if (*qpatnext == BG_RANGE &&
549 (c = qpatnext[1]) != BG_RBRACKET) {
550 *bufnext++ = M_RNG;
551 *bufnext++ = CHAR(c);
552 qpatnext += 2;
553 }
554 } while ((c = *qpatnext++) != BG_RBRACKET);
555 pglob->gl_flags |= GLOB_MAGCHAR;
556 *bufnext++ = M_END;
557 break;
558 case BG_QUESTION:
559 pglob->gl_flags |= GLOB_MAGCHAR;
560 *bufnext++ = M_ONE;
561 break;
562 case BG_STAR:
563 pglob->gl_flags |= GLOB_MAGCHAR;
564 /* Collapse adjacent stars to one.
565 * This is required to ensure that a pattern like
566 * "a**" matches a name like "a", as without this
567 * check when the first star matched everything it would
568 * cause the second star to return a match fail.
569 * As long ** is folded here this does not happen.
570 */
571 if (bufnext == patbuf || bufnext[-1] != M_ALL)
572 *bufnext++ = M_ALL;
573 break;
574 default:
575 *bufnext++ = CHAR(c);
576 break;
577 }
578 }
579 *bufnext = BG_EOS;
580 #ifdef GLOB_DEBUG
581 qprintf("glob0:", patbuf);
582 #endif /* GLOB_DEBUG */
583
584 if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, &limit)) != 0) {
585 pglob->gl_flags = oldflags;
586 return(err);
587 }
588
589 /*
590 * If there was no match we are going to append the pattern
591 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified
592 * and the pattern did not contain any magic characters
593 * GLOB_NOMAGIC is there just for compatibility with csh.
594 */
595 if (pglob->gl_pathc == oldpathc &&
596 ((pglob->gl_flags & GLOB_NOCHECK) ||
597 ((pglob->gl_flags & GLOB_NOMAGIC) &&
598 !(pglob->gl_flags & GLOB_MAGCHAR))))
599 {
600 #ifdef GLOB_DEBUG
601 printf("calling globextend from glob0\n");
602 #endif /* GLOB_DEBUG */
603 pglob->gl_flags = oldflags;
604 return(globextend(qpat, pglob, &limit));
605 }
606 else if (!(pglob->gl_flags & GLOB_NOSORT))
607 if (pglob->gl_pathv)
608 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc,
609 pglob->gl_pathc - oldpathc, sizeof(char *),
610 (pglob->gl_flags & (GLOB_ALPHASORT|GLOB_NOCASE))
611 ? ci_compare : compare);
612 pglob->gl_flags = oldflags;
613 return(0);
614 }
615
616 static int
617 ci_compare(const void *p, const void *q)
618 {
619 const char *pp = *(const char **)p;
620 const char *qq = *(const char **)q;
621 int ci;
622 while (*pp && *qq) {
623 if (toFOLD(*pp) != toFOLD(*qq))
624 break;
625 ++pp;
626 ++qq;
627 }
628 ci = toFOLD(*pp) - toFOLD(*qq);
629 if (ci == 0)
630 return compare(p, q);
631 return ci;
632 }
633
634 static int
635 compare(const void *p, const void *q)
636 {
637 return(strcmp(*(char **)p, *(char **)q));
638 }
639
640 static int
641 glob1(Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
642 {
643 Char pathbuf[MAXPATHLEN];
644
645 assert(pattern < pattern_last);
646
647 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */
648 if (*pattern == BG_EOS)
649 return(0);
650 return(glob2(pathbuf, pathbuf+MAXPATHLEN-1,
651 pathbuf, pathbuf+MAXPATHLEN-1,
652 pattern, pattern_last, pglob, limitp));
653 }
654
655 /*
656 * The functions glob2 and glob3 are mutually recursive; there is one level
657 * of recursion for each segment in the pattern that contains one or more
658 * meta characters.
659 */
660 static int
661 glob2(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
662 Char *pattern, Char *pattern_last, glob_t *pglob, size_t *limitp)
663 {
664 Stat_t sb;
665 Char *p, *q;
666 int anymeta;
667
668 assert(pattern < pattern_last);
669
670 /*
671 * Loop over pattern segments until end of pattern or until
672 * segment with meta character found.
673 */
674 for (anymeta = 0;;) {
675 if (*pattern == BG_EOS) { /* End of pattern? */
676 *pathend = BG_EOS;
677 if (g_lstat(pathbuf, &sb, pglob))
678 return(0);
679
680 if (((pglob->gl_flags & GLOB_MARK) &&
681 pathend[-1] != BG_SEP
682 #ifdef DOSISH
683 && pathend[-1] != BG_SEP2
684 #endif
685 ) && (S_ISDIR(sb.st_mode) ||
686 (S_ISLNK(sb.st_mode) &&
687 (g_stat(pathbuf, &sb, pglob) == 0) &&
688 S_ISDIR(sb.st_mode)))) {
689 if (pathend+1 > pathend_last)
690 return (1);
691 *pathend++ = BG_SEP;
692 *pathend = BG_EOS;
693 }
694 ++pglob->gl_matchc;
695 #ifdef GLOB_DEBUG
696 printf("calling globextend from glob2\n");
697 #endif /* GLOB_DEBUG */
698 return(globextend(pathbuf, pglob, limitp));
699 }
700
701 /* Find end of next segment, copy tentatively to pathend. */
702 q = pathend;
703 p = pattern;
704 while (*p != BG_EOS && *p != BG_SEP
705 #ifdef DOSISH
706 && *p != BG_SEP2
707 #endif
708 ) {
709 assert(p < pattern_last);
710 if (ismeta(*p))
711 anymeta = 1;
712 if (q+1 > pathend_last)
713 return (1);
714 *q++ = *p++;
715 }
716
717 if (!anymeta) { /* No expansion, do next segment. */
718 pathend = q;
719 pattern = p;
720 while (*pattern == BG_SEP
721 #ifdef DOSISH
722 || *pattern == BG_SEP2
723 #endif
724 ) {
725 assert(p < pattern_last);
726 if (pathend+1 > pathend_last)
727 return (1);
728 *pathend++ = *pattern++;
729 }
730 } else
731 /* Need expansion, recurse. */
732 return(glob3(pathbuf, pathbuf_last, pathend,
733 pathend_last, pattern,
734 p, pattern_last, pglob, limitp));
735 }
736 /* NOTREACHED */
737 }
738
739 static int
740 glob3(Char *pathbuf, Char *pathbuf_last, Char *pathend, Char *pathend_last,
741 Char *pattern,
742 Char *restpattern, Char *restpattern_last, glob_t *pglob, size_t *limitp)
743 {
744 Direntry_t *dp;
745 DIR *dirp;
746 int err;
747 int nocase;
748 char buf[MAXPATHLEN];
749
750 /*
751 * The readdirfunc declaration can't be prototyped, because it is
752 * assigned, below, to two functions which are prototyped in glob.h
753 * and dirent.h as taking pointers to differently typed opaque
754 * structures.
755 */
756 Direntry_t *(*readdirfunc)(DIR*);
757
758 assert(pattern < restpattern_last);
759 assert(restpattern < restpattern_last);
760
761 if (pathend > pathend_last)
762 return (1);
763 *pathend = BG_EOS;
764 errno = 0;
765
766 #ifdef VMS
767 {
768 Char *q = pathend;
769 if (q - pathbuf > 5) {
770 q -= 5;
771 if (q[0] == '.' &&
772 tolower(q[1]) == 'd' && tolower(q[2]) == 'i' &&
773 tolower(q[3]) == 'r' && q[4] == '/')
774 {
775 q[0] = '/';
776 q[1] = BG_EOS;
777 pathend = q+1;
778 }
779 }
780 }
781 #endif
782
783 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) {
784 /* TODO: don't call for ENOENT or ENOTDIR? */
785 if (pglob->gl_errfunc) {
786 if (g_Ctoc(pathbuf, buf, sizeof(buf)))
787 return (GLOB_ABEND);
788 if (pglob->gl_errfunc(buf, errno) ||
789 (pglob->gl_flags & GLOB_ERR))
790 return (GLOB_ABEND);
791 }
792 return(0);
793 }
794
795 err = 0;
796 nocase = ((pglob->gl_flags & GLOB_NOCASE) != 0);
797
798 /* Search directory for matching names. */
799 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
800 readdirfunc = (Direntry_t *(*)(DIR *))pglob->gl_readdir;
801 else
802 readdirfunc = (Direntry_t *(*)(DIR *))my_readdir;
803 while ((dp = (*readdirfunc)(dirp))) {
804 U8 *sc;
805 Char *dc;
806
807 /* Initial BG_DOT must be matched literally. */
808 if (dp->d_name[0] == BG_DOT && *pattern != BG_DOT)
809 continue;
810 dc = pathend;
811 sc = (U8 *) dp->d_name;
812 while (dc < pathend_last && (*dc++ = *sc++) != BG_EOS)
813 ;
814 if (dc >= pathend_last) {
815 *dc = BG_EOS;
816 err = 1;
817 break;
818 }
819
820 if (!match(pathend, pattern, restpattern, nocase)) {
821 *pathend = BG_EOS;
822 continue;
823 }
824 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last,
825 restpattern, restpattern_last, pglob, limitp);
826 if (err)
827 break;
828 }
829
830 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
831 (*pglob->gl_closedir)(dirp);
832 else
833 PerlDir_close(dirp);
834 return(err);
835 }
836
837
838 /*
839 * Extend the gl_pathv member of a glob_t structure to accommodate a new item,
840 * add the new item, and update gl_pathc.
841 *
842 * This assumes the BSD realloc, which only copies the block when its size
843 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic
844 * behavior.
845 *
846 * Return 0 if new item added, error code if memory couldn't be allocated.
847 *
848 * Invariant of the glob_t structure:
849 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and
850 * gl_pathv points to (gl_offs + gl_pathc + 1) items.
851 */
852 static int
853 globextend(const Char *path, glob_t *pglob, size_t *limitp)
854 {
855 char **pathv;
856 int i;
857 STRLEN newsize, len;
858 char *copy;
859 const Char *p;
860
861 #ifdef GLOB_DEBUG
862 printf("Adding ");
863 for (p = path; *p; p++)
864 (void)printf("%c", CHAR(*p));
865 printf("\n");
866 #endif /* GLOB_DEBUG */
867
868 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs);
869 if (pglob->gl_pathv)
870 pathv = Renew(pglob->gl_pathv,newsize,char*);
871 else
872 Newx(pathv,newsize,char*);
873 if (pathv == NULL) {
874 if (pglob->gl_pathv) {
875 Safefree(pglob->gl_pathv);
876 pglob->gl_pathv = NULL;
877 }
878 return(GLOB_NOSPACE);
879 }
880
881 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) {
882 /* first time around -- clear initial gl_offs items */
883 pathv += pglob->gl_offs;
884 for (i = pglob->gl_offs; --i >= 0; )
885 *--pathv = NULL;
886 }
887 pglob->gl_pathv = pathv;
888
889 for (p = path; *p++;)
890 ;
891 len = (STRLEN)(p - path);
892 *limitp += len;
893 Newx(copy, p-path, char);
894 if (copy != NULL) {
895 if (g_Ctoc(path, copy, len)) {
896 Safefree(copy);
897 return(GLOB_NOSPACE);
898 }
899 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy;
900 }
901 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
902
903 if ((pglob->gl_flags & GLOB_LIMIT) &&
904 newsize + *limitp >= (unsigned long)ARG_MAX) {
905 errno = 0;
906 return(GLOB_NOSPACE);
907 }
908
909 return(copy == NULL ? GLOB_NOSPACE : 0);
910 }
911
912
913 /*
914 * pattern matching function for filenames using state machine to avoid
915 * recursion. We maintain a "nextp" and "nextn" to allow us to backtrack
916 * without additional callframes, and to do cleanly prune the backtracking
917 * state when multiple '*' (start) matches are included in the pattern.
918 *
919 * Thanks to Russ Cox for the improved state machine logic to avoid quadratic
920 * matching on failure.
921 *
922 * https://research.swtch.com/glob
923 *
924 * An example would be a pattern
925 * ("a*" x 100) . "y"
926 * against a file name like
927 * ("a" x 100) . "x"
928 *
929 */
930 static int
931 match(Char *name, Char *pat, Char *patend, int nocase)
932 {
933 int ok, negate_range;
934 Char c, k;
935 Char *nextp = NULL;
936 Char *nextn = NULL;
937
938 redo:
939 while (pat < patend) {
940 c = *pat++;
941 switch (c & M_MASK) {
942 case M_ALL:
943 if (pat == patend)
944 return(1);
945 if (*name == BG_EOS)
946 return 0;
947 nextn = name + 1;
948 nextp = pat - 1;
949 break;
950 case M_ONE:
951 /* since * matches leftmost-shortest first *
952 * if we encounter the EOS then backtracking *
953 * will not help, so we can exit early here. */
954 if (*name++ == BG_EOS)
955 return 0;
956 break;
957 case M_SET:
958 ok = 0;
959 /* since * matches leftmost-shortest first *
960 * if we encounter the EOS then backtracking *
961 * will not help, so we can exit early here. */
962 if ((k = *name++) == BG_EOS)
963 return 0;
964 if ((negate_range = ((*pat & M_MASK) == M_NOT)) != BG_EOS)
965 ++pat;
966 while (((c = *pat++) & M_MASK) != M_END)
967 if ((*pat & M_MASK) == M_RNG) {
968 if (nocase) {
969 if (tolower(c) <= tolower(k) && tolower(k) <= tolower(pat[1]))
970 ok = 1;
971 } else {
972 if (c <= k && k <= pat[1])
973 ok = 1;
974 }
975 pat += 2;
976 } else if (nocase ? (tolower(c) == tolower(k)) : (c == k))
977 ok = 1;
978 if (ok == negate_range)
979 goto fail;
980 break;
981 default:
982 k = *name++;
983 if (nocase ? (tolower(k) != tolower(c)) : (k != c))
984 goto fail;
985 break;
986 }
987 }
988 if (*name == BG_EOS)
989 return 1;
990
991 fail:
992 if (nextn) {
993 pat = nextp;
994 name = nextn;
995 goto redo;
996 }
997 return 0;
998 }
999
1000 /* Free allocated data belonging to a glob_t structure. */
1001 void
1002 bsd_globfree(glob_t *pglob)
1003 {
1004 int i;
1005 char **pp;
1006
1007 if (pglob->gl_pathv != NULL) {
1008 pp = pglob->gl_pathv + pglob->gl_offs;
1009 for (i = pglob->gl_pathc; i--; ++pp)
1010 if (*pp)
1011 Safefree(*pp);
1012 Safefree(pglob->gl_pathv);
1013 pglob->gl_pathv = NULL;
1014 }
1015 }
1016
1017 static DIR *
1018 g_opendir(Char *str, glob_t *pglob)
1019 {
1020 char buf[MAXPATHLEN];
1021
1022 if (!*str) {
1023 my_strlcpy(buf, ".", sizeof(buf));
1024 } else {
1025 if (g_Ctoc(str, buf, sizeof(buf)))
1026 return(NULL);
1027 }
1028
1029 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1030 return((DIR*)(*pglob->gl_opendir)(buf));
1031
1032 return(PerlDir_open(buf));
1033 }
1034
1035 static int
1036 g_lstat(Char *fn, Stat_t *sb, glob_t *pglob)
1037 {
1038 char buf[MAXPATHLEN];
1039
1040 if (g_Ctoc(fn, buf, sizeof(buf)))
1041 return(-1);
1042 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1043 return((*pglob->gl_lstat)(buf, sb));
1044 #ifdef HAS_LSTAT
1045 return(PerlLIO_lstat(buf, sb));
1046 #else
1047 return(PerlLIO_stat(buf, sb));
1048 #endif /* HAS_LSTAT */
1049 }
1050
1051 static int
1052 g_stat(Char *fn, Stat_t *sb, glob_t *pglob)
1053 {
1054 char buf[MAXPATHLEN];
1055
1056 if (g_Ctoc(fn, buf, sizeof(buf)))
1057 return(-1);
1058 if (pglob->gl_flags & GLOB_ALTDIRFUNC)
1059 return((*pglob->gl_stat)(buf, sb));
1060 return(PerlLIO_stat(buf, sb));
1061 }
1062
1063 static Char *
1064 g_strchr(Char *str, int ch)
1065 {
1066 do {
1067 if (*str == ch)
1068 return (str);
1069 } while (*str++);
1070 return (NULL);
1071 }
1072
1073 static int
1074 g_Ctoc(const Char *str, char *buf, STRLEN len)
1075 {
1076 while (len--) {
1077 if ((*buf++ = (char)*str++) == BG_EOS)
1078 return (0);
1079 }
1080 return (1);
1081 }
1082
1083 #ifdef GLOB_DEBUG
1084 static void
1085 qprintf(const char *str, Char *s)
1086 {
1087 Char *p;
1088
1089 (void)printf("%s:\n", str);
1090 for (p = s; *p; p++)
1091 (void)printf("%c", CHAR(*p));
1092 (void)printf("\n");
1093 for (p = s; *p; p++)
1094 (void)printf("%c", *p & M_PROTECT ? '"' : ' ');
1095 (void)printf("\n");
1096 for (p = s; *p; p++)
1097 (void)printf("%c", ismeta(*p) ? '_' : ' ');
1098 (void)printf("\n");
1099 }
1100 #endif /* GLOB_DEBUG */
1101