1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.16 (Berkeley) 10/01/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * GLOB_NOMAGIC: 28 * Same as GLOB_NOCHECK, but it will only append pattern if it did 29 * not contain any magic characters. [Used in csh style globbing] 30 * gl_matchc: 31 * Number of matches in the current invocation of glob. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <dirent.h> 37 #include <glob.h> 38 #include <ctype.h> 39 #include <errno.h> 40 #include <string.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 44 #define DOLLAR '$' 45 #define DOT '.' 46 #define EOS '\0' 47 #define LBRACKET '[' 48 #define NOT '!' 49 #define QUESTION '?' 50 #define QUOTE '\\' 51 #define RANGE '-' 52 #define RBRACKET ']' 53 #define SEP '/' 54 #define STAR '*' 55 #define TILDE '~' 56 #define UNDERSCORE '_' 57 58 #define M_QUOTE 0x8000 59 #define M_PROTECT 0x4000 60 #define M_MASK 0xffff 61 #define M_ASCII 0x00ff 62 63 #define CHAR(c) ((c)&M_ASCII) 64 #define META(c) ((c)|M_QUOTE) 65 #define M_ALL META('*') 66 #define M_END META(']') 67 #define M_NOT META('!') 68 #define M_ONE META('?') 69 #define M_RNG META('-') 70 #define M_SET META('[') 71 #define ismeta(c) (((c)&M_QUOTE) != 0) 72 73 typedef u_short Char; 74 75 static int compare __P((const void *, const void *)); 76 static void g_Ctoc __P((Char *, char *)); 77 static int g_lstat __P((Char *, struct stat *)); 78 static DIR *g_opendir __P((Char *)); 79 static Char *g_strchr __P((Char *, int)); 80 static int g_stat __P((Char *, struct stat *)); 81 static int glob1 __P((Char *, glob_t *)); 82 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 83 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 84 static int globextend __P((Char *, glob_t *)); 85 static int match __P((Char *, Char *, Char *)); 86 #ifdef DEBUG 87 static void qprintf __P((Char *)); 88 #endif 89 90 /* 91 * The main glob() routine: compiles the pattern (optionally processing 92 * quotes), calls glob1() to do the real pattern matching, and finally 93 * sorts the list (unless unsorted operation is requested). Returns 0 94 * if things went well, nonzero if errors occurred. It is not an error 95 * to find no matches. 96 */ 97 glob(pattern, flags, errfunc, pglob) 98 const char *pattern; 99 int flags, (*errfunc) __P((char *, int)); 100 glob_t *pglob; 101 { 102 const u_char *compilepat, *patnext; 103 int c, err, oldpathc; 104 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 105 106 patnext = (u_char *) pattern; 107 if (!(flags & GLOB_APPEND)) { 108 pglob->gl_pathc = 0; 109 pglob->gl_pathv = NULL; 110 if (!(flags & GLOB_DOOFFS)) 111 pglob->gl_offs = 0; 112 } 113 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 114 pglob->gl_errfunc = errfunc; 115 oldpathc = pglob->gl_pathc; 116 pglob->gl_matchc = 0; 117 118 bufnext = patbuf; 119 bufend = bufnext + MAXPATHLEN; 120 compilebuf = bufnext; 121 compilepat = patnext; 122 if (flags & GLOB_QUOTE) { 123 /* Protect the quoted characters. */ 124 while (bufnext < bufend && (c = *patnext++) != EOS) 125 if (c == QUOTE) { 126 if ((c = *patnext++) == EOS) { 127 c = QUOTE; 128 --patnext; 129 } 130 *bufnext++ = c | M_PROTECT; 131 } 132 else 133 *bufnext++ = c; 134 } 135 else 136 while (bufnext < bufend && (c = *patnext++) != EOS) 137 *bufnext++ = c; 138 *bufnext = EOS; 139 140 bufnext = patbuf; 141 qpatnext = patbuf; 142 /* We don't need to check for buffer overflow any more. */ 143 while ((c = *qpatnext++) != EOS) { 144 switch (c) { 145 case LBRACKET: 146 c = *qpatnext; 147 if (c == NOT) 148 ++qpatnext; 149 if (*qpatnext == EOS || 150 g_strchr(qpatnext+1, RBRACKET) == NULL) { 151 *bufnext++ = LBRACKET; 152 if (c == NOT) 153 --qpatnext; 154 break; 155 } 156 *bufnext++ = M_SET; 157 if (c == NOT) 158 *bufnext++ = M_NOT; 159 c = *qpatnext++; 160 do { 161 *bufnext++ = CHAR(c); 162 if (*qpatnext == RANGE && 163 (c = qpatnext[1]) != RBRACKET) { 164 *bufnext++ = M_RNG; 165 *bufnext++ = CHAR(c); 166 qpatnext += 2; 167 } 168 } while ((c = *qpatnext++) != RBRACKET); 169 pglob->gl_flags |= GLOB_MAGCHAR; 170 *bufnext++ = M_END; 171 break; 172 case QUESTION: 173 pglob->gl_flags |= GLOB_MAGCHAR; 174 *bufnext++ = M_ONE; 175 break; 176 case STAR: 177 pglob->gl_flags |= GLOB_MAGCHAR; 178 /* collapse adjacent stars to one, 179 * to avoid exponential behavior 180 */ 181 if (bufnext == patbuf || bufnext[-1] != M_ALL) 182 *bufnext++ = M_ALL; 183 break; 184 default: 185 *bufnext++ = CHAR(c); 186 break; 187 } 188 } 189 *bufnext = EOS; 190 #ifdef DEBUG 191 qprintf(patbuf); 192 #endif 193 194 if ((err = glob1(patbuf, pglob)) != 0) 195 return(err); 196 197 /* 198 * If there was no match we are going to append the pattern 199 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 200 * and the pattern did not contain any magic characters 201 * GLOB_NOMAGIC is there just for compatibility with csh. 202 */ 203 if (pglob->gl_pathc == oldpathc && 204 ((flags & GLOB_NOCHECK) || 205 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 206 if (!(flags & GLOB_QUOTE)) { 207 Char *dp = compilebuf; 208 const u_char *sp = compilepat; 209 while (*dp++ = *sp++); 210 } 211 else { 212 /* 213 * Copy pattern, interpreting quotes; this is slightly 214 * different than the interpretation of quotes above 215 * -- which should prevail? 216 */ 217 while (*compilepat != EOS) { 218 if (*compilepat == QUOTE) { 219 if (*++compilepat == EOS) 220 --compilepat; 221 } 222 *compilebuf++ = (u_char)*compilepat++; 223 } 224 *compilebuf = EOS; 225 } 226 return(globextend(patbuf, pglob)); 227 } else if (!(flags & GLOB_NOSORT)) 228 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 229 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 230 return(0); 231 } 232 233 static int 234 compare(p, q) 235 const void *p, *q; 236 { 237 return(strcmp(*(char **)p, *(char **)q)); 238 } 239 240 static 241 glob1(pattern, pglob) 242 Char *pattern; 243 glob_t *pglob; 244 { 245 Char pathbuf[MAXPATHLEN+1]; 246 247 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 248 if (*pattern == EOS) 249 return(0); 250 return(glob2(pathbuf, pathbuf, pattern, pglob)); 251 } 252 253 /* 254 * The functions glob2 and glob3 are mutually recursive; there is one level 255 * of recursion for each segment in the pattern that contains one or more 256 * meta characters. 257 */ 258 static 259 glob2(pathbuf, pathend, pattern, pglob) 260 Char *pathbuf, *pathend, *pattern; 261 glob_t *pglob; 262 { 263 struct stat sb; 264 Char *p, *q; 265 int anymeta; 266 267 /* 268 * Loop over pattern segments until end of pattern or until 269 * segment with meta character found. 270 */ 271 for (anymeta = 0;;) { 272 if (*pattern == EOS) { /* End of pattern? */ 273 *pathend = EOS; 274 if (g_lstat(pathbuf, &sb)) 275 return(0); 276 277 if (((pglob->gl_flags & GLOB_MARK) && 278 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 279 || (S_ISLNK(sb.st_mode) && 280 (g_stat(pathbuf, &sb) == 0) && 281 S_ISDIR(sb.st_mode)))) { 282 *pathend++ = SEP; 283 *pathend = EOS; 284 } 285 ++pglob->gl_matchc; 286 return(globextend(pathbuf, pglob)); 287 } 288 289 /* Find end of next segment, copy tentatively to pathend. */ 290 q = pathend; 291 p = pattern; 292 while (*p != EOS && *p != SEP) { 293 if (ismeta(*p)) 294 anymeta = 1; 295 *q++ = *p++; 296 } 297 298 if (!anymeta) { /* No expansion, do next segment. */ 299 pathend = q; 300 pattern = p; 301 while (*pattern == SEP) 302 *pathend++ = *pattern++; 303 } else /* Need expansion, recurse. */ 304 return(glob3(pathbuf, pathend, pattern, p, pglob)); 305 } 306 /* NOTREACHED */ 307 } 308 309 static 310 glob3(pathbuf, pathend, pattern, restpattern, pglob) 311 Char *pathbuf, *pathend, *pattern, *restpattern; 312 glob_t *pglob; 313 { 314 register struct dirent *dp; 315 DIR *dirp; 316 int len, err; 317 318 *pathend = EOS; 319 errno = 0; 320 321 if (!(dirp = g_opendir(pathbuf))) 322 /* TODO: don't call for ENOENT or ENOTDIR? */ 323 if (pglob->gl_errfunc && 324 (*pglob->gl_errfunc)(pathbuf, errno) || 325 (pglob->gl_flags & GLOB_ERR)) 326 return(GLOB_ABEND); 327 else 328 return(0); 329 330 err = 0; 331 332 /* Search directory for matching names. */ 333 while ((dp = readdir(dirp))) { 334 register u_char *sc; 335 register Char *dc; 336 337 /* Initial DOT must be matched literally. */ 338 if (dp->d_name[0] == DOT && *pattern != DOT) 339 continue; 340 for (sc = (u_char *) dp->d_name, dc = pathend; 341 *dc++ = *sc++;); 342 if (!match(pathend, pattern, restpattern)) { 343 *pathend = EOS; 344 continue; 345 } 346 err = glob2(pathbuf, --dc, restpattern, pglob); 347 if (err) 348 break; 349 } 350 351 /* TODO: check error from readdir? */ 352 (void)closedir(dirp); 353 return(err); 354 } 355 356 357 /* 358 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 359 * add the new item, and update gl_pathc. 360 * 361 * This assumes the BSD realloc, which only copies the block when its size 362 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 363 * behavior. 364 * 365 * Return 0 if new item added, error code if memory couldn't be allocated. 366 * 367 * Invariant of the glob_t structure: 368 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 369 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 370 */ 371 static int 372 globextend(path, pglob) 373 Char *path; 374 glob_t *pglob; 375 { 376 register char **pathv; 377 register int i; 378 u_int newsize; 379 char *copy; 380 Char *p; 381 382 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 383 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 384 if (pathv == NULL) 385 return(GLOB_NOSPACE); 386 387 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 388 /* first time around -- clear initial gl_offs items */ 389 pathv += pglob->gl_offs; 390 for (i = pglob->gl_offs; --i >= 0; ) 391 *--pathv = NULL; 392 } 393 pglob->gl_pathv = pathv; 394 395 for (p = path; *p++;); 396 if ((copy = malloc(p - path)) != NULL) { 397 g_Ctoc(path, copy); 398 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 399 } 400 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 401 return(copy == NULL ? GLOB_NOSPACE : 0); 402 } 403 404 405 /* 406 * pattern matching function for filenames. Each occurrence of the * 407 * pattern causes a recursion level. 408 */ 409 static 410 match(name, pat, patend) 411 register Char *name, *pat, *patend; 412 { 413 int ok, negate_range; 414 Char c, k; 415 416 while (pat < patend) { 417 c = *pat++; 418 switch (c & M_MASK) { 419 case M_ALL: 420 if (pat == patend) 421 return(1); 422 do 423 if (match(name, pat, patend)) 424 return(1); 425 while (*name++ != EOS); 426 return(0); 427 case M_ONE: 428 if (*name++ == EOS) 429 return(0); 430 break; 431 case M_SET: 432 ok = 0; 433 if ((k = *name++) == EOS) 434 return(0); 435 if (negate_range = ((*pat & M_MASK) == M_NOT)) 436 ++pat; 437 while (((c = *pat++) & M_MASK) != M_END) 438 if ((*pat & M_MASK) == M_RNG) { 439 if (c <= k && k <= pat[1]) 440 ok = 1; 441 pat += 2; 442 } else if (c == k) 443 ok = 1; 444 if (ok == negate_range) 445 return(0); 446 break; 447 default: 448 if (*name++ != c) 449 return(0); 450 break; 451 } 452 } 453 return(*name == EOS); 454 } 455 456 /* Free allocated data belonging to a glob_t structure. */ 457 void 458 globfree(pglob) 459 glob_t *pglob; 460 { 461 register int i; 462 register char **pp; 463 464 if (pglob->gl_pathv != NULL) { 465 pp = pglob->gl_pathv + pglob->gl_offs; 466 for (i = pglob->gl_pathc; i--; ++pp) 467 if (*pp) 468 free(*pp); 469 free(pglob->gl_pathv); 470 } 471 } 472 473 static DIR * 474 g_opendir(str) 475 register Char *str; 476 { 477 char buf[MAXPATHLEN]; 478 479 if (!*str) 480 return(opendir(".")); 481 g_Ctoc(str, buf); 482 return(opendir(buf)); 483 } 484 485 static int 486 g_lstat(fn, sb) 487 register Char *fn; 488 struct stat *sb; 489 { 490 char buf[MAXPATHLEN]; 491 492 g_Ctoc(fn, buf); 493 return(lstat(buf, sb)); 494 } 495 496 static int 497 g_stat(fn, sb) 498 register Char *fn; 499 struct stat *sb; 500 { 501 char buf[MAXPATHLEN]; 502 503 g_Ctoc(fn, buf); 504 return(stat(buf, sb)); 505 } 506 507 static Char * 508 g_strchr(str, ch) 509 Char *str; 510 int ch; 511 { 512 do { 513 if (*str == ch) 514 return (str); 515 } while (*str++); 516 return (NULL); 517 } 518 519 static void 520 g_Ctoc(str, buf) 521 register Char *str; 522 char *buf; 523 { 524 register char *dc; 525 526 for (dc = buf; *dc++ = *str++;); 527 } 528 529 #ifdef DEBUG 530 static void 531 qprintf(s) 532 register Char *s; 533 { 534 register Char *p; 535 536 for (p = s; *p; p++) 537 (void)printf("%c", *p & 0xff); 538 (void)printf("\n"); 539 for (p = s; *p; p++) 540 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 541 (void)printf("\n"); 542 for (p = s; *p; p++) 543 (void)printf("%c", *p & M_META ? '_' : ' '); 544 (void)printf("\n"); 545 } 546 #endif 547