1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.17 (Berkeley) 12/02/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * GLOB_NOMAGIC: 28 * Same as GLOB_NOCHECK, but it will only append pattern if it did 29 * not contain any magic characters. [Used in csh style globbing] 30 * GLOB_ALTDIRFUNC: 31 * Use alternately specified directory access functions. 32 * gl_matchc: 33 * Number of matches in the current invocation of glob. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/stat.h> 38 #include <dirent.h> 39 #include <glob.h> 40 #include <ctype.h> 41 #include <errno.h> 42 #include <string.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 46 #define DOLLAR '$' 47 #define DOT '.' 48 #define EOS '\0' 49 #define LBRACKET '[' 50 #define NOT '!' 51 #define QUESTION '?' 52 #define QUOTE '\\' 53 #define RANGE '-' 54 #define RBRACKET ']' 55 #define SEP '/' 56 #define STAR '*' 57 #define TILDE '~' 58 #define UNDERSCORE '_' 59 60 #define M_QUOTE 0x8000 61 #define M_PROTECT 0x4000 62 #define M_MASK 0xffff 63 #define M_ASCII 0x00ff 64 65 #define CHAR(c) ((c)&M_ASCII) 66 #define META(c) ((c)|M_QUOTE) 67 #define M_ALL META('*') 68 #define M_END META(']') 69 #define M_NOT META('!') 70 #define M_ONE META('?') 71 #define M_RNG META('-') 72 #define M_SET META('[') 73 #define ismeta(c) (((c)&M_QUOTE) != 0) 74 75 typedef u_short Char; 76 77 static int compare __P((const void *, const void *)); 78 static void g_Ctoc __P((Char *, char *)); 79 static int g_lstat __P((Char *, struct stat *, glob_t *)); 80 static DIR *g_opendir __P((Char *, glob_t *)); 81 static Char *g_strchr __P((Char *, int)); 82 static int g_stat __P((Char *, struct stat *, glob_t *)); 83 static int glob1 __P((Char *, glob_t *)); 84 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 85 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 86 static int globextend __P((Char *, glob_t *)); 87 static int match __P((Char *, Char *, Char *)); 88 #ifdef DEBUG 89 static void qprintf __P((Char *)); 90 #endif 91 92 /* 93 * The main glob() routine: compiles the pattern (optionally processing 94 * quotes), calls glob1() to do the real pattern matching, and finally 95 * sorts the list (unless unsorted operation is requested). Returns 0 96 * if things went well, nonzero if errors occurred. It is not an error 97 * to find no matches. 98 */ 99 glob(pattern, flags, errfunc, pglob) 100 const char *pattern; 101 int flags, (*errfunc) __P((char *, int)); 102 glob_t *pglob; 103 { 104 const u_char *compilepat, *patnext; 105 int c, err, oldpathc; 106 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 107 108 patnext = (u_char *) pattern; 109 if (!(flags & GLOB_APPEND)) { 110 pglob->gl_pathc = 0; 111 pglob->gl_pathv = NULL; 112 if (!(flags & GLOB_DOOFFS)) 113 pglob->gl_offs = 0; 114 } 115 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 116 pglob->gl_errfunc = errfunc; 117 oldpathc = pglob->gl_pathc; 118 pglob->gl_matchc = 0; 119 120 bufnext = patbuf; 121 bufend = bufnext + MAXPATHLEN; 122 compilebuf = bufnext; 123 compilepat = patnext; 124 if (flags & GLOB_QUOTE) { 125 /* Protect the quoted characters. */ 126 while (bufnext < bufend && (c = *patnext++) != EOS) 127 if (c == QUOTE) { 128 if ((c = *patnext++) == EOS) { 129 c = QUOTE; 130 --patnext; 131 } 132 *bufnext++ = c | M_PROTECT; 133 } 134 else 135 *bufnext++ = c; 136 } 137 else 138 while (bufnext < bufend && (c = *patnext++) != EOS) 139 *bufnext++ = c; 140 *bufnext = EOS; 141 142 bufnext = patbuf; 143 qpatnext = patbuf; 144 /* We don't need to check for buffer overflow any more. */ 145 while ((c = *qpatnext++) != EOS) { 146 switch (c) { 147 case LBRACKET: 148 c = *qpatnext; 149 if (c == NOT) 150 ++qpatnext; 151 if (*qpatnext == EOS || 152 g_strchr(qpatnext+1, RBRACKET) == NULL) { 153 *bufnext++ = LBRACKET; 154 if (c == NOT) 155 --qpatnext; 156 break; 157 } 158 *bufnext++ = M_SET; 159 if (c == NOT) 160 *bufnext++ = M_NOT; 161 c = *qpatnext++; 162 do { 163 *bufnext++ = CHAR(c); 164 if (*qpatnext == RANGE && 165 (c = qpatnext[1]) != RBRACKET) { 166 *bufnext++ = M_RNG; 167 *bufnext++ = CHAR(c); 168 qpatnext += 2; 169 } 170 } while ((c = *qpatnext++) != RBRACKET); 171 pglob->gl_flags |= GLOB_MAGCHAR; 172 *bufnext++ = M_END; 173 break; 174 case QUESTION: 175 pglob->gl_flags |= GLOB_MAGCHAR; 176 *bufnext++ = M_ONE; 177 break; 178 case STAR: 179 pglob->gl_flags |= GLOB_MAGCHAR; 180 /* collapse adjacent stars to one, 181 * to avoid exponential behavior 182 */ 183 if (bufnext == patbuf || bufnext[-1] != M_ALL) 184 *bufnext++ = M_ALL; 185 break; 186 default: 187 *bufnext++ = CHAR(c); 188 break; 189 } 190 } 191 *bufnext = EOS; 192 #ifdef DEBUG 193 qprintf(patbuf); 194 #endif 195 196 if ((err = glob1(patbuf, pglob)) != 0) 197 return(err); 198 199 /* 200 * If there was no match we are going to append the pattern 201 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 202 * and the pattern did not contain any magic characters 203 * GLOB_NOMAGIC is there just for compatibility with csh. 204 */ 205 if (pglob->gl_pathc == oldpathc && 206 ((flags & GLOB_NOCHECK) || 207 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 208 if (!(flags & GLOB_QUOTE)) { 209 Char *dp = compilebuf; 210 const u_char *sp = compilepat; 211 while (*dp++ = *sp++); 212 } 213 else { 214 /* 215 * Copy pattern, interpreting quotes; this is slightly 216 * different than the interpretation of quotes above 217 * -- which should prevail? 218 */ 219 while (*compilepat != EOS) { 220 if (*compilepat == QUOTE) { 221 if (*++compilepat == EOS) 222 --compilepat; 223 } 224 *compilebuf++ = (u_char)*compilepat++; 225 } 226 *compilebuf = EOS; 227 } 228 return(globextend(patbuf, pglob)); 229 } else if (!(flags & GLOB_NOSORT)) 230 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 231 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 232 return(0); 233 } 234 235 static int 236 compare(p, q) 237 const void *p, *q; 238 { 239 return(strcmp(*(char **)p, *(char **)q)); 240 } 241 242 static 243 glob1(pattern, pglob) 244 Char *pattern; 245 glob_t *pglob; 246 { 247 Char pathbuf[MAXPATHLEN+1]; 248 249 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 250 if (*pattern == EOS) 251 return(0); 252 return(glob2(pathbuf, pathbuf, pattern, pglob)); 253 } 254 255 /* 256 * The functions glob2 and glob3 are mutually recursive; there is one level 257 * of recursion for each segment in the pattern that contains one or more 258 * meta characters. 259 */ 260 static 261 glob2(pathbuf, pathend, pattern, pglob) 262 Char *pathbuf, *pathend, *pattern; 263 glob_t *pglob; 264 { 265 struct stat sb; 266 Char *p, *q; 267 int anymeta; 268 269 /* 270 * Loop over pattern segments until end of pattern or until 271 * segment with meta character found. 272 */ 273 for (anymeta = 0;;) { 274 if (*pattern == EOS) { /* End of pattern? */ 275 *pathend = EOS; 276 if (g_lstat(pathbuf, &sb, pglob)) 277 return(0); 278 279 if (((pglob->gl_flags & GLOB_MARK) && 280 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 281 || (S_ISLNK(sb.st_mode) && 282 (g_stat(pathbuf, &sb, pglob) == 0) && 283 S_ISDIR(sb.st_mode)))) { 284 *pathend++ = SEP; 285 *pathend = EOS; 286 } 287 ++pglob->gl_matchc; 288 return(globextend(pathbuf, pglob)); 289 } 290 291 /* Find end of next segment, copy tentatively to pathend. */ 292 q = pathend; 293 p = pattern; 294 while (*p != EOS && *p != SEP) { 295 if (ismeta(*p)) 296 anymeta = 1; 297 *q++ = *p++; 298 } 299 300 if (!anymeta) { /* No expansion, do next segment. */ 301 pathend = q; 302 pattern = p; 303 while (*pattern == SEP) 304 *pathend++ = *pattern++; 305 } else /* Need expansion, recurse. */ 306 return(glob3(pathbuf, pathend, pattern, p, pglob)); 307 } 308 /* NOTREACHED */ 309 } 310 311 static 312 glob3(pathbuf, pathend, pattern, restpattern, pglob) 313 Char *pathbuf, *pathend, *pattern, *restpattern; 314 glob_t *pglob; 315 { 316 register struct dirent *dp; 317 struct dirent *(*readdirfunc)(); 318 DIR *dirp; 319 int len, err; 320 321 *pathend = EOS; 322 errno = 0; 323 324 if (!(dirp = g_opendir(pathbuf, pglob))) 325 /* TODO: don't call for ENOENT or ENOTDIR? */ 326 if (pglob->gl_errfunc && 327 (*pglob->gl_errfunc)(pathbuf, errno) || 328 (pglob->gl_flags & GLOB_ERR)) 329 return(GLOB_ABEND); 330 else 331 return(0); 332 333 err = 0; 334 335 /* Search directory for matching names. */ 336 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 337 readdirfunc = pglob->gl_readdir; 338 else 339 readdirfunc = readdir; 340 while ((dp = (*readdirfunc)(dirp))) { 341 register u_char *sc; 342 register Char *dc; 343 344 /* Initial DOT must be matched literally. */ 345 if (dp->d_name[0] == DOT && *pattern != DOT) 346 continue; 347 for (sc = (u_char *) dp->d_name, dc = pathend; 348 *dc++ = *sc++;); 349 if (!match(pathend, pattern, restpattern)) { 350 *pathend = EOS; 351 continue; 352 } 353 err = glob2(pathbuf, --dc, restpattern, pglob); 354 if (err) 355 break; 356 } 357 358 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 359 (*pglob->gl_closedir)(dirp); 360 else 361 closedir(dirp); 362 return(err); 363 } 364 365 366 /* 367 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 368 * add the new item, and update gl_pathc. 369 * 370 * This assumes the BSD realloc, which only copies the block when its size 371 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 372 * behavior. 373 * 374 * Return 0 if new item added, error code if memory couldn't be allocated. 375 * 376 * Invariant of the glob_t structure: 377 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 378 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 379 */ 380 static int 381 globextend(path, pglob) 382 Char *path; 383 glob_t *pglob; 384 { 385 register char **pathv; 386 register int i; 387 u_int newsize; 388 char *copy; 389 Char *p; 390 391 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 392 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 393 if (pathv == NULL) 394 return(GLOB_NOSPACE); 395 396 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 397 /* first time around -- clear initial gl_offs items */ 398 pathv += pglob->gl_offs; 399 for (i = pglob->gl_offs; --i >= 0; ) 400 *--pathv = NULL; 401 } 402 pglob->gl_pathv = pathv; 403 404 for (p = path; *p++;); 405 if ((copy = malloc(p - path)) != NULL) { 406 g_Ctoc(path, copy); 407 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 408 } 409 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 410 return(copy == NULL ? GLOB_NOSPACE : 0); 411 } 412 413 414 /* 415 * pattern matching function for filenames. Each occurrence of the * 416 * pattern causes a recursion level. 417 */ 418 static 419 match(name, pat, patend) 420 register Char *name, *pat, *patend; 421 { 422 int ok, negate_range; 423 Char c, k; 424 425 while (pat < patend) { 426 c = *pat++; 427 switch (c & M_MASK) { 428 case M_ALL: 429 if (pat == patend) 430 return(1); 431 do 432 if (match(name, pat, patend)) 433 return(1); 434 while (*name++ != EOS); 435 return(0); 436 case M_ONE: 437 if (*name++ == EOS) 438 return(0); 439 break; 440 case M_SET: 441 ok = 0; 442 if ((k = *name++) == EOS) 443 return(0); 444 if (negate_range = ((*pat & M_MASK) == M_NOT)) 445 ++pat; 446 while (((c = *pat++) & M_MASK) != M_END) 447 if ((*pat & M_MASK) == M_RNG) { 448 if (c <= k && k <= pat[1]) 449 ok = 1; 450 pat += 2; 451 } else if (c == k) 452 ok = 1; 453 if (ok == negate_range) 454 return(0); 455 break; 456 default: 457 if (*name++ != c) 458 return(0); 459 break; 460 } 461 } 462 return(*name == EOS); 463 } 464 465 /* Free allocated data belonging to a glob_t structure. */ 466 void 467 globfree(pglob) 468 glob_t *pglob; 469 { 470 register int i; 471 register char **pp; 472 473 if (pglob->gl_pathv != NULL) { 474 pp = pglob->gl_pathv + pglob->gl_offs; 475 for (i = pglob->gl_pathc; i--; ++pp) 476 if (*pp) 477 free(*pp); 478 free(pglob->gl_pathv); 479 } 480 } 481 482 static DIR * 483 g_opendir(str, pglob) 484 register Char *str; 485 glob_t *pglob; 486 { 487 char buf[MAXPATHLEN]; 488 char *dirname; 489 490 if (!*str) 491 strcpy(buf, "."); 492 else 493 g_Ctoc(str, buf); 494 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 495 return((*pglob->gl_opendir)(buf)); 496 return(opendir(buf)); 497 } 498 499 static int 500 g_lstat(fn, sb, pglob) 501 register Char *fn; 502 struct stat *sb; 503 glob_t *pglob; 504 { 505 char buf[MAXPATHLEN]; 506 507 g_Ctoc(fn, buf); 508 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 509 return((*pglob->gl_lstat)(buf, sb)); 510 return(lstat(buf, sb)); 511 } 512 513 static int 514 g_stat(fn, sb, pglob) 515 register Char *fn; 516 struct stat *sb; 517 glob_t *pglob; 518 { 519 char buf[MAXPATHLEN]; 520 521 g_Ctoc(fn, buf); 522 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 523 return((*pglob->gl_stat)(buf, sb)); 524 return(stat(buf, sb)); 525 } 526 527 static Char * 528 g_strchr(str, ch) 529 Char *str; 530 int ch; 531 { 532 do { 533 if (*str == ch) 534 return (str); 535 } while (*str++); 536 return (NULL); 537 } 538 539 static void 540 g_Ctoc(str, buf) 541 register Char *str; 542 char *buf; 543 { 544 register char *dc; 545 546 for (dc = buf; *dc++ = *str++;); 547 } 548 549 #ifdef DEBUG 550 static void 551 qprintf(s) 552 register Char *s; 553 { 554 register Char *p; 555 556 for (p = s; *p; p++) 557 (void)printf("%c", *p & 0xff); 558 (void)printf("\n"); 559 for (p = s; *p; p++) 560 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 561 (void)printf("\n"); 562 for (p = s; *p; p++) 563 (void)printf("%c", *p & M_META ? '_' : ' '); 564 (void)printf("\n"); 565 } 566 #endif 567