1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.15 (Berkeley) 02/05/92"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * glob(3) -- a superset of the one defined in POSIX 1003.2. 17 * 18 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 19 * 20 * Optional extra services, controlled by flags not defined by POSIX: 21 * 22 * GLOB_QUOTE: 23 * Escaping convention: \ inhibits any special meaning the following 24 * character might have (except \ at end of string is retained). 25 * GLOB_MAGCHAR: 26 * Set in gl_flags if pattern contained a globbing character. 27 * GLOB_NOMAGIC: 28 * Same as GLOB_NOCHECK, but it will only append pattern if it did 29 * not contain any magic characters. [Used in csh style globbing] 30 * gl_matchc: 31 * Number of matches in the current invocation of glob. 32 */ 33 34 #include <sys/param.h> 35 #include <sys/stat.h> 36 #include <dirent.h> 37 #include <glob.h> 38 #include <ctype.h> 39 #include <errno.h> 40 #include <string.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 44 #define DOLLAR '$' 45 #define DOT '.' 46 #define EOS '\0' 47 #define LBRACKET '[' 48 #define NOT '!' 49 #define QUESTION '?' 50 #define QUOTE '\\' 51 #define RANGE '-' 52 #define RBRACKET ']' 53 #define SEP '/' 54 #define STAR '*' 55 #define TILDE '~' 56 #define UNDERSCORE '_' 57 58 #define M_QUOTE 0x8000 59 #define M_PROTECT 0x4000 60 #define M_MASK 0xffff 61 #define M_ASCII 0x00ff 62 63 #define CHAR(c) ((c)&M_ASCII) 64 #define META(c) ((c)|M_QUOTE) 65 #define M_ALL META('*') 66 #define M_END META(']') 67 #define M_NOT META('!') 68 #define M_ONE META('?') 69 #define M_RNG META('-') 70 #define M_SET META('[') 71 #define ismeta(c) (((c)&M_QUOTE) != 0) 72 73 typedef u_short Char; 74 75 static int compare __P((const void *, const void *)); 76 static void g_Ctoc __P((Char *, char *)); 77 static int g_lstat __P((Char *, struct stat *)); 78 static DIR *g_opendir __P((Char *)); 79 static Char *g_strchr __P((Char *, int)); 80 static int g_stat __P((Char *, struct stat *)); 81 static int glob1 __P((Char *, glob_t *)); 82 static int glob2 __P((Char *, Char *, Char *, glob_t *)); 83 static int glob3 __P((Char *, Char *, Char *, Char *, glob_t *)); 84 static int globextend __P((Char *, glob_t *)); 85 static int match __P((Char *, Char *, Char *)); 86 #ifdef DEBUG 87 static void qprintf __P((Char *)); 88 #endif 89 90 /* 91 * The main glob() routine: compiles the pattern (optionally processing 92 * quotes), calls glob1() to do the real pattern matching, and finally 93 * sorts the list (unless unsorted operation is requested). Returns 0 94 * if things went well, nonzero if errors occurred. It is not an error 95 * to find no matches. 96 */ 97 glob(pattern, flags, errfunc, pglob) 98 const char *pattern; 99 int flags, (*errfunc) __P((char *, int)); 100 glob_t *pglob; 101 { 102 const u_char *compilepat, *patnext; 103 int c, err, oldpathc; 104 Char *bufnext, *bufend, *compilebuf, *qpatnext, patbuf[MAXPATHLEN+1]; 105 106 patnext = (u_char *) pattern; 107 if (!(flags & GLOB_APPEND)) { 108 pglob->gl_pathc = 0; 109 pglob->gl_pathv = NULL; 110 if (!(flags & GLOB_DOOFFS)) 111 pglob->gl_offs = 0; 112 } 113 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 114 pglob->gl_errfunc = errfunc; 115 oldpathc = pglob->gl_pathc; 116 pglob->gl_matchc = 0; 117 118 bufnext = patbuf; 119 bufend = bufnext + MAXPATHLEN; 120 compilebuf = bufnext; 121 compilepat = patnext; 122 if (flags & GLOB_QUOTE) { 123 /* Protect the quoted characters. */ 124 while (bufnext < bufend && (c = *patnext++) != EOS) 125 if (c == QUOTE) { 126 if ((c = *patnext++) == EOS) { 127 c = QUOTE; 128 --patnext; 129 } 130 *bufnext++ = c | M_PROTECT; 131 } 132 else 133 *bufnext++ = c; 134 } 135 else 136 while (bufnext < bufend && (c = *patnext++) != EOS) 137 *bufnext++ = c; 138 *bufnext = EOS; 139 140 bufnext = patbuf; 141 qpatnext = patbuf; 142 /* We don't need to check for buffer overflow any more. */ 143 while ((c = *qpatnext++) != EOS) { 144 switch (c) { 145 case LBRACKET: 146 c = *qpatnext; 147 if (c == NOT) 148 ++qpatnext; 149 if (*qpatnext == EOS || 150 g_strchr(qpatnext+1, RBRACKET) == NULL) { 151 *bufnext++ = LBRACKET; 152 if (c == NOT) 153 --qpatnext; 154 break; 155 } 156 *bufnext++ = M_SET; 157 if (c == NOT) 158 *bufnext++ = M_NOT; 159 c = *qpatnext++; 160 do { 161 *bufnext++ = CHAR(c); 162 if (*qpatnext == RANGE && 163 (c = qpatnext[1]) != RBRACKET) { 164 *bufnext++ = M_RNG; 165 *bufnext++ = CHAR(c); 166 qpatnext += 2; 167 } 168 } while ((c = *qpatnext++) != RBRACKET); 169 pglob->gl_flags |= GLOB_MAGCHAR; 170 *bufnext++ = M_END; 171 break; 172 case QUESTION: 173 pglob->gl_flags |= GLOB_MAGCHAR; 174 *bufnext++ = M_ONE; 175 break; 176 case STAR: 177 pglob->gl_flags |= GLOB_MAGCHAR; 178 *bufnext++ = M_ALL; 179 break; 180 default: 181 *bufnext++ = CHAR(c); 182 break; 183 } 184 } 185 *bufnext = EOS; 186 #ifdef DEBUG 187 qprintf(patbuf); 188 #endif 189 190 if ((err = glob1(patbuf, pglob)) != 0) 191 return(err); 192 193 /* 194 * If there was no match we are going to append the pattern 195 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 196 * and the pattern did not contain any magic characters 197 * GLOB_NOMAGIC is there just for compatibility with csh. 198 */ 199 if (pglob->gl_pathc == oldpathc && 200 ((flags & GLOB_NOCHECK) || 201 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 202 if (!(flags & GLOB_QUOTE)) { 203 Char *dp = compilebuf; 204 const u_char *sp = compilepat; 205 while (*dp++ = *sp++); 206 } 207 else { 208 /* 209 * Copy pattern, interpreting quotes; this is slightly 210 * different than the interpretation of quotes above 211 * -- which should prevail? 212 */ 213 while (*compilepat != EOS) { 214 if (*compilepat == QUOTE) { 215 if (*++compilepat == EOS) 216 --compilepat; 217 } 218 *compilebuf++ = (u_char)*compilepat++; 219 } 220 *compilebuf = EOS; 221 } 222 return(globextend(patbuf, pglob)); 223 } else if (!(flags & GLOB_NOSORT)) 224 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 225 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 226 return(0); 227 } 228 229 static int 230 compare(p, q) 231 const void *p, *q; 232 { 233 return(strcmp(*(char **)p, *(char **)q)); 234 } 235 236 static 237 glob1(pattern, pglob) 238 Char *pattern; 239 glob_t *pglob; 240 { 241 Char pathbuf[MAXPATHLEN+1]; 242 243 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 244 if (*pattern == EOS) 245 return(0); 246 return(glob2(pathbuf, pathbuf, pattern, pglob)); 247 } 248 249 /* 250 * The functions glob2 and glob3 are mutually recursive; there is one level 251 * of recursion for each segment in the pattern that contains one or more 252 * meta characters. 253 */ 254 static 255 glob2(pathbuf, pathend, pattern, pglob) 256 Char *pathbuf, *pathend, *pattern; 257 glob_t *pglob; 258 { 259 struct stat sb; 260 Char *p, *q; 261 int anymeta; 262 263 /* 264 * Loop over pattern segments until end of pattern or until 265 * segment with meta character found. 266 */ 267 for (anymeta = 0;;) { 268 if (*pattern == EOS) { /* End of pattern? */ 269 *pathend = EOS; 270 if (g_lstat(pathbuf, &sb)) 271 return(0); 272 273 if (((pglob->gl_flags & GLOB_MARK) && 274 pathend[-1] != SEP) && (S_ISDIR(sb.st_mode) 275 || (S_ISLNK(sb.st_mode) && 276 (g_stat(pathbuf, &sb) == 0) && 277 S_ISDIR(sb.st_mode)))) { 278 *pathend++ = SEP; 279 *pathend = EOS; 280 } 281 ++pglob->gl_matchc; 282 return(globextend(pathbuf, pglob)); 283 } 284 285 /* Find end of next segment, copy tentatively to pathend. */ 286 q = pathend; 287 p = pattern; 288 while (*p != EOS && *p != SEP) { 289 if (ismeta(*p)) 290 anymeta = 1; 291 *q++ = *p++; 292 } 293 294 if (!anymeta) { /* No expansion, do next segment. */ 295 pathend = q; 296 pattern = p; 297 while (*pattern == SEP) 298 *pathend++ = *pattern++; 299 } else /* Need expansion, recurse. */ 300 return(glob3(pathbuf, pathend, pattern, p, pglob)); 301 } 302 /* NOTREACHED */ 303 } 304 305 static 306 glob3(pathbuf, pathend, pattern, restpattern, pglob) 307 Char *pathbuf, *pathend, *pattern, *restpattern; 308 glob_t *pglob; 309 { 310 register struct dirent *dp; 311 DIR *dirp; 312 int len, err; 313 314 *pathend = EOS; 315 errno = 0; 316 317 if (!(dirp = g_opendir(pathbuf))) 318 /* TODO: don't call for ENOENT or ENOTDIR? */ 319 if (pglob->gl_errfunc && 320 (*pglob->gl_errfunc)(pathbuf, errno) || 321 (pglob->gl_flags & GLOB_ERR)) 322 return(GLOB_ABEND); 323 else 324 return(0); 325 326 err = 0; 327 328 /* Search directory for matching names. */ 329 while ((dp = readdir(dirp))) { 330 register u_char *sc; 331 register Char *dc; 332 333 /* Initial DOT must be matched literally. */ 334 if (dp->d_name[0] == DOT && *pattern != DOT) 335 continue; 336 for (sc = (u_char *) dp->d_name, dc = pathend; 337 *dc++ = *sc++;); 338 if (!match(pathend, pattern, restpattern)) { 339 *pathend = EOS; 340 continue; 341 } 342 err = glob2(pathbuf, --dc, restpattern, pglob); 343 if (err) 344 break; 345 } 346 347 /* TODO: check error from readdir? */ 348 (void)closedir(dirp); 349 return(err); 350 } 351 352 353 /* 354 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 355 * add the new item, and update gl_pathc. 356 * 357 * This assumes the BSD realloc, which only copies the block when its size 358 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 359 * behavior. 360 * 361 * Return 0 if new item added, error code if memory couldn't be allocated. 362 * 363 * Invariant of the glob_t structure: 364 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 365 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 366 */ 367 static int 368 globextend(path, pglob) 369 Char *path; 370 glob_t *pglob; 371 { 372 register char **pathv; 373 register int i; 374 u_int newsize; 375 char *copy; 376 Char *p; 377 378 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 379 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 380 if (pathv == NULL) 381 return(GLOB_NOSPACE); 382 383 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 384 /* first time around -- clear initial gl_offs items */ 385 pathv += pglob->gl_offs; 386 for (i = pglob->gl_offs; --i >= 0; ) 387 *--pathv = NULL; 388 } 389 pglob->gl_pathv = pathv; 390 391 for (p = path; *p++;); 392 if ((copy = malloc(p - path)) != NULL) { 393 g_Ctoc(path, copy); 394 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 395 } 396 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 397 return(copy == NULL ? GLOB_NOSPACE : 0); 398 } 399 400 401 /* 402 * pattern matching function for filenames. Each occurrence of the * 403 * pattern causes a recursion level. 404 */ 405 static 406 match(name, pat, patend) 407 register Char *name, *pat, *patend; 408 { 409 int ok, negate_range; 410 Char c, k; 411 412 while (pat < patend) { 413 c = *pat++; 414 switch (c & M_MASK) { 415 case M_ALL: 416 if (pat == patend) 417 return(1); 418 for (; *name != EOS; ++name) 419 if (match(name, pat, patend)) 420 return(1); 421 return(0); 422 case M_ONE: 423 if (*name++ == EOS) 424 return(0); 425 break; 426 case M_SET: 427 ok = 0; 428 if ((k = *name++) == EOS) 429 return(0); 430 if (negate_range = ((*pat & M_MASK) == M_NOT)) 431 ++pat; 432 while (((c = *pat++) & M_MASK) != M_END) 433 if ((*pat & M_MASK) == M_RNG) { 434 if (c <= k && k <= pat[1]) 435 ok = 1; 436 pat += 2; 437 } else if (c == k) 438 ok = 1; 439 if (ok == negate_range) 440 return(0); 441 break; 442 default: 443 if (*name++ != c) 444 return(0); 445 break; 446 } 447 } 448 return(*name == EOS); 449 } 450 451 /* Free allocated data belonging to a glob_t structure. */ 452 void 453 globfree(pglob) 454 glob_t *pglob; 455 { 456 register int i; 457 register char **pp; 458 459 if (pglob->gl_pathv != NULL) { 460 pp = pglob->gl_pathv + pglob->gl_offs; 461 for (i = pglob->gl_pathc; i--; ++pp) 462 if (*pp) 463 free(*pp); 464 free(pglob->gl_pathv); 465 } 466 } 467 468 static DIR * 469 g_opendir(str) 470 register Char *str; 471 { 472 char buf[MAXPATHLEN]; 473 474 if (!*str) 475 return(opendir(".")); 476 g_Ctoc(str, buf); 477 return(opendir(buf)); 478 } 479 480 static int 481 g_lstat(fn, sb) 482 register Char *fn; 483 struct stat *sb; 484 { 485 char buf[MAXPATHLEN]; 486 487 g_Ctoc(fn, buf); 488 return(lstat(buf, sb)); 489 } 490 491 static int 492 g_stat(fn, sb) 493 register Char *fn; 494 struct stat *sb; 495 { 496 char buf[MAXPATHLEN]; 497 498 g_Ctoc(fn, buf); 499 return(stat(buf, sb)); 500 } 501 502 static Char * 503 g_strchr(str, ch) 504 Char *str; 505 int ch; 506 { 507 do { 508 if (*str == ch) 509 return (str); 510 } while (*str++); 511 return (NULL); 512 } 513 514 static void 515 g_Ctoc(str, buf) 516 register Char *str; 517 char *buf; 518 { 519 register char *dc; 520 521 for (dc = buf; *dc++ = *str++;); 522 } 523 524 #ifdef DEBUG 525 static void 526 qprintf(s) 527 register Char *s; 528 { 529 register Char *p; 530 531 for (p = s; *p; p++) 532 (void)printf("%c", *p & 0xff); 533 (void)printf("\n"); 534 for (p = s; *p; p++) 535 (void)printf("%c", *p & M_PROTECT ? '"' : ' '); 536 (void)printf("\n"); 537 for (p = s; *p; p++) 538 (void)printf("%c", *p & M_META ? '_' : ' '); 539 (void)printf("\n"); 540 } 541 #endif 542