1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.8 (Berkeley) 04/23/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 17 * draft 9. 18 * 19 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 20 * 21 * Optional extra services, controlled by flags not defined by POSIX: 22 * 23 * GLOB_QUOTE: 24 * Escaping convention: \ inhibits any special meaning the following 25 * character might have (except \ at end of string is retained). 26 * GLOB_MAGCHAR: 27 * Set in gl_flags if pattern contained a globbing character. 28 * gl_matchc: 29 * Number of matches in the current invocation of glob. 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/stat.h> 35 #include <dirent.h> 36 #include <glob.h> 37 #include <ctype.h> 38 #include <errno.h> 39 #include <string.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 43 static int glob1(), glob2(), glob3(), globextend(), match(); 44 45 #define DOLLAR '$' 46 #define DOT '.' 47 #define EOS '\0' 48 #define LBRACKET '[' 49 #define NOT '!' 50 #define QUESTION '?' 51 #define QUOTE '\\' 52 #define RANGE '-' 53 #define RBRACKET ']' 54 #define SEP '/' 55 #define STAR '*' 56 #define TILDE '~' 57 #define UNDERSCORE '_' 58 59 #define SHORT_STRINGS 60 #ifdef SHORT_STRINGS 61 typedef u_short shortchar_t; 62 #define METABIT 0x8000 63 #define M_MASK 0xffff 64 #else 65 typedef char shortchar_t; 66 #define METABIT 0x80 67 #define M_MASK 0xff 68 #endif 69 70 #define META(c) ((c)|METABIT) 71 #define M_ALL META('*') 72 #define M_END META(']') 73 #define M_NOT META('!') 74 #define M_ONE META('?') 75 #define M_RNG META('-') 76 #define M_SET META('[') 77 #define ismeta(c) (((c)&METABIT) != 0) 78 79 #ifdef SHORT_STRINGS 80 static DIR * 81 Opendir(str) 82 register shortchar_t *str; 83 { 84 register char *dc; 85 char buf[MAXPATHLEN]; 86 87 if (!*str) 88 return(opendir(".")); 89 for (dc = buf; *dc++ = *str++;); 90 return(opendir(buf)); 91 } 92 93 static int 94 Lstat(fn, sb) 95 register shortchar_t *fn; 96 struct stat *sb; 97 { 98 register char *dc; 99 char buf[MAXPATHLEN]; 100 101 for (dc = buf; *dc++ = *fn++;); 102 return(lstat(buf, sb)); 103 } 104 105 static int 106 Stat(fn, sb) 107 register shortchar_t *fn; 108 struct stat *sb; 109 { 110 register char *dc; 111 char buf[MAXPATHLEN]; 112 113 for (dc = buf; *dc++ = *fn++;); 114 return(stat(buf, sb)); 115 } 116 #else 117 #define Opendir opendir 118 #define Stat stat 119 #define Lstat lstat 120 #endif 121 122 static int 123 compare(p, q) 124 void **p, **q; 125 { 126 return(strcmp(*(char **)p, *(char **)q)); 127 } 128 129 /* 130 * The main glob() routine: compiles the pattern (optionally processing 131 * quotes), calls glob1() to do the real pattern matching, and finally 132 * sorts the list (unless unsorted operation is requested). Returns 0 133 * if things went well, nonzero if errors occurred. It is not an error 134 * to find no matches. 135 */ 136 glob(pattern, flags, errfunc, pglob) 137 const char *pattern; 138 int flags; 139 int (*errfunc) __P((char *, int)); 140 glob_t *pglob; 141 { 142 int err, oldpathc; 143 shortchar_t *bufnext, *bufend, *compilebuf; 144 const char *compilepat, *patnext; 145 char c; 146 shortchar_t patbuf[MAXPATHLEN+1]; 147 148 patnext = pattern; 149 if (!(flags & GLOB_APPEND)) { 150 pglob->gl_pathc = 0; 151 pglob->gl_pathv = NULL; 152 if (!(flags & GLOB_DOOFFS)) 153 pglob->gl_offs = 0; 154 } 155 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 156 pglob->gl_errfunc = errfunc; 157 oldpathc = pglob->gl_pathc; 158 pglob->gl_matchc = 0; 159 160 bufnext = patbuf; 161 bufend = bufnext+MAXPATHLEN; 162 163 compilebuf = bufnext; 164 compilepat = patnext; 165 while (bufnext < bufend && (c = *patnext++) != EOS) { 166 switch (c) { 167 case LBRACKET: 168 pglob->gl_flags |= GLOB_MAGCHAR; 169 c = *patnext; 170 if (c == NOT) 171 ++patnext; 172 if (*patnext == EOS || 173 strchr(patnext+1, RBRACKET) == NULL) { 174 *bufnext++ = LBRACKET; 175 if (c == NOT) 176 --patnext; 177 break; 178 } 179 *bufnext++ = M_SET; 180 if (c == NOT) 181 *bufnext++ = M_NOT; 182 c = *patnext++; 183 do { 184 /* todo: quoting */ 185 *bufnext++ = c; 186 if (*patnext == RANGE && 187 (c = patnext[1]) != RBRACKET) { 188 *bufnext++ = M_RNG; 189 *bufnext++ = c; 190 patnext += 2; 191 } 192 } while ((c = *patnext++) != RBRACKET); 193 *bufnext++ = M_END; 194 break; 195 case QUESTION: 196 pglob->gl_flags |= GLOB_MAGCHAR; 197 *bufnext++ = M_ONE; 198 break; 199 case QUOTE: 200 if (!(flags & GLOB_QUOTE)) 201 *bufnext++ = QUOTE; 202 else { 203 if ((c = *patnext++) == EOS) { 204 c = QUOTE; 205 --patnext; 206 } 207 *bufnext++ = c; 208 } 209 break; 210 case STAR: 211 pglob->gl_flags |= GLOB_MAGCHAR; 212 *bufnext++ = M_ALL; 213 break; 214 default: 215 *bufnext++ = c; 216 break; 217 } 218 } 219 *bufnext = EOS; 220 221 if ((err = glob1(patbuf, pglob)) != 0) 222 return(err); 223 224 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 225 if (!(flags & GLOB_QUOTE)) { 226 shortchar_t *dp = compilebuf; 227 const char *sp = compilepat; 228 while (*dp++ = (u_char)*sp++); 229 } 230 else { 231 /* 232 * copy pattern, interpreting quotes; this is slightly 233 * different than the interpretation of quotes above 234 * -- which should prevail? 235 */ 236 while (*compilepat != EOS) { 237 if (*compilepat == QUOTE) { 238 if (*++compilepat == EOS) 239 --compilepat; 240 } 241 *compilebuf++ = (u_char)*compilepat++; 242 } 243 *compilebuf = EOS; 244 } 245 return(globextend(patbuf, pglob)); 246 } else if (!(flags & GLOB_NOSORT)) 247 qsort((char*)(pglob->gl_pathv + pglob->gl_offs + oldpathc), 248 pglob->gl_pathc - oldpathc, sizeof(char*), compare); 249 return(0); 250 } 251 252 static 253 glob1(pattern, pglob) 254 shortchar_t *pattern; 255 glob_t *pglob; 256 { 257 shortchar_t pathbuf[MAXPATHLEN+1]; 258 259 /* 260 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. 261 */ 262 if (*pattern == EOS) 263 return(0); 264 return(glob2(pathbuf, pathbuf, pattern, pglob)); 265 } 266 267 /* 268 * functions glob2 and glob3 are mutually recursive; there is one level 269 * of recursion for each segment in the pattern that contains one or 270 * more meta characters. 271 */ 272 static 273 glob2(pathbuf, pathend, pattern, pglob) 274 shortchar_t *pathbuf, *pathend, *pattern; 275 glob_t *pglob; 276 { 277 struct stat sbuf; 278 shortchar_t *p, *q; 279 int anymeta; 280 281 /* 282 * loop over pattern segments until end of pattern or until 283 * segment with meta character found. 284 */ 285 for (anymeta = 0;;) { 286 if (*pattern == EOS) { /* end of pattern? */ 287 *pathend = EOS; 288 if (Lstat(pathbuf, &sbuf)) 289 return(0); 290 291 if (((pglob->gl_flags & GLOB_MARK) && 292 pathend[-1] != SEP) && (S_ISDIR(sbuf.st_mode) 293 || (S_ISLNK(sbuf.st_mode) && 294 (Stat(pathbuf, &sbuf) == 0) && 295 S_ISDIR(sbuf.st_mode)))) { 296 *pathend++ = SEP; 297 *pathend = EOS; 298 } 299 ++pglob->gl_matchc; 300 return(globextend(pathbuf, pglob)); 301 } 302 303 /* find end of next segment, copy tentatively to pathend */ 304 q = pathend; 305 p = pattern; 306 while (*p != EOS && *p != SEP) { 307 if (ismeta(*p)) 308 anymeta = 1; 309 *q++ = *p++; 310 } 311 312 if (!anymeta) { /* no expansion, do next segment */ 313 pathend = q; 314 pattern = p; 315 while (*pattern == SEP) 316 *pathend++ = *pattern++; 317 } else /* need expansion, recurse */ 318 return(glob3(pathbuf, pathend, pattern, p, pglob)); 319 } 320 /* NOTREACHED */ 321 } 322 323 324 static 325 glob3(pathbuf, pathend, pattern, restpattern, pglob) 326 shortchar_t *pathbuf, *pathend, *pattern, *restpattern; 327 glob_t *pglob; 328 { 329 extern int errno; 330 DIR *dirp; 331 struct dirent *dp; 332 int len, err; 333 334 *pathend = EOS; 335 errno = 0; 336 337 if (!(dirp = Opendir(pathbuf))) 338 /* todo: don't call for ENOENT or ENOTDIR? */ 339 if (pglob->gl_errfunc && 340 (*pglob->gl_errfunc)(pathbuf, errno) || 341 (pglob->gl_flags & GLOB_ERR)) 342 return(GLOB_ABEND); 343 else 344 return(0); 345 346 err = 0; 347 348 /* search directory for matching names */ 349 while ((dp = readdir(dirp))) { 350 register char *sc; 351 register shortchar_t *dc; 352 /* initial DOT must be matched literally */ 353 if (dp->d_name[0] == DOT && *pattern != DOT) 354 continue; 355 for (sc = dp->d_name, dc = pathend; 356 *dc++ = (u_char)*sc++;); 357 if (!match(pathend, pattern, restpattern)) { 358 *pathend = EOS; 359 continue; 360 } 361 err = glob2(pathbuf, --dc, restpattern, pglob); 362 if (err) 363 break; 364 } 365 /* todo: check error from readdir? */ 366 (void)closedir(dirp); 367 return(err); 368 } 369 370 371 /* 372 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 373 * add the new item, and update gl_pathc. 374 * 375 * This assumes the BSD realloc, which only copies the block when its size 376 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 377 * behavior. 378 * 379 * Return 0 if new item added, error code if memory couldn't be allocated. 380 * 381 * Invariant of the glob_t structure: 382 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 383 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 384 */ 385 static int 386 globextend(path, pglob) 387 shortchar_t *path; 388 glob_t *pglob; 389 { 390 register char **pathv; 391 register int i; 392 u_int newsize; 393 char *copy; 394 shortchar_t *p; 395 396 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 397 pathv = (char **)realloc((char *)pglob->gl_pathv, newsize); 398 if (pathv == NULL) 399 return(GLOB_NOSPACE); 400 401 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 402 /* first time around -- clear initial gl_offs items */ 403 pathv += pglob->gl_offs; 404 for (i = pglob->gl_offs; --i >= 0; ) 405 *--pathv = NULL; 406 } 407 pglob->gl_pathv = pathv; 408 409 for (p = path; *p++;); 410 if ((copy = malloc(p - path)) != NULL) { 411 register char *dc = copy; 412 register shortchar_t *sc = path; 413 while (*dc++ = *sc++); 414 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 415 } 416 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 417 return((copy == NULL) ? GLOB_NOSPACE : 0); 418 } 419 420 421 /* 422 * pattern matching function for filenames. Each occurrence of the * 423 * pattern causes a recursion level. 424 */ 425 static 426 match(name, pat, patend) 427 register shortchar_t *name, *pat, *patend; 428 { 429 int ok, negate_range; 430 shortchar_t c, k; 431 432 while (pat < patend) { 433 c = *pat++; 434 switch (c & M_MASK) { 435 case M_ALL: 436 if (pat == patend) 437 return(1); 438 for (; *name != EOS; ++name) { 439 if (match(name, pat, patend)) 440 return(1); 441 } 442 return(0); 443 case M_ONE: 444 if (*name++ == EOS) 445 return(0); 446 break; 447 case M_SET: 448 ok = 0; 449 k = *name++; 450 if (negate_range = ((*pat & M_MASK) == M_NOT)) 451 ++pat; 452 while (((c = *pat++) & M_MASK) != M_END) { 453 if ((*pat & M_MASK) == M_RNG) { 454 if (c <= k && k <= pat[1]) 455 ok = 1; 456 pat += 2; 457 } 458 else if (c == k) 459 ok = 1; 460 } 461 if (ok == negate_range) 462 return(0); 463 break; 464 default: 465 if (*name++ != c) 466 return(0); 467 break; 468 } 469 } 470 return(*name == EOS); 471 } 472 473 /* free allocated data belonging to a glob_t structure */ 474 void 475 globfree(pglob) 476 glob_t *pglob; 477 { 478 register int i; 479 register char **pp; 480 481 if (pglob->gl_pathv != NULL) { 482 pp = pglob->gl_pathv + pglob->gl_offs; 483 for (i = pglob->gl_pathc; i--; ++pp) 484 if (*pp) 485 free((void *)*pp); 486 free((void *)pglob->gl_pathv); 487 } 488 } 489