1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)glob.c 5.4 (Berkeley) 02/23/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 /* 16 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 17 * draft 9. 18 * 19 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 20 * 21 * Optional extra services, controlled by flags not defined by POSIX: 22 * GLOB_QUOTE: escaping convention: \ inhibits any special meaning 23 the following character might have (except \ at end of 24 * string is kept); 25 */ 26 27 #include <sys/param.h> 28 #include <sys/stat.h> 29 #include <dirent.h> 30 #include <glob.h> 31 #include <ctype.h> 32 #include <errno.h> 33 #include <string.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 37 typedef int bool_t; 38 39 static glob1(); 40 static glob2(); 41 static glob3(); 42 static globextend(); 43 static bool_t match(); 44 45 #define DOLLAR '$' 46 #define DOT '.' 47 #define EOS '\0' 48 #define LBRACKET '[' 49 #define NOT '!' 50 #define QUESTION '?' 51 #define QUOTE '\\' 52 #define RANGE '-' 53 #define RBRACKET ']' 54 #define SEP '/' 55 #define STAR '*' 56 #define TILDE '~' 57 #define UNDERSCORE '_' 58 59 #define METABIT 0x80 60 #define META(c) ((c)|METABIT) 61 #define M_ALL META('*') 62 #define M_END META(']') 63 #define M_NOT META('!') 64 #define M_ONE META('?') 65 #define M_RNG META('-') 66 #define M_SET META('[') 67 #define ismeta(c) (((c)&METABIT) != 0) 68 69 static 70 compare(p, q) 71 void **p, **q; 72 { 73 return(strcmp(*(char **)p, *(char **)q)); 74 } 75 76 77 /* 78 * The main glob() routine: compiles the pattern (optionally processing 79 * quotes), calls glob1() to do the real pattern matching, and finally 80 * sorts the list (unless unsorted operation is requested). Returns 0 81 * if things went well, nonzero if errors occurred. It is not an error 82 * to find no matches. 83 */ 84 glob(pattern, flags, errfunc, pglob) 85 const char *pattern; 86 int flags; 87 int (*errfunc) __P((char *, int)); 88 glob_t *pglob; 89 { 90 int err, oldpathc; 91 char *bufnext, *bufend, *compilebuf; 92 const char *compilepat, *patnext; 93 char c, patbuf[MAXPATHLEN+1]; 94 95 patnext = pattern; 96 if (!(flags & GLOB_APPEND)) { 97 pglob->gl_pathc = 0; 98 pglob->gl_pathv = NULL; 99 if (!(flags & GLOB_DOOFFS)) 100 pglob->gl_offs = 0; 101 } 102 pglob->gl_flags = flags; 103 pglob->gl_errfunc = errfunc; 104 oldpathc = pglob->gl_pathc; 105 106 bufnext = patbuf; 107 bufend = bufnext+MAXPATHLEN; 108 109 compilebuf = bufnext; 110 compilepat = patnext; 111 while (bufnext < bufend && (c = *patnext++) != EOS) { 112 switch (c) { 113 case LBRACKET: 114 c = *patnext; 115 if (c == NOT) 116 ++patnext; 117 if (*patnext == EOS || 118 strchr(patnext+1, RBRACKET) == NULL) { 119 *bufnext++ = LBRACKET; 120 if (c == NOT) 121 --patnext; 122 break; 123 } 124 *bufnext++ = M_SET; 125 if (c == NOT) 126 *bufnext++ = M_NOT; 127 c = *patnext++; 128 do { 129 /* todo: quoting */ 130 *bufnext++ = c; 131 if (*patnext == RANGE && 132 (c = patnext[1]) != RBRACKET) { 133 *bufnext++ = M_RNG; 134 *bufnext++ = c; 135 patnext += 2; 136 } 137 } while ((c = *patnext++) != RBRACKET); 138 *bufnext++ = M_END; 139 break; 140 case QUESTION: 141 *bufnext++ = M_ONE; 142 break; 143 case QUOTE: 144 if (!(flags & GLOB_QUOTE)) 145 *bufnext++ = QUOTE; 146 else { 147 if ((c = *patnext++) == EOS) { 148 c = QUOTE; 149 --patnext; 150 } 151 *bufnext++ = c; 152 } 153 break; 154 case STAR: 155 *bufnext++ = M_ALL; 156 break; 157 default: 158 *bufnext++ = c; 159 break; 160 } 161 } 162 *bufnext = EOS; 163 164 if ((err = glob1(patbuf, pglob)) != 0) 165 return(err); 166 167 if (pglob->gl_pathc == oldpathc && flags & GLOB_NOCHECK) { 168 if (!(flags & GLOB_QUOTE)) 169 (void)strcpy(compilebuf, compilepat); 170 else { 171 /* 172 * copy pattern, interpreting quotes; this is slightly 173 * different than the interpretation of quotes above 174 * -- which should prevail? 175 */ 176 while (*compilepat != EOS) { 177 if (*compilepat == QUOTE) { 178 if (*++compilepat == EOS) 179 --compilepat; 180 } 181 *compilebuf++ = *compilepat++; 182 } 183 *compilebuf = EOS; 184 } 185 return(globextend(patbuf, pglob)); 186 } else if (!(flags & GLOB_NOSORT)) 187 qsort((char*) (pglob->gl_pathv + pglob->gl_offs + oldpathc), 188 pglob->gl_pathc - oldpathc, sizeof(char*), compare); 189 return(0); 190 } 191 192 static 193 glob1(pattern, pglob) 194 char *pattern; 195 glob_t *pglob; 196 { 197 char pathbuf[MAXPATHLEN+1]; 198 199 /* 200 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 201 if (*pattern == EOS) 202 return(0); 203 return(glob2(pathbuf, pathbuf, pattern, pglob)); 204 } 205 206 /* 207 * functions glob2 and glob3 are mutually recursive; there is one level 208 * of recursion for each segment in the pattern that contains one or 209 * more meta characters. 210 */ 211 static 212 glob2(pathbuf, pathend, pattern, pglob) 213 char *pathbuf, *pathend, *pattern; 214 glob_t *pglob; 215 { 216 struct stat sbuf; 217 bool_t anymeta = 0; 218 char *p, *q; 219 220 /* 221 * loop over pattern segments until end of pattern or until 222 * segment with meta character found. 223 */ 224 for (;;) { 225 if (*pattern == EOS) { /* end of pattern? */ 226 *pathend = EOS; 227 if (stat(pathbuf, &sbuf) != 0) 228 return(0); /* need error call here? */ 229 if ((pglob->gl_flags & GLOB_MARK) && 230 pathend[-1] != SEP && S_ISDIR(sbuf.st_mode)) { 231 *pathend++ = SEP; 232 *pathend = EOS; 233 } 234 return(globextend(pathbuf, pglob)); 235 } 236 237 /* find end of next segment, copy tentatively to pathend */ 238 q = pathend; 239 p = pattern; 240 while (*p != EOS && *p != SEP) { 241 if (ismeta(*p)) 242 anymeta = 1; 243 *q++ = *p++; 244 } 245 246 if (!anymeta) { /* no expansion, do next segment */ 247 pathend = q; 248 pattern = p; 249 while (*pattern == SEP) 250 *pathend++ = *pattern++; 251 } else /* need expansion, recurse */ 252 return(glob3(pathbuf, pathend, pattern, p, pglob)); 253 } 254 /* NOTREACHED */ 255 } 256 257 static 258 glob3(pathbuf, pathend, pattern, restpattern, pglob) 259 char *pathbuf, *pathend, *pattern, *restpattern; 260 glob_t *pglob; 261 { 262 extern int errno; 263 DIR *dirp; 264 struct dirent *dp; 265 int len, err; 266 267 *pathend = EOS; 268 errno = 0; 269 if (!(dirp = opendir(pathbuf))) 270 /* todo: don't call for ENOENT or ENOTDIR? */ 271 if (pglob->gl_errfunc && 272 (*pglob->gl_errfunc)(pathbuf, errno) || 273 (pglob->gl_flags & GLOB_ERR)) 274 return(GLOB_ABEND); 275 else 276 return(0); 277 278 err = 0; 279 280 /* search directory for matching names */ 281 while ((dp = readdir(dirp))) { 282 /* initial DOT must be matched literally */ 283 if (dp->d_name[0] == DOT && *pattern != DOT) 284 continue; 285 if (!match(dp->d_name, pattern, restpattern)) 286 continue; 287 len = dp->d_namlen; 288 (void)strcpy(pathend, dp->d_name); 289 err = glob2(pathbuf, pathend+len, restpattern, pglob); 290 if (err) 291 break; 292 } 293 /* todo: check error from readdir? */ 294 (void)closedir(dirp); 295 return(err); 296 } 297 298 299 /* 300 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 301 * add the new item, and update gl_pathc. 302 * 303 * This assumes the BSD realloc, which only copies the block when its size 304 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 305 * behavior. 306 * 307 * Return 0 if new item added, error code if memory couldn't be allocated. 308 * 309 * Invariant of the glob_t structure: 310 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 311 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 312 */ 313 static 314 globextend(path, pglob) 315 char *path; 316 glob_t *pglob; 317 { 318 register char **pathv; 319 register int i; 320 u_int copysize, newsize; 321 char *copy; 322 323 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 324 pathv = (char **)realloc((char *)(pathv = pglob->gl_pathv), newsize); 325 if (pathv == NULL) 326 return(GLOB_NOSPACE); 327 328 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 329 /* first time around -- clear initial gl_offs items */ 330 pathv += pglob->gl_offs; 331 for (i = pglob->gl_offs; --i >= 0; ) 332 *--pathv = NULL; 333 } 334 pglob->gl_pathv = pathv; 335 336 copysize = strlen(path) + 1; 337 if ((copy = malloc(copysize)) != NULL) { 338 (void)strcpy(copy, path); 339 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 340 } 341 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 342 return((copy == NULL) ? GLOB_NOSPACE : 0); 343 } 344 345 346 /* 347 * pattern matching function for filenames. Each occurrence of the * 348 * pattern causes a recursion level. 349 */ 350 static bool_t 351 match(name, pat, patend) 352 register char *name, *pat, *patend; 353 { 354 bool_t ok, negate_range; 355 char c, k; 356 357 while (pat < patend) { 358 c = *pat++; 359 switch (c & 0xff) { 360 case M_ALL: 361 if (pat == patend) 362 return(1); 363 for (; *name != EOS; ++name) { 364 if (match(name, pat, patend)) 365 return(1); 366 } 367 return(0); 368 case M_ONE: 369 if (*name++ == EOS) 370 return(0); 371 break; 372 case M_SET: 373 ok = 0; 374 k = *name++; 375 if (negate_range = (*pat & 0xff) == M_NOT) 376 ++pat; 377 while (((c = *pat++) & 0xff) != M_END) { 378 if ((*pat & 0xff) == M_RNG) { 379 if (c <= k && k <= pat[1]) 380 ok = 1; 381 pat += 2; 382 } 383 else if (c == k) 384 ok = 1; 385 } 386 if (ok == negate_range) 387 return(0); 388 break; 389 default: 390 if (*name++ != c) 391 return(0); 392 break; 393 } 394 } 395 return(*name == EOS); 396 } 397 398 /* free allocated data belonging to a glob_t structure */ 399 void 400 globfree(pglob) 401 glob_t *pglob; 402 { 403 register int i; 404 register char **pp; 405 406 if (pglob->gl_pathv != NULL) { 407 pp = pglob->gl_pathv + pglob->gl_offs; 408 for (i = pglob->gl_pathc; i--; ++pp) 409 if (*pp) 410 (void)free(*pp); 411 (void)free((char *)pp); 412 } 413 } 414