1 /* 2 * Copyright (c) 2013 Gary Mills 3 */ 4 /* $OpenBSD: glob.c,v 1.39 2012/01/20 07:09:42 tedu Exp $ */ 5 /* 6 * Copyright (c) 1989, 1993 7 * The Regents of the University of California. All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Guido van Rossum. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * glob(3) -- a superset of the one defined in POSIX 1003.2. 39 * 40 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 41 * 42 * Optional extra services, controlled by flags not defined by POSIX: 43 * 44 * GLOB_QUOTE: 45 * Escaping convention: \ inhibits any special meaning the following 46 * character might have (except \ at end of string is retained). 47 * GLOB_MAGCHAR: 48 * Set in gl_flags if pattern contained a globbing character. 49 * GLOB_NOMAGIC: 50 * Same as GLOB_NOCHECK, but it will only append pattern if it did 51 * not contain any magic characters. [Used in csh style globbing] 52 * GLOB_ALTDIRFUNC: 53 * Use alternately specified directory access functions. 54 * GLOB_TILDE: 55 * expand ~user/foo to the /home/dir/of/user/foo 56 * GLOB_BRACE: 57 * expand {1,2}{a,b} to 1a 1b 2a 2b 58 * gl_matchc: 59 * Number of matches in the current invocation of glob. 60 */ 61 62 #include "lint.h" 63 64 #include <sys/param.h> 65 #include <sys/stat.h> 66 67 #include <ctype.h> 68 #include <dirent.h> 69 #include <errno.h> 70 #include <glob.h> 71 #include <limits.h> 72 #include <pwd.h> 73 #include <stdio.h> 74 #include <stdlib.h> 75 #include <string.h> 76 #include <unistd.h> 77 #include <wchar.h> 78 #include <wctype.h> 79 80 /* 81 * This is the legacy glob_t prior to illumos enhancement 1097, 82 * used when old programs call the old libc glob functions. 83 * (New programs call the _glob_ext, _globfree_ext functions.) 84 * This struct should be considered "carved in stone". 85 */ 86 typedef struct old_glob { 87 size_t gl_pathc; /* Count of paths matched by pattern */ 88 char **gl_pathv; /* List of matched pathnames */ 89 size_t gl_offs; /* # of slots reserved in gl_pathv */ 90 /* following are internal to the implementation */ 91 char **gl_pathp; /* gl_pathv + gl_offs */ 92 int gl_pathn; /* # of elements allocated */ 93 } old_glob_t; 94 95 /* 96 * For old programs, the external names need to be the old names: 97 * glob() and globfree() . We've redefined those already to 98 * _glob_ext() and _globfree_ext() . Now redefine old_glob() 99 * and old_globfree() to glob() and globfree() . 100 */ 101 #ifdef __PRAGMA_REDEFINE_EXTNAME 102 #pragma redefine_extname old_glob glob 103 #pragma redefine_extname old_globfree globfree 104 #endif /* __PRAGMA_REDEFINE_EXTNAME */ 105 extern int old_glob(const char *, int, int (*)(const char *, int), 106 old_glob_t *); 107 extern void old_globfree(old_glob_t *); 108 109 #define DOLLAR '$' 110 #define DOT '.' 111 #define EOS '\0' 112 #define LBRACKET '[' 113 #define NOT '!' 114 #define QUESTION '?' 115 #define QUOTE '\\' 116 #define RANGE '-' 117 #define RBRACKET ']' 118 #define SEP '/' 119 #define STAR '*' 120 #define TILDE '~' 121 #define UNDERSCORE '_' 122 #define LBRACE '{' 123 #define RBRACE '}' 124 #define SLASH '/' 125 #define COMMA ',' 126 #define COLON ':' 127 128 #define M_QUOTE 0x800000 129 #define M_PROTECT 0x400000 130 131 typedef struct wcat { 132 wchar_t w_wc; 133 uint_t w_at; 134 } wcat_t; 135 136 #define M_ALL '*' /* Plus M_QUOTE */ 137 #define M_END ']' /* Plus M_QUOTE */ 138 #define M_NOT '!' /* Plus M_QUOTE */ 139 #define M_ONE '?' /* Plus M_QUOTE */ 140 #define M_RNG '-' /* Plus M_QUOTE */ 141 #define M_SET '[' /* Plus M_QUOTE */ 142 #define M_CLASS ':' /* Plus M_QUOTE */ 143 #define ismeta(c) (((c).w_at&M_QUOTE) != 0) 144 145 #define INITIAL 8 /* initial pathv allocation */ 146 147 #define GLOB_LIMIT_MALLOC 65536 148 #define GLOB_LIMIT_STAT 2048 149 #define GLOB_LIMIT_READDIR 16384 150 151 /* Limit of recursion during matching attempts. */ 152 #define GLOB_LIMIT_RECUR 64 153 154 struct glob_lim { 155 size_t glim_malloc; 156 size_t glim_stat; 157 size_t glim_readdir; 158 }; 159 160 struct glob_path_stat { 161 char *gps_path; 162 struct stat *gps_stat; 163 }; 164 165 static int compare(const void *, const void *); 166 static int compare_gps(const void *, const void *); 167 static int g_Ctoc(const wcat_t *, char *, uint_t); 168 static int g_lstat(wcat_t *, struct stat *, glob_t *); 169 static DIR *g_opendir(wcat_t *, glob_t *); 170 static wcat_t *g_strchr(const wcat_t *, wchar_t); 171 static int g_stat(wcat_t *, struct stat *, glob_t *); 172 static int glob0(const wcat_t *, glob_t *, struct glob_lim *, 173 int (*)(const char *, int)); 174 static int glob1(wcat_t *, wcat_t *, glob_t *, struct glob_lim *, 175 int (*)(const char *, int)); 176 static int glob2(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *, 177 wcat_t *, glob_t *, struct glob_lim *, 178 int (*)(const char *, int)); 179 static int glob3(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *, 180 wcat_t *, wcat_t *, glob_t *, struct glob_lim *, 181 int (*)(const char *, int)); 182 static int globextend(const wcat_t *, glob_t *, struct glob_lim *, 183 struct stat *); 184 static 185 const wcat_t *globtilde(const wcat_t *, wcat_t *, size_t, glob_t *); 186 static int globexp1(const wcat_t *, glob_t *, struct glob_lim *, 187 int (*)(const char *, int)); 188 static int globexp2(const wcat_t *, const wcat_t *, glob_t *, 189 struct glob_lim *, int (*)(const char *, int)); 190 static int match(wcat_t *, wcat_t *, wcat_t *, int); 191 192 /* 193 * Extended glob() function, selected by #pragma redefine_extname 194 * in glob.h with the external name _glob_ext() . 195 */ 196 int 197 _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int), 198 glob_t *pglob) 199 { 200 const char *patnext; 201 int n; 202 size_t patlen; 203 wchar_t c; 204 wcat_t *bufnext, *bufend, patbuf[MAXPATHLEN]; 205 struct glob_lim limit = { 0, 0, 0 }; 206 207 if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX) 208 return (GLOB_NOMATCH); 209 210 patnext = pattern; 211 if (!(flags & GLOB_APPEND)) { 212 pglob->gl_pathc = 0; 213 pglob->gl_pathn = 0; 214 pglob->gl_pathv = NULL; 215 if ((flags & GLOB_KEEPSTAT) != 0) 216 pglob->gl_statv = NULL; 217 if (!(flags & GLOB_DOOFFS)) 218 pglob->gl_offs = 0; 219 } 220 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 221 pglob->gl_matchc = 0; 222 223 if (pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX || 224 pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1) 225 return (GLOB_NOSPACE); 226 227 bufnext = patbuf; 228 bufend = bufnext + MAXPATHLEN - 1; 229 patlen += 1; 230 if (flags & GLOB_NOESCAPE) { 231 while (bufnext < bufend) { 232 if ((n = mbtowc(&c, patnext, patlen)) > 0) { 233 patnext += n; 234 patlen -= n; 235 bufnext->w_at = 0; 236 (bufnext++)->w_wc = c; 237 } else if (n == 0) { 238 break; 239 } else { 240 return (GLOB_NOMATCH); 241 } 242 } 243 } else { 244 /* Protect the quoted characters. */ 245 while (bufnext < bufend) { 246 if ((n = mbtowc(&c, patnext, patlen)) > 0) { 247 patnext += n; 248 patlen -= n; 249 if (c == QUOTE) { 250 n = mbtowc(&c, patnext, patlen); 251 if (n < 0) 252 return (GLOB_NOMATCH); 253 if (n > 0) { 254 patnext += n; 255 patlen -= n; 256 } 257 if (n == 0) 258 c = QUOTE; 259 bufnext->w_at = M_PROTECT; 260 (bufnext++)->w_wc = c; 261 } else { 262 bufnext->w_at = 0; 263 (bufnext++)->w_wc = c; 264 } 265 } else if (n == 0) { 266 break; 267 } else { 268 return (GLOB_NOMATCH); 269 } 270 } 271 } 272 bufnext->w_at = 0; 273 bufnext->w_wc = EOS; 274 275 if (flags & GLOB_BRACE) 276 return (globexp1(patbuf, pglob, &limit, errfunc)); 277 else 278 return (glob0(patbuf, pglob, &limit, errfunc)); 279 } 280 281 /* 282 * Expand recursively a glob {} pattern. When there is no more expansion 283 * invoke the standard globbing routine to glob the rest of the magic 284 * characters 285 */ 286 static int 287 globexp1(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, 288 int (*errfunc)(const char *, int)) 289 { 290 const wcat_t *ptr = pattern; 291 292 /* Protect a single {}, for find(1), like csh */ 293 if (pattern[0].w_wc == LBRACE && pattern[1].w_wc == RBRACE && 294 pattern[2].w_wc == EOS) 295 return (glob0(pattern, pglob, limitp, errfunc)); 296 297 if ((ptr = (const wcat_t *) g_strchr(ptr, LBRACE)) != NULL) 298 return (globexp2(ptr, pattern, pglob, limitp, errfunc)); 299 300 return (glob0(pattern, pglob, limitp, errfunc)); 301 } 302 303 304 /* 305 * Recursive brace globbing helper. Tries to expand a single brace. 306 * If it succeeds then it invokes globexp1 with the new pattern. 307 * If it fails then it tries to glob the rest of the pattern and returns. 308 */ 309 static int 310 globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob, 311 struct glob_lim *limitp, int (*errfunc)(const char *, int)) 312 { 313 int i, rv; 314 wcat_t *lm, *ls; 315 const wcat_t *pe, *pm, *pl; 316 wcat_t patbuf[MAXPATHLEN]; 317 318 /* copy part up to the brace */ 319 for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) 320 ; 321 lm->w_at = 0; 322 lm->w_wc = EOS; 323 ls = lm; 324 325 /* Find the balanced brace */ 326 for (i = 0, pe = ++ptr; pe->w_wc != EOS; pe++) 327 if (pe->w_wc == LBRACKET) { 328 /* Ignore everything between [] */ 329 for (pm = pe++; pe->w_wc != RBRACKET && 330 pe->w_wc != EOS; pe++) 331 ; 332 if (pe->w_wc == EOS) { 333 /* 334 * We could not find a matching RBRACKET. 335 * Ignore and just look for RBRACE 336 */ 337 pe = pm; 338 } 339 } else if (pe->w_wc == LBRACE) { 340 i++; 341 } else if (pe->w_wc == RBRACE) { 342 if (i == 0) 343 break; 344 i--; 345 } 346 347 /* Non matching braces; just glob the pattern */ 348 if (i != 0 || pe->w_wc == EOS) 349 return (glob0(patbuf, pglob, limitp, errfunc)); 350 351 for (i = 0, pl = pm = ptr; pm <= pe; pm++) { 352 switch (pm->w_wc) { 353 case LBRACKET: 354 /* Ignore everything between [] */ 355 for (pl = pm++; pm->w_wc != RBRACKET && pm->w_wc != EOS; 356 pm++) 357 ; 358 if (pm->w_wc == EOS) { 359 /* 360 * We could not find a matching RBRACKET. 361 * Ignore and just look for RBRACE 362 */ 363 pm = pl; 364 } 365 break; 366 367 case LBRACE: 368 i++; 369 break; 370 371 case RBRACE: 372 if (i) { 373 i--; 374 break; 375 } 376 /* FALLTHROUGH */ 377 case COMMA: 378 if (i && pm->w_wc == COMMA) 379 break; 380 else { 381 /* Append the current string */ 382 for (lm = ls; (pl < pm); *lm++ = *pl++) 383 ; 384 385 /* 386 * Append the rest of the pattern after the 387 * closing brace 388 */ 389 for (pl = pe + 1; 390 (*lm++ = *pl++).w_wc != EOS; /* */) 391 ; 392 393 /* Expand the current pattern */ 394 rv = globexp1(patbuf, pglob, limitp, errfunc); 395 if (rv && rv != GLOB_NOMATCH) 396 return (rv); 397 398 /* move after the comma, to the next string */ 399 pl = pm + 1; 400 } 401 break; 402 403 default: 404 break; 405 } 406 } 407 return (0); 408 } 409 410 411 412 /* 413 * expand tilde from the passwd file. 414 */ 415 static const wcat_t * 416 globtilde(const wcat_t *pattern, wcat_t *patbuf, size_t patbuf_len, 417 glob_t *pglob) 418 { 419 struct passwd *pwd; 420 char *h; 421 const wcat_t *p; 422 wcat_t *b, *eb, *q; 423 int n; 424 size_t lenh; 425 wchar_t c; 426 427 if (pattern->w_wc != TILDE || !(pglob->gl_flags & GLOB_TILDE)) 428 return (pattern); 429 430 /* Copy up to the end of the string or / */ 431 eb = &patbuf[patbuf_len - 1]; 432 for (p = pattern + 1, q = patbuf; 433 q < eb && p->w_wc != EOS && p->w_wc != SLASH; *q++ = *p++) 434 ; 435 436 q->w_at = 0; 437 q->w_wc = EOS; 438 439 /* What to do if patbuf is full? */ 440 441 if (patbuf[0].w_wc == EOS) { 442 /* 443 * handle a plain ~ or ~/ by expanding $HOME 444 * first and then trying the password file 445 */ 446 if (issetugid() != 0) 447 return (pattern); 448 if ((h = getenv("HOME")) == NULL) { 449 if ((pwd = getpwuid(getuid())) == NULL) 450 return (pattern); 451 else 452 h = pwd->pw_dir; 453 } 454 } else { 455 /* 456 * Expand a ~user 457 */ 458 if ((pwd = getpwnam((char *)patbuf)) == NULL) 459 return (pattern); 460 else 461 h = pwd->pw_dir; 462 } 463 464 /* Copy the home directory */ 465 lenh = strlen(h) + 1; 466 for (b = patbuf; b < eb && *h != EOS; b++) { 467 if ((n = mbtowc(&c, h, lenh)) > 0) { 468 h += n; 469 lenh -= n; 470 b->w_at = 0; 471 b->w_wc = c; 472 } else if (n < 0) { 473 return (pattern); 474 } else { 475 break; 476 } 477 } 478 479 /* Append the rest of the pattern */ 480 while (b < eb && (*b++ = *p++).w_wc != EOS) 481 ; 482 b->w_at = 0; 483 b->w_wc = EOS; 484 485 return (patbuf); 486 } 487 488 static int 489 g_charclass(const wcat_t **patternp, wcat_t **bufnextp) 490 { 491 const wcat_t *pattern = *patternp + 1; 492 wcat_t *bufnext = *bufnextp; 493 const wcat_t *colon; 494 char cbuf[MB_LEN_MAX + 32]; 495 wctype_t cc; 496 size_t len; 497 498 if ((colon = g_strchr(pattern, COLON)) == NULL || 499 colon[1].w_wc != RBRACKET) 500 return (1); /* not a character class */ 501 502 len = (size_t)(colon - pattern); 503 if (len + MB_LEN_MAX + 1 > sizeof (cbuf)) 504 return (-1); /* invalid character class */ 505 { 506 wchar_t w; 507 const wcat_t *s1 = pattern; 508 char *s2 = cbuf; 509 size_t n = len; 510 511 /* Copy the string. */ 512 while (n > 0) { 513 w = (s1++)->w_wc; 514 /* Character class names must be ASCII. */ 515 if (iswascii(w)) { 516 n--; 517 *s2++ = w; 518 } else { 519 return (-1); /* invalid character class */ 520 } 521 } 522 *s2 = EOS; 523 } 524 if ((cc = wctype(cbuf)) == 0) 525 return (-1); /* invalid character class */ 526 bufnext->w_at = M_QUOTE; 527 (bufnext++)->w_wc = M_CLASS; 528 bufnext->w_at = 0; 529 (bufnext++)->w_wc = cc; 530 *bufnextp = bufnext; 531 *patternp += len + 3; 532 533 return (0); 534 } 535 536 /* 537 * The main glob() routine: compiles the pattern (optionally processing 538 * quotes), calls glob1() to do the real pattern matching, and finally 539 * sorts the list (unless unsorted operation is requested). Returns 0 540 * if things went well, nonzero if errors occurred. It is not an error 541 * to find no matches. 542 */ 543 static int 544 glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, 545 int (*errfunc)(const char *, int)) 546 { 547 const wcat_t *qpatnext; 548 int err, oldpathc; 549 wchar_t c; 550 int a; 551 wcat_t *bufnext, patbuf[MAXPATHLEN]; 552 553 qpatnext = globtilde(pattern, patbuf, MAXPATHLEN, pglob); 554 oldpathc = pglob->gl_pathc; 555 bufnext = patbuf; 556 557 /* 558 * We don't need to check for buffer overflow any more. 559 * The pattern has already been copied to an internal buffer. 560 */ 561 while ((a = qpatnext->w_at), (c = (qpatnext++)->w_wc) != EOS) { 562 switch (c) { 563 case LBRACKET: 564 if (a != 0) { 565 bufnext->w_at = a; 566 (bufnext++)->w_wc = c; 567 break; 568 } 569 a = qpatnext->w_at; 570 c = qpatnext->w_wc; 571 if (a == 0 && c == NOT) 572 ++qpatnext; 573 if (qpatnext->w_wc == EOS || 574 g_strchr(qpatnext+1, RBRACKET) == NULL) { 575 bufnext->w_at = 0; 576 (bufnext++)->w_wc = LBRACKET; 577 if (a == 0 && c == NOT) 578 --qpatnext; 579 break; 580 } 581 bufnext->w_at = M_QUOTE; 582 (bufnext++)->w_wc = M_SET; 583 if (a == 0 && c == NOT) { 584 bufnext->w_at = M_QUOTE; 585 (bufnext++)->w_wc = M_NOT; 586 } 587 a = qpatnext->w_at; 588 c = (qpatnext++)->w_wc; 589 do { 590 if (a == 0 && c == LBRACKET && 591 qpatnext->w_wc == COLON) { 592 do { 593 err = g_charclass(&qpatnext, 594 &bufnext); 595 if (err) 596 break; 597 a = qpatnext->w_at; 598 c = (qpatnext++)->w_wc; 599 } while (a == 0 && c == LBRACKET && 600 qpatnext->w_wc == COLON); 601 if (err == -1 && 602 !(pglob->gl_flags & GLOB_NOCHECK)) 603 return (GLOB_NOMATCH); 604 if (a == 0 && c == RBRACKET) 605 break; 606 } 607 bufnext->w_at = a; 608 (bufnext++)->w_wc = c; 609 if (qpatnext->w_at == 0 && 610 qpatnext->w_wc == RANGE) { 611 a = qpatnext[1].w_at; 612 c = qpatnext[1].w_wc; 613 if (qpatnext[1].w_at != 0 || 614 qpatnext[1].w_wc != RBRACKET) { 615 bufnext->w_at = M_QUOTE; 616 (bufnext++)->w_wc = M_RNG; 617 bufnext->w_at = a; 618 (bufnext++)->w_wc = c; 619 qpatnext += 2; 620 } 621 } 622 a = qpatnext->w_at; 623 c = (qpatnext++)->w_wc; 624 } while (a != 0 || c != RBRACKET); 625 pglob->gl_flags |= GLOB_MAGCHAR; 626 bufnext->w_at = M_QUOTE; 627 (bufnext++)->w_wc = M_END; 628 break; 629 case QUESTION: 630 if (a != 0) { 631 bufnext->w_at = a; 632 (bufnext++)->w_wc = c; 633 break; 634 } 635 pglob->gl_flags |= GLOB_MAGCHAR; 636 bufnext->w_at = M_QUOTE; 637 (bufnext++)->w_wc = M_ONE; 638 break; 639 case STAR: 640 if (a != 0) { 641 bufnext->w_at = a; 642 (bufnext++)->w_wc = c; 643 break; 644 } 645 pglob->gl_flags |= GLOB_MAGCHAR; 646 /* 647 * collapse adjacent stars to one, 648 * to avoid exponential behavior 649 */ 650 if (bufnext == patbuf || 651 bufnext[-1].w_at != M_QUOTE || 652 bufnext[-1].w_wc != M_ALL) { 653 bufnext->w_at = M_QUOTE; 654 (bufnext++)->w_wc = M_ALL; 655 } 656 break; 657 default: 658 bufnext->w_at = a; 659 (bufnext++)->w_wc = c; 660 break; 661 } 662 } 663 bufnext->w_at = 0; 664 bufnext->w_wc = EOS; 665 666 if ((err = glob1(patbuf, patbuf+MAXPATHLEN-1, pglob, limitp, errfunc)) 667 != 0) 668 return (err); 669 670 /* 671 * If there was no match we are going to append the pattern 672 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 673 * and the pattern did not contain any magic characters 674 * GLOB_NOMAGIC is there just for compatibility with csh. 675 */ 676 if (pglob->gl_pathc == oldpathc) { 677 if ((pglob->gl_flags & GLOB_NOCHECK) || 678 ((pglob->gl_flags & GLOB_NOMAGIC) && 679 !(pglob->gl_flags & GLOB_MAGCHAR))) 680 return (globextend(pattern, pglob, limitp, NULL)); 681 else 682 return (GLOB_NOMATCH); 683 } 684 if (!(pglob->gl_flags & GLOB_NOSORT)) { 685 if ((pglob->gl_flags & GLOB_KEEPSTAT)) { 686 /* Keep the paths and stat info synced during sort */ 687 struct glob_path_stat *path_stat; 688 int i; 689 int n = pglob->gl_pathc - oldpathc; 690 int o = pglob->gl_offs + oldpathc; 691 692 if ((path_stat = calloc(n, sizeof (*path_stat))) == 693 NULL) 694 return (GLOB_NOSPACE); 695 for (i = 0; i < n; i++) { 696 path_stat[i].gps_path = pglob->gl_pathv[o + i]; 697 path_stat[i].gps_stat = pglob->gl_statv[o + i]; 698 } 699 qsort(path_stat, n, sizeof (*path_stat), compare_gps); 700 for (i = 0; i < n; i++) { 701 pglob->gl_pathv[o + i] = path_stat[i].gps_path; 702 pglob->gl_statv[o + i] = path_stat[i].gps_stat; 703 } 704 free(path_stat); 705 } else { 706 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 707 pglob->gl_pathc - oldpathc, sizeof (char *), 708 compare); 709 } 710 } 711 return (0); 712 } 713 714 static int 715 compare(const void *p, const void *q) 716 { 717 return (strcmp(*(char **)p, *(char **)q)); 718 } 719 720 static int 721 compare_gps(const void *_p, const void *_q) 722 { 723 const struct glob_path_stat *p = (const struct glob_path_stat *)_p; 724 const struct glob_path_stat *q = (const struct glob_path_stat *)_q; 725 726 return (strcmp(p->gps_path, q->gps_path)); 727 } 728 729 static int 730 glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob, 731 struct glob_lim *limitp, int (*errfunc)(const char *, int)) 732 { 733 wcat_t pathbuf[MAXPATHLEN]; 734 735 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 736 if (pattern->w_wc == EOS) 737 return (0); 738 return (glob2(pathbuf, pathbuf+MAXPATHLEN-1, 739 pathbuf, pathbuf+MAXPATHLEN-1, 740 pattern, pattern_last, pglob, limitp, errfunc)); 741 } 742 743 /* 744 * The functions glob2 and glob3 are mutually recursive; there is one level 745 * of recursion for each segment in the pattern that contains one or more 746 * meta characters. 747 */ 748 static int 749 glob2(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, 750 wcat_t *pathend_last, wcat_t *pattern, wcat_t *pattern_last, 751 glob_t *pglob, struct glob_lim *limitp, int (*errfunc)(const char *, int)) 752 { 753 struct stat sb; 754 wcat_t *p, *q; 755 int anymeta; 756 757 /* 758 * Loop over pattern segments until end of pattern or until 759 * segment with meta character found. 760 */ 761 for (anymeta = 0; ; ) { 762 if (pattern->w_wc == EOS) { /* End of pattern? */ 763 pathend->w_at = 0; 764 pathend->w_wc = EOS; 765 766 if ((pglob->gl_flags & GLOB_LIMIT) && 767 limitp->glim_stat++ >= GLOB_LIMIT_STAT) { 768 errno = 0; 769 pathend->w_at = 0; 770 (pathend++)->w_wc = SEP; 771 pathend->w_at = 0; 772 pathend->w_wc = EOS; 773 return (GLOB_NOSPACE); 774 } 775 if (g_lstat(pathbuf, &sb, pglob)) 776 return (0); 777 778 if (((pglob->gl_flags & GLOB_MARK) && 779 (pathend[-1].w_at != 0 || 780 pathend[-1].w_wc != SEP)) && 781 (S_ISDIR(sb.st_mode) || 782 (S_ISLNK(sb.st_mode) && 783 (g_stat(pathbuf, &sb, pglob) == 0) && 784 S_ISDIR(sb.st_mode)))) { 785 if (pathend+1 > pathend_last) 786 return (GLOB_NOSPACE); 787 pathend->w_at = 0; 788 (pathend++)->w_wc = SEP; 789 pathend->w_at = 0; 790 pathend->w_wc = EOS; 791 } 792 ++pglob->gl_matchc; 793 return (globextend(pathbuf, pglob, limitp, &sb)); 794 } 795 796 /* Find end of next segment, copy tentatively to pathend. */ 797 q = pathend; 798 p = pattern; 799 while (p->w_wc != EOS && p->w_wc != SEP) { 800 if (ismeta(*p)) 801 anymeta = 1; 802 if (q+1 > pathend_last) 803 return (GLOB_NOSPACE); 804 *q++ = *p++; 805 } 806 807 if (!anymeta) { /* No expansion, do next segment. */ 808 pathend = q; 809 pattern = p; 810 while (pattern->w_wc == SEP) { 811 if (pathend+1 > pathend_last) 812 return (GLOB_NOSPACE); 813 *pathend++ = *pattern++; 814 } 815 } else { 816 /* Need expansion, recurse. */ 817 return (glob3(pathbuf, pathbuf_last, pathend, 818 pathend_last, pattern, p, pattern_last, 819 pglob, limitp, errfunc)); 820 } 821 } 822 /* NOTREACHED */ 823 } 824 825 static int 826 glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, 827 wcat_t *pathend_last, wcat_t *pattern, wcat_t *restpattern, 828 wcat_t *restpattern_last, glob_t *pglob, struct glob_lim *limitp, 829 int (*errfunc)(const char *, int)) 830 { 831 struct dirent *dp; 832 DIR *dirp; 833 int err; 834 char buf[MAXPATHLEN]; 835 836 /* 837 * The readdirfunc declaration can't be prototyped, because it is 838 * assigned, below, to two functions which are prototyped in glob.h 839 * and dirent.h as taking pointers to differently typed opaque 840 * structures. 841 */ 842 struct dirent *(*readdirfunc)(void *); 843 844 if (pathend > pathend_last) 845 return (GLOB_NOSPACE); 846 pathend->w_at = 0; 847 pathend->w_wc = EOS; 848 errno = 0; 849 850 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { 851 /* TODO: don't call for ENOENT or ENOTDIR? */ 852 if (errfunc) { 853 if (g_Ctoc(pathbuf, buf, sizeof (buf))) 854 return (GLOB_ABORTED); 855 if (errfunc(buf, errno) || 856 pglob->gl_flags & GLOB_ERR) 857 return (GLOB_ABORTED); 858 } 859 return (0); 860 } 861 862 err = 0; 863 864 /* Search directory for matching names. */ 865 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 866 readdirfunc = pglob->gl_readdir; 867 else 868 readdirfunc = (struct dirent *(*)(void *))readdir; 869 while ((dp = (*readdirfunc)(dirp))) { 870 char *sc; 871 wcat_t *dc; 872 int n; 873 int lensc; 874 wchar_t w; 875 876 if ((pglob->gl_flags & GLOB_LIMIT) && 877 limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) { 878 errno = 0; 879 pathend->w_at = 0; 880 (pathend++)->w_wc = SEP; 881 pathend->w_at = 0; 882 pathend->w_wc = EOS; 883 err = GLOB_NOSPACE; 884 break; 885 } 886 887 /* Initial DOT must be matched literally. */ 888 if (dp->d_name[0] == DOT && pattern->w_wc != DOT) 889 continue; 890 dc = pathend; 891 sc = dp->d_name; 892 lensc = strlen(sc) + 1; 893 while (dc < pathend_last) { 894 if ((n = mbtowc(&w, sc, lensc)) <= 0) { 895 sc += 1; 896 lensc -= 1; 897 dc->w_at = 0; 898 dc->w_wc = EOS; 899 } else { 900 sc += n; 901 lensc -= n; 902 dc->w_at = 0; 903 dc->w_wc = w; 904 } 905 dc++; 906 if (n <= 0) 907 break; 908 } 909 if (dc >= pathend_last) { 910 dc->w_at = 0; 911 dc->w_wc = EOS; 912 err = GLOB_NOSPACE; 913 break; 914 } 915 if (n < 0) { 916 err = GLOB_NOMATCH; 917 break; 918 } 919 920 if (!match(pathend, pattern, restpattern, GLOB_LIMIT_RECUR)) { 921 pathend->w_at = 0; 922 pathend->w_wc = EOS; 923 continue; 924 } 925 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last, 926 restpattern, restpattern_last, pglob, limitp, 927 errfunc); 928 if (err) 929 break; 930 } 931 932 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 933 (*pglob->gl_closedir)(dirp); 934 else 935 (void) closedir(dirp); 936 return (err); 937 } 938 939 940 /* 941 * Extend the gl_pathv member of a glob_t structure to accommodate a new item, 942 * add the new item, and update gl_pathc. Avoids excessive reallocation 943 * by doubling the number of elements each time. Uses gl_pathn to contain 944 * the number. 945 * 946 * Return 0 if new item added, error code if memory couldn't be allocated. 947 * 948 * Invariant of the glob_t structure: 949 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 950 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 951 */ 952 static int 953 globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp, 954 struct stat *sb) 955 { 956 char **pathv; 957 ssize_t i; 958 size_t allocn, newn, len; 959 char *copy = NULL; 960 const wcat_t *p; 961 struct stat **statv; 962 char junk[MB_LEN_MAX]; 963 int n; 964 965 allocn = pglob->gl_pathn; 966 newn = 2 + pglob->gl_pathc + pglob->gl_offs; 967 968 if (newn <= allocn) { 969 pathv = pglob->gl_pathv; 970 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) 971 statv = pglob->gl_statv; 972 } else { 973 if (allocn == 0) 974 allocn = pglob->gl_offs + INITIAL; 975 allocn *= 2; 976 if (pglob->gl_offs >= INT_MAX || 977 pglob->gl_pathc >= INT_MAX || 978 allocn >= INT_MAX || 979 SIZE_MAX / sizeof (*pathv) <= allocn || 980 SIZE_MAX / sizeof (*statv) <= allocn) { 981 nospace: 982 for (i = pglob->gl_offs; i < (ssize_t)(newn - 2); 983 i++) { 984 if (pglob->gl_pathv && pglob->gl_pathv[i]) 985 free(pglob->gl_pathv[i]); 986 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && 987 pglob->gl_statv && pglob->gl_statv[i]) 988 free(pglob->gl_statv[i]); 989 } 990 if (pglob->gl_pathv) { 991 free(pglob->gl_pathv); 992 pglob->gl_pathv = NULL; 993 } 994 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && 995 pglob->gl_statv) { 996 free(pglob->gl_statv); 997 pglob->gl_statv = NULL; 998 } 999 return (GLOB_NOSPACE); 1000 } 1001 limitp->glim_malloc += allocn * sizeof (*pathv); 1002 pathv = realloc(pglob->gl_pathv, allocn * sizeof (*pathv)); 1003 if (pathv == NULL) 1004 goto nospace; 1005 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { 1006 limitp->glim_malloc += allocn * sizeof (*statv); 1007 statv = realloc(pglob->gl_statv, 1008 allocn * sizeof (*statv)); 1009 if (statv == NULL) 1010 goto nospace; 1011 } 1012 } 1013 pglob->gl_pathn = allocn; 1014 1015 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 1016 /* first time around -- clear initial gl_offs items */ 1017 pathv += pglob->gl_offs; 1018 for (i = pglob->gl_offs; --i >= 0; ) 1019 *--pathv = NULL; 1020 } 1021 pglob->gl_pathv = pathv; 1022 1023 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { 1024 if (pglob->gl_statv == NULL && pglob->gl_offs > 0) { 1025 /* first time around -- clear initial gl_offs items */ 1026 statv += pglob->gl_offs; 1027 for (i = pglob->gl_offs; --i >= 0; ) 1028 *--statv = NULL; 1029 } 1030 pglob->gl_statv = statv; 1031 if (sb == NULL) 1032 statv[pglob->gl_offs + pglob->gl_pathc] = NULL; 1033 else { 1034 limitp->glim_malloc += sizeof (**statv); 1035 if ((statv[pglob->gl_offs + pglob->gl_pathc] = 1036 malloc(sizeof (**statv))) == NULL) 1037 goto copy_error; 1038 (void) memcpy(statv[pglob->gl_offs + pglob->gl_pathc], 1039 sb, sizeof (*sb)); 1040 } 1041 statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL; 1042 } 1043 1044 len = MB_LEN_MAX; 1045 p = path; 1046 while ((n = wctomb(junk, p->w_wc)) > 0) { 1047 len += n; 1048 if ((p++)->w_wc == EOS) 1049 break; 1050 } 1051 if (n < 0) 1052 return (GLOB_NOMATCH); 1053 1054 limitp->glim_malloc += len; 1055 if ((copy = malloc(len)) != NULL) { 1056 if (g_Ctoc(path, copy, len)) { 1057 free(copy); 1058 return (GLOB_NOSPACE); 1059 } 1060 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 1061 } 1062 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 1063 1064 if ((pglob->gl_flags & GLOB_LIMIT) && 1065 limitp->glim_malloc >= GLOB_LIMIT_MALLOC) { 1066 errno = 0; 1067 return (GLOB_NOSPACE); 1068 } 1069 copy_error: 1070 return (copy == NULL ? GLOB_NOSPACE : 0); 1071 } 1072 1073 1074 /* 1075 * pattern matching function for filenames. Each occurrence of the * 1076 * pattern causes a recursion level. 1077 */ 1078 static int 1079 match(wcat_t *name, wcat_t *pat, wcat_t *patend, int recur) 1080 { 1081 int ok, negate_range; 1082 wcat_t c, k; 1083 1084 if (recur-- == 0) 1085 return (1); 1086 1087 while (pat < patend) { 1088 c = *pat++; 1089 switch (c.w_wc) { 1090 case M_ALL: 1091 if (c.w_at != M_QUOTE) { 1092 k = *name++; 1093 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1094 return (0); 1095 break; 1096 } 1097 while (pat < patend && pat->w_at == M_QUOTE && 1098 pat->w_wc == M_ALL) 1099 pat++; /* eat consecutive '*' */ 1100 if (pat == patend) 1101 return (1); 1102 do { 1103 if (match(name, pat, patend, recur)) 1104 return (1); 1105 } while ((name++)->w_wc != EOS); 1106 return (0); 1107 case M_ONE: 1108 if (c.w_at != M_QUOTE) { 1109 k = *name++; 1110 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1111 return (0); 1112 break; 1113 } 1114 if ((name++)->w_wc == EOS) 1115 return (0); 1116 break; 1117 case M_SET: 1118 if (c.w_at != M_QUOTE) { 1119 k = *name++; 1120 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1121 return (0); 1122 break; 1123 } 1124 ok = 0; 1125 if ((k = *name++).w_wc == EOS) 1126 return (0); 1127 if ((negate_range = (pat->w_at == M_QUOTE && 1128 pat->w_wc == M_NOT)) != 0) 1129 ++pat; 1130 while (((c = *pat++).w_at != M_QUOTE) || 1131 c.w_wc != M_END) { 1132 if (c.w_at == M_QUOTE && c.w_wc == M_CLASS) { 1133 wcat_t cc; 1134 1135 cc.w_at = pat->w_at; 1136 cc.w_wc = pat->w_wc; 1137 if (iswctype(k.w_wc, cc.w_wc)) 1138 ok = 1; 1139 ++pat; 1140 } 1141 if (pat->w_at == M_QUOTE && 1142 pat->w_wc == M_RNG) { 1143 if (c.w_wc <= k.w_wc && 1144 k.w_wc <= pat[1].w_wc) 1145 ok = 1; 1146 pat += 2; 1147 } else if (c.w_wc == k.w_wc) 1148 ok = 1; 1149 } 1150 if (ok == negate_range) 1151 return (0); 1152 break; 1153 default: 1154 k = *name++; 1155 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1156 return (0); 1157 break; 1158 } 1159 } 1160 return (name->w_wc == EOS); 1161 } 1162 1163 /* 1164 * Extended globfree() function, selected by #pragma redefine_extname 1165 * in glob.h with the external name _globfree_ext() . 1166 */ 1167 void 1168 _globfree_ext(glob_t *pglob) 1169 { 1170 int i; 1171 char **pp; 1172 1173 if (pglob->gl_pathv != NULL) { 1174 pp = pglob->gl_pathv + pglob->gl_offs; 1175 for (i = pglob->gl_pathc; i--; ++pp) 1176 if (*pp) 1177 free(*pp); 1178 free(pglob->gl_pathv); 1179 pglob->gl_pathv = NULL; 1180 } 1181 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && 1182 pglob->gl_statv != NULL) { 1183 for (i = 0; i < pglob->gl_pathc; i++) { 1184 if (pglob->gl_statv[i] != NULL) 1185 free(pglob->gl_statv[i]); 1186 } 1187 free(pglob->gl_statv); 1188 pglob->gl_statv = NULL; 1189 } 1190 } 1191 1192 static DIR * 1193 g_opendir(wcat_t *str, glob_t *pglob) 1194 { 1195 char buf[MAXPATHLEN]; 1196 1197 if (str->w_wc == EOS) 1198 (void) strlcpy(buf, ".", sizeof (buf)); 1199 else { 1200 if (g_Ctoc(str, buf, sizeof (buf))) 1201 return (NULL); 1202 } 1203 1204 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1205 return ((*pglob->gl_opendir)(buf)); 1206 1207 return (opendir(buf)); 1208 } 1209 1210 static int 1211 g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob) 1212 { 1213 char buf[MAXPATHLEN]; 1214 1215 if (g_Ctoc(fn, buf, sizeof (buf))) 1216 return (-1); 1217 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1218 return ((*pglob->gl_lstat)(buf, sb)); 1219 return (lstat(buf, sb)); 1220 } 1221 1222 static int 1223 g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob) 1224 { 1225 char buf[MAXPATHLEN]; 1226 1227 if (g_Ctoc(fn, buf, sizeof (buf))) 1228 return (-1); 1229 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1230 return ((*pglob->gl_stat)(buf, sb)); 1231 return (stat(buf, sb)); 1232 } 1233 1234 static wcat_t * 1235 g_strchr(const wcat_t *str, wchar_t ch) 1236 { 1237 do { 1238 if (str->w_at == 0 && str->w_wc == ch) 1239 return ((wcat_t *)str); 1240 } while ((str++)->w_wc != EOS); 1241 return (NULL); 1242 } 1243 1244 static int 1245 g_Ctoc(const wcat_t *str, char *buf, uint_t len) 1246 { 1247 int n; 1248 wchar_t w; 1249 1250 while (len >= MB_LEN_MAX) { 1251 w = (str++)->w_wc; 1252 if ((n = wctomb(buf, w)) > 0) { 1253 len -= n; 1254 buf += n; 1255 } 1256 if (n < 0) 1257 break; 1258 if (w == EOS) 1259 return (0); 1260 } 1261 return (1); 1262 } 1263 1264 /* glob() function with legacy glob structure */ 1265 int 1266 old_glob(const char *pattern, int flags, int (*errfunc)(const char *, int), 1267 old_glob_t *pglob) 1268 { 1269 1270 glob_t gl; 1271 int rv; 1272 1273 flags &= GLOB_POSIX; 1274 1275 (void) memset(&gl, 0, sizeof (gl)); 1276 1277 /* 1278 * Copy all the members, old to new. There's 1279 * really no point in micro-optimizing the copying. 1280 * Other members are set to zero. 1281 */ 1282 gl.gl_pathc = pglob->gl_pathc; 1283 gl.gl_pathv = pglob->gl_pathv; 1284 gl.gl_offs = pglob->gl_offs; 1285 gl.gl_pathp = pglob->gl_pathp; 1286 gl.gl_pathn = pglob->gl_pathn; 1287 1288 rv = _glob_ext(pattern, flags, errfunc, &gl); 1289 1290 /* 1291 * Copy all the members, new to old. There's 1292 * really no point in micro-optimizing the copying. 1293 */ 1294 pglob->gl_pathc = gl.gl_pathc; 1295 pglob->gl_pathv = gl.gl_pathv; 1296 pglob->gl_offs = gl.gl_offs; 1297 pglob->gl_pathp = gl.gl_pathp; 1298 pglob->gl_pathn = gl.gl_pathn; 1299 1300 return (rv); 1301 } 1302 1303 /* globfree() function with legacy glob structure */ 1304 void 1305 old_globfree(old_glob_t *pglob) 1306 { 1307 glob_t gl; 1308 1309 (void) memset(&gl, 0, sizeof (gl)); 1310 1311 /* 1312 * Copy all the members, old to new. There's 1313 * really no point in micro-optimizing the copying. 1314 * Other members are set to zero. 1315 */ 1316 gl.gl_pathc = pglob->gl_pathc; 1317 gl.gl_pathv = pglob->gl_pathv; 1318 gl.gl_offs = pglob->gl_offs; 1319 gl.gl_pathp = pglob->gl_pathp; 1320 gl.gl_pathn = pglob->gl_pathn; 1321 1322 _globfree_ext(&gl); 1323 1324 /* 1325 * Copy all the members, new to old. There's 1326 * really no point in micro-optimizing the copying. 1327 */ 1328 pglob->gl_pathc = gl.gl_pathc; 1329 pglob->gl_pathv = gl.gl_pathv; 1330 pglob->gl_offs = gl.gl_offs; 1331 pglob->gl_pathp = gl.gl_pathp; 1332 pglob->gl_pathn = gl.gl_pathn; 1333 1334 } 1335 1336 /* End */ 1337