1 /* Copyright (C) 1991-1993, 1996-2006, 2009-2013 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, see <http://www.gnu.org/licenses/>. */ 16 17 /* Match STRING against the file name pattern PATTERN, returning zero if 18 it matches, nonzero if not. */ 19 static int EXT (INT opt, const CHAR *pattern, const CHAR *string, 20 const CHAR *string_end, bool no_leading_period, int flags) 21 internal_function; 22 static const CHAR *END (const CHAR *patternp) internal_function; 23 24 static int 25 internal_function 26 FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, 27 bool no_leading_period, int flags) 28 { 29 register const CHAR *p = pattern, *n = string; 30 register UCHAR c; 31 #ifdef _LIBC 32 # if WIDE_CHAR_VERSION 33 const char *collseq = (const char *) 34 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); 35 # else 36 const UCHAR *collseq = (const UCHAR *) 37 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); 38 # endif 39 #endif 40 41 while ((c = *p++) != L_('\0')) 42 { 43 bool new_no_leading_period = false; 44 c = FOLD (c); 45 46 switch (c) 47 { 48 case L_('?'): 49 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 50 { 51 int res; 52 53 res = EXT (c, p, n, string_end, no_leading_period, 54 flags); 55 if (res != -1) 56 return res; 57 } 58 59 if (n == string_end) 60 return FNM_NOMATCH; 61 else if (*n == L_('/') && (flags & FNM_FILE_NAME)) 62 return FNM_NOMATCH; 63 else if (*n == L_('.') && no_leading_period) 64 return FNM_NOMATCH; 65 break; 66 67 case L_('\\'): 68 if (!(flags & FNM_NOESCAPE)) 69 { 70 c = *p++; 71 if (c == L_('\0')) 72 /* Trailing \ loses. */ 73 return FNM_NOMATCH; 74 c = FOLD (c); 75 } 76 if (n == string_end || FOLD ((UCHAR) *n) != c) 77 return FNM_NOMATCH; 78 break; 79 80 case L_('*'): 81 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 82 { 83 int res; 84 85 res = EXT (c, p, n, string_end, no_leading_period, 86 flags); 87 if (res != -1) 88 return res; 89 } 90 91 if (n != string_end && *n == L_('.') && no_leading_period) 92 return FNM_NOMATCH; 93 94 for (c = *p++; c == L_('?') || c == L_('*'); c = *p++) 95 { 96 if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0) 97 { 98 const CHAR *endp = END (p); 99 if (endp != p) 100 { 101 /* This is a pattern. Skip over it. */ 102 p = endp; 103 continue; 104 } 105 } 106 107 if (c == L_('?')) 108 { 109 /* A ? needs to match one character. */ 110 if (n == string_end) 111 /* There isn't another character; no match. */ 112 return FNM_NOMATCH; 113 else if (*n == L_('/') 114 && __builtin_expect (flags & FNM_FILE_NAME, 0)) 115 /* A slash does not match a wildcard under 116 FNM_FILE_NAME. */ 117 return FNM_NOMATCH; 118 else 119 /* One character of the string is consumed in matching 120 this ? wildcard, so *??? won't match if there are 121 less than three characters. */ 122 ++n; 123 } 124 } 125 126 if (c == L_('\0')) 127 /* The wildcard(s) is/are the last element of the pattern. 128 If the name is a file name and contains another slash 129 this means it cannot match, unless the FNM_LEADING_DIR 130 flag is set. */ 131 { 132 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; 133 134 if (flags & FNM_FILE_NAME) 135 { 136 if (flags & FNM_LEADING_DIR) 137 result = 0; 138 else 139 { 140 if (MEMCHR (n, L_('/'), string_end - n) == NULL) 141 result = 0; 142 } 143 } 144 145 return result; 146 } 147 else 148 { 149 const CHAR *endp; 150 151 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'), 152 string_end - n); 153 if (endp == NULL) 154 endp = string_end; 155 156 if (c == L_('[') 157 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 158 && (c == L_('@') || c == L_('+') || c == L_('!')) 159 && *p == L_('('))) 160 { 161 int flags2 = ((flags & FNM_FILE_NAME) 162 ? flags : (flags & ~FNM_PERIOD)); 163 bool no_leading_period2 = no_leading_period; 164 165 for (--p; n < endp; ++n, no_leading_period2 = false) 166 if (FCT (p, n, string_end, no_leading_period2, flags2) 167 == 0) 168 return 0; 169 } 170 else if (c == L_('/') && (flags & FNM_FILE_NAME)) 171 { 172 while (n < string_end && *n != L_('/')) 173 ++n; 174 if (n < string_end && *n == L_('/') 175 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags) 176 == 0)) 177 return 0; 178 } 179 else 180 { 181 int flags2 = ((flags & FNM_FILE_NAME) 182 ? flags : (flags & ~FNM_PERIOD)); 183 int no_leading_period2 = no_leading_period; 184 185 if (c == L_('\\') && !(flags & FNM_NOESCAPE)) 186 c = *p; 187 c = FOLD (c); 188 for (--p; n < endp; ++n, no_leading_period2 = false) 189 if (FOLD ((UCHAR) *n) == c 190 && (FCT (p, n, string_end, no_leading_period2, flags2) 191 == 0)) 192 return 0; 193 } 194 } 195 196 /* If we come here no match is possible with the wildcard. */ 197 return FNM_NOMATCH; 198 199 case L_('['): 200 { 201 /* Nonzero if the sense of the character class is inverted. */ 202 const CHAR *p_init = p; 203 const CHAR *n_init = n; 204 register bool not; 205 CHAR cold; 206 UCHAR fn; 207 208 if (posixly_correct == 0) 209 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 210 211 if (n == string_end) 212 return FNM_NOMATCH; 213 214 if (*n == L_('.') && no_leading_period) 215 return FNM_NOMATCH; 216 217 if (*n == L_('/') && (flags & FNM_FILE_NAME)) 218 /* '/' cannot be matched. */ 219 return FNM_NOMATCH; 220 221 not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^'))); 222 if (not) 223 ++p; 224 225 fn = FOLD ((UCHAR) *n); 226 227 c = *p++; 228 for (;;) 229 { 230 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) 231 { 232 if (*p == L_('\0')) 233 return FNM_NOMATCH; 234 c = FOLD ((UCHAR) *p); 235 ++p; 236 237 goto normal_bracket; 238 } 239 else if (c == L_('[') && *p == L_(':')) 240 { 241 /* Leave room for the null. */ 242 CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; 243 size_t c1 = 0; 244 #if defined _LIBC || WIDE_CHAR_SUPPORT 245 wctype_t wt; 246 #endif 247 const CHAR *startp = p; 248 249 for (;;) 250 { 251 if (c1 == CHAR_CLASS_MAX_LENGTH) 252 /* The name is too long and therefore the pattern 253 is ill-formed. */ 254 return FNM_NOMATCH; 255 256 c = *++p; 257 if (c == L_(':') && p[1] == L_(']')) 258 { 259 p += 2; 260 break; 261 } 262 if (c < L_('a') || c >= L_('z')) 263 { 264 /* This cannot possibly be a character class name. 265 Match it as a normal range. */ 266 p = startp; 267 c = L_('['); 268 goto normal_bracket; 269 } 270 str[c1++] = c; 271 } 272 str[c1] = L_('\0'); 273 274 #if defined _LIBC || WIDE_CHAR_SUPPORT 275 wt = IS_CHAR_CLASS (str); 276 if (wt == 0) 277 /* Invalid character class name. */ 278 return FNM_NOMATCH; 279 280 # if defined _LIBC && ! WIDE_CHAR_VERSION 281 /* The following code is glibc specific but does 282 there a good job in speeding up the code since 283 we can avoid the btowc() call. */ 284 if (_ISCTYPE ((UCHAR) *n, wt)) 285 goto matched; 286 # else 287 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) 288 goto matched; 289 # endif 290 #else 291 if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n)) 292 || (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n)) 293 || (STREQ (str, L_("blank")) && isblank ((UCHAR) *n)) 294 || (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n)) 295 || (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n)) 296 || (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n)) 297 || (STREQ (str, L_("lower")) && islower ((UCHAR) *n)) 298 || (STREQ (str, L_("print")) && isprint ((UCHAR) *n)) 299 || (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n)) 300 || (STREQ (str, L_("space")) && isspace ((UCHAR) *n)) 301 || (STREQ (str, L_("upper")) && isupper ((UCHAR) *n)) 302 || (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n))) 303 goto matched; 304 #endif 305 c = *p++; 306 } 307 #ifdef _LIBC 308 else if (c == L_('[') && *p == L_('=')) 309 { 310 UCHAR str[1]; 311 uint32_t nrules = 312 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 313 const CHAR *startp = p; 314 315 c = *++p; 316 if (c == L_('\0')) 317 { 318 p = startp; 319 c = L_('['); 320 goto normal_bracket; 321 } 322 str[0] = c; 323 324 c = *++p; 325 if (c != L_('=') || p[1] != L_(']')) 326 { 327 p = startp; 328 c = L_('['); 329 goto normal_bracket; 330 } 331 p += 2; 332 333 if (nrules == 0) 334 { 335 if ((UCHAR) *n == str[0]) 336 goto matched; 337 } 338 else 339 { 340 const int32_t *table; 341 # if WIDE_CHAR_VERSION 342 const int32_t *weights; 343 const int32_t *extra; 344 # else 345 const unsigned char *weights; 346 const unsigned char *extra; 347 # endif 348 const int32_t *indirect; 349 int32_t idx; 350 const UCHAR *cp = (const UCHAR *) str; 351 352 /* This #include defines a local function! */ 353 # if WIDE_CHAR_VERSION 354 # include <locale/weightwc.h> 355 # else 356 # include <locale/weight.h> 357 # endif 358 359 # if WIDE_CHAR_VERSION 360 table = (const int32_t *) 361 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); 362 weights = (const int32_t *) 363 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); 364 extra = (const int32_t *) 365 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); 366 indirect = (const int32_t *) 367 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); 368 # else 369 table = (const int32_t *) 370 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); 371 weights = (const unsigned char *) 372 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); 373 extra = (const unsigned char *) 374 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); 375 indirect = (const int32_t *) 376 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); 377 # endif 378 379 idx = findidx (&cp); 380 if (idx != 0) 381 { 382 /* We found a table entry. Now see whether the 383 character we are currently at has the same 384 equivalence class value. */ 385 int len = weights[idx & 0xffffff]; 386 int32_t idx2; 387 const UCHAR *np = (const UCHAR *) n; 388 389 idx2 = findidx (&np); 390 if (idx2 != 0 391 && (idx >> 24) == (idx2 >> 24) 392 && len == weights[idx2 & 0xffffff]) 393 { 394 int cnt = 0; 395 396 idx &= 0xffffff; 397 idx2 &= 0xffffff; 398 399 while (cnt < len 400 && (weights[idx + 1 + cnt] 401 == weights[idx2 + 1 + cnt])) 402 ++cnt; 403 404 if (cnt == len) 405 goto matched; 406 } 407 } 408 } 409 410 c = *p++; 411 } 412 #endif 413 else if (c == L_('\0')) 414 { 415 /* [ unterminated, treat as normal character. */ 416 p = p_init; 417 n = n_init; 418 c = L_('['); 419 goto normal_match; 420 } 421 else 422 { 423 bool is_range = false; 424 425 #ifdef _LIBC 426 bool is_seqval = false; 427 428 if (c == L_('[') && *p == L_('.')) 429 { 430 uint32_t nrules = 431 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); 432 const CHAR *startp = p; 433 size_t c1 = 0; 434 435 while (1) 436 { 437 c = *++p; 438 if (c == L_('.') && p[1] == L_(']')) 439 { 440 p += 2; 441 break; 442 } 443 if (c == '\0') 444 return FNM_NOMATCH; 445 ++c1; 446 } 447 448 /* We have to handling the symbols differently in 449 ranges since then the collation sequence is 450 important. */ 451 is_range = *p == L_('-') && p[1] != L_('\0'); 452 453 if (nrules == 0) 454 { 455 /* There are no names defined in the collation 456 data. Therefore we only accept the trivial 457 names consisting of the character itself. */ 458 if (c1 != 1) 459 return FNM_NOMATCH; 460 461 if (!is_range && *n == startp[1]) 462 goto matched; 463 464 cold = startp[1]; 465 c = *p++; 466 } 467 else 468 { 469 int32_t table_size; 470 const int32_t *symb_table; 471 # ifdef WIDE_CHAR_VERSION 472 char str[c1]; 473 size_t strcnt; 474 # else 475 # define str (startp + 1) 476 # endif 477 const unsigned char *extra; 478 int32_t idx; 479 int32_t elem; 480 int32_t second; 481 int32_t hash; 482 483 # ifdef WIDE_CHAR_VERSION 484 /* We have to convert the name to a single-byte 485 string. This is possible since the names 486 consist of ASCII characters and the internal 487 representation is UCS4. */ 488 for (strcnt = 0; strcnt < c1; ++strcnt) 489 str[strcnt] = startp[1 + strcnt]; 490 # endif 491 492 table_size = 493 _NL_CURRENT_WORD (LC_COLLATE, 494 _NL_COLLATE_SYMB_HASH_SIZEMB); 495 symb_table = (const int32_t *) 496 _NL_CURRENT (LC_COLLATE, 497 _NL_COLLATE_SYMB_TABLEMB); 498 extra = (const unsigned char *) 499 _NL_CURRENT (LC_COLLATE, 500 _NL_COLLATE_SYMB_EXTRAMB); 501 502 /* Locate the character in the hashing table. */ 503 hash = elem_hash (str, c1); 504 505 idx = 0; 506 elem = hash % table_size; 507 if (symb_table[2 * elem] != 0) 508 { 509 second = hash % (table_size - 2) + 1; 510 511 do 512 { 513 /* First compare the hashing value. */ 514 if (symb_table[2 * elem] == hash 515 && (c1 516 == extra[symb_table[2 * elem + 1]]) 517 && memcmp (str, 518 &extra[symb_table[2 * elem 519 + 1] 520 + 1], c1) == 0) 521 { 522 /* Yep, this is the entry. */ 523 idx = symb_table[2 * elem + 1]; 524 idx += 1 + extra[idx]; 525 break; 526 } 527 528 /* Next entry. */ 529 elem += second; 530 } 531 while (symb_table[2 * elem] != 0); 532 } 533 534 if (symb_table[2 * elem] != 0) 535 { 536 /* Compare the byte sequence but only if 537 this is not part of a range. */ 538 # ifdef WIDE_CHAR_VERSION 539 int32_t *wextra; 540 541 idx += 1 + extra[idx]; 542 /* Adjust for the alignment. */ 543 idx = (idx + 3) & ~3; 544 545 wextra = (int32_t *) &extra[idx + 4]; 546 # endif 547 548 if (! is_range) 549 { 550 # ifdef WIDE_CHAR_VERSION 551 for (c1 = 0; 552 (int32_t) c1 < wextra[idx]; 553 ++c1) 554 if (n[c1] != wextra[1 + c1]) 555 break; 556 557 if ((int32_t) c1 == wextra[idx]) 558 goto matched; 559 # else 560 for (c1 = 0; c1 < extra[idx]; ++c1) 561 if (n[c1] != extra[1 + c1]) 562 break; 563 564 if (c1 == extra[idx]) 565 goto matched; 566 # endif 567 } 568 569 /* Get the collation sequence value. */ 570 is_seqval = true; 571 # ifdef WIDE_CHAR_VERSION 572 cold = wextra[1 + wextra[idx]]; 573 # else 574 /* Adjust for the alignment. */ 575 idx += 1 + extra[idx]; 576 idx = (idx + 3) & ~4; 577 cold = *((int32_t *) &extra[idx]); 578 # endif 579 580 c = *p++; 581 } 582 else if (c1 == 1) 583 { 584 /* No valid character. Match it as a 585 single byte. */ 586 if (!is_range && *n == str[0]) 587 goto matched; 588 589 cold = str[0]; 590 c = *p++; 591 } 592 else 593 return FNM_NOMATCH; 594 } 595 } 596 else 597 # undef str 598 #endif 599 { 600 c = FOLD (c); 601 normal_bracket: 602 603 /* We have to handling the symbols differently in 604 ranges since then the collation sequence is 605 important. */ 606 is_range = (*p == L_('-') && p[1] != L_('\0') 607 && p[1] != L_(']')); 608 609 if (!is_range && c == fn) 610 goto matched; 611 612 #if _LIBC 613 /* This is needed if we goto normal_bracket; from 614 outside of is_seqval's scope. */ 615 is_seqval = false; 616 #endif 617 618 cold = c; 619 c = *p++; 620 } 621 622 if (c == L_('-') && *p != L_(']')) 623 { 624 #if _LIBC 625 /* We have to find the collation sequence 626 value for C. Collation sequence is nothing 627 we can regularly access. The sequence 628 value is defined by the order in which the 629 definitions of the collation values for the 630 various characters appear in the source 631 file. A strange concept, nowhere 632 documented. */ 633 uint32_t fcollseq; 634 uint32_t lcollseq; 635 UCHAR cend = *p++; 636 637 # ifdef WIDE_CHAR_VERSION 638 /* Search in the 'names' array for the characters. */ 639 fcollseq = __collseq_table_lookup (collseq, fn); 640 if (fcollseq == ~((uint32_t) 0)) 641 /* XXX We don't know anything about the character 642 we are supposed to match. This means we are 643 failing. */ 644 goto range_not_matched; 645 646 if (is_seqval) 647 lcollseq = cold; 648 else 649 lcollseq = __collseq_table_lookup (collseq, cold); 650 # else 651 fcollseq = collseq[fn]; 652 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; 653 # endif 654 655 is_seqval = false; 656 if (cend == L_('[') && *p == L_('.')) 657 { 658 uint32_t nrules = 659 _NL_CURRENT_WORD (LC_COLLATE, 660 _NL_COLLATE_NRULES); 661 const CHAR *startp = p; 662 size_t c1 = 0; 663 664 while (1) 665 { 666 c = *++p; 667 if (c == L_('.') && p[1] == L_(']')) 668 { 669 p += 2; 670 break; 671 } 672 if (c == '\0') 673 return FNM_NOMATCH; 674 ++c1; 675 } 676 677 if (nrules == 0) 678 { 679 /* There are no names defined in the 680 collation data. Therefore we only 681 accept the trivial names consisting 682 of the character itself. */ 683 if (c1 != 1) 684 return FNM_NOMATCH; 685 686 cend = startp[1]; 687 } 688 else 689 { 690 int32_t table_size; 691 const int32_t *symb_table; 692 # ifdef WIDE_CHAR_VERSION 693 char str[c1]; 694 size_t strcnt; 695 # else 696 # define str (startp + 1) 697 # endif 698 const unsigned char *extra; 699 int32_t idx; 700 int32_t elem; 701 int32_t second; 702 int32_t hash; 703 704 # ifdef WIDE_CHAR_VERSION 705 /* We have to convert the name to a single-byte 706 string. This is possible since the names 707 consist of ASCII characters and the internal 708 representation is UCS4. */ 709 for (strcnt = 0; strcnt < c1; ++strcnt) 710 str[strcnt] = startp[1 + strcnt]; 711 # endif 712 713 table_size = 714 _NL_CURRENT_WORD (LC_COLLATE, 715 _NL_COLLATE_SYMB_HASH_SIZEMB); 716 symb_table = (const int32_t *) 717 _NL_CURRENT (LC_COLLATE, 718 _NL_COLLATE_SYMB_TABLEMB); 719 extra = (const unsigned char *) 720 _NL_CURRENT (LC_COLLATE, 721 _NL_COLLATE_SYMB_EXTRAMB); 722 723 /* Locate the character in the hashing 724 table. */ 725 hash = elem_hash (str, c1); 726 727 idx = 0; 728 elem = hash % table_size; 729 if (symb_table[2 * elem] != 0) 730 { 731 second = hash % (table_size - 2) + 1; 732 733 do 734 { 735 /* First compare the hashing value. */ 736 if (symb_table[2 * elem] == hash 737 && (c1 738 == extra[symb_table[2 * elem + 1]]) 739 && memcmp (str, 740 &extra[symb_table[2 * elem + 1] 741 + 1], c1) == 0) 742 { 743 /* Yep, this is the entry. */ 744 idx = symb_table[2 * elem + 1]; 745 idx += 1 + extra[idx]; 746 break; 747 } 748 749 /* Next entry. */ 750 elem += second; 751 } 752 while (symb_table[2 * elem] != 0); 753 } 754 755 if (symb_table[2 * elem] != 0) 756 { 757 /* Compare the byte sequence but only if 758 this is not part of a range. */ 759 # ifdef WIDE_CHAR_VERSION 760 int32_t *wextra; 761 762 idx += 1 + extra[idx]; 763 /* Adjust for the alignment. */ 764 idx = (idx + 3) & ~4; 765 766 wextra = (int32_t *) &extra[idx + 4]; 767 # endif 768 /* Get the collation sequence value. */ 769 is_seqval = true; 770 # ifdef WIDE_CHAR_VERSION 771 cend = wextra[1 + wextra[idx]]; 772 # else 773 /* Adjust for the alignment. */ 774 idx += 1 + extra[idx]; 775 idx = (idx + 3) & ~4; 776 cend = *((int32_t *) &extra[idx]); 777 # endif 778 } 779 else if (symb_table[2 * elem] != 0 && c1 == 1) 780 { 781 cend = str[0]; 782 c = *p++; 783 } 784 else 785 return FNM_NOMATCH; 786 } 787 # undef str 788 } 789 else 790 { 791 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) 792 cend = *p++; 793 if (cend == L_('\0')) 794 return FNM_NOMATCH; 795 cend = FOLD (cend); 796 } 797 798 /* XXX It is not entirely clear to me how to handle 799 characters which are not mentioned in the 800 collation specification. */ 801 if ( 802 # ifdef WIDE_CHAR_VERSION 803 lcollseq == 0xffffffff || 804 # endif 805 lcollseq <= fcollseq) 806 { 807 /* We have to look at the upper bound. */ 808 uint32_t hcollseq; 809 810 if (is_seqval) 811 hcollseq = cend; 812 else 813 { 814 # ifdef WIDE_CHAR_VERSION 815 hcollseq = 816 __collseq_table_lookup (collseq, cend); 817 if (hcollseq == ~((uint32_t) 0)) 818 { 819 /* Hum, no information about the upper 820 bound. The matching succeeds if the 821 lower bound is matched exactly. */ 822 if (lcollseq != fcollseq) 823 goto range_not_matched; 824 825 goto matched; 826 } 827 # else 828 hcollseq = collseq[cend]; 829 # endif 830 } 831 832 if (lcollseq <= hcollseq && fcollseq <= hcollseq) 833 goto matched; 834 } 835 # ifdef WIDE_CHAR_VERSION 836 range_not_matched: 837 # endif 838 #else 839 /* We use a boring value comparison of the character 840 values. This is better than comparing using 841 'strcoll' since the latter would have surprising 842 and sometimes fatal consequences. */ 843 UCHAR cend = *p++; 844 845 if (!(flags & FNM_NOESCAPE) && cend == L_('\\')) 846 cend = *p++; 847 if (cend == L_('\0')) 848 return FNM_NOMATCH; 849 850 /* It is a range. */ 851 if (cold <= fn && fn <= cend) 852 goto matched; 853 #endif 854 855 c = *p++; 856 } 857 } 858 859 if (c == L_(']')) 860 break; 861 } 862 863 if (!not) 864 return FNM_NOMATCH; 865 break; 866 867 matched: 868 /* Skip the rest of the [...] that already matched. */ 869 do 870 { 871 ignore_next: 872 c = *p++; 873 874 if (c == L_('\0')) 875 /* [... (unterminated) loses. */ 876 return FNM_NOMATCH; 877 878 if (!(flags & FNM_NOESCAPE) && c == L_('\\')) 879 { 880 if (*p == L_('\0')) 881 return FNM_NOMATCH; 882 /* XXX 1003.2d11 is unclear if this is right. */ 883 ++p; 884 } 885 else if (c == L_('[') && *p == L_(':')) 886 { 887 int c1 = 0; 888 const CHAR *startp = p; 889 890 while (1) 891 { 892 c = *++p; 893 if (++c1 == CHAR_CLASS_MAX_LENGTH) 894 return FNM_NOMATCH; 895 896 if (*p == L_(':') && p[1] == L_(']')) 897 break; 898 899 if (c < L_('a') || c >= L_('z')) 900 { 901 p = startp; 902 goto ignore_next; 903 } 904 } 905 p += 2; 906 c = *p++; 907 } 908 else if (c == L_('[') && *p == L_('=')) 909 { 910 c = *++p; 911 if (c == L_('\0')) 912 return FNM_NOMATCH; 913 c = *++p; 914 if (c != L_('=') || p[1] != L_(']')) 915 return FNM_NOMATCH; 916 p += 2; 917 c = *p++; 918 } 919 else if (c == L_('[') && *p == L_('.')) 920 { 921 ++p; 922 while (1) 923 { 924 c = *++p; 925 if (c == '\0') 926 return FNM_NOMATCH; 927 928 if (*p == L_('.') && p[1] == L_(']')) 929 break; 930 } 931 p += 2; 932 c = *p++; 933 } 934 } 935 while (c != L_(']')); 936 if (not) 937 return FNM_NOMATCH; 938 } 939 break; 940 941 case L_('+'): 942 case L_('@'): 943 case L_('!'): 944 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') 945 { 946 int res; 947 948 res = EXT (c, p, n, string_end, no_leading_period, flags); 949 if (res != -1) 950 return res; 951 } 952 goto normal_match; 953 954 case L_('/'): 955 if (NO_LEADING_PERIOD (flags)) 956 { 957 if (n == string_end || c != (UCHAR) *n) 958 return FNM_NOMATCH; 959 960 new_no_leading_period = true; 961 break; 962 } 963 /* FALLTHROUGH */ 964 default: 965 normal_match: 966 if (n == string_end || c != FOLD ((UCHAR) *n)) 967 return FNM_NOMATCH; 968 } 969 970 no_leading_period = new_no_leading_period; 971 ++n; 972 } 973 974 if (n == string_end) 975 return 0; 976 977 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/')) 978 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ 979 return 0; 980 981 return FNM_NOMATCH; 982 } 983 984 985 static const CHAR * 986 internal_function 987 END (const CHAR *pattern) 988 { 989 const CHAR *p = pattern; 990 991 while (1) 992 if (*++p == L_('\0')) 993 /* This is an invalid pattern. */ 994 return pattern; 995 else if (*p == L_('[')) 996 { 997 /* Handle brackets special. */ 998 if (posixly_correct == 0) 999 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 1000 1001 /* Skip the not sign. We have to recognize it because of a possibly 1002 following ']'. */ 1003 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) 1004 ++p; 1005 /* A leading ']' is recognized as such. */ 1006 if (*p == L_(']')) 1007 ++p; 1008 /* Skip over all characters of the list. */ 1009 while (*p != L_(']')) 1010 if (*p++ == L_('\0')) 1011 /* This is no valid pattern. */ 1012 return pattern; 1013 } 1014 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') 1015 || *p == L_('!')) && p[1] == L_('(')) 1016 p = END (p + 1); 1017 else if (*p == L_(')')) 1018 break; 1019 1020 return p + 1; 1021 } 1022 1023 1024 static int 1025 internal_function 1026 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, 1027 bool no_leading_period, int flags) 1028 { 1029 const CHAR *startp; 1030 size_t level; 1031 struct patternlist 1032 { 1033 struct patternlist *next; 1034 CHAR str[1]; 1035 } *list = NULL; 1036 struct patternlist **lastp = &list; 1037 size_t pattern_len = STRLEN (pattern); 1038 const CHAR *p; 1039 const CHAR *rs; 1040 enum { ALLOCA_LIMIT = 8000 }; 1041 1042 /* Parse the pattern. Store the individual parts in the list. */ 1043 level = 0; 1044 for (startp = p = pattern + 1; ; ++p) 1045 if (*p == L_('\0')) 1046 /* This is an invalid pattern. */ 1047 return -1; 1048 else if (*p == L_('[')) 1049 { 1050 /* Handle brackets special. */ 1051 if (posixly_correct == 0) 1052 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1; 1053 1054 /* Skip the not sign. We have to recognize it because of a possibly 1055 following ']'. */ 1056 if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^'))) 1057 ++p; 1058 /* A leading ']' is recognized as such. */ 1059 if (*p == L_(']')) 1060 ++p; 1061 /* Skip over all characters of the list. */ 1062 while (*p != L_(']')) 1063 if (*p++ == L_('\0')) 1064 /* This is no valid pattern. */ 1065 return -1; 1066 } 1067 else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@') 1068 || *p == L_('!')) && p[1] == L_('(')) 1069 /* Remember the nesting level. */ 1070 ++level; 1071 else if (*p == L_(')')) 1072 { 1073 if (level-- == 0) 1074 { 1075 /* This means we found the end of the pattern. */ 1076 #define NEW_PATTERN \ 1077 struct patternlist *newp; \ 1078 size_t plen; \ 1079 size_t plensize; \ 1080 size_t newpsize; \ 1081 \ 1082 plen = (opt == L_('?') || opt == L_('@') \ 1083 ? pattern_len \ 1084 : p - startp + 1UL); \ 1085 plensize = plen * sizeof (CHAR); \ 1086 newpsize = offsetof (struct patternlist, str) + plensize; \ 1087 if ((size_t) -1 / sizeof (CHAR) < plen \ 1088 || newpsize < offsetof (struct patternlist, str) \ 1089 || ALLOCA_LIMIT <= newpsize) \ 1090 return -1; \ 1091 newp = (struct patternlist *) alloca (newpsize); \ 1092 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \ 1093 newp->next = NULL; \ 1094 *lastp = newp; \ 1095 lastp = &newp->next 1096 NEW_PATTERN; 1097 break; 1098 } 1099 } 1100 else if (*p == L_('|')) 1101 { 1102 if (level == 0) 1103 { 1104 NEW_PATTERN; 1105 startp = p + 1; 1106 } 1107 } 1108 assert (list != NULL); 1109 assert (p[-1] == L_(')')); 1110 #undef NEW_PATTERN 1111 1112 switch (opt) 1113 { 1114 case L_('*'): 1115 if (FCT (p, string, string_end, no_leading_period, flags) == 0) 1116 return 0; 1117 /* FALLTHROUGH */ 1118 1119 case L_('+'): 1120 do 1121 { 1122 for (rs = string; rs <= string_end; ++rs) 1123 /* First match the prefix with the current pattern with the 1124 current pattern. */ 1125 if (FCT (list->str, string, rs, no_leading_period, 1126 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0 1127 /* This was successful. Now match the rest with the rest 1128 of the pattern. */ 1129 && (FCT (p, rs, string_end, 1130 rs == string 1131 ? no_leading_period 1132 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1133 flags & FNM_FILE_NAME 1134 ? flags : flags & ~FNM_PERIOD) == 0 1135 /* This didn't work. Try the whole pattern. */ 1136 || (rs != string 1137 && FCT (pattern - 1, rs, string_end, 1138 rs == string 1139 ? no_leading_period 1140 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1141 flags & FNM_FILE_NAME 1142 ? flags : flags & ~FNM_PERIOD) == 0))) 1143 /* It worked. Signal success. */ 1144 return 0; 1145 } 1146 while ((list = list->next) != NULL); 1147 1148 /* None of the patterns lead to a match. */ 1149 return FNM_NOMATCH; 1150 1151 case L_('?'): 1152 if (FCT (p, string, string_end, no_leading_period, flags) == 0) 1153 return 0; 1154 /* FALLTHROUGH */ 1155 1156 case L_('@'): 1157 do 1158 /* I cannot believe it but 'strcat' is actually acceptable 1159 here. Match the entire string with the prefix from the 1160 pattern list and the rest of the pattern following the 1161 pattern list. */ 1162 if (FCT (STRCAT (list->str, p), string, string_end, 1163 no_leading_period, 1164 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) 1165 /* It worked. Signal success. */ 1166 return 0; 1167 while ((list = list->next) != NULL); 1168 1169 /* None of the patterns lead to a match. */ 1170 return FNM_NOMATCH; 1171 1172 case L_('!'): 1173 for (rs = string; rs <= string_end; ++rs) 1174 { 1175 struct patternlist *runp; 1176 1177 for (runp = list; runp != NULL; runp = runp->next) 1178 if (FCT (runp->str, string, rs, no_leading_period, 1179 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0) 1180 break; 1181 1182 /* If none of the patterns matched see whether the rest does. */ 1183 if (runp == NULL 1184 && (FCT (p, rs, string_end, 1185 rs == string 1186 ? no_leading_period 1187 : rs[-1] == '/' && NO_LEADING_PERIOD (flags), 1188 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) 1189 == 0)) 1190 /* This is successful. */ 1191 return 0; 1192 } 1193 1194 /* None of the patterns together with the rest of the pattern 1195 lead to a match. */ 1196 return FNM_NOMATCH; 1197 1198 default: 1199 assert (! "Invalid extended matching operator"); 1200 break; 1201 } 1202 1203 return -1; 1204 } 1205 1206 1207 #undef FOLD 1208 #undef CHAR 1209 #undef UCHAR 1210 #undef INT 1211 #undef FCT 1212 #undef EXT 1213 #undef END 1214 #undef MEMPCPY 1215 #undef MEMCHR 1216 #undef STRLEN 1217 #undef STRCAT 1218 #undef L_ 1219 #undef BTOWC 1220