1 /* 2 * Copyright (C) the libgit2 contributors. All rights reserved. 3 * 4 * This file is part of libgit2, distributed under the GNU GPL v2 with 5 * a Linking Exception. For full terms see the included COPYING file. 6 * 7 * Do shell-style pattern matching for ?, \, [], and * characters. 8 * It is 8bit clean. 9 * 10 * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. 11 * Rich $alz is now <rsalz@bbn.com>. 12 * 13 * Modified by Wayne Davison to special-case '/' matching, to make '**' 14 * work differently than '*', and to fix the character-class code. 15 * 16 * Imported from git.git. 17 */ 18 19 #include "wildmatch.h" 20 21 #define GIT_SPACE 0x01 22 #define GIT_DIGIT 0x02 23 #define GIT_ALPHA 0x04 24 #define GIT_GLOB_SPECIAL 0x08 25 #define GIT_REGEX_SPECIAL 0x10 26 #define GIT_PATHSPEC_MAGIC 0x20 27 #define GIT_CNTRL 0x40 28 #define GIT_PUNCT 0x80 29 30 enum { 31 S = GIT_SPACE, 32 A = GIT_ALPHA, 33 D = GIT_DIGIT, 34 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ 35 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ 36 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ 37 X = GIT_CNTRL, 38 U = GIT_PUNCT, 39 Z = GIT_CNTRL | GIT_SPACE 40 }; 41 42 static const unsigned char sane_ctype[256] = { 43 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ 44 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ 45 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ 46 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ 47 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ 48 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ 49 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ 50 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ 51 /* Nothing in the 128.. range */ 52 }; 53 54 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) 55 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) 56 57 typedef unsigned char uchar; 58 59 /* What character marks an inverted character class? */ 60 #define NEGATE_CLASS '!' 61 #define NEGATE_CLASS2 '^' 62 63 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ 64 && *(class) == *(litmatch) \ 65 && strncmp((char*)class, litmatch, len) == 0) 66 67 #if defined STDC_HEADERS || !defined isascii 68 # define ISASCII(c) 1 69 #else 70 # define ISASCII(c) isascii(c) 71 #endif 72 73 #ifdef isblank 74 # define ISBLANK(c) (ISASCII(c) && isblank(c)) 75 #else 76 # define ISBLANK(c) ((c) == ' ' || (c) == '\t') 77 #endif 78 79 #ifdef isgraph 80 # define ISGRAPH(c) (ISASCII(c) && isgraph(c)) 81 #else 82 # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c)) 83 #endif 84 85 #define ISPRINT(c) (ISASCII(c) && isprint(c)) 86 #define ISDIGIT(c) (ISASCII(c) && isdigit(c)) 87 #define ISALNUM(c) (ISASCII(c) && isalnum(c)) 88 #define ISALPHA(c) (ISASCII(c) && isalpha(c)) 89 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c)) 90 #define ISLOWER(c) (ISASCII(c) && islower(c)) 91 #define ISPUNCT(c) (ISASCII(c) && ispunct(c)) 92 #define ISSPACE(c) (ISASCII(c) && isspace(c)) 93 #define ISUPPER(c) (ISASCII(c) && isupper(c)) 94 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c)) 95 96 /* Match pattern "p" against "text" */ 97 static int dowild(const uchar *p, const uchar *text, unsigned int flags) 98 { 99 uchar p_ch; 100 const uchar *pattern = p; 101 102 for ( ; (p_ch = *p) != '\0'; text++, p++) { 103 int matched, match_slash, negated; 104 uchar t_ch, prev_ch; 105 if ((t_ch = *text) == '\0' && p_ch != '*') 106 return WM_ABORT_ALL; 107 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) 108 t_ch = tolower(t_ch); 109 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) 110 p_ch = tolower(p_ch); 111 switch (p_ch) { 112 case '\\': 113 /* Literal match with following character. Note that the test 114 * in "default" handles the p[1] == '\0' failure case. */ 115 p_ch = *++p; 116 /* FALLTHROUGH */ 117 default: 118 if (t_ch != p_ch) 119 return WM_NOMATCH; 120 continue; 121 case '?': 122 /* Match anything but '/'. */ 123 if ((flags & WM_PATHNAME) && t_ch == '/') 124 return WM_NOMATCH; 125 continue; 126 case '*': 127 if (*++p == '*') { 128 const uchar *prev_p = p - 2; 129 while (*++p == '*') {} 130 if (!(flags & WM_PATHNAME)) 131 /* without WM_PATHNAME, '*' == '**' */ 132 match_slash = 1; 133 else if ((prev_p < pattern || *prev_p == '/') && 134 (*p == '\0' || *p == '/' || 135 (p[0] == '\\' && p[1] == '/'))) { 136 /* 137 * Assuming we already match 'foo/' and are at 138 * <star star slash>, just assume it matches 139 * nothing and go ahead match the rest of the 140 * pattern with the remaining string. This 141 * helps make foo/<*><*>/bar (<> because 142 * otherwise it breaks C comment syntax) match 143 * both foo/bar and foo/a/bar. 144 */ 145 if (p[0] == '/' && 146 dowild(p + 1, text, flags) == WM_MATCH) 147 return WM_MATCH; 148 match_slash = 1; 149 } else /* WM_PATHNAME is set */ 150 match_slash = 0; 151 } else 152 /* without WM_PATHNAME, '*' == '**' */ 153 match_slash = flags & WM_PATHNAME ? 0 : 1; 154 if (*p == '\0') { 155 /* Trailing "**" matches everything. Trailing "*" matches 156 * only if there are no more slash characters. */ 157 if (!match_slash) { 158 if (strchr((char*)text, '/') != NULL) 159 return WM_NOMATCH; 160 } 161 return WM_MATCH; 162 } else if (!match_slash && *p == '/') { 163 /* 164 * _one_ asterisk followed by a slash 165 * with WM_PATHNAME matches the next 166 * directory 167 */ 168 const char *slash = strchr((char*)text, '/'); 169 if (!slash) 170 return WM_NOMATCH; 171 text = (const uchar*)slash; 172 /* the slash is consumed by the top-level for loop */ 173 break; 174 } 175 while (1) { 176 if (t_ch == '\0') 177 break; 178 /* 179 * Try to advance faster when an asterisk is 180 * followed by a literal. We know in this case 181 * that the string before the literal 182 * must belong to "*". 183 * If match_slash is false, do not look past 184 * the first slash as it cannot belong to '*'. 185 */ 186 if (!is_glob_special(*p)) { 187 p_ch = *p; 188 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) 189 p_ch = tolower(p_ch); 190 while ((t_ch = *text) != '\0' && 191 (match_slash || t_ch != '/')) { 192 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) 193 t_ch = tolower(t_ch); 194 if (t_ch == p_ch) 195 break; 196 text++; 197 } 198 if (t_ch != p_ch) 199 return WM_NOMATCH; 200 } 201 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { 202 if (!match_slash || matched != WM_ABORT_TO_STARSTAR) 203 return matched; 204 } else if (!match_slash && t_ch == '/') 205 return WM_ABORT_TO_STARSTAR; 206 t_ch = *++text; 207 } 208 return WM_ABORT_ALL; 209 case '[': 210 p_ch = *++p; 211 #ifdef NEGATE_CLASS2 212 if (p_ch == NEGATE_CLASS2) 213 p_ch = NEGATE_CLASS; 214 #endif 215 /* Assign literal 1/0 because of "matched" comparison. */ 216 negated = p_ch == NEGATE_CLASS ? 1 : 0; 217 if (negated) { 218 /* Inverted character class. */ 219 p_ch = *++p; 220 } 221 prev_ch = 0; 222 matched = 0; 223 do { 224 if (!p_ch) 225 return WM_ABORT_ALL; 226 if (p_ch == '\\') { 227 p_ch = *++p; 228 if (!p_ch) 229 return WM_ABORT_ALL; 230 if (t_ch == p_ch) 231 matched = 1; 232 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { 233 p_ch = *++p; 234 if (p_ch == '\\') { 235 p_ch = *++p; 236 if (!p_ch) 237 return WM_ABORT_ALL; 238 } 239 if (t_ch <= p_ch && t_ch >= prev_ch) 240 matched = 1; 241 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { 242 uchar t_ch_upper = toupper(t_ch); 243 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) 244 matched = 1; 245 } 246 p_ch = 0; /* This makes "prev_ch" get set to 0. */ 247 } else if (p_ch == '[' && p[1] == ':') { 248 const uchar *s; 249 int i; 250 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ 251 if (!p_ch) 252 return WM_ABORT_ALL; 253 i = (int)(p - s - 1); 254 if (i < 0 || p[-1] != ':') { 255 /* Didn't find ":]", so treat like a normal set. */ 256 p = s - 2; 257 p_ch = '['; 258 if (t_ch == p_ch) 259 matched = 1; 260 continue; 261 } 262 if (CC_EQ(s,i, "alnum")) { 263 if (ISALNUM(t_ch)) 264 matched = 1; 265 } else if (CC_EQ(s,i, "alpha")) { 266 if (ISALPHA(t_ch)) 267 matched = 1; 268 } else if (CC_EQ(s,i, "blank")) { 269 if (ISBLANK(t_ch)) 270 matched = 1; 271 } else if (CC_EQ(s,i, "cntrl")) { 272 if (ISCNTRL(t_ch)) 273 matched = 1; 274 } else if (CC_EQ(s,i, "digit")) { 275 if (ISDIGIT(t_ch)) 276 matched = 1; 277 } else if (CC_EQ(s,i, "graph")) { 278 if (ISGRAPH(t_ch)) 279 matched = 1; 280 } else if (CC_EQ(s,i, "lower")) { 281 if (ISLOWER(t_ch)) 282 matched = 1; 283 } else if (CC_EQ(s,i, "print")) { 284 if (ISPRINT(t_ch)) 285 matched = 1; 286 } else if (CC_EQ(s,i, "punct")) { 287 if (ISPUNCT(t_ch)) 288 matched = 1; 289 } else if (CC_EQ(s,i, "space")) { 290 if (ISSPACE(t_ch)) 291 matched = 1; 292 } else if (CC_EQ(s,i, "upper")) { 293 if (ISUPPER(t_ch)) 294 matched = 1; 295 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) 296 matched = 1; 297 } else if (CC_EQ(s,i, "xdigit")) { 298 if (ISXDIGIT(t_ch)) 299 matched = 1; 300 } else /* malformed [:class:] string */ 301 return WM_ABORT_ALL; 302 p_ch = 0; /* This makes "prev_ch" get set to 0. */ 303 } else if (t_ch == p_ch) 304 matched = 1; 305 } while (prev_ch = p_ch, (p_ch = *++p) != ']'); 306 if (matched == negated || 307 ((flags & WM_PATHNAME) && t_ch == '/')) 308 return WM_NOMATCH; 309 continue; 310 } 311 } 312 313 return *text ? WM_NOMATCH : WM_MATCH; 314 } 315 316 /* Match the "pattern" against the "text" string. */ 317 int wildmatch(const char *pattern, const char *text, unsigned int flags) 318 { 319 return dowild((const uchar*)pattern, (const uchar*)text, flags); 320 } 321