1 /* $OpenBSD: rmatch.c,v 1.4 2024/02/20 10:36:23 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * Copyright (c) 1989, 1993, 1994 21 * The Regents of the University of California. All rights reserved. 22 * 23 * This code is derived from software contributed to Berkeley by 24 * Guido van Rossum. 25 * 26 * Redistribution and use in source and binary forms, with or without 27 * modification, are permitted provided that the following conditions 28 * are met: 29 * 1. Redistributions of source code must retain the above copyright 30 * notice, this list of conditions and the following disclaimer. 31 * 2. Redistributions in binary form must reproduce the above copyright 32 * notice, this list of conditions and the following disclaimer in the 33 * documentation and/or other materials provided with the distribution. 34 * 3. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 */ 50 51 #include <ctype.h> 52 #include <stdio.h> 53 #include <string.h> 54 #include <limits.h> 55 56 #include "charclass.h" 57 #include "extern.h" 58 59 #define RANGE_MATCH 1 60 #define RANGE_NOMATCH 0 61 #define RANGE_ERROR (-1) 62 63 static int 64 classmatch(const char *pattern, char test, const char **ep) 65 { 66 const char *mismatch = pattern; 67 const struct cclass *cc; 68 const char *colon; 69 size_t len; 70 int rval = RANGE_NOMATCH; 71 72 if (*pattern++ != ':') { 73 *ep = mismatch; 74 return RANGE_ERROR; 75 } 76 if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') { 77 *ep = mismatch; 78 return RANGE_ERROR; 79 } 80 *ep = colon + 2; 81 len = (size_t)(colon - pattern); 82 83 for (cc = cclasses; cc->name != NULL; cc++) { 84 if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') { 85 if (cc->isctype((unsigned char)test)) 86 rval = RANGE_MATCH; 87 return rval; 88 } 89 } 90 91 /* invalid character class, treat as normal text */ 92 *ep = mismatch; 93 return RANGE_ERROR; 94 } 95 96 static int 97 rangematch(const char **pp, char test) 98 { 99 const char *pattern = *pp; 100 int negate, ok; 101 char c, c2; 102 103 /* 104 * A bracket expression starting with an unquoted circumflex 105 * character produces unspecified results (IEEE 1003.2-1992, 106 * 3.13.2). This implementation treats it like '!', for 107 * consistency with the regular expression syntax. 108 * J.T. Conklin (conklin@ngai.kaleida.com) 109 */ 110 if ((negate = (*pattern == '!' || *pattern == '^'))) 111 ++pattern; 112 113 /* 114 * A right bracket shall lose its special meaning and represent 115 * itself in a bracket expression if it occurs first in the list. 116 * -- POSIX.2 2.8.3.2 117 */ 118 ok = 0; 119 c = *pattern++; 120 do { 121 if (c == '[') { 122 switch (classmatch(pattern, test, &pattern)) { 123 case RANGE_MATCH: 124 ok = 1; 125 continue; 126 case RANGE_NOMATCH: 127 continue; 128 default: 129 /* invalid character class, treat literally. */ 130 break; 131 } 132 } 133 if (c == '\\') 134 c = *pattern++; 135 if (c == '\0') 136 return RANGE_ERROR; 137 /* patterns can not match on '/' */ 138 if (c == '/') 139 return RANGE_NOMATCH; 140 if (*pattern == '-' 141 && (c2 = *(pattern + 1)) != '\0' && c2 != ']') { 142 pattern += 2; 143 if (c2 == '\\') 144 c2 = *pattern++; 145 if (c2 == '\0') 146 return RANGE_ERROR; 147 if (c <= test && test <= c2) 148 ok = 1; 149 } else if (c == test) 150 ok = 1; 151 } while ((c = *pattern++) != ']'); 152 153 *pp = pattern; 154 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 155 } 156 157 /* 158 * Single character match, advances pattern as much as needed. 159 * Return 0 on match and !0 (aka 1) on mismatch. 160 * When matched pp is advanced to the end of the pattern matched. 161 */ 162 static int 163 matchchar(const char **pp, const char in) 164 { 165 const char *pattern = *pp; 166 char c; 167 int rv = 0; 168 169 switch (c = *pattern++) { 170 case '?': 171 if (in == '\0') 172 rv = 1; 173 if (in == '/') 174 rv = 1; 175 break; 176 case '[': 177 if (in == '\0') 178 rv = 1; 179 if (in == '/') 180 rv = 1; 181 if (rv == 1) 182 break; 183 184 switch (rangematch(&pattern, in)) { 185 case RANGE_ERROR: 186 /* not a good range, treat as normal text */ 187 goto normal; 188 case RANGE_MATCH: 189 break; 190 case RANGE_NOMATCH: 191 rv = 1; 192 } 193 break; 194 case '\\': 195 if ((c = *pattern++) == '\0') { 196 c = '\\'; 197 --pattern; 198 } 199 /* FALLTHROUGH */ 200 default: 201 normal: 202 if (c != in) 203 rv = 1; 204 break; 205 } 206 207 *pp = pattern; 208 return rv; 209 } 210 211 /* 212 * Do a substring match. If wild is set then the pattern started with a '*'. 213 * The match will go until '*', '/' or '\0' is encountered in pattern or 214 * the input string is consumed up to end. 215 * The pattern and string handles pp and ss are updated only on success. 216 */ 217 static int 218 matchsub(const char **pp, const char **ss, const char *end, int wild) 219 { 220 const char *pattern = *pp; 221 const char *p = pattern; 222 const char *string = *ss; 223 size_t matchlen; 224 225 /* first calculate how many characters the submatch will consume */ 226 for (matchlen = 0; *p != '\0'; matchlen++) { 227 if (p[0] == '*') 228 break; 229 /* '/' acts as barrier */ 230 if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) { 231 if (wild) { 232 /* match needs to match up to end of segment */ 233 if (string > end - matchlen) 234 return 1; 235 string = end - matchlen; 236 wild = 0; 237 } 238 break; 239 } 240 /* 241 * skip forward one character in pattern by doing a 242 * dummy lookup. 243 */ 244 matchchar(&p, ' '); 245 } 246 247 /* not enough char to match */ 248 if (string > end - matchlen) 249 return 1; 250 251 if (*p == '\0') { 252 if (wild) { 253 /* match needs to match up to end of segment */ 254 string = end - matchlen; 255 wild = 0; 256 } 257 } 258 259 while (*pattern != '\0' && *pattern != '*') { 260 /* eat possible escape char before '/' */ 261 if (pattern[0] == '\\' && pattern[1] == '/') 262 pattern++; 263 if (pattern[0] == '/') { 264 /* hit the barrier but we still have characters left */ 265 if (string < end) 266 return 1; 267 break; 268 } 269 270 /* check if there are still characters available to compare */ 271 if (string >= end) 272 return 1; 273 /* Compare one char at a time. */ 274 if (!matchchar(&pattern, *string++)) 275 continue; 276 if (wild) { 277 /* skip forward one char and restart match */ 278 string = ++*ss; 279 pattern = *pp; 280 /* can it still match? */ 281 if (string > end - matchlen) 282 return 1; 283 } else { 284 /* failed match */ 285 return 1; 286 } 287 } 288 289 *pp = pattern; 290 *ss = string; 291 return 0; 292 } 293 294 /* 295 * File matching with the addition of the special '**'. 296 * Returns 0 on match and !0 for strings that do not match pattern. 297 */ 298 int 299 rmatch(const char *pattern, const char *string, int leading_dir) 300 { 301 const char *segend, *segnext, *mismatch = NULL; 302 int wild, starstar; 303 304 while (*pattern && *string) { 305 306 /* handle leading '/' first */ 307 if (pattern[0] == '\\' && pattern[1] == '/') 308 pattern++; 309 if (*string == '/' && *pattern == '/') { 310 string++; 311 pattern++; 312 } 313 314 /* match to the next '/' in string */ 315 segend = strchr(string, '/'); 316 if (segend == NULL) 317 segend = strchr(string, '\0'); 318 319 while (*pattern) { 320 /* 321 * Check for '*' and '**'. For '*' reduce '*' and '?' 322 * sequences into n-'?' and trailing '*'. 323 * For '**' this optimisation can not be done 324 * since '**???/' will match 'a/aa/aaa/' but not 325 * 'a/aa/aa/' still additional '*' will be reduced. 326 */ 327 wild = 0; 328 starstar = 0; 329 for ( ; *pattern == '*' || *pattern == '?'; pattern++) { 330 if (pattern[0] == '*') { 331 if (pattern[1] == '*') { 332 starstar = 1; 333 pattern++; 334 } 335 wild = 1; 336 } else if (!starstar) { /* pattern[0] == '?' */ 337 if (string < segend && *string != '/') 338 string++; 339 else 340 /* no match possible */ 341 return 1; 342 } else 343 break; 344 } 345 346 /* pattern ends in '**' so it is a match */ 347 if (starstar && *pattern == '\0') 348 return 0; 349 350 if (starstar) { 351 segnext = segend; 352 mismatch = pattern; 353 } 354 355 while (string < segend) { 356 if (matchsub(&pattern, &string, segend, wild)) { 357 failed_match: 358 /* 359 * failed to match, if starstar retry 360 * with the next segment. 361 */ 362 if (mismatch) { 363 pattern = mismatch; 364 wild = 1; 365 string = segnext; 366 if (*string == '/') 367 string++; 368 segend = strchr(string, '/'); 369 if (!segend) 370 segend = strchr(string, 371 '\0'); 372 segnext = segend; 373 if (string < segend) 374 continue; 375 } 376 /* no match possible */ 377 return 1; 378 } 379 break; 380 } 381 382 /* at end of string segment, eat up any extra '*' */ 383 if (string >= segend && *pattern != '*') 384 break; 385 } 386 if (*string != '\0' && *string != '/') 387 goto failed_match; 388 if (*pattern != '\0' && *pattern != '/') 389 goto failed_match; 390 } 391 392 /* if both pattern and string are consumed it was a match */ 393 if (*pattern == '\0' && *string == '\0') 394 return 0; 395 /* if leading_dir is set then string can also be '/' for success */ 396 if (leading_dir && *pattern == '\0' && *string == '/') 397 return 0; 398 /* else failure */ 399 return 1; 400 } 401