1 /* $OpenBSD: rmatch.c,v 1.3 2022/12/26 19:16:02 jmc Exp $ */ 2 3 /* 4 * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19 /* 20 * Copyright (c) 1989, 1993, 1994 21 * The Regents of the University of California. All rights reserved. 22 * 23 * This code is derived from software contributed to Berkeley by 24 * Guido van Rossum. 25 * 26 * Redistribution and use in source and binary forms, with or without 27 * modification, are permitted provided that the following conditions 28 * are met: 29 * 1. Redistributions of source code must retain the above copyright 30 * notice, this list of conditions and the following disclaimer. 31 * 2. Redistributions in binary form must reproduce the above copyright 32 * notice, this list of conditions and the following disclaimer in the 33 * documentation and/or other materials provided with the distribution. 34 * 3. Neither the name of the University nor the names of its contributors 35 * may be used to endorse or promote products derived from this software 36 * without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 39 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 40 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 41 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 42 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 43 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 44 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 45 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 46 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 47 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 48 * SUCH DAMAGE. 49 */ 50 51 #include <ctype.h> 52 #include <stdio.h> 53 #include <string.h> 54 #include <limits.h> 55 56 #include "charclass.h" 57 #include "extern.h" 58 59 #define RANGE_MATCH 1 60 #define RANGE_NOMATCH 0 61 #define RANGE_ERROR (-1) 62 63 static int 64 classmatch(const char *pattern, char test, const char **ep) 65 { 66 const char *mismatch = pattern; 67 const struct cclass *cc; 68 const char *colon; 69 size_t len; 70 int rval = RANGE_NOMATCH; 71 72 if (*pattern++ != ':') { 73 *ep = mismatch; 74 return RANGE_ERROR; 75 } 76 if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') { 77 *ep = mismatch; 78 return RANGE_ERROR; 79 } 80 *ep = colon + 2; 81 len = (size_t)(colon - pattern); 82 83 for (cc = cclasses; cc->name != NULL; cc++) { 84 if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') { 85 if (cc->isctype((unsigned char)test)) 86 rval = RANGE_MATCH; 87 return rval; 88 } 89 } 90 91 /* invalid character class, treat as normal text */ 92 *ep = mismatch; 93 return RANGE_ERROR; 94 } 95 96 static int 97 rangematch(const char **pp, char test) 98 { 99 const char *pattern = *pp; 100 int negate, ok; 101 char c, c2; 102 103 /* 104 * A bracket expression starting with an unquoted circumflex 105 * character produces unspecified results (IEEE 1003.2-1992, 106 * 3.13.2). This implementation treats it like '!', for 107 * consistency with the regular expression syntax. 108 * J.T. Conklin (conklin@ngai.kaleida.com) 109 */ 110 if ((negate = (*pattern == '!' || *pattern == '^'))) 111 ++pattern; 112 113 /* 114 * A right bracket shall lose its special meaning and represent 115 * itself in a bracket expression if it occurs first in the list. 116 * -- POSIX.2 2.8.3.2 117 */ 118 ok = 0; 119 c = *pattern++; 120 do { 121 if (c == '[') { 122 switch (classmatch(pattern, test, &pattern)) { 123 case RANGE_MATCH: 124 ok = 1; 125 continue; 126 case RANGE_NOMATCH: 127 continue; 128 default: 129 /* invalid character class, treat literally. */ 130 break; 131 } 132 } 133 if (c == '\\') 134 c = *pattern++; 135 if (c == '\0') 136 return RANGE_ERROR; 137 /* patterns can not match on '/' */ 138 if (c == '/') 139 return RANGE_NOMATCH; 140 if (*pattern == '-' 141 && (c2 = *(pattern + 1)) != '\0' && c2 != ']') { 142 pattern += 2; 143 if (c2 == '\\') 144 c2 = *pattern++; 145 if (c2 == '\0') 146 return RANGE_ERROR; 147 if (c <= test && test <= c2) 148 ok = 1; 149 } else if (c == test) 150 ok = 1; 151 } while ((c = *pattern++) != ']'); 152 153 *pp = pattern; 154 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 155 } 156 157 /* 158 * Single character match, advances pattern as much as needed. 159 * Return 0 on match and !0 (aka 1) on mismatch. 160 * When matched pp is advanced to the end of the pattern matched. 161 */ 162 static int 163 matchchar(const char **pp, const char in) 164 { 165 const char *pattern = *pp; 166 char c; 167 int rv = 0; 168 169 switch (c = *pattern++) { 170 case '?': 171 if (in == '\0') 172 rv = 1; 173 if (in == '/') 174 rv = 1; 175 break; 176 case '[': 177 if (in == '\0') 178 rv = 1; 179 if (in == '/') 180 rv = 1; 181 if (rv == 1) 182 break; 183 184 switch (rangematch(&pattern, in)) { 185 case RANGE_ERROR: 186 /* not a good range, treat as normal text */ 187 goto normal; 188 case RANGE_MATCH: 189 break; 190 case RANGE_NOMATCH: 191 rv = 1; 192 } 193 break; 194 case '\\': 195 if ((c = *pattern++) == '\0') { 196 c = '\\'; 197 --pattern; 198 } 199 /* FALLTHROUGH */ 200 default: 201 normal: 202 if (c != in) 203 rv = 1; 204 break; 205 } 206 207 *pp = pattern; 208 return rv; 209 } 210 211 /* 212 * Do a substring match. If wild is set then the pattern started with a '*'. 213 * The match will go until '*', '/' or '\0' is encountered in pattern or 214 * the input string is consumed up to end. 215 * The pattern and string handles pp and ss are updated only on success. 216 */ 217 static int 218 matchsub(const char **pp, const char **ss, const char *end, int wild) 219 { 220 const char *pattern = *pp; 221 const char *p = pattern; 222 const char *string = *ss; 223 size_t matchlen; 224 225 /* first calculate how many characters the submatch will consume */ 226 for (matchlen = 0; *p != '\0'; matchlen++) { 227 if (p[0] == '*') 228 break; 229 /* '/' acts as barrier */ 230 if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) { 231 if (wild) { 232 /* match needs to match up to end of segment */ 233 if (string > end - matchlen) 234 return 1; 235 string = end - matchlen; 236 wild = 0; 237 } 238 break; 239 } 240 /* 241 * skip forward one character in pattern by doing a 242 * dummy lookup. 243 */ 244 matchchar(&p, ' '); 245 } 246 247 /* not enough char to match */ 248 if (string > end - matchlen) 249 return 1; 250 251 if (*p == '\0') { 252 if (wild) { 253 /* match needs to match up to end of segment */ 254 string = end - matchlen; 255 wild = 0; 256 } 257 } 258 259 while (*pattern != '\0' && *pattern != '*') { 260 /* eat possible escape char before '/' */ 261 if (pattern[0] == '\\' && pattern[1] == '/') 262 pattern++; 263 if (pattern[0] == '/') 264 break; 265 266 /* check if there are still characters available to compare */ 267 if (string >= end) 268 return 1; 269 /* Compare one char at a time. */ 270 if (!matchchar(&pattern, *string++)) 271 continue; 272 if (wild) { 273 /* skip forward one char and restart match */ 274 string = ++*ss; 275 pattern = *pp; 276 /* can it still match? */ 277 if (string > end - matchlen) 278 return 1; 279 } else { 280 /* failed match */ 281 return 1; 282 } 283 } 284 285 *pp = pattern; 286 *ss = string; 287 return 0; 288 } 289 290 /* 291 * File matching with the addition of the special '**'. 292 * Returns 0 on match and !0 for strings that do not match pattern. 293 */ 294 int 295 rmatch(const char *pattern, const char *string, int leading_dir) 296 { 297 const char *segend, *segnext, *mismatch = NULL; 298 int wild, starstar; 299 300 while (*pattern && *string) { 301 302 /* handle leading '/' first */ 303 if (pattern[0] == '\\' && pattern[1] == '/') 304 pattern++; 305 if (*string == '/' && *pattern == '/') { 306 string++; 307 pattern++; 308 } 309 310 /* match to the next '/' in string */ 311 segend = strchr(string, '/'); 312 if (segend == NULL) 313 segend = strchr(string, '\0'); 314 315 while (*pattern) { 316 /* 317 * Check for '*' and '**'. For '*' reduce '*' and '?' 318 * sequences into n-'?' and trailing '*'. 319 * For '**' this optimisation can not be done 320 * since '**???/' will match 'a/aa/aaa/' but not 321 * 'a/aa/aa/' still additional '*' will be reduced. 322 */ 323 wild = 0; 324 starstar = 0; 325 for ( ; *pattern == '*' || *pattern == '?'; pattern++) { 326 if (pattern[0] == '*') { 327 if (pattern[1] == '*') { 328 starstar = 1; 329 pattern++; 330 } 331 wild = 1; 332 } else if (!starstar) { /* pattern[0] == '?' */ 333 if (string < segend && *string != '/') 334 string++; 335 else 336 /* no match possible */ 337 return 1; 338 } else 339 break; 340 } 341 342 /* pattern ends in '**' so it is a match */ 343 if (starstar && *pattern == '\0') 344 return 0; 345 346 if (starstar) { 347 segnext = segend; 348 mismatch = pattern; 349 } 350 351 while (string < segend) { 352 if (matchsub(&pattern, &string, segend, wild)) { 353 failed_match: 354 /* 355 * failed to match, if starstar retry 356 * with the next segment. 357 */ 358 if (mismatch) { 359 pattern = mismatch; 360 wild = 1; 361 string = segnext; 362 if (*string == '/') 363 string++; 364 segend = strchr(string, '/'); 365 if (!segend) 366 segend = strchr(string, 367 '\0'); 368 segnext = segend; 369 if (string < segend) 370 continue; 371 } 372 /* no match possible */ 373 return 1; 374 } 375 break; 376 } 377 378 /* at end of string segment, eat up any extra '*' */ 379 if (string >= segend && *pattern != '*') 380 break; 381 } 382 if (*string != '\0' && *string != '/') 383 goto failed_match; 384 if (*pattern != '\0' && *pattern != '/') 385 goto failed_match; 386 } 387 388 /* if both pattern and string are consumed it was a match */ 389 if (*pattern == '\0' && *string == '\0') 390 return 0; 391 /* if leading_dir is set then string can also be '/' for success */ 392 if (leading_dir && *pattern == '\0' && *string == '/') 393 return 0; 394 /* else failure */ 395 return 1; 396 } 397