1 /* $OpenBSD: str.c,v 1.31 2014/05/18 08:08:50 espie Exp $ */ 2 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1988, 1989, 1990, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 1989 by Berkeley Softworks 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Adam de Boor. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include <ctype.h> 39 #include <string.h> 40 #include "config.h" 41 #include "defines.h" 42 #include "str.h" 43 #include "memory.h" 44 #include "buf.h" 45 46 /* helpers for Str_Matchi */ 47 static bool range_match(char, const char **, const char *); 48 static bool star_match(const char *, const char *, const char *, const char *); 49 50 char * 51 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2, 52 int sep) 53 { 54 size_t len1, len2; 55 char *result; 56 57 /* get the length of both strings */ 58 len1 = e1 - s1; 59 len2 = e2 - s2; 60 61 /* space for separator */ 62 if (sep) 63 len1++; 64 result = emalloc(len1 + len2 + 1); 65 66 /* copy first string into place */ 67 memcpy(result, s1, len1); 68 69 /* add separator character */ 70 if (sep) 71 result[len1-1] = sep; 72 73 /* copy second string plus EOS into place */ 74 memcpy(result + len1, s2, len2); 75 result[len1+len2] = '\0'; 76 return result; 77 } 78 79 /*- 80 * brk_string -- 81 * Fracture a string into an array of words (as delineated by tabs or 82 * spaces) taking quotation marks into account. Leading tabs/spaces 83 * are ignored. 84 * 85 * returns -- 86 * Pointer to the array of pointers to the words. To make life easier, 87 * the first word is always the value of the .MAKE variable. 88 */ 89 char ** 90 brk_string(const char *str, int *store_argc, char **buffer) 91 { 92 int argc; 93 char ch; 94 char inquote; 95 const char *p; 96 char *start, *t; 97 size_t len; 98 int argmax = 50; 99 size_t curlen = 0; 100 char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *)); 101 102 /* skip leading space chars. */ 103 for (; *str == ' ' || *str == '\t'; ++str) 104 continue; 105 106 /* allocate room for a copy of the string */ 107 if ((len = strlen(str) + 1) > curlen) 108 *buffer = emalloc(curlen = len); 109 110 /* 111 * copy the string; at the same time, parse backslashes, 112 * quotes and build the argument list. 113 */ 114 argc = 0; 115 inquote = '\0'; 116 for (p = str, start = t = *buffer;; ++p) { 117 switch (ch = *p) { 118 case '"': 119 case '\'': 120 if (inquote) { 121 if (inquote == ch) 122 inquote = '\0'; 123 else 124 break; 125 } else { 126 inquote = ch; 127 /* Don't miss "" or '' */ 128 if (start == NULL && p[1] == inquote) { 129 start = t + 1; 130 break; 131 } 132 } 133 continue; 134 case ' ': 135 case '\t': 136 case '\n': 137 if (inquote) 138 break; 139 if (!start) 140 continue; 141 /* FALLTHROUGH */ 142 case '\0': 143 /* 144 * end of a token -- make sure there's enough argv 145 * space and save off a pointer. 146 */ 147 if (!start) 148 goto done; 149 150 *t++ = '\0'; 151 if (argc == argmax) { 152 argmax *= 2; /* ramp up fast */ 153 argv = ereallocarray(argv, 154 (argmax + 1), sizeof(char *)); 155 } 156 argv[argc++] = start; 157 start = NULL; 158 if (ch == '\n' || ch == '\0') 159 goto done; 160 continue; 161 case '\\': 162 switch (ch = *++p) { 163 case '\0': 164 case '\n': 165 /* hmmm; fix it up as best we can */ 166 ch = '\\'; 167 --p; 168 break; 169 case 'b': 170 ch = '\b'; 171 break; 172 case 'f': 173 ch = '\f'; 174 break; 175 case 'n': 176 ch = '\n'; 177 break; 178 case 'r': 179 ch = '\r'; 180 break; 181 case 't': 182 ch = '\t'; 183 break; 184 } 185 break; 186 } 187 if (!start) 188 start = t; 189 *t++ = ch; 190 } 191 done: 192 argv[argc] = NULL; 193 *store_argc = argc; 194 return argv; 195 } 196 197 198 const char * 199 iterate_words(const char **end) 200 { 201 const char *start, *p; 202 char state = 0; 203 start = *end; 204 205 while (ISSPACE(*start)) 206 start++; 207 if (*start == '\0') 208 return NULL; 209 210 for (p = start;; p++) 211 switch(*p) { 212 case '\\': 213 if (p[1] != '\0') 214 p++; 215 break; 216 case '\'': 217 case '"': 218 if (state == *p) 219 state = 0; 220 else if (state == 0) 221 state = *p; 222 break; 223 case ' ': 224 case '\t': 225 if (state != 0) 226 break; 227 /* FALLTHROUGH */ 228 case '\0': 229 *end = p; 230 return start; 231 default: 232 break; 233 } 234 } 235 236 static bool 237 star_match(const char *string, const char *estring, 238 const char *pattern, const char *epattern) 239 { 240 /* '*' matches any substring. We handle this by calling ourselves 241 * recursively for each postfix of string, until either we match or 242 * we reach the end of the string. */ 243 pattern++; 244 /* Skip over contiguous sequences of `?*', so that 245 * recursive calls only occur on `real' characters. */ 246 while (pattern != epattern && 247 (*pattern == '?' || *pattern == '*')) { 248 if (*pattern == '?') { 249 if (string == estring) 250 return false; 251 else 252 string++; 253 } 254 pattern++; 255 } 256 if (pattern == epattern) 257 return true; 258 for (; string != estring; string++) 259 if (Str_Matchi(string, estring, pattern, 260 epattern)) 261 return true; 262 return false; 263 } 264 265 static bool 266 range_match(char c, const char **ppat, const char *epattern) 267 { 268 if (*ppat == epattern) { 269 if (c == '[') 270 return true; 271 else 272 return false; 273 } 274 if (**ppat == '!' || **ppat == '^') { 275 (*ppat)++; 276 return !range_match(c, ppat, epattern); 277 } 278 for (;;) { 279 if (**ppat == '\\') { 280 if (++(*ppat) == epattern) 281 return false; 282 } 283 if (**ppat == c) 284 break; 285 if ((*ppat)[1] == '-') { 286 if (*ppat + 2 == epattern) 287 return false; 288 if (**ppat < c && c <= (*ppat)[2]) 289 break; 290 if ((*ppat)[2] <= c && c < **ppat) 291 break; 292 *ppat += 3; 293 } else 294 (*ppat)++; 295 /* The test for ']' is done at the end 296 * so that ']' can be used at the 297 * start of the range without '\' */ 298 if (*ppat == epattern || **ppat == ']') 299 return false; 300 } 301 /* Found matching character, skip over rest 302 * of class. */ 303 while (**ppat != ']') { 304 if (**ppat == '\\') 305 (*ppat)++; 306 /* A non-terminated character class 307 * is ok. */ 308 if (*ppat == epattern) 309 break; 310 (*ppat)++; 311 } 312 return true; 313 } 314 315 bool 316 Str_Matchi(const char *string, const char *estring, 317 const char *pattern, const char *epattern) 318 { 319 while (pattern != epattern) { 320 /* Check for a "*" as the next pattern character. */ 321 if (*pattern == '*') 322 return star_match(string, estring, pattern, epattern); 323 else if (string == estring) 324 return false; 325 /* Check for a "[" as the next pattern character. It is 326 * followed by a list of characters that are acceptable, or 327 * by a range (two characters separated by "-"). */ 328 else if (*pattern == '[') { 329 pattern++; 330 if (!range_match(*string, &pattern, epattern)) 331 return false; 332 333 } 334 /* '?' matches any single character, so shunt test. */ 335 else if (*pattern != '?') { 336 /* If the next pattern character is '\', just strip 337 * off the '\' so we do exact matching on the 338 * character that follows. */ 339 if (*pattern == '\\') { 340 if (++pattern == epattern) 341 return false; 342 } 343 /* There's no special character. Just make sure that 344 * the next characters of each string match. */ 345 if (*pattern != *string) 346 return false; 347 } 348 pattern++; 349 string++; 350 } 351 if (string == estring) 352 return true; 353 else 354 return false; 355 } 356 357 358 /*- 359 *----------------------------------------------------------------------- 360 * Str_SYSVMatch -- 361 * Check word against pattern for a match (% is wild), 362 * 363 * Results: 364 * Returns the beginning position of a match or null. The number 365 * of characters matched is returned in len. 366 *----------------------------------------------------------------------- 367 */ 368 const char * 369 Str_SYSVMatch(const char *word, const char *pattern, size_t *len) 370 { 371 const char *p = pattern; 372 const char *w = word; 373 const char *m; 374 375 if (*p == '\0') { 376 /* Null pattern is the whole string. */ 377 *len = strlen(w); 378 return w; 379 } 380 381 if ((m = strchr(p, '%')) != NULL) { 382 /* Check that the prefix matches. */ 383 for (; p != m && *w && *w == *p; w++, p++) 384 continue; 385 386 if (p != m) 387 return NULL; /* No match. */ 388 389 if (*++p == '\0') { 390 /* No more pattern, return the rest of the string. */ 391 *len = strlen(w); 392 return w; 393 } 394 } 395 396 m = w; 397 398 /* Find a matching tail. */ 399 do { 400 if (strcmp(p, w) == 0) { 401 *len = w - m; 402 return m; 403 } 404 } while (*w++ != '\0'); 405 406 return NULL; 407 } 408 409 410 /*- 411 *----------------------------------------------------------------------- 412 * Str_SYSVSubst -- 413 * Substitute '%' in the pattern with len characters from src. 414 * If the pattern does not contain a '%' prepend len characters 415 * from src. 416 * 417 * Side Effects: 418 * Adds result to buf 419 *----------------------------------------------------------------------- 420 */ 421 void 422 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len) 423 { 424 const char *m; 425 426 if ((m = strchr(pat, '%')) != NULL) { 427 /* Copy the prefix. */ 428 Buf_Addi(buf, pat, m); 429 /* Skip the %. */ 430 pat = m + 1; 431 } 432 433 /* Copy the pattern. */ 434 Buf_AddChars(buf, len, src); 435 436 /* Append the rest. */ 437 Buf_AddString(buf, pat); 438 } 439 440 char * 441 Str_dupi(const char *begin, const char *end) 442 { 443 char *s; 444 445 s = emalloc(end - begin + 1); 446 memcpy(s, begin, end - begin); 447 s[end-begin] = '\0'; 448 return s; 449 } 450 451 char * 452 escape_dupi(const char *begin, const char *end, const char *set) 453 { 454 char *s, *t; 455 456 t = s = emalloc(end - begin + 1); 457 while (begin != end) { 458 if (*begin == '\\') { 459 begin++; 460 if (begin == end) { 461 *t++ = '\\'; 462 break; 463 } 464 if (strchr(set, *begin) == NULL) 465 *t++ = '\\'; 466 } 467 *t++ = *begin++; 468 } 469 *t++ = '\0'; 470 return s; 471 } 472 473 char * 474 Str_rchri(const char *begin, const char *end, int c) 475 { 476 if (begin != end) 477 do { 478 if (*--end == c) 479 return (char *)end; 480 } while (end != begin); 481 return NULL; 482 } 483