1 /* $OpenPackages$ */ 2 /* $OpenBSD: str.c,v 1.25 2007/09/17 09:44:20 espie Exp $ */ 3 /* $NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $ */ 4 5 /*- 6 * Copyright (c) 1988, 1989, 1990, 1993 7 * The Regents of the University of California. All rights reserved. 8 * Copyright (c) 1989 by Berkeley Softworks 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * Adam de Boor. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39 #include <ctype.h> 40 #include <string.h> 41 #include "config.h" 42 #include "defines.h" 43 #include "str.h" 44 #include "memory.h" 45 #include "buf.h" 46 47 /* helpers for Str_Matchi */ 48 static bool range_match(char, const char **, const char *); 49 static bool star_match(const char *, const char *, const char *, const char *); 50 51 char * 52 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2, 53 int sep) 54 { 55 size_t len1, len2; 56 char *result; 57 58 /* get the length of both strings */ 59 len1 = e1 - s1; 60 len2 = e2 - s2; 61 62 /* space for separator */ 63 if (sep) 64 len1++; 65 result = emalloc(len1 + len2 + 1); 66 67 /* copy first string into place */ 68 memcpy(result, s1, len1); 69 70 /* add separator character */ 71 if (sep) 72 result[len1-1] = sep; 73 74 /* copy second string plus EOS into place */ 75 memcpy(result + len1, s2, len2); 76 result[len1+len2] = '\0'; 77 return result; 78 } 79 80 /*- 81 * brk_string -- 82 * Fracture a string into an array of words (as delineated by tabs or 83 * spaces) taking quotation marks into account. Leading tabs/spaces 84 * are ignored. 85 * 86 * returns -- 87 * Pointer to the array of pointers to the words. To make life easier, 88 * the first word is always the value of the .MAKE variable. 89 */ 90 char ** 91 brk_string(const char *str, int *store_argc, char **buffer) 92 { 93 int argc; 94 char ch; 95 char inquote; 96 const char *p; 97 char *start, *t; 98 size_t len; 99 int argmax = 50; 100 size_t curlen = 0; 101 char **argv = emalloc((argmax + 1) * sizeof(char *)); 102 103 /* skip leading space chars. */ 104 for (; *str == ' ' || *str == '\t'; ++str) 105 continue; 106 107 /* allocate room for a copy of the string */ 108 if ((len = strlen(str) + 1) > curlen) 109 *buffer = emalloc(curlen = len); 110 111 /* 112 * copy the string; at the same time, parse backslashes, 113 * quotes and build the argument list. 114 */ 115 argc = 0; 116 inquote = '\0'; 117 for (p = str, start = t = *buffer;; ++p) { 118 switch (ch = *p) { 119 case '"': 120 case '\'': 121 if (inquote) { 122 if (inquote == ch) 123 inquote = '\0'; 124 else 125 break; 126 } else { 127 inquote = ch; 128 /* Don't miss "" or '' */ 129 if (start == NULL && p[1] == inquote) { 130 start = t + 1; 131 break; 132 } 133 } 134 continue; 135 case ' ': 136 case '\t': 137 case '\n': 138 if (inquote) 139 break; 140 if (!start) 141 continue; 142 /* FALLTHROUGH */ 143 case '\0': 144 /* 145 * end of a token -- make sure there's enough argv 146 * space and save off a pointer. 147 */ 148 if (!start) 149 goto done; 150 151 *t++ = '\0'; 152 if (argc == argmax) { 153 argmax *= 2; /* ramp up fast */ 154 argv = erealloc(argv, 155 (argmax + 1) * sizeof(char *)); 156 } 157 argv[argc++] = start; 158 start = NULL; 159 if (ch == '\n' || ch == '\0') 160 goto done; 161 continue; 162 case '\\': 163 switch (ch = *++p) { 164 case '\0': 165 case '\n': 166 /* hmmm; fix it up as best we can */ 167 ch = '\\'; 168 --p; 169 break; 170 case 'b': 171 ch = '\b'; 172 break; 173 case 'f': 174 ch = '\f'; 175 break; 176 case 'n': 177 ch = '\n'; 178 break; 179 case 'r': 180 ch = '\r'; 181 break; 182 case 't': 183 ch = '\t'; 184 break; 185 } 186 break; 187 } 188 if (!start) 189 start = t; 190 *t++ = ch; 191 } 192 done: 193 argv[argc] = NULL; 194 *store_argc = argc; 195 return argv; 196 } 197 198 199 const char * 200 iterate_words(const char **end) 201 { 202 const char *start, *p; 203 char state = 0; 204 start = *end; 205 206 while (isspace(*start)) 207 start++; 208 if (*start == '\0') 209 return NULL; 210 211 for (p = start;; p++) 212 switch(*p) { 213 case '\\': 214 if (p[1] != '\0') 215 p++; 216 break; 217 case '\'': 218 case '"': 219 if (state == *p) 220 state = 0; 221 else if (state == 0) 222 state = *p; 223 break; 224 case ' ': 225 case '\t': 226 if (state != 0) 227 break; 228 /* FALLTHROUGH */ 229 case '\0': 230 *end = p; 231 return start; 232 default: 233 break; 234 } 235 } 236 237 static bool 238 star_match(const char *string, const char *estring, 239 const char *pattern, const char *epattern) 240 { 241 /* '*' matches any substring. We handle this by calling ourselves 242 * recursively for each postfix of string, until either we match or 243 * we reach the end of the string. */ 244 pattern++; 245 /* Skip over contiguous sequences of `?*', so that 246 * recursive calls only occur on `real' characters. */ 247 while (pattern != epattern && 248 (*pattern == '?' || *pattern == '*')) { 249 if (*pattern == '?') { 250 if (string == estring) 251 return false; 252 else 253 string++; 254 } 255 pattern++; 256 } 257 if (pattern == epattern) 258 return true; 259 for (; string != estring; string++) 260 if (Str_Matchi(string, estring, pattern, 261 epattern)) 262 return true; 263 return false; 264 } 265 266 static bool 267 range_match(char c, const char **ppat, const char *epattern) 268 { 269 if (*ppat == epattern) { 270 if (c == '[') 271 return true; 272 else 273 return false; 274 } 275 if (**ppat == '!' || **ppat == '^') { 276 (*ppat)++; 277 return !range_match(c, ppat, epattern); 278 } 279 for (;;) { 280 if (**ppat == '\\') { 281 if (++(*ppat) == epattern) 282 return false; 283 } 284 if (**ppat == c) 285 break; 286 if ((*ppat)[1] == '-') { 287 if (*ppat + 2 == epattern) 288 return false; 289 if (**ppat < c && c <= (*ppat)[2]) 290 break; 291 if ((*ppat)[2] <= c && c < **ppat) 292 break; 293 *ppat += 3; 294 } else 295 (*ppat)++; 296 /* The test for ']' is done at the end 297 * so that ']' can be used at the 298 * start of the range without '\' */ 299 if (*ppat == epattern || **ppat == ']') 300 return false; 301 } 302 /* Found matching character, skip over rest 303 * of class. */ 304 while (**ppat != ']') { 305 if (**ppat == '\\') 306 (*ppat)++; 307 /* A non-terminated character class 308 * is ok. */ 309 if (*ppat == epattern) 310 break; 311 (*ppat)++; 312 } 313 return true; 314 } 315 316 bool 317 Str_Matchi(const char *string, const char *estring, 318 const char *pattern, const char *epattern) 319 { 320 while (pattern != epattern) { 321 /* Check for a "*" as the next pattern character. */ 322 if (*pattern == '*') 323 return star_match(string, estring, pattern, epattern); 324 else if (string == estring) 325 return false; 326 /* Check for a "[" as the next pattern character. It is 327 * followed by a list of characters that are acceptable, or 328 * by a range (two characters separated by "-"). */ 329 else if (*pattern == '[') { 330 pattern++; 331 if (!range_match(*string, &pattern, epattern)) 332 return false; 333 334 } 335 /* '?' matches any single character, so shunt test. */ 336 else if (*pattern != '?') { 337 /* If the next pattern character is '\', just strip 338 * off the '\' so we do exact matching on the 339 * character that follows. */ 340 if (*pattern == '\\') { 341 if (++pattern == epattern) 342 return false; 343 } 344 /* There's no special character. Just make sure that 345 * the next characters of each string match. */ 346 if (*pattern != *string) 347 return false; 348 } 349 pattern++; 350 string++; 351 } 352 if (string == estring) 353 return true; 354 else 355 return false; 356 } 357 358 359 /*- 360 *----------------------------------------------------------------------- 361 * Str_SYSVMatch -- 362 * Check word against pattern for a match (% is wild), 363 * 364 * Results: 365 * Returns the beginning position of a match or null. The number 366 * of characters matched is returned in len. 367 *----------------------------------------------------------------------- 368 */ 369 const char * 370 Str_SYSVMatch(const char *word, const char *pattern, size_t *len) 371 { 372 const char *p = pattern; 373 const char *w = word; 374 const char *m; 375 376 if (*p == '\0') { 377 /* Null pattern is the whole string. */ 378 *len = strlen(w); 379 return w; 380 } 381 382 if ((m = strchr(p, '%')) != NULL) { 383 /* Check that the prefix matches. */ 384 for (; p != m && *w && *w == *p; w++, p++) 385 continue; 386 387 if (p != m) 388 return NULL; /* No match. */ 389 390 if (*++p == '\0') { 391 /* No more pattern, return the rest of the string. */ 392 *len = strlen(w); 393 return w; 394 } 395 } 396 397 m = w; 398 399 /* Find a matching tail. */ 400 do { 401 if (strcmp(p, w) == 0) { 402 *len = w - m; 403 return m; 404 } 405 } while (*w++ != '\0'); 406 407 return NULL; 408 } 409 410 411 /*- 412 *----------------------------------------------------------------------- 413 * Str_SYSVSubst -- 414 * Substitute '%' in the pattern with len characters from src. 415 * If the pattern does not contain a '%' prepend len characters 416 * from src. 417 * 418 * Side Effects: 419 * Adds result to buf 420 *----------------------------------------------------------------------- 421 */ 422 void 423 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len) 424 { 425 const char *m; 426 427 if ((m = strchr(pat, '%')) != NULL) { 428 /* Copy the prefix. */ 429 Buf_Addi(buf, pat, m); 430 /* Skip the %. */ 431 pat = m + 1; 432 } 433 434 /* Copy the pattern. */ 435 Buf_AddChars(buf, len, src); 436 437 /* Append the rest. */ 438 Buf_AddString(buf, pat); 439 } 440 441 char * 442 Str_dupi(const char *begin, const char *end) 443 { 444 char *s; 445 446 s = emalloc(end - begin + 1); 447 memcpy(s, begin, end - begin); 448 s[end-begin] = '\0'; 449 return s; 450 } 451 452 char * 453 escape_dupi(const char *begin, const char *end, const char *set) 454 { 455 char *s, *t; 456 457 t = s = emalloc(end - begin + 1); 458 while (begin != end) { 459 if (*begin == '\\') { 460 begin++; 461 if (begin == end) { 462 *t++ = '\\'; 463 break; 464 } 465 if (strchr(set, *begin) == NULL) 466 *t++ = '\\'; 467 } 468 *t++ = *begin++; 469 } 470 *t++ = '\0'; 471 return s; 472 } 473 474 char * 475 Str_rchri(const char *begin, const char *end, int c) 476 { 477 if (begin != end) 478 do { 479 if (*--end == c) 480 return (char *)end; 481 } while (end != begin); 482 return NULL; 483 } 484