xref: /openbsd/usr.bin/make/str.c (revision 17df1aa7)
1 /*	$OpenPackages$ */
2 /*	$OpenBSD: str.c,v 1.25 2007/09/17 09:44:20 espie Exp $	*/
3 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
4 
5 /*-
6  * Copyright (c) 1988, 1989, 1990, 1993
7  *	The Regents of the University of California.  All rights reserved.
8  * Copyright (c) 1989 by Berkeley Softworks
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Adam de Boor.
13  *
14  * Redistribution and use in source and binary forms, with or without
15  * modification, are permitted provided that the following conditions
16  * are met:
17  * 1. Redistributions of source code must retain the above copyright
18  *    notice, this list of conditions and the following disclaimer.
19  * 2. Redistributions in binary form must reproduce the above copyright
20  *    notice, this list of conditions and the following disclaimer in the
21  *    documentation and/or other materials provided with the distribution.
22  * 3. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  */
38 
39 #include <ctype.h>
40 #include <string.h>
41 #include "config.h"
42 #include "defines.h"
43 #include "str.h"
44 #include "memory.h"
45 #include "buf.h"
46 
47 /* helpers for Str_Matchi */
48 static bool range_match(char, const char **, const char *);
49 static bool star_match(const char *, const char *, const char *, const char *);
50 
51 char *
52 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
53     int sep)
54 {
55 	size_t len1, len2;
56 	char *result;
57 
58 	/* get the length of both strings */
59 	len1 = e1 - s1;
60 	len2 = e2 - s2;
61 
62 	/* space for separator */
63 	if (sep)
64 		len1++;
65 	result = emalloc(len1 + len2 + 1);
66 
67 	/* copy first string into place */
68 	memcpy(result, s1, len1);
69 
70 	/* add separator character */
71 	if (sep)
72 		result[len1-1] = sep;
73 
74 	/* copy second string plus EOS into place */
75 	memcpy(result + len1, s2, len2);
76 	result[len1+len2] = '\0';
77 	return result;
78 }
79 
80 /*-
81  * brk_string --
82  *	Fracture a string into an array of words (as delineated by tabs or
83  *	spaces) taking quotation marks into account.  Leading tabs/spaces
84  *	are ignored.
85  *
86  * returns --
87  *	Pointer to the array of pointers to the words.	To make life easier,
88  *	the first word is always the value of the .MAKE variable.
89  */
90 char **
91 brk_string(const char *str, int *store_argc, char **buffer)
92 {
93 	int argc;
94 	char ch;
95 	char inquote;
96 	const char *p;
97 	char *start, *t;
98 	size_t len;
99 	int argmax = 50;
100 	size_t curlen = 0;
101 	char **argv = emalloc((argmax + 1) * sizeof(char *));
102 
103 	/* skip leading space chars. */
104 	for (; *str == ' ' || *str == '\t'; ++str)
105 		continue;
106 
107 	/* allocate room for a copy of the string */
108 	if ((len = strlen(str) + 1) > curlen)
109 		*buffer = emalloc(curlen = len);
110 
111 	/*
112 	 * copy the string; at the same time, parse backslashes,
113 	 * quotes and build the argument list.
114 	 */
115 	argc = 0;
116 	inquote = '\0';
117 	for (p = str, start = t = *buffer;; ++p) {
118 		switch (ch = *p) {
119 		case '"':
120 		case '\'':
121 			if (inquote) {
122 				if (inquote == ch)
123 					inquote = '\0';
124 				else
125 					break;
126 			} else {
127 				inquote = ch;
128 				/* Don't miss "" or '' */
129 				if (start == NULL && p[1] == inquote) {
130 					start = t + 1;
131 					break;
132 				}
133 			}
134 			continue;
135 		case ' ':
136 		case '\t':
137 		case '\n':
138 			if (inquote)
139 				break;
140 			if (!start)
141 				continue;
142 			/* FALLTHROUGH */
143 		case '\0':
144 			/*
145 			 * end of a token -- make sure there's enough argv
146 			 * space and save off a pointer.
147 			 */
148 			if (!start)
149 				goto done;
150 
151 			*t++ = '\0';
152 			if (argc == argmax) {
153 				argmax *= 2;	/* ramp up fast */
154 				argv = erealloc(argv,
155 				    (argmax + 1) * sizeof(char *));
156 			}
157 			argv[argc++] = start;
158 			start = NULL;
159 			if (ch == '\n' || ch == '\0')
160 				goto done;
161 			continue;
162 		case '\\':
163 			switch (ch = *++p) {
164 			case '\0':
165 			case '\n':
166 				/* hmmm; fix it up as best we can */
167 				ch = '\\';
168 				--p;
169 				break;
170 			case 'b':
171 				ch = '\b';
172 				break;
173 			case 'f':
174 				ch = '\f';
175 				break;
176 			case 'n':
177 				ch = '\n';
178 				break;
179 			case 'r':
180 				ch = '\r';
181 				break;
182 			case 't':
183 				ch = '\t';
184 				break;
185 			}
186 			    break;
187 		}
188 		if (!start)
189 			start = t;
190 		*t++ = ch;
191 	}
192     done:
193 	    argv[argc] = NULL;
194 	    *store_argc = argc;
195 	    return argv;
196 }
197 
198 
199 const char *
200 iterate_words(const char **end)
201 {
202 	const char	*start, *p;
203 	char	state = 0;
204 	start = *end;
205 
206 	while (isspace(*start))
207 		start++;
208 	if (*start == '\0')
209 		return NULL;
210 
211 	for (p = start;; p++)
212 	    switch(*p) {
213 	    case '\\':
214 		    if (p[1] != '\0')
215 			    p++;
216 		    break;
217 	    case '\'':
218 	    case '"':
219 		    if (state == *p)
220 			    state = 0;
221 		    else if (state == 0)
222 			    state = *p;
223 		    break;
224 	    case ' ':
225 	    case '\t':
226 		    if (state != 0)
227 			    break;
228 		    /* FALLTHROUGH */
229 	    case '\0':
230 		    *end = p;
231 		    return start;
232 	    default:
233 		    break;
234 	    }
235 }
236 
237 static bool
238 star_match(const char *string, const char *estring,
239     const char *pattern, const char *epattern)
240 {
241 	/* '*' matches any substring.  We handle this by calling ourselves
242 	 * recursively for each postfix of string, until either we match or
243 	 * we reach the end of the string.  */
244 	pattern++;
245 	/* Skip over contiguous  sequences of `?*', so that
246 	 * recursive calls only occur on `real' characters.  */
247 	while (pattern != epattern &&
248 		(*pattern == '?' || *pattern == '*')) {
249 		if (*pattern == '?') {
250 			if (string == estring)
251 				return false;
252 			else
253 				string++;
254 		}
255 		pattern++;
256 	}
257 	if (pattern == epattern)
258 		return true;
259 	for (; string != estring; string++)
260 		if (Str_Matchi(string, estring, pattern,
261 		    epattern))
262 			return true;
263 	return false;
264 }
265 
266 static bool
267 range_match(char c, const char **ppat, const char *epattern)
268 {
269 	if (*ppat == epattern) {
270 		if (c == '[')
271 			return true;
272 		else
273 			return false;
274 	}
275 	if (**ppat == '!' || **ppat == '^') {
276 		(*ppat)++;
277 		return !range_match(c, ppat, epattern);
278 	}
279 	for (;;) {
280 		if (**ppat == '\\') {
281 			if (++(*ppat) == epattern)
282 				return false;
283 		}
284 		if (**ppat == c)
285 			break;
286 		if ((*ppat)[1] == '-') {
287 			if (*ppat + 2 == epattern)
288 				return false;
289 			if (**ppat < c && c <= (*ppat)[2])
290 				break;
291 			if ((*ppat)[2] <= c && c < **ppat)
292 				break;
293 			*ppat += 3;
294 		} else
295 			(*ppat)++;
296 		/* The test for ']' is done at the end
297 		 * so that ']' can be used at the
298 		 * start of the range without '\' */
299 		if (*ppat == epattern || **ppat == ']')
300 			return false;
301 	}
302 	/* Found matching character, skip over rest
303 	 * of class.  */
304 	while (**ppat != ']') {
305 		if (**ppat == '\\')
306 			(*ppat)++;
307 		/* A non-terminated character class
308 		 * is ok. */
309 		if (*ppat == epattern)
310 			break;
311 		(*ppat)++;
312 	}
313 	return true;
314 }
315 
316 bool
317 Str_Matchi(const char *string, const char *estring,
318     const char *pattern, const char *epattern)
319 {
320 	while (pattern != epattern) {
321 		/* Check for a "*" as the next pattern character.  */
322 		if (*pattern == '*')
323 			return star_match(string, estring, pattern, epattern);
324 		else if (string == estring)
325 			return false;
326 		/* Check for a "[" as the next pattern character.  It is
327 		 * followed by a list of characters that are acceptable, or
328 		 * by a range (two characters separated by "-").  */
329 		else if (*pattern == '[') {
330 			pattern++;
331 			if (!range_match(*string, &pattern, epattern))
332 				return false;
333 
334 		}
335 		/* '?' matches any single character, so shunt test.  */
336 		else if (*pattern != '?') {
337 			/* If the next pattern character is '\', just strip
338 			 * off the '\' so we do exact matching on the
339 			 * character that follows.  */
340 			if (*pattern == '\\') {
341 				if (++pattern == epattern)
342 					return false;
343 			}
344 			/* There's no special character.  Just make sure that
345 			 * the next characters of each string match.  */
346 			if (*pattern != *string)
347 				return false;
348 		}
349 		pattern++;
350 		string++;
351 	}
352 	if (string == estring)
353 		return true;
354 	else
355 		return false;
356 }
357 
358 
359 /*-
360  *-----------------------------------------------------------------------
361  * Str_SYSVMatch --
362  *	Check word against pattern for a match (% is wild),
363  *
364  * Results:
365  *	Returns the beginning position of a match or null. The number
366  *	of characters matched is returned in len.
367  *-----------------------------------------------------------------------
368  */
369 const char *
370 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
371 {
372 	const char *p = pattern;
373 	const char *w = word;
374 	const char *m;
375 
376 	if (*p == '\0') {
377 		/* Null pattern is the whole string.  */
378 		*len = strlen(w);
379 		return w;
380 	}
381 
382 	if ((m = strchr(p, '%')) != NULL) {
383 		/* Check that the prefix matches.  */
384 		for (; p != m && *w && *w == *p; w++, p++)
385 			 continue;
386 
387 		if (p != m)
388 			return NULL;	/* No match.  */
389 
390 		if (*++p == '\0') {
391 			/* No more pattern, return the rest of the string. */
392 			*len = strlen(w);
393 			return w;
394 		}
395 	}
396 
397 	m = w;
398 
399 	/* Find a matching tail.  */
400 	do {
401 		if (strcmp(p, w) == 0) {
402 			*len = w - m;
403 			return m;
404 		}
405 	} while (*w++ != '\0');
406 
407 	return NULL;
408 }
409 
410 
411 /*-
412  *-----------------------------------------------------------------------
413  * Str_SYSVSubst --
414  *	Substitute '%' in the pattern with len characters from src.
415  *	If the pattern does not contain a '%' prepend len characters
416  *	from src.
417  *
418  * Side Effects:
419  *	Adds result to buf
420  *-----------------------------------------------------------------------
421  */
422 void
423 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
424 {
425 	const char *m;
426 
427 	if ((m = strchr(pat, '%')) != NULL) {
428 		/* Copy the prefix.  */
429 		Buf_Addi(buf, pat, m);
430 		/* Skip the %.	*/
431 		pat = m + 1;
432 	}
433 
434 	/* Copy the pattern.  */
435 	Buf_AddChars(buf, len, src);
436 
437 	/* Append the rest.  */
438 	Buf_AddString(buf, pat);
439 }
440 
441 char *
442 Str_dupi(const char *begin, const char *end)
443 {
444 	char *s;
445 
446 	s = emalloc(end - begin + 1);
447 	memcpy(s, begin, end - begin);
448 	s[end-begin] = '\0';
449 	return s;
450 }
451 
452 char *
453 escape_dupi(const char *begin, const char *end, const char *set)
454 {
455 	char *s, *t;
456 
457 	t = s = emalloc(end - begin + 1);
458 	while (begin != end) {
459 		if (*begin == '\\') {
460 			begin++;
461 			if (begin == end) {
462 				*t++ = '\\';
463 				break;
464 			}
465 			if (strchr(set, *begin) == NULL)
466 				*t++ = '\\';
467 		}
468 		*t++ = *begin++;
469 	}
470 	*t++ = '\0';
471 	return s;
472 }
473 
474 char *
475 Str_rchri(const char *begin, const char *end, int c)
476 {
477 	if (begin != end)
478 		do {
479 			if (*--end == c)
480 				return (char *)end;
481 		} while (end != begin);
482 	return NULL;
483 }
484