xref: /openbsd/usr.bin/make/str.c (revision fc61954a)
1 /*	$OpenBSD: str.c,v 1.31 2014/05/18 08:08:50 espie Exp $	*/
2 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1988, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 1989 by Berkeley Softworks
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Adam de Boor.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include <ctype.h>
39 #include <string.h>
40 #include "config.h"
41 #include "defines.h"
42 #include "str.h"
43 #include "memory.h"
44 #include "buf.h"
45 
46 /* helpers for Str_Matchi */
47 static bool range_match(char, const char **, const char *);
48 static bool star_match(const char *, const char *, const char *, const char *);
49 
50 char *
51 Str_concati(const char *s1, const char *e1, const char *s2, const char *e2,
52     int sep)
53 {
54 	size_t len1, len2;
55 	char *result;
56 
57 	/* get the length of both strings */
58 	len1 = e1 - s1;
59 	len2 = e2 - s2;
60 
61 	/* space for separator */
62 	if (sep)
63 		len1++;
64 	result = emalloc(len1 + len2 + 1);
65 
66 	/* copy first string into place */
67 	memcpy(result, s1, len1);
68 
69 	/* add separator character */
70 	if (sep)
71 		result[len1-1] = sep;
72 
73 	/* copy second string plus EOS into place */
74 	memcpy(result + len1, s2, len2);
75 	result[len1+len2] = '\0';
76 	return result;
77 }
78 
79 /*-
80  * brk_string --
81  *	Fracture a string into an array of words (as delineated by tabs or
82  *	spaces) taking quotation marks into account.  Leading tabs/spaces
83  *	are ignored.
84  *
85  * returns --
86  *	Pointer to the array of pointers to the words.	To make life easier,
87  *	the first word is always the value of the .MAKE variable.
88  */
89 char **
90 brk_string(const char *str, int *store_argc, char **buffer)
91 {
92 	int argc;
93 	char ch;
94 	char inquote;
95 	const char *p;
96 	char *start, *t;
97 	size_t len;
98 	int argmax = 50;
99 	size_t curlen = 0;
100 	char **argv = ereallocarray(NULL, argmax + 1, sizeof(char *));
101 
102 	/* skip leading space chars. */
103 	for (; *str == ' ' || *str == '\t'; ++str)
104 		continue;
105 
106 	/* allocate room for a copy of the string */
107 	if ((len = strlen(str) + 1) > curlen)
108 		*buffer = emalloc(curlen = len);
109 
110 	/*
111 	 * copy the string; at the same time, parse backslashes,
112 	 * quotes and build the argument list.
113 	 */
114 	argc = 0;
115 	inquote = '\0';
116 	for (p = str, start = t = *buffer;; ++p) {
117 		switch (ch = *p) {
118 		case '"':
119 		case '\'':
120 			if (inquote) {
121 				if (inquote == ch)
122 					inquote = '\0';
123 				else
124 					break;
125 			} else {
126 				inquote = ch;
127 				/* Don't miss "" or '' */
128 				if (start == NULL && p[1] == inquote) {
129 					start = t + 1;
130 					break;
131 				}
132 			}
133 			continue;
134 		case ' ':
135 		case '\t':
136 		case '\n':
137 			if (inquote)
138 				break;
139 			if (!start)
140 				continue;
141 			/* FALLTHROUGH */
142 		case '\0':
143 			/*
144 			 * end of a token -- make sure there's enough argv
145 			 * space and save off a pointer.
146 			 */
147 			if (!start)
148 				goto done;
149 
150 			*t++ = '\0';
151 			if (argc == argmax) {
152 				argmax *= 2;	/* ramp up fast */
153 				argv = ereallocarray(argv,
154 				    (argmax + 1), sizeof(char *));
155 			}
156 			argv[argc++] = start;
157 			start = NULL;
158 			if (ch == '\n' || ch == '\0')
159 				goto done;
160 			continue;
161 		case '\\':
162 			switch (ch = *++p) {
163 			case '\0':
164 			case '\n':
165 				/* hmmm; fix it up as best we can */
166 				ch = '\\';
167 				--p;
168 				break;
169 			case 'b':
170 				ch = '\b';
171 				break;
172 			case 'f':
173 				ch = '\f';
174 				break;
175 			case 'n':
176 				ch = '\n';
177 				break;
178 			case 'r':
179 				ch = '\r';
180 				break;
181 			case 't':
182 				ch = '\t';
183 				break;
184 			}
185 			    break;
186 		}
187 		if (!start)
188 			start = t;
189 		*t++ = ch;
190 	}
191     done:
192 	    argv[argc] = NULL;
193 	    *store_argc = argc;
194 	    return argv;
195 }
196 
197 
198 const char *
199 iterate_words(const char **end)
200 {
201 	const char	*start, *p;
202 	char	state = 0;
203 	start = *end;
204 
205 	while (ISSPACE(*start))
206 		start++;
207 	if (*start == '\0')
208 		return NULL;
209 
210 	for (p = start;; p++)
211 	    switch(*p) {
212 	    case '\\':
213 		    if (p[1] != '\0')
214 			    p++;
215 		    break;
216 	    case '\'':
217 	    case '"':
218 		    if (state == *p)
219 			    state = 0;
220 		    else if (state == 0)
221 			    state = *p;
222 		    break;
223 	    case ' ':
224 	    case '\t':
225 		    if (state != 0)
226 			    break;
227 		    /* FALLTHROUGH */
228 	    case '\0':
229 		    *end = p;
230 		    return start;
231 	    default:
232 		    break;
233 	    }
234 }
235 
236 static bool
237 star_match(const char *string, const char *estring,
238     const char *pattern, const char *epattern)
239 {
240 	/* '*' matches any substring.  We handle this by calling ourselves
241 	 * recursively for each postfix of string, until either we match or
242 	 * we reach the end of the string.  */
243 	pattern++;
244 	/* Skip over contiguous  sequences of `?*', so that
245 	 * recursive calls only occur on `real' characters.  */
246 	while (pattern != epattern &&
247 		(*pattern == '?' || *pattern == '*')) {
248 		if (*pattern == '?') {
249 			if (string == estring)
250 				return false;
251 			else
252 				string++;
253 		}
254 		pattern++;
255 	}
256 	if (pattern == epattern)
257 		return true;
258 	for (; string != estring; string++)
259 		if (Str_Matchi(string, estring, pattern,
260 		    epattern))
261 			return true;
262 	return false;
263 }
264 
265 static bool
266 range_match(char c, const char **ppat, const char *epattern)
267 {
268 	if (*ppat == epattern) {
269 		if (c == '[')
270 			return true;
271 		else
272 			return false;
273 	}
274 	if (**ppat == '!' || **ppat == '^') {
275 		(*ppat)++;
276 		return !range_match(c, ppat, epattern);
277 	}
278 	for (;;) {
279 		if (**ppat == '\\') {
280 			if (++(*ppat) == epattern)
281 				return false;
282 		}
283 		if (**ppat == c)
284 			break;
285 		if ((*ppat)[1] == '-') {
286 			if (*ppat + 2 == epattern)
287 				return false;
288 			if (**ppat < c && c <= (*ppat)[2])
289 				break;
290 			if ((*ppat)[2] <= c && c < **ppat)
291 				break;
292 			*ppat += 3;
293 		} else
294 			(*ppat)++;
295 		/* The test for ']' is done at the end
296 		 * so that ']' can be used at the
297 		 * start of the range without '\' */
298 		if (*ppat == epattern || **ppat == ']')
299 			return false;
300 	}
301 	/* Found matching character, skip over rest
302 	 * of class.  */
303 	while (**ppat != ']') {
304 		if (**ppat == '\\')
305 			(*ppat)++;
306 		/* A non-terminated character class
307 		 * is ok. */
308 		if (*ppat == epattern)
309 			break;
310 		(*ppat)++;
311 	}
312 	return true;
313 }
314 
315 bool
316 Str_Matchi(const char *string, const char *estring,
317     const char *pattern, const char *epattern)
318 {
319 	while (pattern != epattern) {
320 		/* Check for a "*" as the next pattern character.  */
321 		if (*pattern == '*')
322 			return star_match(string, estring, pattern, epattern);
323 		else if (string == estring)
324 			return false;
325 		/* Check for a "[" as the next pattern character.  It is
326 		 * followed by a list of characters that are acceptable, or
327 		 * by a range (two characters separated by "-").  */
328 		else if (*pattern == '[') {
329 			pattern++;
330 			if (!range_match(*string, &pattern, epattern))
331 				return false;
332 
333 		}
334 		/* '?' matches any single character, so shunt test.  */
335 		else if (*pattern != '?') {
336 			/* If the next pattern character is '\', just strip
337 			 * off the '\' so we do exact matching on the
338 			 * character that follows.  */
339 			if (*pattern == '\\') {
340 				if (++pattern == epattern)
341 					return false;
342 			}
343 			/* There's no special character.  Just make sure that
344 			 * the next characters of each string match.  */
345 			if (*pattern != *string)
346 				return false;
347 		}
348 		pattern++;
349 		string++;
350 	}
351 	if (string == estring)
352 		return true;
353 	else
354 		return false;
355 }
356 
357 
358 /*-
359  *-----------------------------------------------------------------------
360  * Str_SYSVMatch --
361  *	Check word against pattern for a match (% is wild),
362  *
363  * Results:
364  *	Returns the beginning position of a match or null. The number
365  *	of characters matched is returned in len.
366  *-----------------------------------------------------------------------
367  */
368 const char *
369 Str_SYSVMatch(const char *word, const char *pattern, size_t *len)
370 {
371 	const char *p = pattern;
372 	const char *w = word;
373 	const char *m;
374 
375 	if (*p == '\0') {
376 		/* Null pattern is the whole string.  */
377 		*len = strlen(w);
378 		return w;
379 	}
380 
381 	if ((m = strchr(p, '%')) != NULL) {
382 		/* Check that the prefix matches.  */
383 		for (; p != m && *w && *w == *p; w++, p++)
384 			 continue;
385 
386 		if (p != m)
387 			return NULL;	/* No match.  */
388 
389 		if (*++p == '\0') {
390 			/* No more pattern, return the rest of the string. */
391 			*len = strlen(w);
392 			return w;
393 		}
394 	}
395 
396 	m = w;
397 
398 	/* Find a matching tail.  */
399 	do {
400 		if (strcmp(p, w) == 0) {
401 			*len = w - m;
402 			return m;
403 		}
404 	} while (*w++ != '\0');
405 
406 	return NULL;
407 }
408 
409 
410 /*-
411  *-----------------------------------------------------------------------
412  * Str_SYSVSubst --
413  *	Substitute '%' in the pattern with len characters from src.
414  *	If the pattern does not contain a '%' prepend len characters
415  *	from src.
416  *
417  * Side Effects:
418  *	Adds result to buf
419  *-----------------------------------------------------------------------
420  */
421 void
422 Str_SYSVSubst(Buffer buf, const char *pat, const char *src, size_t len)
423 {
424 	const char *m;
425 
426 	if ((m = strchr(pat, '%')) != NULL) {
427 		/* Copy the prefix.  */
428 		Buf_Addi(buf, pat, m);
429 		/* Skip the %.	*/
430 		pat = m + 1;
431 	}
432 
433 	/* Copy the pattern.  */
434 	Buf_AddChars(buf, len, src);
435 
436 	/* Append the rest.  */
437 	Buf_AddString(buf, pat);
438 }
439 
440 char *
441 Str_dupi(const char *begin, const char *end)
442 {
443 	char *s;
444 
445 	s = emalloc(end - begin + 1);
446 	memcpy(s, begin, end - begin);
447 	s[end-begin] = '\0';
448 	return s;
449 }
450 
451 char *
452 escape_dupi(const char *begin, const char *end, const char *set)
453 {
454 	char *s, *t;
455 
456 	t = s = emalloc(end - begin + 1);
457 	while (begin != end) {
458 		if (*begin == '\\') {
459 			begin++;
460 			if (begin == end) {
461 				*t++ = '\\';
462 				break;
463 			}
464 			if (strchr(set, *begin) == NULL)
465 				*t++ = '\\';
466 		}
467 		*t++ = *begin++;
468 	}
469 	*t++ = '\0';
470 	return s;
471 }
472 
473 char *
474 Str_rchri(const char *begin, const char *end, int c)
475 {
476 	if (begin != end)
477 		do {
478 			if (*--end == c)
479 				return (char *)end;
480 		} while (end != begin);
481 	return NULL;
482 }
483