xref: /openbsd/usr.bin/make/str.c (revision f2dfb0a4)
1 /*	$OpenBSD: str.c,v 1.6 1998/03/07 18:34:50 millert Exp $	*/
2 /*	$NetBSD: str.c,v 1.13 1996/11/06 17:59:23 christos Exp $	*/
3 
4 /*-
5  * Copyright (c) 1988, 1989, 1990, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * Copyright (c) 1989 by Berkeley Softworks
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Adam de Boor.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  */
41 
42 #ifndef lint
43 #if 0
44 static char     sccsid[] = "@(#)str.c	5.8 (Berkeley) 6/1/90";
45 #else
46 static char rcsid[] = "$OpenBSD: str.c,v 1.6 1998/03/07 18:34:50 millert Exp $";
47 #endif
48 #endif				/* not lint */
49 
50 #include "make.h"
51 
52 static char **argv, *buffer;
53 static int argmax, curlen;
54 
55 /*
56  * str_init --
57  *	Initialize the strings package
58  *
59  */
60 void
61 str_init()
62 {
63     char *p1;
64     argv = (char **)emalloc(((argmax = 50) + 1) * sizeof(char *));
65     argv[0] = Var_Value(".MAKE", VAR_GLOBAL, &p1);
66 }
67 
68 
69 /*
70  * str_end --
71  *	Cleanup the strings package
72  *
73  */
74 void
75 str_end()
76 {
77     if (argv) {
78 	if (argv[0])
79 	    free(argv[0]);
80 	free((Address) argv);
81     }
82     if (buffer)
83 	free(buffer);
84 }
85 
86 /*-
87  * str_concat --
88  *	concatenate the two strings, inserting a space or slash between them,
89  *	freeing them if requested.
90  *
91  * returns --
92  *	the resulting string in allocated space.
93  */
94 char *
95 str_concat(s1, s2, flags)
96 	char *s1, *s2;
97 	int flags;
98 {
99 	register int len1, len2;
100 	register char *result;
101 
102 	/* get the length of both strings */
103 	len1 = strlen(s1);
104 	len2 = strlen(s2);
105 
106 	/* allocate length plus separator plus EOS */
107 	result = emalloc((u_int)(len1 + len2 + 2));
108 
109 	/* copy first string into place */
110 	memcpy(result, s1, len1);
111 
112 	/* add separator character */
113 	if (flags & STR_ADDSPACE) {
114 		result[len1] = ' ';
115 		++len1;
116 	} else if (flags & STR_ADDSLASH) {
117 		result[len1] = '/';
118 		++len1;
119 	}
120 
121 	/* copy second string plus EOS into place */
122 	memcpy(result + len1, s2, len2 + 1);
123 
124 	/* free original strings */
125 	if (flags & STR_DOFREE) {
126 		(void)free(s1);
127 		(void)free(s2);
128 	}
129 	return(result);
130 }
131 
132 /*-
133  * brk_string --
134  *	Fracture a string into an array of words (as delineated by tabs or
135  *	spaces) taking quotation marks into account.  Leading tabs/spaces
136  *	are ignored.
137  *
138  * returns --
139  *	Pointer to the array of pointers to the words.  To make life easier,
140  *	the first word is always the value of the .MAKE variable.
141  */
142 char **
143 brk_string(str, store_argc, expand)
144 	register char *str;
145 	int *store_argc;
146 	Boolean expand;
147 {
148 	register int argc, ch;
149 	register char inquote, *p, *start, *t;
150 	int len;
151 
152 	/* skip leading space chars. */
153 	for (; *str == ' ' || *str == '\t'; ++str)
154 		continue;
155 
156 	/* allocate room for a copy of the string */
157 	if ((len = strlen(str) + 1) > curlen) {
158 		if (buffer)
159 		    free(buffer);
160 		buffer = emalloc(curlen = len);
161 	}
162 
163 	/*
164 	 * copy the string; at the same time, parse backslashes,
165 	 * quotes and build the argument list.
166 	 */
167 	argc = 1;
168 	inquote = '\0';
169 	for (p = str, start = t = buffer;; ++p) {
170 		switch(ch = *p) {
171 		case '"':
172 		case '\'':
173 			if (inquote) {
174 				if (inquote == ch)
175 					inquote = '\0';
176 				else
177 					break;
178 			} else {
179 				inquote = (char) ch;
180 				/* Don't miss "" or '' */
181 				if (start == NULL && p[1] == inquote) {
182 					start = t + 1;
183 					break;
184 				}
185 			}
186 			if (!expand) {
187 				if (!start)
188 					start = t;
189 				*t++ = ch;
190 			}
191 			continue;
192 		case ' ':
193 		case '\t':
194 		case '\n':
195 			if (inquote)
196 				break;
197 			if (!start)
198 				continue;
199 			/* FALLTHROUGH */
200 		case '\0':
201 			/*
202 			 * end of a token -- make sure there's enough argv
203 			 * space and save off a pointer.
204 			 */
205 			if (!start)
206 			    goto done;
207 
208 			*t++ = '\0';
209 			if (argc == argmax) {
210 				argmax *= 2;		/* ramp up fast */
211 				argv = (char **)erealloc(argv,
212 				    (argmax + 1) * sizeof(char *));
213 			}
214 			argv[argc++] = start;
215 			start = (char *)NULL;
216 			if (ch == '\n' || ch == '\0')
217 				goto done;
218 			continue;
219 		case '\\':
220 			if (!expand) {
221 				if (!start)
222 					start = t;
223 				*t++ = '\\';
224 				ch = *++p;
225 				break;
226 			}
227 
228 			switch (ch = *++p) {
229 			case '\0':
230 			case '\n':
231 				/* hmmm; fix it up as best we can */
232 				ch = '\\';
233 				--p;
234 				break;
235 			case 'b':
236 				ch = '\b';
237 				break;
238 			case 'f':
239 				ch = '\f';
240 				break;
241 			case 'n':
242 				ch = '\n';
243 				break;
244 			case 'r':
245 				ch = '\r';
246 				break;
247 			case 't':
248 				ch = '\t';
249 				break;
250 			}
251 			break;
252 		}
253 		if (!start)
254 			start = t;
255 		*t++ = (char) ch;
256 	}
257 done:	argv[argc] = (char *)NULL;
258 	*store_argc = argc;
259 	return(argv);
260 }
261 
262 /*
263  * Str_FindSubstring -- See if a string contains a particular substring.
264  *
265  * Results: If string contains substring, the return value is the location of
266  * the first matching instance of substring in string.  If string doesn't
267  * contain substring, the return value is NULL.  Matching is done on an exact
268  * character-for-character basis with no wildcards or special characters.
269  *
270  * Side effects: None.
271  */
272 char *
273 Str_FindSubstring(string, substring)
274 	register char *string;		/* String to search. */
275 	char *substring;		/* Substring to find in string */
276 {
277 	register char *a, *b;
278 
279 	/*
280 	 * First scan quickly through the two strings looking for a single-
281 	 * character match.  When it's found, then compare the rest of the
282 	 * substring.
283 	 */
284 
285 	for (b = substring; *string != 0; string += 1) {
286 		if (*string != *b)
287 			continue;
288 		a = string;
289 		for (;;) {
290 			if (*b == 0)
291 				return(string);
292 			if (*a++ != *b++)
293 				break;
294 		}
295 		b = substring;
296 	}
297 	return((char *) NULL);
298 }
299 
300 /*
301  * Str_Match --
302  *
303  * See if a particular string matches a particular pattern.
304  *
305  * Results: Non-zero is returned if string matches pattern, 0 otherwise. The
306  * matching operation permits the following special characters in the
307  * pattern: *?\[] (see the man page for details on what these mean).
308  *
309  * Side effects: None.
310  */
311 int
312 Str_Match(string, pattern)
313 	register char *string;		/* String */
314 	register char *pattern;		/* Pattern */
315 {
316 	char c2;
317 
318 	for (;;) {
319 		/*
320 		 * See if we're at the end of both the pattern and the
321 		 * string. If, we succeeded.  If we're at the end of the
322 		 * pattern but not at the end of the string, we failed.
323 		 */
324 		if (*pattern == 0)
325 			return(!*string);
326 		if (*string == 0 && *pattern != '*')
327 			return(0);
328 		/*
329 		 * Check for a "*" as the next pattern character.  It matches
330 		 * any substring.  We handle this by calling ourselves
331 		 * recursively for each postfix of string, until either we
332 		 * match or we reach the end of the string.
333 		 */
334 		if (*pattern == '*') {
335 			pattern += 1;
336 			if (*pattern == 0)
337 				return(1);
338 			while (*string != 0) {
339 				if (Str_Match(string, pattern))
340 					return(1);
341 				++string;
342 			}
343 			return(0);
344 		}
345 		/*
346 		 * Check for a "?" as the next pattern character.  It matches
347 		 * any single character.
348 		 */
349 		if (*pattern == '?')
350 			goto thisCharOK;
351 		/*
352 		 * Check for a "[" as the next pattern character.  It is
353 		 * followed by a list of characters that are acceptable, or
354 		 * by a range (two characters separated by "-").
355 		 */
356 		if (*pattern == '[') {
357 			++pattern;
358 			for (;;) {
359 				if ((*pattern == ']') || (*pattern == 0))
360 					return(0);
361 				if (*pattern == *string)
362 					break;
363 				if (pattern[1] == '-') {
364 					c2 = pattern[2];
365 					if (c2 == 0)
366 						return(0);
367 					if ((*pattern <= *string) &&
368 					    (c2 >= *string))
369 						break;
370 					if ((*pattern >= *string) &&
371 					    (c2 <= *string))
372 						break;
373 					pattern += 2;
374 				}
375 				++pattern;
376 			}
377 			while ((*pattern != ']') && (*pattern != 0))
378 				++pattern;
379 			goto thisCharOK;
380 		}
381 		/*
382 		 * If the next pattern character is '/', just strip off the
383 		 * '/' so we do exact matching on the character that follows.
384 		 */
385 		if (*pattern == '\\') {
386 			++pattern;
387 			if (*pattern == 0)
388 				return(0);
389 		}
390 		/*
391 		 * There's no special character.  Just make sure that the
392 		 * next characters of each string match.
393 		 */
394 		if (*pattern != *string)
395 			return(0);
396 thisCharOK:	++pattern;
397 		++string;
398 	}
399 }
400 
401 
402 /*-
403  *-----------------------------------------------------------------------
404  * Str_SYSVMatch --
405  *	Check word against pattern for a match (% is wild),
406  *
407  * Results:
408  *	Returns the beginning position of a match or null. The number
409  *	of characters matched is returned in len.
410  *
411  * Side Effects:
412  *	None
413  *
414  *-----------------------------------------------------------------------
415  */
416 char *
417 Str_SYSVMatch(word, pattern, len)
418     char	*word;		/* Word to examine */
419     char	*pattern;	/* Pattern to examine against */
420     int		*len;		/* Number of characters to substitute */
421 {
422     char *p = pattern;
423     char *w = word;
424     char *m;
425 
426     if (*p == '\0') {
427 	/* Null pattern is the whole string */
428 	*len = strlen(w);
429 	return w;
430     }
431 
432     if ((m = strchr(p, '%')) != NULL) {
433 	/* check that the prefix matches */
434 	for (; p != m && *w && *w == *p; w++, p++)
435 	     continue;
436 
437 	if (p != m)
438 	    return NULL;	/* No match */
439 
440 	if (*++p == '\0') {
441 	    /* No more pattern, return the rest of the string */
442 	    *len = strlen(w);
443 	    return w;
444 	}
445     }
446 
447     m = w;
448 
449     /* Find a matching tail */
450     do
451 	if (strcmp(p, w) == 0) {
452 	    *len = w - m;
453 	    return m;
454 	}
455     while (*w++ != '\0');
456 
457     return NULL;
458 }
459 
460 
461 /*-
462  *-----------------------------------------------------------------------
463  * Str_SYSVSubst --
464  *	Substitute '%' on the pattern with len characters from src.
465  *	If the pattern does not contain a '%' prepend len characters
466  *	from src.
467  *
468  * Results:
469  *	None
470  *
471  * Side Effects:
472  *	Places result on buf
473  *
474  *-----------------------------------------------------------------------
475  */
476 void
477 Str_SYSVSubst(buf, pat, src, len)
478     Buffer buf;
479     char *pat;
480     char *src;
481     int   len;
482 {
483     char *m;
484 
485     if ((m = strchr(pat, '%')) != NULL) {
486 	/* Copy the prefix */
487 	Buf_AddBytes(buf, m - pat, (Byte *) pat);
488 	/* skip the % */
489 	pat = m + 1;
490     }
491 
492     /* Copy the pattern */
493     Buf_AddBytes(buf, len, (Byte *) src);
494 
495     /* append the rest */
496     Buf_AddBytes(buf, strlen(pat), (Byte *) pat);
497 }
498