xref: /openbsd/usr.bin/make/varmodifiers.c (revision 7e30afce)
1 /*	$OpenBSD: varmodifiers.c,v 1.50 2024/06/18 02:11:04 millert Exp $	*/
2 /*	$NetBSD: var.c,v 1.18 1997/03/18 19:24:46 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1999-2010 Marc Espie.
6  *
7  * Extensive code changes for the OpenBSD project.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
22  * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1988, 1989, 1990, 1993
32  *	The Regents of the University of California.  All rights reserved.
33  * Copyright (c) 1989 by Berkeley Softworks
34  * All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Adam de Boor.
38  *
39  * Redistribution and use in source and binary forms, with or without
40  * modification, are permitted provided that the following conditions
41  * are met:
42  * 1. Redistributions of source code must retain the above copyright
43  *    notice, this list of conditions and the following disclaimer.
44  * 2. Redistributions in binary form must reproduce the above copyright
45  *    notice, this list of conditions and the following disclaimer in the
46  *    documentation and/or other materials provided with the distribution.
47  * 3. Neither the name of the University nor the names of its contributors
48  *    may be used to endorse or promote products derived from this software
49  *    without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 /* VarModifiers_Apply is mostly a constituent function of Var_Parse, it
65  * is also called directly by Var_SubstVar.  */
66 
67 
68 #include <assert.h>
69 #include <ctype.h>
70 #include <sys/types.h>
71 #include <regex.h>
72 #include <stddef.h>
73 #include <stdio.h>
74 #include <stdlib.h>
75 #include <string.h>
76 #include "defines.h"
77 #include "buf.h"
78 #include "var.h"
79 #include "varmodifiers.h"
80 #include "varname.h"
81 #include "targ.h"
82 #include "error.h"
83 #include "str.h"
84 #include "cmd_exec.h"
85 #include "memory.h"
86 #include "gnode.h"
87 
88 
89 /* Var*Pattern flags */
90 #define VAR_SUB_GLOBAL	0x01	/* Apply substitution globally */
91 #define VAR_SUB_ONE	0x02	/* Apply substitution to one word */
92 #define VAR_SUB_MATCHED 0x04	/* There was a match */
93 #define VAR_MATCH_START 0x08	/* Match at start of word */
94 #define VAR_MATCH_END	0x10	/* Match at end of word */
95 
96 /* Modifiers flags */
97 #define VAR_EQUAL	0x20
98 #define VAR_MAY_EQUAL	0x40
99 #define VAR_ADD_EQUAL	0x80
100 #define VAR_BANG_EQUAL	0x100
101 
102 typedef struct {
103 	char	  *lbuffer; /* Left string to free */
104 	char	  *lhs;     /* String to match */
105 	size_t	  leftLen;  /* Length of string */
106 	char	  *rhs;     /* Replacement string (w/ &'s removed) */
107 	size_t	  rightLen; /* Length of replacement */
108 	int 	  flags;
109 } VarPattern;
110 
111 static bool VarHead(struct Name *, bool, Buffer, void *);
112 static bool VarTail(struct Name *, bool, Buffer, void *);
113 static bool VarSuffix(struct Name *, bool, Buffer, void *);
114 static bool VarRoot(struct Name *, bool, Buffer, void *);
115 static bool VarMatch(struct Name *, bool, Buffer, void *);
116 static bool VarSYSVMatch(struct Name *, bool, Buffer, void *);
117 static bool VarNoMatch(struct Name *, bool, Buffer, void *);
118 
119 
120 static void VarREError(int, regex_t *, const char *);
121 static bool VarRESubstitute(struct Name *, bool, Buffer, void *);
122 static char *do_regex(const char *, const struct Name *, void *);
123 
124 typedef struct {
125 	regex_t	  re;
126 	int 	  nsub;
127 	regmatch_t	 *matches;
128 	char	 *replace;
129 	int 	  flags;
130 } VarREPattern;
131 
132 static bool VarSubstitute(struct Name *, bool, Buffer, void *);
133 static char *VarGetPattern(SymTable *, int, const char **, int, int,
134     size_t *, VarPattern *);
135 static char *VarQuote(const char *, const struct Name *, void *);
136 static char *VarModify(char *, bool (*)(struct Name *, bool, Buffer, void *), void *);
137 
138 static void *check_empty(const char **, SymTable *, bool, int);
139 static void *check_quote(const char **, SymTable *, bool, int);
140 static char *do_upper(const char *, const struct Name *, void *);
141 static char *do_lower(const char *, const struct Name *, void *);
142 static void *check_shcmd(const char **, SymTable *, bool, int);
143 static char *do_shcmd(const char *, const struct Name *, void *);
144 static void *get_stringarg(const char **, SymTable *, bool, int);
145 static void free_stringarg(void *);
146 static void *get_patternarg(const char **, SymTable *, bool, int);
147 static void *get_spatternarg(const char **, SymTable *, bool, int);
148 static void *common_get_patternarg(const char **, SymTable *, bool, int, bool);
149 static void free_patternarg(void *);
150 static void *get_sysvpattern(const char **, SymTable *, bool, int);
151 
152 static struct Name dummy;
153 static struct Name *dummy_arg = &dummy;
154 
155 static struct modifier {
156 	    void * (*getarg)(const char **, SymTable *, bool, int);
157 	    char * (*apply)(const char *, const struct Name *, void *);
158 	    bool (*word_apply)(struct Name *, bool, Buffer, void *);
159 	    void   (*freearg)(void *);
160 } *choose_mod[256],
161 	match_mod = {get_stringarg, NULL, VarMatch, free_stringarg},
162 	nomatch_mod = {get_stringarg, NULL, VarNoMatch, free_stringarg},
163 	subst_mod = {get_spatternarg, NULL, VarSubstitute, free_patternarg},
164 	resubst_mod = {get_patternarg, do_regex, NULL, free_patternarg},
165 	quote_mod = {check_quote, VarQuote, NULL , free},
166 	tail_mod = {check_empty, NULL, VarTail, NULL},
167 	head_mod = {check_empty, NULL, VarHead, NULL},
168 	suffix_mod = {check_empty, NULL, VarSuffix, NULL},
169 	root_mod = {check_empty, NULL, VarRoot, NULL},
170 	upper_mod = {check_empty, do_upper, NULL, NULL},
171 	lower_mod = {check_empty, do_lower, NULL, NULL},
172 	shcmd_mod = {check_shcmd, do_shcmd, NULL, NULL},
173 	sysv_mod = {get_sysvpattern, NULL, VarSYSVMatch, free_patternarg}
174 ;
175 
176 void
VarModifiers_Init(void)177 VarModifiers_Init(void)
178 {
179 	choose_mod['M'] = &match_mod;
180 	choose_mod['N'] = &nomatch_mod;
181 	choose_mod['S'] = &subst_mod;
182 	choose_mod['C'] = &resubst_mod;
183 	choose_mod['Q'] = &quote_mod;
184 	choose_mod['T'] = &tail_mod;
185 	choose_mod['H'] = &head_mod;
186 	choose_mod['E'] = &suffix_mod;
187 	choose_mod['R'] = &root_mod;
188 	choose_mod['U'] = &upper_mod;
189 	choose_mod['L'] = &lower_mod;
190 	choose_mod['s'] = &shcmd_mod;
191 }
192 
193 /* All modifiers handle addSpace (need to add a space before placing the
194  * next word into the buffer) and propagate it when necessary.
195  */
196 
197 /*-
198  *-----------------------------------------------------------------------
199  * VarHead --
200  *	Remove the tail of the given word and add the result to the given
201  *	buffer.
202  *-----------------------------------------------------------------------
203  */
204 static bool
VarHead(struct Name * word,bool addSpace,Buffer buf,void * dummy UNUSED)205 VarHead(struct Name *word, bool addSpace, Buffer buf, void *dummy UNUSED)
206 {
207 	const char	*slash;
208 
209 	slash = Str_rchri(word->s, word->e, '/');
210 	if (slash != NULL) {
211 		if (addSpace)
212 			Buf_AddSpace(buf);
213 		Buf_Addi(buf, word->s, slash);
214 	} else {
215 		/* If no directory part, give . (q.v. the POSIX standard).  */
216 		if (addSpace)
217 			Buf_AddString(buf, " .");
218 		else
219 			Buf_AddChar(buf, '.');
220 	}
221 	return true;
222 }
223 
224 /*-
225  *-----------------------------------------------------------------------
226  * VarTail --
227  *	Remove the head of the given word add the result to the given
228  *	buffer.
229  *-----------------------------------------------------------------------
230  */
231 static bool
VarTail(struct Name * word,bool addSpace,Buffer buf,void * dummy UNUSED)232 VarTail(struct Name *word, bool addSpace, Buffer buf, void *dummy UNUSED)
233 {
234 	const char	*slash;
235 
236 	if (addSpace)
237 		Buf_AddSpace(buf);
238 	slash = Str_rchri(word->s, word->e, '/');
239 	if (slash != NULL)
240 		Buf_Addi(buf, slash+1, word->e);
241 	else
242 		Buf_Addi(buf, word->s, word->e);
243 	return true;
244 }
245 
246 /*-
247  *-----------------------------------------------------------------------
248  * VarSuffix --
249  *	Add the suffix of the given word to the given buffer.
250  *-----------------------------------------------------------------------
251  */
252 static bool
VarSuffix(struct Name * word,bool addSpace,Buffer buf,void * dummy UNUSED)253 VarSuffix(struct Name *word, bool addSpace, Buffer buf, void *dummy UNUSED)
254 {
255 	const char	*dot;
256 
257 	dot = Str_rchri(word->s, word->e, '.');
258 	if (dot != NULL) {
259 		if (addSpace)
260 			Buf_AddSpace(buf);
261 		Buf_Addi(buf, dot+1, word->e);
262 		addSpace = true;
263 	}
264 	return addSpace;
265 }
266 
267 /*-
268  *-----------------------------------------------------------------------
269  * VarRoot --
270  *	Remove the suffix of the given word and add the result to the
271  *	buffer.
272  *-----------------------------------------------------------------------
273  */
274 static bool
VarRoot(struct Name * word,bool addSpace,Buffer buf,void * dummy UNUSED)275 VarRoot(struct Name *word, bool addSpace, Buffer buf, void *dummy UNUSED)
276 {
277 	const char	*dot;
278 
279 	if (addSpace)
280 		Buf_AddSpace(buf);
281 	dot = Str_rchri(word->s, word->e, '.');
282 	if (dot != NULL)
283 		Buf_Addi(buf, word->s, dot);
284 	else
285 		Buf_Addi(buf, word->s, word->e);
286 	return true;
287 }
288 
289 /*-
290  *-----------------------------------------------------------------------
291  * VarMatch --
292  *	Add the word to the buffer if it matches the given pattern.
293  *-----------------------------------------------------------------------
294  */
295 static bool
VarMatch(struct Name * word,bool addSpace,Buffer buf,void * pattern)296 VarMatch(struct Name *word, bool addSpace, Buffer buf, void *pattern)
297 {
298 	const char *pat = pattern;
299 
300 	if (Str_Matchi(word->s, word->e, pat, strchr(pat, '\0'))) {
301 		if (addSpace)
302 			Buf_AddSpace(buf);
303 		Buf_Addi(buf, word->s, word->e);
304 		return true;
305 	} else
306 		return addSpace;
307 }
308 
309 /*-
310  *-----------------------------------------------------------------------
311  * VarNoMatch --
312  *	Add the word to the buffer if it doesn't match the given pattern.
313  *-----------------------------------------------------------------------
314  */
315 static bool
VarNoMatch(struct Name * word,bool addSpace,Buffer buf,void * pattern)316 VarNoMatch(struct Name *word, bool addSpace, Buffer buf, void *pattern)
317 {
318 	const char *pat = pattern;
319 
320 	if (!Str_Matchi(word->s, word->e, pat, strchr(pat, '\0'))) {
321 		if (addSpace)
322 			Buf_AddSpace(buf);
323 		Buf_Addi(buf, word->s, word->e);
324 		return true;
325 	} else
326 		return addSpace;
327 }
328 
329 /*-
330  *-----------------------------------------------------------------------
331  * VarSYSVMatch --
332  *	Add the word to the buffer if it matches the given pattern.
333  *	Used to implement the System V % modifiers.
334  *-----------------------------------------------------------------------
335  */
336 static bool
VarSYSVMatch(struct Name * word,bool addSpace,Buffer buf,void * patp)337 VarSYSVMatch(struct Name *word, bool addSpace, Buffer buf, void *patp)
338 {
339 	size_t	len;
340 	const char	*ptr;
341 	VarPattern	*pat = patp;
342 
343 	if (*word->s != '\0') {
344 		if (addSpace)
345 			Buf_AddSpace(buf);
346 		if ((ptr = Str_SYSVMatch(word->s, pat->lhs, &len)) != NULL)
347 			Str_SYSVSubst(buf, pat->rhs, ptr, len);
348 		else
349 			Buf_Addi(buf, word->s, word->e);
350 		return true;
351 	} else
352 		return addSpace;
353 }
354 
355 void *
get_sysvpattern(const char ** p,SymTable * ctxt UNUSED,bool err,int endc)356 get_sysvpattern(const char **p, SymTable *ctxt UNUSED, bool err, int endc)
357 {
358 	VarPattern		*pattern;
359 	const char		*cp, *cp2;
360 	BUFFER buf, buf2;
361 	int cnt = 0;
362 	char startc = endc == ')' ? '(' : '{';
363 
364 	Buf_Init(&buf, 0);
365 	for (cp = *p;; cp++) {
366 		if (*cp == '=' && cnt == 0)
367 			break;
368 		if (*cp == '\0') {
369 			Buf_Destroy(&buf);
370 			return NULL;
371 		}
372 		if (*cp == startc)
373 			cnt++;
374 		else if (*cp == endc) {
375 			cnt--;
376 			if (cnt < 0) {
377 				Buf_Destroy(&buf);
378 				return NULL;
379 			}
380 		} else if (*cp == '$') {
381 			if (cp[1] == '$')
382 				cp++;
383 			else {
384 				size_t len;
385 				(void)Var_ParseBuffer(&buf, cp, ctxt, err,
386 				    &len);
387 				cp += len - 1;
388 				continue;
389 			}
390 		}
391 		Buf_AddChar(&buf, *cp);
392 	}
393 
394 	Buf_Init(&buf2, 0);
395 	for (cp2 = cp+1;; cp2++) {
396 		if (((*cp2 == ':' && cp2[1] != endc) || *cp2 == endc) &&
397 		    cnt == 0)
398 			break;
399 		if (*cp2 == '\0') {
400 			Buf_Destroy(&buf);
401 			Buf_Destroy(&buf2);
402 			return NULL;
403 		}
404 		if (*cp2 == startc)
405 			cnt++;
406 		else if (*cp2 == endc) {
407 			cnt--;
408 			if (cnt < 0) {
409 				Buf_Destroy(&buf);
410 				Buf_Destroy(&buf2);
411 				return NULL;
412 			}
413 		} else if (*cp2 == '$') {
414 			if (cp2[1] == '$')
415 				cp2++;
416 			else {
417 				size_t len;
418 				(void)Var_ParseBuffer(&buf2, cp2, ctxt, err,
419 				    &len);
420 				cp2 += len - 1;
421 				continue;
422 			}
423 		}
424 		Buf_AddChar(&buf2, *cp2);
425 	}
426 
427 	pattern = emalloc(sizeof(VarPattern));
428 	pattern->lbuffer = pattern->lhs = Buf_Retrieve(&buf);
429 	pattern->leftLen = Buf_Size(&buf);
430 	pattern->rhs = Buf_Retrieve(&buf2);
431 	pattern->rightLen = Buf_Size(&buf2);
432 	pattern->flags = 0;
433 	*p = cp2;
434 	return pattern;
435 }
436 
437 
438 /*-
439  *-----------------------------------------------------------------------
440  * VarSubstitute --
441  *	Perform a string-substitution on the given word, Adding the
442  *	result to the given buffer.
443  *-----------------------------------------------------------------------
444  */
445 static bool
VarSubstitute(struct Name * word,bool addSpace,Buffer buf,void * patternp)446 VarSubstitute(struct Name *word, bool addSpace, Buffer buf,
447     void *patternp) /* Pattern for substitution */
448 {
449     size_t	wordLen;    /* Length of word */
450     const char	*cp;	    /* General pointer */
451     VarPattern	*pattern = patternp;
452 
453     wordLen = word->e - word->s;
454     if ((pattern->flags & (VAR_SUB_ONE|VAR_SUB_MATCHED)) !=
455 	(VAR_SUB_ONE|VAR_SUB_MATCHED)) {
456 	/* Still substituting -- break it down into simple anchored cases
457 	 * and if none of them fits, perform the general substitution case.  */
458 	if ((pattern->flags & VAR_MATCH_START) &&
459 	    (strncmp(word->s, pattern->lhs, pattern->leftLen) == 0)) {
460 		/* Anchored at start and beginning of word matches pattern.  */
461 		if ((pattern->flags & VAR_MATCH_END) &&
462 		    (wordLen == pattern->leftLen)) {
463 			/* Also anchored at end and matches to the end (word
464 			 * is same length as pattern) add space and rhs only
465 			 * if rhs is non-null.	*/
466 			if (pattern->rightLen != 0) {
467 			    if (addSpace)
468 				Buf_AddSpace(buf);
469 			    addSpace = true;
470 			    Buf_AddChars(buf, pattern->rightLen,
471 					 pattern->rhs);
472 			}
473 			pattern->flags |= VAR_SUB_MATCHED;
474 		} else if (pattern->flags & VAR_MATCH_END) {
475 		    /* Doesn't match to end -- copy word wholesale.  */
476 		    goto nosub;
477 		} else {
478 		    /* Matches at start but need to copy in
479 		     * trailing characters.  */
480 		    if ((pattern->rightLen + wordLen - pattern->leftLen) != 0){
481 			if (addSpace)
482 			    Buf_AddSpace(buf);
483 			addSpace = true;
484 		    }
485 		    Buf_AddChars(buf, pattern->rightLen, pattern->rhs);
486 		    Buf_AddChars(buf, wordLen - pattern->leftLen,
487 				 word->s + pattern->leftLen);
488 		    pattern->flags |= VAR_SUB_MATCHED;
489 		}
490 	} else if (pattern->flags & VAR_MATCH_START) {
491 	    /* Had to match at start of word and didn't -- copy whole word.  */
492 	    goto nosub;
493 	} else if (pattern->flags & VAR_MATCH_END) {
494 	    /* Anchored at end, Find only place match could occur (leftLen
495 	     * characters from the end of the word) and see if it does. Note
496 	     * that because the $ will be left at the end of the lhs, we have
497 	     * to use strncmp.	*/
498 	    cp = word->s + (wordLen - pattern->leftLen);
499 	    if (cp >= word->s &&
500 		strncmp(cp, pattern->lhs, pattern->leftLen) == 0) {
501 		/* Match found. If we will place characters in the buffer,
502 		 * add a space before hand as indicated by addSpace, then
503 		 * stuff in the initial, unmatched part of the word followed
504 		 * by the right-hand-side.  */
505 		if (((cp - word->s) + pattern->rightLen) != 0) {
506 		    if (addSpace)
507 			Buf_AddSpace(buf);
508 		    addSpace = true;
509 		}
510 		Buf_Addi(buf, word->s, cp);
511 		Buf_AddChars(buf, pattern->rightLen, pattern->rhs);
512 		pattern->flags |= VAR_SUB_MATCHED;
513 	    } else {
514 		/* Had to match at end and didn't. Copy entire word.  */
515 		goto nosub;
516 	    }
517 	} else {
518 	    /* Pattern is unanchored: search for the pattern in the word using
519 	     * strstr, copying unmatched portions and the
520 	     * right-hand-side for each match found, handling non-global
521 	     * substitutions correctly, etc. When the loop is done, any
522 	     * remaining part of the word (word and wordLen are adjusted
523 	     * accordingly through the loop) is copied straight into the
524 	     * buffer.
525 	     * addSpace is set to false as soon as a space is added to the
526 	     * buffer.	*/
527 	    bool done;
528 	    size_t origSize;
529 
530 	    done = false;
531 	    origSize = Buf_Size(buf);
532 	    while (!done) {
533 		cp = strstr(word->s, pattern->lhs);
534 		if (cp != NULL) {
535 		    if (addSpace && (cp - word->s) + pattern->rightLen != 0){
536 			Buf_AddSpace(buf);
537 			addSpace = false;
538 		    }
539 		    Buf_Addi(buf, word->s, cp);
540 		    Buf_AddChars(buf, pattern->rightLen, pattern->rhs);
541 		    wordLen -= (cp - word->s) + pattern->leftLen;
542 		    word->s = cp + pattern->leftLen;
543 		    if (wordLen == 0 || (pattern->flags & VAR_SUB_GLOBAL) == 0)
544 			done = true;
545 		    pattern->flags |= VAR_SUB_MATCHED;
546 		} else
547 		    done = true;
548 	    }
549 	    if (wordLen != 0) {
550 		if (addSpace)
551 		    Buf_AddSpace(buf);
552 		Buf_AddChars(buf, wordLen, word->s);
553 	    }
554 	    /* If added characters to the buffer, need to add a space
555 	     * before we add any more. If we didn't add any, just return
556 	     * the previous value of addSpace.	*/
557 	    return Buf_Size(buf) != origSize || addSpace;
558 	}
559 	return addSpace;
560     }
561  nosub:
562     if (addSpace)
563 	Buf_AddSpace(buf);
564     Buf_AddChars(buf, wordLen, word->s);
565     return true;
566 }
567 
568 /*-
569  *-----------------------------------------------------------------------
570  * VarREError --
571  *	Print the error caused by a regcomp or regexec call.
572  *-----------------------------------------------------------------------
573  */
574 static void
VarREError(int err,regex_t * pat,const char * str)575 VarREError(int err, regex_t *pat, const char *str)
576 {
577 	char	*errbuf;
578 	int 	errlen;
579 
580 	errlen = regerror(err, pat, 0, 0);
581 	errbuf = emalloc(errlen);
582 	regerror(err, pat, errbuf, errlen);
583 	Error("%s: %s", str, errbuf);
584 	free(errbuf);
585 }
586 
587 /*-
588  *-----------------------------------------------------------------------
589  * VarRESubstitute --
590  *	Perform a regex substitution on the given word, placing the
591  *	result in the passed buffer.
592  *-----------------------------------------------------------------------
593  */
594 static bool
VarRESubstitute(struct Name * word,bool addSpace,Buffer buf,void * patternp)595 VarRESubstitute(struct Name *word, bool addSpace, Buffer buf, void *patternp)
596 {
597 	VarREPattern	*pat;
598 	int 		xrv;
599 	const char		*wp;
600 	char		*rp;
601 	int 		added;
602 
603 #define MAYBE_ADD_SPACE()		\
604 	if (addSpace && !added) 	\
605 		Buf_AddSpace(buf);	\
606 	added = 1
607 
608 	added = 0;
609 	wp = word->s;
610 	pat = patternp;
611 
612 	if ((pat->flags & (VAR_SUB_ONE|VAR_SUB_MATCHED)) ==
613 	    (VAR_SUB_ONE|VAR_SUB_MATCHED))
614 		xrv = REG_NOMATCH;
615 	else {
616 	tryagain:
617 		xrv = regexec(&pat->re, wp, pat->nsub, pat->matches, 0);
618 	}
619 
620 	switch (xrv) {
621 	case 0:
622 		pat->flags |= VAR_SUB_MATCHED;
623 		if (pat->matches[0].rm_so > 0) {
624 			MAYBE_ADD_SPACE();
625 			Buf_AddChars(buf, pat->matches[0].rm_so, wp);
626 		}
627 
628 		for (rp = pat->replace; *rp; rp++) {
629 			if (*rp == '\\' && (rp[1] == '&' || rp[1] == '\\')) {
630 				MAYBE_ADD_SPACE();
631 				Buf_AddChar(buf,rp[1]);
632 				rp++;
633 			}
634 			else if (*rp == '&' ||
635 			    (*rp == '\\' && ISDIGIT(rp[1]))) {
636 				int n;
637 				const char *subbuf;
638 				int sublen;
639 				char errstr[3];
640 
641 				if (*rp == '&') {
642 					n = 0;
643 					errstr[0] = '&';
644 					errstr[1] = '\0';
645 				} else {
646 					n = rp[1] - '0';
647 					errstr[0] = '\\';
648 					errstr[1] = rp[1];
649 					errstr[2] = '\0';
650 					rp++;
651 				}
652 
653 				if (n >= pat->nsub) {
654 					Error("No subexpression %s",
655 					    &errstr[0]);
656 					subbuf = "";
657 					sublen = 0;
658 				} else if (pat->matches[n].rm_so == -1 &&
659 				    pat->matches[n].rm_eo == -1) {
660 					Error("No match for subexpression %s",
661 					    &errstr[0]);
662 					subbuf = "";
663 					sublen = 0;
664 				} else {
665 					subbuf = wp + pat->matches[n].rm_so;
666 					sublen = pat->matches[n].rm_eo -
667 					    pat->matches[n].rm_so;
668 				}
669 
670 				if (sublen > 0) {
671 					MAYBE_ADD_SPACE();
672 					Buf_AddChars(buf, sublen, subbuf);
673 				}
674 			} else {
675 				MAYBE_ADD_SPACE();
676 				Buf_AddChar(buf, *rp);
677 			}
678 		}
679 		wp += pat->matches[0].rm_eo;
680 		if (pat->flags & VAR_SUB_GLOBAL) {
681 			/* like most modern tools, empty string matches
682 			 * should advance one char at a time...
683 			 */
684 			if (pat->matches[0].rm_eo == 0)  {
685 				if (*wp) {
686 					MAYBE_ADD_SPACE();
687 					Buf_AddChar(buf, *wp++);
688 				} else
689 					break;
690 			}
691 			goto tryagain;
692 		}
693 		if (*wp) {
694 			MAYBE_ADD_SPACE();
695 			Buf_AddString(buf, wp);
696 		}
697 		break;
698 	default:
699 		VarREError(xrv, &pat->re, "Unexpected regex error");
700 	       /* FALLTHROUGH */
701 	case REG_NOMATCH:
702 		if (*wp) {
703 			MAYBE_ADD_SPACE();
704 			Buf_AddString(buf, wp);
705 		}
706 		break;
707 	}
708 	return addSpace||added;
709 }
710 
711 /*-
712  *-----------------------------------------------------------------------
713  * VarModify --
714  *	Modify each of the words of the passed string using the given
715  *	function. Used to implement most modifiers.
716  *
717  * Results:
718  *	A string of all the words modified appropriately.
719  *-----------------------------------------------------------------------
720  */
721 static char *
VarModify(char * str,bool (* modProc)(struct Name *,bool,Buffer,void *),void * datum)722 VarModify(char *str, 		/* String whose words should be trimmed */
723 				/* Function to use to modify them */
724     bool (*modProc)(struct Name *, bool, Buffer, void *),
725     void *datum)		/* Datum to pass it */
726 {
727 	BUFFER	  buf;		/* Buffer for the new string */
728 	bool	  addSpace;	/* true if need to add a space to the
729 				     * buffer before adding the trimmed
730 				     * word */
731 	struct Name	  word;
732 
733 	Buf_Init(&buf, 0);
734 	addSpace = false;
735 
736 	word.e = str;
737 
738 	while ((word.s = iterate_words(&word.e)) != NULL) {
739 		char termc;
740 
741 		termc = *word.e;
742 		*((char *)(word.e)) = '\0';
743 		addSpace = (*modProc)(&word, addSpace, &buf, datum);
744 		*((char *)(word.e)) = termc;
745 	}
746 	return Buf_Retrieve(&buf);
747 }
748 
749 /*-
750  *-----------------------------------------------------------------------
751  * VarGetPattern --
752  *	Pass through the tstr looking for 1) escaped delimiters,
753  *	'$'s and backslashes (place the escaped character in
754  *	uninterpreted) and 2) unescaped $'s that aren't before
755  *	the delimiter (expand the variable substitution).
756  *	Return the expanded string or NULL if the delimiter was missing
757  *	If pattern is specified, handle escaped ampersands, and replace
758  *	unescaped ampersands with the lhs of the pattern.
759  *
760  * Results:
761  *	A string of all the words modified appropriately.
762  *	If length is specified, return the string length of the buffer
763  *-----------------------------------------------------------------------
764  */
765 static char *
VarGetPattern(SymTable * ctxt,int err,const char ** tstr,int delim1,int delim2,size_t * length,VarPattern * pattern)766 VarGetPattern(SymTable *ctxt, int err, const char **tstr, int delim1,
767     int delim2, size_t *length, VarPattern *pattern)
768 {
769 	const char	*cp;
770 	char	*result;
771 	BUFFER	buf;
772 	size_t	junk;
773 
774 	Buf_Init(&buf, 0);
775 	if (length == NULL)
776 		length = &junk;
777 
778 #define IS_A_MATCH(cp, delim1, delim2) \
779 	(cp[0] == '\\' && (cp[1] == delim1 || cp[1] == delim2 || \
780 	 cp[1] == '\\' || cp[1] == '$' || (pattern && cp[1] == '&')))
781 
782 	/*
783 	 * Skim through until the matching delimiter is found;
784 	 * pick up variable substitutions on the way. Also allow
785 	 * backslashes to quote the delimiter, $, and \, but don't
786 	 * touch other backslashes.
787 	 */
788 	for (cp = *tstr; *cp != '\0' && *cp != delim1 && *cp != delim2; cp++) {
789 		if (IS_A_MATCH(cp, delim1, delim2)) {
790 			Buf_AddChar(&buf, cp[1]);
791 			cp++;
792 		} else if (*cp == '$') {
793 			/* Allowed at end of pattern */
794 			if (cp[1] == delim1 || cp[1] == delim2)
795 				Buf_AddChar(&buf, *cp);
796 			else {
797 				size_t len;
798 
799 				/* If unescaped dollar sign not before the
800 				 * delimiter, assume it's a variable
801 				 * substitution and recurse.  */
802 				(void)Var_ParseBuffer(&buf, cp, ctxt, err,
803 				    &len);
804 				cp += len - 1;
805 			}
806 		} else if (pattern && *cp == '&')
807 			Buf_AddChars(&buf, pattern->leftLen, pattern->lhs);
808 		else
809 			Buf_AddChar(&buf, *cp);
810 	}
811 
812 	*length = Buf_Size(&buf);
813 	result = Buf_Retrieve(&buf);
814 
815 	if (*cp != delim1 && *cp != delim2) {
816 		*tstr = cp;
817 		*length = 0;
818 		free(result);
819 		return NULL;
820 	}
821 	else {
822 		*tstr = ++cp;
823 		return result;
824 	}
825 }
826 
827 /*-
828  *-----------------------------------------------------------------------
829  * VarQuote --
830  *	Quote shell meta-characters in the string
831  *
832  * Results:
833  *	The quoted string
834  *-----------------------------------------------------------------------
835  */
836 static char *
VarQuote(const char * str,const struct Name * n UNUSED,void * islistp)837 VarQuote(const char *str, const struct Name *n UNUSED, void *islistp)
838 {
839 	int *p = islistp;
840 	int islist = *p;
841 
842 	BUFFER	  buf;
843 	/* This should cover most shells :-( */
844 	static char meta[] = "\n \t'`\";&<>()|*?{}[]\\$!#^~";
845 	char *rep = meta;
846 	if (islist)
847 		rep += 3;
848 
849 	Buf_Init(&buf, MAKE_BSIZE);
850 	for (; *str; str++) {
851 		if (strchr(rep, *str) != NULL)
852 			Buf_AddChar(&buf, '\\');
853 		Buf_AddChar(&buf, *str);
854 	}
855 	return Buf_Retrieve(&buf);
856 }
857 
858 static void *
check_empty(const char ** p,SymTable * ctxt UNUSED,bool b UNUSED,int endc)859 check_empty(const char **p, SymTable *ctxt UNUSED, bool b UNUSED, int endc)
860 {
861 	dummy_arg->s = NULL;
862 	if ((*p)[1] == endc || (*p)[1] == ':') {
863 		(*p)++;
864 		return dummy_arg;
865 	} else
866 		return NULL;
867 }
868 
869 static void *
check_quote(const char ** p,SymTable * ctxt UNUSED,bool b UNUSED,int endc)870 check_quote(const char **p, SymTable *ctxt UNUSED, bool b UNUSED, int endc)
871 {
872 	int *qargs = emalloc(sizeof(int));
873 	*qargs = 0;
874 	if ((*p)[1] == 'L') {
875 		*qargs = 1;
876 		(*p)++;
877 	}
878 	if ((*p)[1] == endc || (*p)[1] == ':') {
879 		(*p)++;
880 		return qargs;
881 	} else  {
882 		free(qargs);
883 		return NULL;
884 	}
885 }
886 
887 static void *
check_shcmd(const char ** p,SymTable * ctxt UNUSED,bool b UNUSED,int endc)888 check_shcmd(const char **p, SymTable *ctxt UNUSED, bool b UNUSED, int endc)
889 {
890 	if ((*p)[1] == 'h' && ((*p)[2] == endc || (*p)[2] == ':')) {
891 		(*p)+=2;
892 		return dummy_arg;
893 	} else
894 		return NULL;
895 }
896 
897 
898 static char *
do_shcmd(const char * s,const struct Name * n UNUSED,void * arg UNUSED)899 do_shcmd(const char *s, const struct Name *n UNUSED, void *arg UNUSED)
900 {
901 	char *err;
902 	char *t;
903 
904 	t = Cmd_Exec(s, &err);
905 	if (err)
906 		Error(err, s);
907 	return t;
908 }
909 
910 static void *
get_stringarg(const char ** p,SymTable * ctxt UNUSED,bool b UNUSED,int endc)911 get_stringarg(const char **p, SymTable *ctxt UNUSED, bool b UNUSED, int endc)
912 {
913 	const char *cp;
914 	char *s;
915 
916 	for (cp = *p + 1; *cp != ':' && *cp != endc; cp++) {
917 		if (*cp == '\\') {
918 			if (cp[1] == ':' || cp[1] == endc || cp[1] == '\\')
919 				cp++;
920 		} else if (*cp == '\0')
921 			return NULL;
922 	}
923 	s = escape_dupi(*p+1, cp, ":)}");
924 	*p = cp;
925 	return s;
926 }
927 
928 static void
free_stringarg(void * arg)929 free_stringarg(void *arg)
930 {
931 	free(arg);
932 }
933 
934 static char *
do_upper(const char * s,const struct Name * n UNUSED,void * arg UNUSED)935 do_upper(const char *s, const struct Name *n UNUSED, void *arg UNUSED)
936 {
937 	size_t len, i;
938 	char *t;
939 
940 	len = strlen(s);
941 	t = emalloc(len+1);
942 	for (i = 0; i < len; i++)
943 		t[i] = TOUPPER(s[i]);
944 	t[len] = '\0';
945 	return t;
946 }
947 
948 static char *
do_lower(const char * s,const struct Name * n UNUSED,void * arg UNUSED)949 do_lower(const char *s, const struct Name *n UNUSED, void *arg UNUSED)
950 {
951 	size_t	len, i;
952 	char	*t;
953 
954 	len = strlen(s);
955 	t = emalloc(len+1);
956 	for (i = 0; i < len; i++)
957 		t[i] = TOLOWER(s[i]);
958 	t[len] = '\0';
959 	return t;
960 }
961 
962 static void *
get_patternarg(const char ** p,SymTable * ctxt,bool err,int endc)963 get_patternarg(const char **p, SymTable *ctxt, bool err, int endc)
964 {
965 	return common_get_patternarg(p, ctxt, err, endc, false);
966 }
967 
968 /* Extract anchors */
969 static void *
get_spatternarg(const char ** p,SymTable * ctxt,bool err,int endc)970 get_spatternarg(const char **p, SymTable *ctxt, bool err, int endc)
971 {
972 	return common_get_patternarg(p, ctxt, err, endc, true);
973 }
974 
975 static void *
common_get_patternarg(const char ** p,SymTable * ctxt,bool err,int endc,bool dosubst)976 common_get_patternarg(const char **p, SymTable *ctxt, bool err, int endc,
977     bool dosubst)
978 {
979 	VarPattern *pattern;
980 	char delim;
981 	const char *s;
982 
983 	pattern = emalloc(sizeof(VarPattern));
984 	pattern->flags = 0;
985 	s = *p;
986 
987 	delim = s[1];
988 	if (delim == '\0')
989 		return NULL;
990 	s += 2;
991 
992 	pattern->rhs = NULL;
993 	pattern->lhs = VarGetPattern(ctxt, err, &s, delim, delim,
994 	    &pattern->leftLen, NULL);
995 	pattern->lbuffer = pattern->lhs;
996 	if (pattern->lhs != NULL) {
997 		if (dosubst && pattern->leftLen > 0) {
998 			if (pattern->lhs[pattern->leftLen-1] == '$') {
999 				    pattern->leftLen--;
1000 				    pattern->flags |= VAR_MATCH_END;
1001 			}
1002 			if (pattern->lhs[0] == '^') {
1003 				    pattern->lhs++;
1004 				    pattern->leftLen--;
1005 				    pattern->flags |= VAR_MATCH_START;
1006 			}
1007 		}
1008 		pattern->rhs = VarGetPattern(ctxt, err, &s, delim, delim,
1009 		    &pattern->rightLen, dosubst ? pattern: NULL);
1010 		if (pattern->rhs != NULL) {
1011 			/* Check for global substitution. If 'g' after the
1012 			 * final delimiter, substitution is global and is
1013 			 * marked that way.  */
1014 			for (;; s++) {
1015 				switch (*s) {
1016 				case 'g':
1017 					pattern->flags |= VAR_SUB_GLOBAL;
1018 					continue;
1019 				case '1':
1020 					pattern->flags |= VAR_SUB_ONE;
1021 					continue;
1022 				}
1023 				break;
1024 			}
1025 			if (*s == endc || *s == ':') {
1026 				*p = s;
1027 				return pattern;
1028 			}
1029 		}
1030 	}
1031 	free_patternarg(pattern);
1032 	return NULL;
1033 }
1034 
1035 static void
free_patternarg(void * p)1036 free_patternarg(void *p)
1037 {
1038 	VarPattern *vp = p;
1039 
1040 	free(vp->lbuffer);
1041 	free(vp->rhs);
1042 	free(vp);
1043 }
1044 
1045 static char *
do_regex(const char * s,const struct Name * n UNUSED,void * arg)1046 do_regex(const char *s, const struct Name *n UNUSED, void *arg)
1047 {
1048 	VarREPattern p2;
1049 	VarPattern *p = arg;
1050 	int error;
1051 	char *result;
1052 
1053 	error = regcomp(&p2.re, p->lhs, REG_EXTENDED);
1054 	if (error) {
1055 		VarREError(error, &p2.re, "RE substitution error");
1056 		return var_Error;
1057 	}
1058 	p2.nsub = p2.re.re_nsub + 1;
1059 	p2.replace = p->rhs;
1060 	p2.flags = p->flags;
1061 	if (p2.nsub < 1)
1062 		p2.nsub = 1;
1063 	if (p2.nsub > 10)
1064 		p2.nsub = 10;
1065 	p2.matches = ereallocarray(NULL, p2.nsub, sizeof(regmatch_t));
1066 	result = VarModify((char *)s, VarRESubstitute, &p2);
1067 	regfree(&p2.re);
1068 	free(p2.matches);
1069 	return result;
1070 }
1071 
1072 char *
VarModifiers_Apply(char * str,const struct Name * name,SymTable * ctxt,bool err,bool * freePtr,const char ** pscan,int paren)1073 VarModifiers_Apply(char *str, const struct Name *name, SymTable *ctxt,
1074     bool err, bool *freePtr, const char **pscan, int paren)
1075 {
1076 	const char *tstr;
1077 	char endc = paren == '(' ? ')' : '}';
1078 	const char *start = *pscan;
1079 
1080 	tstr = start;
1081 	/*
1082 	 * Now we need to apply any modifiers the user wants applied.
1083 	 * These are:
1084 	 *		  :M<pattern>	words which match the given <pattern>.
1085 	 *				<pattern> is of the standard file
1086 	 *				wildcarding form.
1087 	 *		  :S<d><pat1><d><pat2><d>[g]
1088 	 *				Substitute <pat2> for <pat1> in the
1089 	 *				value
1090 	 *		  :C<d><pat1><d><pat2><d>[g]
1091 	 *				Substitute <pat2> for regex <pat1> in
1092 	 *				the value
1093 	 *		  :H		Substitute the head of each word
1094 	 *		  :T		Substitute the tail of each word
1095 	 *		  :E		Substitute the extension (minus '.') of
1096 	 *				each word
1097 	 *		  :R		Substitute the root of each word
1098 	 *				(pathname minus the suffix).
1099 	 *		  :lhs=rhs	Like :S, but the rhs goes to the end of
1100 	 *				the invocation.
1101 	 */
1102 
1103 	while (*tstr != endc && *tstr != '\0') {
1104 		struct modifier *mod;
1105 		void *arg;
1106 		char *newStr;
1107 
1108 		tstr++;
1109 		if (DEBUG(VAR)) {
1110 			if (str != NULL)
1111 				printf("Applying :%c to \"%s\"\n", *tstr, str);
1112 			else
1113 				printf("Applying :%c\n", *tstr);
1114 		}
1115 
1116 		mod = choose_mod[(unsigned char)*tstr];
1117 		arg = NULL;
1118 
1119 		if (mod != NULL)
1120 			arg = mod->getarg(&tstr, ctxt, err, endc);
1121 		if (arg == NULL) {
1122 			mod = &sysv_mod;
1123 			arg = mod->getarg(&tstr, ctxt, err, endc);
1124 		}
1125 		if (arg != NULL) {
1126 			if (str != NULL) {
1127 				if (mod->word_apply != NULL) {
1128 					newStr = VarModify(str,
1129 					    mod->word_apply, arg);
1130 					assert(mod->apply == NULL);
1131 				} else
1132 					newStr = mod->apply(str, name, arg);
1133 				if (*freePtr)
1134 					free(str);
1135 				str = newStr;
1136 				if (str != var_Error)
1137 					*freePtr = true;
1138 				else
1139 					*freePtr = false;
1140 			}
1141 			if (mod->freearg != NULL)
1142 				mod->freearg(arg);
1143 		} else {
1144 			Error("Bad modifier: %s", tstr);
1145 			/* Try skipping to end of var... */
1146 			while (*tstr != endc && *tstr != '\0')
1147 				tstr++;
1148 			if (str != NULL && *freePtr)
1149 				free(str);
1150 			str = var_Error;
1151 			*freePtr = false;
1152 			break;
1153 		}
1154 		if (DEBUG(VAR) && str != NULL)
1155 			printf("Result is \"%s\"\n", str);
1156 	}
1157 	if (*tstr == '\0')
1158 		Parse_Error(PARSE_FATAL, "Unclosed variable specification");
1159 	else
1160 		tstr++;
1161 
1162 	*pscan = tstr;
1163 	return str;
1164 }
1165 
1166 char *
Var_GetHead(char * s)1167 Var_GetHead(char *s)
1168 {
1169 	return VarModify(s, VarHead, NULL);
1170 }
1171 
1172 char *
Var_GetTail(char * s)1173 Var_GetTail(char *s)
1174 {
1175 	return VarModify(s, VarTail, NULL);
1176 }
1177