1 /*	$NetBSD: var.c,v 1.930 2021/04/19 22:22:27 rillig Exp $	*/
2 
3 /*
4  * Copyright (c) 1988, 1989, 1990, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Adam de Boor.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * Copyright (c) 1989 by Berkeley Softworks
37  * All rights reserved.
38  *
39  * This code is derived from software contributed to Berkeley by
40  * Adam de Boor.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. All advertising materials mentioning features or use of this software
51  *    must display the following acknowledgement:
52  *	This product includes software developed by the University of
53  *	California, Berkeley and its contributors.
54  * 4. Neither the name of the University nor the names of its contributors
55  *    may be used to endorse or promote products derived from this software
56  *    without specific prior written permission.
57  *
58  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
59  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
60  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
61  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
62  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
63  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
64  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
65  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
66  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
67  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
68  * SUCH DAMAGE.
69  */
70 
71 /*
72  * Handling of variables and the expressions formed from them.
73  *
74  * Variables are set using lines of the form VAR=value.  Both the variable
75  * name and the value can contain references to other variables, by using
76  * expressions like ${VAR}, ${VAR:Modifiers}, ${${VARNAME}} or ${VAR:${MODS}}.
77  *
78  * Interface:
79  *	Var_Init	Initialize this module.
80  *
81  *	Var_End		Clean up the module.
82  *
83  *	Var_Set
84  *	Var_SetExpand
85  *			Set the value of the variable, creating it if
86  *			necessary.
87  *
88  *	Var_Append
89  *	Var_AppendExpand
90  *			Append more characters to the variable, creating it if
91  *			necessary. A space is placed between the old value and
92  *			the new one.
93  *
94  *	Var_Exists
95  *	Var_ExistsExpand
96  *			See if a variable exists.
97  *
98  *	Var_Value	Return the unexpanded value of a variable, or NULL if
99  *			the variable is undefined.
100  *
101  *	Var_Subst	Substitute all variable expressions in a string.
102  *
103  *	Var_Parse	Parse a variable expression such as ${VAR:Mpattern}.
104  *
105  *	Var_Delete
106  *	Var_DeleteExpand
107  *			Delete a variable.
108  *
109  *	Var_ReexportVars
110  *			Export some or even all variables to the environment
111  *			of this process and its child processes.
112  *
113  *	Var_Export	Export the variable to the environment of this process
114  *			and its child processes.
115  *
116  *	Var_UnExport	Don't export the variable anymore.
117  *
118  * Debugging:
119  *	Var_Stats	Print out hashing statistics if in -dh mode.
120  *
121  *	Var_Dump	Print out all variables defined in the given scope.
122  *
123  * XXX: There's a lot of almost duplicate code in these functions that only
124  *  differs in subtle details that are not mentioned in the manual page.
125  */
126 
127 #include <sys/stat.h>
128 #include <sys/types.h>
129 #ifndef NO_REGEX
130 #include <regex.h>
131 #endif
132 
133 #include "make.h"
134 
135 #include <errno.h>
136 #ifdef HAVE_INTTYPES_H
137 #include <inttypes.h>
138 #elif defined(HAVE_STDINT_H)
139 #include <stdint.h>
140 #endif
141 #ifdef HAVE_LIMITS_H
142 #include <limits.h>
143 #endif
144 #include <time.h>
145 
146 #include "dir.h"
147 #include "job.h"
148 #include "metachar.h"
149 
150 /*	"@(#)var.c	8.3 (Berkeley) 3/19/94" */
151 MAKE_RCSID("$NetBSD: var.c,v 1.930 2021/04/19 22:22:27 rillig Exp $");
152 
153 /*
154  * Variables are defined using one of the VAR=value assignments.  Their
155  * value can be queried by expressions such as $V, ${VAR}, or with modifiers
156  * such as ${VAR:S,from,to,g:Q}.
157  *
158  * There are 3 kinds of variables: scope variables, environment variables,
159  * undefined variables.
160  *
161  * Scope variables are stored in a GNode.scope.  The only way to undefine
162  * a scope variable is using the .undef directive.  In particular, it must
163  * not be possible to undefine a variable during the evaluation of an
164  * expression, or Var.name might point nowhere.
165  *
166  * Environment variables are temporary.  They are returned by VarFind, and
167  * after using them, they must be freed using VarFreeEnv.
168  *
169  * Undefined variables occur during evaluation of variable expressions such
170  * as ${UNDEF:Ufallback} in Var_Parse and ApplyModifiers.
171  */
172 typedef struct Var {
173 	/*
174 	 * The name of the variable, once set, doesn't change anymore.
175 	 * For scope variables, it aliases the corresponding HashEntry name.
176 	 * For environment and undefined variables, it is allocated.
177 	 */
178 	FStr name;
179 
180 	/* The unexpanded value of the variable. */
181 	Buffer val;
182 
183 	/* The variable came from the command line. */
184 	bool fromCmd: 1;
185 
186 	/*
187 	 * The variable comes from the environment.
188 	 * These variables are not registered in any GNode, therefore they
189 	 * must be freed as soon as they are not used anymore.
190 	 */
191 	bool fromEnv: 1;
192 
193 	/*
194 	 * The variable value cannot be changed anymore, and the variable
195 	 * cannot be deleted.  Any attempts to do so are silently ignored,
196 	 * they are logged with -dv though.
197 	 *
198 	 * See VAR_SET_READONLY.
199 	 */
200 	bool readOnly: 1;
201 
202 	/*
203 	* The variable's value is currently being used by Var_Parse or
204 	* Var_Subst.  This marker is used to avoid endless recursion.
205 	*/
206 	bool inUse: 1;
207 
208 	/*
209 	 * The variable is exported to the environment, to be used by child
210 	 * processes.
211 	 */
212 	bool exported: 1;
213 
214 	/*
215 	 * At the point where this variable was exported, it contained an
216 	 * unresolved reference to another variable.  Before any child
217 	 * process is started, it needs to be exported again, in the hope
218 	 * that the referenced variable can then be resolved.
219 	 */
220 	bool reexport: 1;
221 } Var;
222 
223 /*
224  * Exporting variables is expensive and may leak memory, so skip it if we
225  * can.
226  *
227  * To avoid this, it might be worth encapsulating the environment variables
228  * in a separate data structure called EnvVars.
229  */
230 typedef enum VarExportedMode {
231 	VAR_EXPORTED_NONE,
232 	VAR_EXPORTED_SOME,
233 	VAR_EXPORTED_ALL
234 } VarExportedMode;
235 
236 typedef enum UnexportWhat {
237 	/* Unexport the variables given by name. */
238 	UNEXPORT_NAMED,
239 	/*
240 	 * Unexport all globals previously exported, but keep the environment
241 	 * inherited from the parent.
242 	 */
243 	UNEXPORT_ALL,
244 	/*
245 	 * Unexport all globals previously exported and clear the environment
246 	 * inherited from the parent.
247 	 */
248 	UNEXPORT_ENV
249 } UnexportWhat;
250 
251 /* Flags for pattern matching in the :S and :C modifiers */
252 typedef struct PatternFlags {
253 	bool subGlobal: 1;	/* 'g': replace as often as possible */
254 	bool subOnce: 1;	/* '1': replace only once */
255 	bool anchorStart: 1;	/* '^': match only at start of word */
256 	bool anchorEnd: 1;	/* '$': match only at end of word */
257 } PatternFlags;
258 
259 /* SepBuf builds a string from words interleaved with separators. */
260 typedef struct SepBuf {
261 	Buffer buf;
262 	bool needSep;
263 	/* Usually ' ', but see the ':ts' modifier. */
264 	char sep;
265 } SepBuf;
266 
267 
268 /*
269  * This lets us tell if we have replaced the original environ
270  * (which we cannot free).
271  */
272 char **savedEnv = NULL;
273 
274 /*
275  * Special return value for Var_Parse, indicating a parse error.  It may be
276  * caused by an undefined variable, a syntax error in a modifier or
277  * something entirely different.
278  */
279 char var_Error[] = "";
280 
281 /*
282  * Special return value for Var_Parse, indicating an undefined variable in
283  * a case where VARE_UNDEFERR is not set.  This undefined variable is
284  * typically a dynamic variable such as ${.TARGET}, whose expansion needs to
285  * be deferred until it is defined in an actual target.
286  *
287  * See VARE_EVAL_KEEP_UNDEF.
288  */
289 static char varUndefined[] = "";
290 
291 /*
292  * Traditionally this make consumed $$ during := like any other expansion.
293  * Other make's do not, and this make follows straight since 2016-01-09.
294  *
295  * This knob allows controlling the behavior:
296  *	false to consume $$ during := assignment.
297  *	true to preserve $$ during := assignment.
298  */
299 #define MAKE_SAVE_DOLLARS ".MAKE.SAVE_DOLLARS"
300 static bool save_dollars = false;
301 
302 /*
303  * A scope collects variable names and their values.
304  *
305  * The main scope is SCOPE_GLOBAL, which contains the variables that are set
306  * in the makefiles.  SCOPE_INTERNAL acts as a fallback for SCOPE_GLOBAL and
307  * contains some internal make variables.  These internal variables can thus
308  * be overridden, they can also be restored by undefining the overriding
309  * variable.
310  *
311  * SCOPE_CMDLINE contains variables from the command line arguments.  These
312  * override variables from SCOPE_GLOBAL.
313  *
314  * There is no scope for environment variables, these are generated on-the-fly
315  * whenever they are referenced.  If there were such a scope, each change to
316  * environment variables would have to be reflected in that scope, which may
317  * be simpler or more complex than the current implementation.
318  *
319  * Each target has its own scope, containing the 7 target-local variables
320  * .TARGET, .ALLSRC, etc.  No other variables are in these scopes.
321  */
322 
323 GNode *SCOPE_CMDLINE;
324 GNode *SCOPE_GLOBAL;
325 GNode *SCOPE_INTERNAL;
326 
327 static VarExportedMode var_exportedVars = VAR_EXPORTED_NONE;
328 
329 static const char *VarEvalMode_Name[] = {
330 	"parse-only",
331 	"eval",
332 	"eval-defined",
333 	"eval-keep-dollar",
334 	"eval-keep-undefined",
335 	"eval-keep-dollar-and-undefined",
336 };
337 
338 
339 static Var *
VarNew(FStr name,const char * value,bool fromEnv,bool readOnly)340 VarNew(FStr name, const char *value, bool fromEnv, bool readOnly)
341 {
342 	size_t value_len = strlen(value);
343 	Var *var = bmake_malloc(sizeof *var);
344 	var->name = name;
345 	Buf_InitSize(&var->val, value_len + 1);
346 	Buf_AddBytes(&var->val, value, value_len);
347 	var->fromCmd = false;
348 	var->fromEnv = fromEnv;
349 	var->readOnly = readOnly;
350 	var->inUse = false;
351 	var->exported = false;
352 	var->reexport = false;
353 	return var;
354 }
355 
356 static Substring
CanonicalVarname(Substring name)357 CanonicalVarname(Substring name)
358 {
359 
360 	if (!(Substring_Length(name) > 0 && name.start[0] == '.'))
361 		return name;
362 
363 	if (Substring_Equals(name, ".ALLSRC"))
364 		return Substring_InitStr(ALLSRC);
365 	if (Substring_Equals(name, ".ARCHIVE"))
366 		return Substring_InitStr(ARCHIVE);
367 	if (Substring_Equals(name, ".IMPSRC"))
368 		return Substring_InitStr(IMPSRC);
369 	if (Substring_Equals(name, ".MEMBER"))
370 		return Substring_InitStr(MEMBER);
371 	if (Substring_Equals(name, ".OODATE"))
372 		return Substring_InitStr(OODATE);
373 	if (Substring_Equals(name, ".PREFIX"))
374 		return Substring_InitStr(PREFIX);
375 	if (Substring_Equals(name, ".TARGET"))
376 		return Substring_InitStr(TARGET);
377 
378 	if (Substring_Equals(name, ".SHELL") && shellPath == NULL)
379 		Shell_Init();
380 
381 	/* GNU make has an additional alias $^ == ${.ALLSRC}. */
382 
383 	return name;
384 }
385 
386 static Var *
GNode_FindVar(GNode * scope,Substring varname,unsigned int hash)387 GNode_FindVar(GNode *scope, Substring varname, unsigned int hash)
388 {
389 	return HashTable_FindValueBySubstringHash(&scope->vars, varname, hash);
390 }
391 
392 /*
393  * Find the variable in the scope, and maybe in other scopes as well.
394  *
395  * Input:
396  *	name		name to find, is not expanded any further
397  *	scope		scope in which to look first
398  *	elsewhere	true to look in other scopes as well
399  *
400  * Results:
401  *	The found variable, or NULL if the variable does not exist.
402  *	If the variable is an environment variable, it must be freed using
403  *	VarFreeEnv after use.
404  */
405 static Var *
VarFindSubstring(Substring name,GNode * scope,bool elsewhere)406 VarFindSubstring(Substring name, GNode *scope, bool elsewhere)
407 {
408 	Var *var;
409 	unsigned int nameHash;
410 
411 	/* Replace '.TARGET' with '@', likewise for other local variables. */
412 	name = CanonicalVarname(name);
413 	nameHash = Hash_Substring(name);
414 
415 	var = GNode_FindVar(scope, name, nameHash);
416 	if (!elsewhere)
417 		return var;
418 
419 	if (var == NULL && scope != SCOPE_CMDLINE)
420 		var = GNode_FindVar(SCOPE_CMDLINE, name, nameHash);
421 
422 	if (!opts.checkEnvFirst && var == NULL && scope != SCOPE_GLOBAL) {
423 		var = GNode_FindVar(SCOPE_GLOBAL, name, nameHash);
424 		if (var == NULL && scope != SCOPE_INTERNAL) {
425 			/* SCOPE_INTERNAL is subordinate to SCOPE_GLOBAL */
426 			var = GNode_FindVar(SCOPE_INTERNAL, name, nameHash);
427 		}
428 	}
429 
430 	if (var == NULL) {
431 		FStr envName;
432 		const char *envValue;
433 
434 		/*
435 		 * TODO: try setting an environment variable with the empty
436 		 *  name, which should be technically possible, just to see
437 		 *  how make reacts.  All .for loops should be broken then.
438 		 */
439 		envName = Substring_Str(name);
440 		envValue = getenv(envName.str);
441 		if (envValue != NULL)
442 			return VarNew(envName, envValue, true, false);
443 		FStr_Done(&envName);
444 
445 		if (opts.checkEnvFirst && scope != SCOPE_GLOBAL) {
446 			var = GNode_FindVar(SCOPE_GLOBAL, name, nameHash);
447 			if (var == NULL && scope != SCOPE_INTERNAL)
448 				var = GNode_FindVar(SCOPE_INTERNAL, name,
449 				    nameHash);
450 			return var;
451 		}
452 
453 		return NULL;
454 	}
455 
456 	return var;
457 }
458 
459 /* TODO: Replace these calls with VarFindSubstring, as far as possible. */
460 static Var *
VarFind(const char * name,GNode * scope,bool elsewhere)461 VarFind(const char *name, GNode *scope, bool elsewhere)
462 {
463 	return VarFindSubstring(Substring_InitStr(name), scope, elsewhere);
464 }
465 
466 /* If the variable is an environment variable, free it, including its value. */
467 static void
VarFreeEnv(Var * v)468 VarFreeEnv(Var *v)
469 {
470 	if (!v->fromEnv)
471 		return;
472 
473 	FStr_Done(&v->name);
474 	Buf_Done(&v->val);
475 	free(v);
476 }
477 
478 /* Add a new variable of the given name and value to the given scope. */
479 static Var *
VarAdd(const char * name,const char * value,GNode * scope,VarSetFlags flags)480 VarAdd(const char *name, const char *value, GNode *scope, VarSetFlags flags)
481 {
482 	HashEntry *he = HashTable_CreateEntry(&scope->vars, name, NULL);
483 	Var *v = VarNew(FStr_InitRefer(/* aliased to */ he->key), value,
484 	    false, (flags & VAR_SET_READONLY) != 0);
485 	HashEntry_Set(he, v);
486 	DEBUG3(VAR, "%s: %s = %s\n", scope->name, name, value);
487 	return v;
488 }
489 
490 /*
491  * Remove a variable from a scope, freeing all related memory as well.
492  * The variable name is kept as-is, it is not expanded.
493  */
494 void
Var_Delete(GNode * scope,const char * varname)495 Var_Delete(GNode *scope, const char *varname)
496 {
497 	HashEntry *he = HashTable_FindEntry(&scope->vars, varname);
498 	Var *v;
499 
500 	if (he == NULL) {
501 		DEBUG2(VAR, "%s:delete %s (not found)\n", scope->name, varname);
502 		return;
503 	}
504 
505 	DEBUG2(VAR, "%s:delete %s\n", scope->name, varname);
506 	v = he->value;
507 	if (v->exported)
508 		unsetenv(v->name.str);
509 	if (strcmp(v->name.str, MAKE_EXPORTED) == 0)
510 		var_exportedVars = VAR_EXPORTED_NONE;
511 	assert(v->name.freeIt == NULL);
512 	HashTable_DeleteEntry(&scope->vars, he);
513 	Buf_Done(&v->val);
514 	free(v);
515 }
516 
517 /*
518  * Remove a variable from a scope, freeing all related memory as well.
519  * The variable name is expanded once.
520  */
521 void
Var_DeleteExpand(GNode * scope,const char * name)522 Var_DeleteExpand(GNode *scope, const char *name)
523 {
524 	FStr varname = FStr_InitRefer(name);
525 
526 	if (strchr(varname.str, '$') != NULL) {
527 		char *expanded;
528 		(void)Var_Subst(varname.str, SCOPE_GLOBAL, VARE_WANTRES,
529 		    &expanded);
530 		/* TODO: handle errors */
531 		varname = FStr_InitOwn(expanded);
532 	}
533 
534 	Var_Delete(scope, varname.str);
535 	FStr_Done(&varname);
536 }
537 
538 /*
539  * Undefine one or more variables from the global scope.
540  * The argument is expanded exactly once and then split into words.
541  */
542 void
Var_Undef(const char * arg)543 Var_Undef(const char *arg)
544 {
545 	VarParseResult vpr;
546 	char *expanded;
547 	Words varnames;
548 	size_t i;
549 
550 	if (arg[0] == '\0') {
551 		Parse_Error(PARSE_FATAL,
552 		    "The .undef directive requires an argument");
553 		return;
554 	}
555 
556 	vpr = Var_Subst(arg, SCOPE_GLOBAL, VARE_WANTRES, &expanded);
557 	if (vpr != VPR_OK) {
558 		Parse_Error(PARSE_FATAL,
559 		    "Error in variable names to be undefined");
560 		return;
561 	}
562 
563 	varnames = Str_Words(expanded, false);
564 	if (varnames.len == 1 && varnames.words[0][0] == '\0')
565 		varnames.len = 0;
566 
567 	for (i = 0; i < varnames.len; i++) {
568 		const char *varname = varnames.words[i];
569 		Global_Delete(varname);
570 	}
571 
572 	Words_Free(varnames);
573 	free(expanded);
574 }
575 
576 static bool
MayExport(const char * name)577 MayExport(const char *name)
578 {
579 	if (name[0] == '.')
580 		return false;	/* skip internals */
581 	if (name[0] == '-')
582 		return false;	/* skip misnamed variables */
583 	if (name[1] == '\0') {
584 		/*
585 		 * A single char.
586 		 * If it is one of the variables that should only appear in
587 		 * local scope, skip it, else we can get Var_Subst
588 		 * into a loop.
589 		 */
590 		switch (name[0]) {
591 		case '@':
592 		case '%':
593 		case '*':
594 		case '!':
595 			return false;
596 		}
597 	}
598 	return true;
599 }
600 
601 static bool
ExportVarEnv(Var * v)602 ExportVarEnv(Var *v)
603 {
604 	const char *name = v->name.str;
605 	char *val = v->val.data;
606 	char *expr;
607 
608 	if (v->exported && !v->reexport)
609 		return false;	/* nothing to do */
610 
611 	if (strchr(val, '$') == NULL) {
612 		if (!v->exported)
613 			setenv(name, val, 1);
614 		return true;
615 	}
616 
617 	if (v->inUse) {
618 		/*
619 		 * We recursed while exporting in a child.
620 		 * This isn't going to end well, just skip it.
621 		 */
622 		return false;
623 	}
624 
625 	/* XXX: name is injected without escaping it */
626 	expr = str_concat3("${", name, "}");
627 	(void)Var_Subst(expr, SCOPE_GLOBAL, VARE_WANTRES, &val);
628 	/* TODO: handle errors */
629 	setenv(name, val, 1);
630 	free(val);
631 	free(expr);
632 	return true;
633 }
634 
635 static bool
ExportVarPlain(Var * v)636 ExportVarPlain(Var *v)
637 {
638 	if (strchr(v->val.data, '$') == NULL) {
639 		setenv(v->name.str, v->val.data, 1);
640 		v->exported = true;
641 		v->reexport = false;
642 		return true;
643 	}
644 
645 	/*
646 	 * Flag the variable as something we need to re-export.
647 	 * No point actually exporting it now though,
648 	 * the child process can do it at the last minute.
649 	 * Avoid calling setenv more often than necessary since it can leak.
650 	 */
651 	v->exported = true;
652 	v->reexport = true;
653 	return true;
654 }
655 
656 static bool
ExportVarLiteral(Var * v)657 ExportVarLiteral(Var *v)
658 {
659 	if (v->exported && !v->reexport)
660 		return false;
661 
662 	if (!v->exported)
663 		setenv(v->name.str, v->val.data, 1);
664 
665 	return true;
666 }
667 
668 /*
669  * Mark a single variable to be exported later for subprocesses.
670  *
671  * Internal variables (those starting with '.') are not exported.
672  */
673 static bool
ExportVar(const char * name,VarExportMode mode)674 ExportVar(const char *name, VarExportMode mode)
675 {
676 	Var *v;
677 
678 	if (!MayExport(name))
679 		return false;
680 
681 	v = VarFind(name, SCOPE_GLOBAL, false);
682 	if (v == NULL)
683 		return false;
684 
685 	if (mode == VEM_ENV)
686 		return ExportVarEnv(v);
687 	else if (mode == VEM_PLAIN)
688 		return ExportVarPlain(v);
689 	else
690 		return ExportVarLiteral(v);
691 }
692 
693 /*
694  * Actually export the variables that have been marked as needing to be
695  * re-exported.
696  */
697 void
Var_ReexportVars(void)698 Var_ReexportVars(void)
699 {
700 	char *xvarnames;
701 
702 	/*
703 	 * Several make implementations support this sort of mechanism for
704 	 * tracking recursion - but each uses a different name.
705 	 * We allow the makefiles to update MAKELEVEL and ensure
706 	 * children see a correctly incremented value.
707 	 */
708 	char tmp[21];
709 	snprintf(tmp, sizeof tmp, "%d", makelevel + 1);
710 	setenv(MAKE_LEVEL_ENV, tmp, 1);
711 
712 	if (var_exportedVars == VAR_EXPORTED_NONE)
713 		return;
714 
715 	if (var_exportedVars == VAR_EXPORTED_ALL) {
716 		HashIter hi;
717 
718 		/* Ouch! Exporting all variables at once is crazy. */
719 		HashIter_Init(&hi, &SCOPE_GLOBAL->vars);
720 		while (HashIter_Next(&hi) != NULL) {
721 			Var *var = hi.entry->value;
722 			ExportVar(var->name.str, VEM_ENV);
723 		}
724 		return;
725 	}
726 
727 	(void)Var_Subst("${" MAKE_EXPORTED ":O:u}", SCOPE_GLOBAL, VARE_WANTRES,
728 	    &xvarnames);
729 	/* TODO: handle errors */
730 	if (xvarnames[0] != '\0') {
731 		Words varnames = Str_Words(xvarnames, false);
732 		size_t i;
733 
734 		for (i = 0; i < varnames.len; i++)
735 			ExportVar(varnames.words[i], VEM_ENV);
736 		Words_Free(varnames);
737 	}
738 	free(xvarnames);
739 }
740 
741 static void
ExportVars(const char * varnames,bool isExport,VarExportMode mode)742 ExportVars(const char *varnames, bool isExport, VarExportMode mode)
743 /* TODO: try to combine the parameters 'isExport' and 'mode'. */
744 {
745 	Words words = Str_Words(varnames, false);
746 	size_t i;
747 
748 	if (words.len == 1 && words.words[0][0] == '\0')
749 		words.len = 0;
750 
751 	for (i = 0; i < words.len; i++) {
752 		const char *varname = words.words[i];
753 		if (!ExportVar(varname, mode))
754 			continue;
755 
756 		if (var_exportedVars == VAR_EXPORTED_NONE)
757 			var_exportedVars = VAR_EXPORTED_SOME;
758 
759 		if (isExport && mode == VEM_PLAIN)
760 			Global_Append(MAKE_EXPORTED, varname);
761 	}
762 	Words_Free(words);
763 }
764 
765 static void
ExportVarsExpand(const char * uvarnames,bool isExport,VarExportMode mode)766 ExportVarsExpand(const char *uvarnames, bool isExport, VarExportMode mode)
767 {
768 	char *xvarnames;
769 
770 	(void)Var_Subst(uvarnames, SCOPE_GLOBAL, VARE_WANTRES, &xvarnames);
771 	/* TODO: handle errors */
772 	ExportVars(xvarnames, isExport, mode);
773 	free(xvarnames);
774 }
775 
776 /* Export the named variables, or all variables. */
777 void
Var_Export(VarExportMode mode,const char * varnames)778 Var_Export(VarExportMode mode, const char *varnames)
779 {
780 	if (mode == VEM_PLAIN && varnames[0] == '\0') {
781 		var_exportedVars = VAR_EXPORTED_ALL; /* use with caution! */
782 		return;
783 	}
784 
785 	ExportVarsExpand(varnames, true, mode);
786 }
787 
788 void
Var_ExportVars(const char * varnames)789 Var_ExportVars(const char *varnames)
790 {
791 	ExportVarsExpand(varnames, false, VEM_PLAIN);
792 }
793 
794 
795 extern char **environ;
796 
797 static void
ClearEnv(void)798 ClearEnv(void)
799 {
800 	const char *cp;
801 	char **newenv;
802 
803 	cp = getenv(MAKE_LEVEL_ENV);	/* we should preserve this */
804 	if (environ == savedEnv) {
805 		/* we have been here before! */
806 		newenv = bmake_realloc(environ, 2 * sizeof(char *));
807 	} else {
808 		if (savedEnv != NULL) {
809 			free(savedEnv);
810 			savedEnv = NULL;
811 		}
812 		newenv = bmake_malloc(2 * sizeof(char *));
813 	}
814 
815 	/* Note: we cannot safely free() the original environ. */
816 	environ = savedEnv = newenv;
817 	newenv[0] = NULL;
818 	newenv[1] = NULL;
819 	if (cp != NULL && *cp != '\0')
820 		setenv(MAKE_LEVEL_ENV, cp, 1);
821 }
822 
823 static void
GetVarnamesToUnexport(bool isEnv,const char * arg,FStr * out_varnames,UnexportWhat * out_what)824 GetVarnamesToUnexport(bool isEnv, const char *arg,
825 		      FStr *out_varnames, UnexportWhat *out_what)
826 {
827 	UnexportWhat what;
828 	FStr varnames = FStr_InitRefer("");
829 
830 	if (isEnv) {
831 		if (arg[0] != '\0') {
832 			Parse_Error(PARSE_FATAL,
833 			    "The directive .unexport-env does not take "
834 			    "arguments");
835 			/* continue anyway */
836 		}
837 		what = UNEXPORT_ENV;
838 
839 	} else {
840 		what = arg[0] != '\0' ? UNEXPORT_NAMED : UNEXPORT_ALL;
841 		if (what == UNEXPORT_NAMED)
842 			varnames = FStr_InitRefer(arg);
843 	}
844 
845 	if (what != UNEXPORT_NAMED) {
846 		char *expanded;
847 		/* Using .MAKE.EXPORTED */
848 		(void)Var_Subst("${" MAKE_EXPORTED ":O:u}", SCOPE_GLOBAL,
849 		    VARE_WANTRES, &expanded);
850 		/* TODO: handle errors */
851 		varnames = FStr_InitOwn(expanded);
852 	}
853 
854 	*out_varnames = varnames;
855 	*out_what = what;
856 }
857 
858 static void
UnexportVar(const char * varname,UnexportWhat what)859 UnexportVar(const char *varname, UnexportWhat what)
860 {
861 	Var *v = VarFind(varname, SCOPE_GLOBAL, false);
862 	if (v == NULL) {
863 		DEBUG1(VAR, "Not unexporting \"%s\" (not found)\n", varname);
864 		return;
865 	}
866 
867 	DEBUG1(VAR, "Unexporting \"%s\"\n", varname);
868 	if (what != UNEXPORT_ENV && v->exported && !v->reexport)
869 		unsetenv(v->name.str);
870 	v->exported = false;
871 	v->reexport = false;
872 
873 	if (what == UNEXPORT_NAMED) {
874 		/* Remove the variable names from .MAKE.EXPORTED. */
875 		/* XXX: v->name is injected without escaping it */
876 		char *expr = str_concat3("${" MAKE_EXPORTED ":N",
877 		    v->name.str, "}");
878 		char *cp;
879 		(void)Var_Subst(expr, SCOPE_GLOBAL, VARE_WANTRES, &cp);
880 		/* TODO: handle errors */
881 		Global_Set(MAKE_EXPORTED, cp);
882 		free(cp);
883 		free(expr);
884 	}
885 }
886 
887 static void
UnexportVars(FStr * varnames,UnexportWhat what)888 UnexportVars(FStr *varnames, UnexportWhat what)
889 {
890 	size_t i;
891 	Words words;
892 
893 	if (what == UNEXPORT_ENV)
894 		ClearEnv();
895 
896 	words = Str_Words(varnames->str, false);
897 	for (i = 0; i < words.len; i++) {
898 		const char *varname = words.words[i];
899 		UnexportVar(varname, what);
900 	}
901 	Words_Free(words);
902 
903 	if (what != UNEXPORT_NAMED)
904 		Global_Delete(MAKE_EXPORTED);
905 }
906 
907 /*
908  * This is called when .unexport[-env] is seen.
909  *
910  * str must have the form "unexport[-env] varname...".
911  */
912 void
Var_UnExport(bool isEnv,const char * arg)913 Var_UnExport(bool isEnv, const char *arg)
914 {
915 	UnexportWhat what;
916 	FStr varnames;
917 
918 	GetVarnamesToUnexport(isEnv, arg, &varnames, &what);
919 	UnexportVars(&varnames, what);
920 	FStr_Done(&varnames);
921 }
922 
923 /*
924  * When there is a variable of the same name in the command line scope, the
925  * global variable would not be visible anywhere.  Therefore there is no
926  * point in setting it at all.
927  *
928  * See 'scope == SCOPE_CMDLINE' in Var_SetWithFlags.
929  */
930 static bool
ExistsInCmdline(const char * name,const char * val)931 ExistsInCmdline(const char *name, const char *val)
932 {
933 	Var *v;
934 
935 	v = VarFind(name, SCOPE_CMDLINE, false);
936 	if (v == NULL)
937 		return false;
938 
939 	if (v->fromCmd) {
940 		DEBUG3(VAR, "%s: %s = %s ignored!\n",
941 		    SCOPE_GLOBAL->name, name, val);
942 		return true;
943 	}
944 
945 	VarFreeEnv(v);
946 	return false;
947 }
948 
949 /* Set the variable to the value; the name is not expanded. */
950 void
Var_SetWithFlags(GNode * scope,const char * name,const char * val,VarSetFlags flags)951 Var_SetWithFlags(GNode *scope, const char *name, const char *val,
952 		 VarSetFlags flags)
953 {
954 	Var *v;
955 
956 	assert(val != NULL);
957 	if (name[0] == '\0') {
958 		DEBUG0(VAR, "SetVar: variable name is empty - ignored\n");
959 		return;
960 	}
961 
962 	if (scope == SCOPE_GLOBAL && ExistsInCmdline(name, val))
963 		return;
964 
965 	/*
966 	 * Only look for a variable in the given scope since anything set
967 	 * here will override anything in a lower scope, so there's not much
968 	 * point in searching them all.
969 	 */
970 	v = VarFind(name, scope, false);
971 	if (v == NULL) {
972 		if (scope == SCOPE_CMDLINE && !(flags & VAR_SET_NO_EXPORT)) {
973 			/*
974 			 * This var would normally prevent the same name being
975 			 * added to SCOPE_GLOBAL, so delete it from there if
976 			 * needed. Otherwise -V name may show the wrong value.
977 			 *
978 			 * See ExistsInCmdline.
979 			 */
980 			Var_Delete(SCOPE_GLOBAL, name);
981 		}
982 		v = VarAdd(name, val, scope, flags);
983 	} else {
984 		if (v->readOnly && !(flags & VAR_SET_READONLY)) {
985 			DEBUG3(VAR, "%s: %s = %s ignored (read-only)\n",
986 			    scope->name, name, val);
987 			return;
988 		}
989 		Buf_Empty(&v->val);
990 		Buf_AddStr(&v->val, val);
991 
992 		DEBUG3(VAR, "%s: %s = %s\n", scope->name, name, val);
993 		if (v->exported)
994 			ExportVar(name, VEM_PLAIN);
995 	}
996 
997 	/*
998 	 * Any variables given on the command line are automatically exported
999 	 * to the environment (as per POSIX standard), except for internals.
1000 	 */
1001 	if (scope == SCOPE_CMDLINE && !(flags & VAR_SET_NO_EXPORT) &&
1002 	    name[0] != '.') {
1003 		v->fromCmd = true;
1004 
1005 		/*
1006 		 * If requested, don't export these in the environment
1007 		 * individually.  We still put them in MAKEOVERRIDES so
1008 		 * that the command-line settings continue to override
1009 		 * Makefile settings.
1010 		 */
1011 		if (!opts.varNoExportEnv)
1012 			setenv(name, val, 1);
1013 		/* XXX: What about .MAKE.EXPORTED? */
1014 		/* XXX: Why not just mark the variable for needing export,
1015 		 *  as in ExportVarPlain? */
1016 
1017 		Global_Append(MAKEOVERRIDES, name);
1018 	}
1019 
1020 	if (name[0] == '.' && strcmp(name, MAKE_SAVE_DOLLARS) == 0)
1021 		save_dollars = ParseBoolean(val, save_dollars);
1022 
1023 	if (v != NULL)
1024 		VarFreeEnv(v);
1025 }
1026 
1027 /* See Var_Set for documentation. */
1028 void
Var_SetExpandWithFlags(GNode * scope,const char * name,const char * val,VarSetFlags flags)1029 Var_SetExpandWithFlags(GNode *scope, const char *name, const char *val,
1030 		       VarSetFlags flags)
1031 {
1032 	const char *unexpanded_name = name;
1033 	FStr varname = FStr_InitRefer(name);
1034 
1035 	assert(val != NULL);
1036 
1037 	if (strchr(varname.str, '$') != NULL) {
1038 		char *expanded;
1039 		(void)Var_Subst(varname.str, scope, VARE_WANTRES, &expanded);
1040 		/* TODO: handle errors */
1041 		varname = FStr_InitOwn(expanded);
1042 	}
1043 
1044 	if (varname.str[0] == '\0') {
1045 		DEBUG2(VAR,
1046 		    "Var_SetExpand: variable name \"%s\" expands "
1047 		    "to empty string, with value \"%s\" - ignored\n",
1048 		    unexpanded_name, val);
1049 	} else
1050 		Var_SetWithFlags(scope, varname.str, val, flags);
1051 
1052 	FStr_Done(&varname);
1053 }
1054 
1055 void
Var_Set(GNode * scope,const char * name,const char * val)1056 Var_Set(GNode *scope, const char *name, const char *val)
1057 {
1058 	Var_SetWithFlags(scope, name, val, VAR_SET_NONE);
1059 }
1060 
1061 /*
1062  * Set the variable name to the value val in the given scope.
1063  *
1064  * If the variable doesn't yet exist, it is created.
1065  * Otherwise the new value overwrites and replaces the old value.
1066  *
1067  * Input:
1068  *	name		name of the variable to set, is expanded once
1069  *	val		value to give to the variable
1070  *	scope		scope in which to set it
1071  */
1072 void
Var_SetExpand(GNode * scope,const char * name,const char * val)1073 Var_SetExpand(GNode *scope, const char *name, const char *val)
1074 {
1075 	Var_SetExpandWithFlags(scope, name, val, VAR_SET_NONE);
1076 }
1077 
1078 void
Global_Set(const char * name,const char * value)1079 Global_Set(const char *name, const char *value)
1080 {
1081 	Var_Set(SCOPE_GLOBAL, name, value);
1082 }
1083 
1084 void
Global_SetExpand(const char * name,const char * value)1085 Global_SetExpand(const char *name, const char *value)
1086 {
1087 	Var_SetExpand(SCOPE_GLOBAL, name, value);
1088 }
1089 
1090 void
Global_Delete(const char * name)1091 Global_Delete(const char *name)
1092 {
1093 	Var_Delete(SCOPE_GLOBAL, name);
1094 }
1095 
1096 /*
1097  * Append the value to the named variable.
1098  *
1099  * If the variable doesn't exist, it is created.  Otherwise a single space
1100  * and the given value are appended.
1101  */
1102 void
Var_Append(GNode * scope,const char * name,const char * val)1103 Var_Append(GNode *scope, const char *name, const char *val)
1104 {
1105 	Var *v;
1106 
1107 	v = VarFind(name, scope, scope == SCOPE_GLOBAL);
1108 
1109 	if (v == NULL) {
1110 		Var_SetWithFlags(scope, name, val, VAR_SET_NONE);
1111 	} else if (v->readOnly) {
1112 		DEBUG1(VAR, "Ignoring append to %s since it is read-only\n",
1113 		    name);
1114 	} else if (scope == SCOPE_CMDLINE || !v->fromCmd) {
1115 		Buf_AddByte(&v->val, ' ');
1116 		Buf_AddStr(&v->val, val);
1117 
1118 		DEBUG3(VAR, "%s: %s = %s\n", scope->name, name, v->val.data);
1119 
1120 		if (v->fromEnv) {
1121 			/*
1122 			 * If the original variable came from the environment,
1123 			 * we have to install it in the global scope (we
1124 			 * could place it in the environment, but then we
1125 			 * should provide a way to export other variables...)
1126 			 */
1127 			v->fromEnv = false;
1128 			/*
1129 			 * This is the only place where a variable is
1130 			 * created whose v->name is not the same as
1131 			 * scope->vars->key.
1132 			 */
1133 			HashTable_Set(&scope->vars, name, v);
1134 		}
1135 	}
1136 }
1137 
1138 /*
1139  * The variable of the given name has the given value appended to it in the
1140  * given scope.
1141  *
1142  * If the variable doesn't exist, it is created. Otherwise the strings are
1143  * concatenated, with a space in between.
1144  *
1145  * Input:
1146  *	name		name of the variable to modify, is expanded once
1147  *	val		string to append to it
1148  *	scope		scope in which this should occur
1149  *
1150  * Notes:
1151  *	Only if the variable is being sought in the global scope is the
1152  *	environment searched.
1153  *	XXX: Knows its calling circumstances in that if called with scope
1154  *	an actual target, it will only search that scope since only
1155  *	a local variable could be being appended to. This is actually
1156  *	a big win and must be tolerated.
1157  */
1158 void
Var_AppendExpand(GNode * scope,const char * name,const char * val)1159 Var_AppendExpand(GNode *scope, const char *name, const char *val)
1160 {
1161 	FStr xname = FStr_InitRefer(name);
1162 
1163 	assert(val != NULL);
1164 
1165 	if (strchr(name, '$') != NULL) {
1166 		char *expanded;
1167 		(void)Var_Subst(name, scope, VARE_WANTRES, &expanded);
1168 		/* TODO: handle errors */
1169 		xname = FStr_InitOwn(expanded);
1170 		if (expanded[0] == '\0') {
1171 			DEBUG2(VAR,
1172 			    "Var_AppendExpand: variable name \"%s\" expands "
1173 			    "to empty string, with value \"%s\" - ignored\n",
1174 			    name, val);
1175 			FStr_Done(&xname);
1176 			return;
1177 		}
1178 	}
1179 
1180 	Var_Append(scope, xname.str, val);
1181 
1182 	FStr_Done(&xname);
1183 }
1184 
1185 void
Global_Append(const char * name,const char * value)1186 Global_Append(const char *name, const char *value)
1187 {
1188 	Var_Append(SCOPE_GLOBAL, name, value);
1189 }
1190 
1191 bool
Var_Exists(GNode * scope,const char * name)1192 Var_Exists(GNode *scope, const char *name)
1193 {
1194 	Var *v = VarFind(name, scope, true);
1195 	if (v == NULL)
1196 		return false;
1197 
1198 	VarFreeEnv(v);
1199 	return true;
1200 }
1201 
1202 /*
1203  * See if the given variable exists, in the given scope or in other
1204  * fallback scopes.
1205  *
1206  * Input:
1207  *	name		Variable to find, is expanded once
1208  *	scope		Scope in which to start search
1209  */
1210 bool
Var_ExistsExpand(GNode * scope,const char * name)1211 Var_ExistsExpand(GNode *scope, const char *name)
1212 {
1213 	FStr varname = FStr_InitRefer(name);
1214 	bool exists;
1215 
1216 	if (strchr(varname.str, '$') != NULL) {
1217 		char *expanded;
1218 		(void)Var_Subst(varname.str, scope, VARE_WANTRES, &expanded);
1219 		/* TODO: handle errors */
1220 		varname = FStr_InitOwn(expanded);
1221 	}
1222 
1223 	exists = Var_Exists(scope, varname.str);
1224 	FStr_Done(&varname);
1225 	return exists;
1226 }
1227 
1228 /*
1229  * Return the unexpanded value of the given variable in the given scope,
1230  * or the usual scopes.
1231  *
1232  * Input:
1233  *	name		name to find, is not expanded any further
1234  *	scope		scope in which to search for it
1235  *
1236  * Results:
1237  *	The value if the variable exists, NULL if it doesn't.
1238  *	The value is valid until the next modification to any variable.
1239  */
1240 FStr
Var_Value(GNode * scope,const char * name)1241 Var_Value(GNode *scope, const char *name)
1242 {
1243 	Var *v = VarFind(name, scope, true);
1244 	char *value;
1245 
1246 	if (v == NULL)
1247 		return FStr_InitRefer(NULL);
1248 
1249 	if (!v->fromEnv)
1250 		return FStr_InitRefer(v->val.data);
1251 
1252 	/* Since environment variables are short-lived, free it now. */
1253 	FStr_Done(&v->name);
1254 	value = Buf_DoneData(&v->val);
1255 	free(v);
1256 	return FStr_InitOwn(value);
1257 }
1258 
1259 /*
1260  * Return the unexpanded variable value from this node, without trying to look
1261  * up the variable in any other scope.
1262  */
1263 const char *
GNode_ValueDirect(GNode * gn,const char * name)1264 GNode_ValueDirect(GNode *gn, const char *name)
1265 {
1266 	Var *v = VarFind(name, gn, false);
1267 	return v != NULL ? v->val.data : NULL;
1268 }
1269 
1270 static VarEvalMode
VarEvalMode_WithoutKeepDollar(VarEvalMode emode)1271 VarEvalMode_WithoutKeepDollar(VarEvalMode emode)
1272 {
1273 	if (emode == VARE_KEEP_DOLLAR_UNDEF)
1274 		return VARE_EVAL_KEEP_UNDEF;
1275 	if (emode == VARE_EVAL_KEEP_DOLLAR)
1276 		return VARE_WANTRES;
1277 	return emode;
1278 }
1279 
1280 static VarEvalMode
VarEvalMode_UndefOk(VarEvalMode emode)1281 VarEvalMode_UndefOk(VarEvalMode emode)
1282 {
1283 	return emode == VARE_UNDEFERR ? VARE_WANTRES : emode;
1284 }
1285 
1286 static bool
VarEvalMode_ShouldEval(VarEvalMode emode)1287 VarEvalMode_ShouldEval(VarEvalMode emode)
1288 {
1289 	return emode != VARE_PARSE_ONLY;
1290 }
1291 
1292 static bool
VarEvalMode_ShouldKeepUndef(VarEvalMode emode)1293 VarEvalMode_ShouldKeepUndef(VarEvalMode emode)
1294 {
1295 	return emode == VARE_EVAL_KEEP_UNDEF ||
1296 	       emode == VARE_KEEP_DOLLAR_UNDEF;
1297 }
1298 
1299 static bool
VarEvalMode_ShouldKeepDollar(VarEvalMode emode)1300 VarEvalMode_ShouldKeepDollar(VarEvalMode emode)
1301 {
1302 	return emode == VARE_EVAL_KEEP_DOLLAR ||
1303 	       emode == VARE_KEEP_DOLLAR_UNDEF;
1304 }
1305 
1306 
1307 static void
SepBuf_Init(SepBuf * buf,char sep)1308 SepBuf_Init(SepBuf *buf, char sep)
1309 {
1310 	Buf_InitSize(&buf->buf, 32);
1311 	buf->needSep = false;
1312 	buf->sep = sep;
1313 }
1314 
1315 static void
SepBuf_Sep(SepBuf * buf)1316 SepBuf_Sep(SepBuf *buf)
1317 {
1318 	buf->needSep = true;
1319 }
1320 
1321 static void
SepBuf_AddBytes(SepBuf * buf,const char * mem,size_t mem_size)1322 SepBuf_AddBytes(SepBuf *buf, const char *mem, size_t mem_size)
1323 {
1324 	if (mem_size == 0)
1325 		return;
1326 	if (buf->needSep && buf->sep != '\0') {
1327 		Buf_AddByte(&buf->buf, buf->sep);
1328 		buf->needSep = false;
1329 	}
1330 	Buf_AddBytes(&buf->buf, mem, mem_size);
1331 }
1332 
1333 static void
SepBuf_AddBytesBetween(SepBuf * buf,const char * start,const char * end)1334 SepBuf_AddBytesBetween(SepBuf *buf, const char *start, const char *end)
1335 {
1336 	SepBuf_AddBytes(buf, start, (size_t)(end - start));
1337 }
1338 
1339 static void
SepBuf_AddStr(SepBuf * buf,const char * str)1340 SepBuf_AddStr(SepBuf *buf, const char *str)
1341 {
1342 	SepBuf_AddBytes(buf, str, strlen(str));
1343 }
1344 
1345 static void
SepBuf_AddSubstring(SepBuf * buf,Substring sub)1346 SepBuf_AddSubstring(SepBuf *buf, Substring sub)
1347 {
1348 	SepBuf_AddBytesBetween(buf, sub.start, sub.end);
1349 }
1350 
1351 static char *
SepBuf_DoneData(SepBuf * buf)1352 SepBuf_DoneData(SepBuf *buf)
1353 {
1354 	return Buf_DoneData(&buf->buf);
1355 }
1356 
1357 
1358 /*
1359  * This callback for ModifyWords gets a single word from a variable expression
1360  * and typically adds a modification of this word to the buffer. It may also
1361  * do nothing or add several words.
1362  *
1363  * For example, when evaluating the modifier ':M*b' in ${:Ua b c:M*b}, the
1364  * callback is called 3 times, once for "a", "b" and "c".
1365  *
1366  * Some ModifyWord functions assume that they are always passed a
1367  * null-terminated substring, which is currently guaranteed but may change in
1368  * the future.
1369  */
1370 typedef void (*ModifyWordProc)(Substring word, SepBuf *buf, void *data);
1371 
1372 
1373 /*
1374  * Callback for ModifyWords to implement the :H modifier.
1375  * Add the dirname of the given word to the buffer.
1376  */
1377 /*ARGSUSED*/
1378 static void
ModifyWord_Head(Substring word,SepBuf * buf,void * dummy MAKE_ATTR_UNUSED)1379 ModifyWord_Head(Substring word, SepBuf *buf, void *dummy MAKE_ATTR_UNUSED)
1380 {
1381 	SepBuf_AddSubstring(buf, Substring_Dirname(word));
1382 }
1383 
1384 /*
1385  * Callback for ModifyWords to implement the :T modifier.
1386  * Add the basename of the given word to the buffer.
1387  */
1388 /*ARGSUSED*/
1389 static void
ModifyWord_Tail(Substring word,SepBuf * buf,void * dummy MAKE_ATTR_UNUSED)1390 ModifyWord_Tail(Substring word, SepBuf *buf, void *dummy MAKE_ATTR_UNUSED)
1391 {
1392 	SepBuf_AddSubstring(buf, Substring_Basename(word));
1393 }
1394 
1395 /*
1396  * Callback for ModifyWords to implement the :E modifier.
1397  * Add the filename suffix of the given word to the buffer, if it exists.
1398  */
1399 /*ARGSUSED*/
1400 static void
ModifyWord_Suffix(Substring word,SepBuf * buf,void * dummy MAKE_ATTR_UNUSED)1401 ModifyWord_Suffix(Substring word, SepBuf *buf, void *dummy MAKE_ATTR_UNUSED)
1402 {
1403 	const char *lastDot = Substring_LastIndex(word, '.');
1404 	if (lastDot != NULL)
1405 		SepBuf_AddBytesBetween(buf, lastDot + 1, word.end);
1406 }
1407 
1408 /*
1409  * Callback for ModifyWords to implement the :R modifier.
1410  * Add the filename without extension of the given word to the buffer.
1411  */
1412 /*ARGSUSED*/
1413 static void
ModifyWord_Root(Substring word,SepBuf * buf,void * dummy MAKE_ATTR_UNUSED)1414 ModifyWord_Root(Substring word, SepBuf *buf, void *dummy MAKE_ATTR_UNUSED)
1415 {
1416 	const char *lastDot, *end;
1417 
1418 	lastDot = Substring_LastIndex(word, '.');
1419 	end = lastDot != NULL ? lastDot : word.end;
1420 	SepBuf_AddBytesBetween(buf, word.start, end);
1421 }
1422 
1423 /*
1424  * Callback for ModifyWords to implement the :M modifier.
1425  * Place the word in the buffer if it matches the given pattern.
1426  */
1427 static void
ModifyWord_Match(Substring word,SepBuf * buf,void * data)1428 ModifyWord_Match(Substring word, SepBuf *buf, void *data)
1429 {
1430 	const char *pattern = data;
1431 
1432 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1433 	if (Str_Match(word.start, pattern))
1434 		SepBuf_AddSubstring(buf, word);
1435 }
1436 
1437 /*
1438  * Callback for ModifyWords to implement the :N modifier.
1439  * Place the word in the buffer if it doesn't match the given pattern.
1440  */
1441 static void
ModifyWord_NoMatch(Substring word,SepBuf * buf,void * data)1442 ModifyWord_NoMatch(Substring word, SepBuf *buf, void *data)
1443 {
1444 	const char *pattern = data;
1445 
1446 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1447 	if (!Str_Match(word.start, pattern))
1448 		SepBuf_AddSubstring(buf, word);
1449 }
1450 
1451 #ifdef SYSVVARSUB
1452 struct ModifyWord_SysVSubstArgs {
1453 	GNode *scope;
1454 	Substring lhsPrefix;
1455 	bool lhsPercent;
1456 	Substring lhsSuffix;
1457 	const char *rhs;
1458 };
1459 
1460 /* Callback for ModifyWords to implement the :%.from=%.to modifier. */
1461 static void
ModifyWord_SysVSubst(Substring word,SepBuf * buf,void * data)1462 ModifyWord_SysVSubst(Substring word, SepBuf *buf, void *data)
1463 {
1464 	const struct ModifyWord_SysVSubstArgs *args = data;
1465 	FStr rhs;
1466 	char *rhsExp;
1467 	const char *percent;
1468 
1469 	if (Substring_IsEmpty(word))
1470 		return;
1471 
1472 	if (!Substring_HasPrefix(word, args->lhsPrefix))
1473 		goto no_match;
1474 	if (!Substring_HasSuffix(word, args->lhsSuffix))
1475 		goto no_match;
1476 
1477 	rhs = FStr_InitRefer(args->rhs);
1478 	if (strchr(rhs.str, '$') != NULL) {
1479 		(void)Var_Subst(args->rhs, args->scope, VARE_WANTRES, &rhsExp);
1480 		/* TODO: handle errors */
1481 		rhs = FStr_InitOwn(rhsExp);
1482 	}
1483 
1484 	percent = args->lhsPercent ? strchr(rhs.str, '%') : NULL;
1485 
1486 	if (percent != NULL)
1487 		SepBuf_AddBytesBetween(buf, rhs.str, percent);
1488 	if (percent != NULL || !args->lhsPercent)
1489 		SepBuf_AddBytesBetween(buf,
1490 		    word.start + Substring_Length(args->lhsPrefix),
1491 		    word.end - Substring_Length(args->lhsSuffix));
1492 	SepBuf_AddStr(buf, percent != NULL ? percent + 1 : rhs.str);
1493 
1494 	FStr_Done(&rhs);
1495 	return;
1496 
1497 no_match:
1498 	SepBuf_AddSubstring(buf, word);
1499 }
1500 #endif
1501 
1502 
1503 struct ModifyWord_SubstArgs {
1504 	Substring lhs;
1505 	Substring rhs;
1506 	PatternFlags pflags;
1507 	bool matched;
1508 };
1509 
1510 static const char *
Substring_Find(Substring haystack,Substring needle)1511 Substring_Find(Substring haystack, Substring needle)
1512 {
1513 	size_t len, needleLen, i;
1514 
1515 	len = Substring_Length(haystack);
1516 	needleLen = Substring_Length(needle);
1517 	for (i = 0; i + needleLen <= len; i++)
1518 		if (memcmp(haystack.start + i, needle.start, needleLen) == 0)
1519 			return haystack.start + i;
1520 	return NULL;
1521 }
1522 
1523 /*
1524  * Callback for ModifyWords to implement the :S,from,to, modifier.
1525  * Perform a string substitution on the given word.
1526  */
1527 static void
ModifyWord_Subst(Substring word,SepBuf * buf,void * data)1528 ModifyWord_Subst(Substring word, SepBuf *buf, void *data)
1529 {
1530 	struct ModifyWord_SubstArgs *args = data;
1531 	size_t wordLen, lhsLen;
1532 	const char *wordEnd, *match;
1533 
1534 	wordLen = Substring_Length(word);
1535 	wordEnd = word.end;
1536 	if (args->pflags.subOnce && args->matched)
1537 		goto nosub;
1538 
1539 	lhsLen = Substring_Length(args->lhs);
1540 	if (args->pflags.anchorStart) {
1541 		if (wordLen < lhsLen ||
1542 		    memcmp(word.start, args->lhs.start, lhsLen) != 0)
1543 			goto nosub;
1544 
1545 		if (args->pflags.anchorEnd && wordLen != lhsLen)
1546 			goto nosub;
1547 
1548 		/* :S,^prefix,replacement, or :S,^whole$,replacement, */
1549 		SepBuf_AddSubstring(buf, args->rhs);
1550 		SepBuf_AddBytesBetween(buf, word.start + lhsLen, wordEnd);
1551 		args->matched = true;
1552 		return;
1553 	}
1554 
1555 	if (args->pflags.anchorEnd) {
1556 		if (wordLen < lhsLen)
1557 			goto nosub;
1558 		if (memcmp(wordEnd - lhsLen, args->lhs.start, lhsLen) != 0)
1559 			goto nosub;
1560 
1561 		/* :S,suffix$,replacement, */
1562 		SepBuf_AddBytesBetween(buf, word.start, wordEnd - lhsLen);
1563 		SepBuf_AddSubstring(buf, args->rhs);
1564 		args->matched = true;
1565 		return;
1566 	}
1567 
1568 	if (Substring_IsEmpty(args->lhs))
1569 		goto nosub;
1570 
1571 	/* unanchored case, may match more than once */
1572 	while ((match = Substring_Find(word, args->lhs)) != NULL) {
1573 		SepBuf_AddBytesBetween(buf, word.start, match);
1574 		SepBuf_AddSubstring(buf, args->rhs);
1575 		args->matched = true;
1576 		word.start = match + lhsLen;
1577 		if (Substring_IsEmpty(word) || !args->pflags.subGlobal)
1578 			break;
1579 	}
1580 nosub:
1581 	SepBuf_AddSubstring(buf, word);
1582 }
1583 
1584 #ifndef NO_REGEX
1585 /* Print the error caused by a regcomp or regexec call. */
1586 static void
VarREError(int reerr,const regex_t * pat,const char * str)1587 VarREError(int reerr, const regex_t *pat, const char *str)
1588 {
1589 	size_t errlen = regerror(reerr, pat, NULL, 0);
1590 	char *errbuf = bmake_malloc(errlen);
1591 	regerror(reerr, pat, errbuf, errlen);
1592 	Error("%s: %s", str, errbuf);
1593 	free(errbuf);
1594 }
1595 
1596 struct ModifyWord_SubstRegexArgs {
1597 	regex_t re;
1598 	size_t nsub;
1599 	const char *replace;
1600 	PatternFlags pflags;
1601 	bool matched;
1602 };
1603 
1604 /*
1605  * Callback for ModifyWords to implement the :C/from/to/ modifier.
1606  * Perform a regex substitution on the given word.
1607  */
1608 static void
ModifyWord_SubstRegex(Substring word,SepBuf * buf,void * data)1609 ModifyWord_SubstRegex(Substring word, SepBuf *buf, void *data)
1610 {
1611 	struct ModifyWord_SubstRegexArgs *args = data;
1612 	int xrv;
1613 	const char *wp;
1614 	const char *rp;
1615 	int flags = 0;
1616 	regmatch_t m[10];
1617 
1618 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1619 	wp = word.start;
1620 	if (args->pflags.subOnce && args->matched)
1621 		goto nosub;
1622 
1623 tryagain:
1624 	xrv = regexec(&args->re, wp, args->nsub, m, flags);
1625 
1626 	switch (xrv) {
1627 	case 0:
1628 		args->matched = true;
1629 		SepBuf_AddBytes(buf, wp, (size_t)m[0].rm_so);
1630 
1631 		/*
1632 		 * Replacement of regular expressions is not specified by
1633 		 * POSIX, therefore implement it here.
1634 		 */
1635 
1636 		for (rp = args->replace; *rp != '\0'; rp++) {
1637 			if (*rp == '\\' && (rp[1] == '&' || rp[1] == '\\')) {
1638 				SepBuf_AddBytes(buf, rp + 1, 1);
1639 				rp++;
1640 				continue;
1641 			}
1642 
1643 			if (*rp == '&') {
1644 				SepBuf_AddBytesBetween(buf,
1645 				    wp + m[0].rm_so, wp + m[0].rm_eo);
1646 				continue;
1647 			}
1648 
1649 			if (*rp != '\\' || !ch_isdigit(rp[1])) {
1650 				SepBuf_AddBytes(buf, rp, 1);
1651 				continue;
1652 			}
1653 
1654 			{	/* \0 to \9 backreference */
1655 				size_t n = (size_t)(rp[1] - '0');
1656 				rp++;
1657 
1658 				if (n >= args->nsub) {
1659 					Error("No subexpression \\%u",
1660 					    (unsigned)n);
1661 				} else if (m[n].rm_so == -1) {
1662 					Error(
1663 					    "No match for subexpression \\%u",
1664 					    (unsigned)n);
1665 				} else {
1666 					SepBuf_AddBytesBetween(buf,
1667 					    wp + m[n].rm_so, wp + m[n].rm_eo);
1668 				}
1669 			}
1670 		}
1671 
1672 		wp += m[0].rm_eo;
1673 		if (args->pflags.subGlobal) {
1674 			flags |= REG_NOTBOL;
1675 			if (m[0].rm_so == 0 && m[0].rm_eo == 0) {
1676 				SepBuf_AddBytes(buf, wp, 1);
1677 				wp++;
1678 			}
1679 			if (*wp != '\0')
1680 				goto tryagain;
1681 		}
1682 		if (*wp != '\0')
1683 			SepBuf_AddStr(buf, wp);
1684 		break;
1685 	default:
1686 		VarREError(xrv, &args->re, "Unexpected regex error");
1687 		/* FALLTHROUGH */
1688 	case REG_NOMATCH:
1689 	nosub:
1690 		SepBuf_AddStr(buf, wp);
1691 		break;
1692 	}
1693 }
1694 #endif
1695 
1696 
1697 struct ModifyWord_LoopArgs {
1698 	GNode *scope;
1699 	const char *var;	/* name of the temporary variable */
1700 	const char *body;	/* string to expand */
1701 	VarEvalMode emode;
1702 };
1703 
1704 /* Callback for ModifyWords to implement the :@var@...@ modifier of ODE make. */
1705 static void
ModifyWord_Loop(Substring word,SepBuf * buf,void * data)1706 ModifyWord_Loop(Substring word, SepBuf *buf, void *data)
1707 {
1708 	const struct ModifyWord_LoopArgs *args;
1709 	char *s;
1710 
1711 	if (Substring_IsEmpty(word))
1712 		return;
1713 
1714 	args = data;
1715 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1716 	Var_SetWithFlags(args->scope, args->var, word.start,
1717 	    VAR_SET_NO_EXPORT);
1718 	(void)Var_Subst(args->body, args->scope, args->emode, &s);
1719 	/* TODO: handle errors */
1720 
1721 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1722 	DEBUG4(VAR, "ModifyWord_Loop: "
1723 		    "in \"%s\", replace \"%s\" with \"%s\" to \"%s\"\n",
1724 	    word.start, args->var, args->body, s);
1725 
1726 	if (s[0] == '\n' || Buf_EndsWith(&buf->buf, '\n'))
1727 		buf->needSep = false;
1728 	SepBuf_AddStr(buf, s);
1729 	free(s);
1730 }
1731 
1732 
1733 /*
1734  * The :[first..last] modifier selects words from the expression.
1735  * It can also reverse the words.
1736  */
1737 static char *
VarSelectWords(const char * str,int first,int last,char sep,bool oneBigWord)1738 VarSelectWords(const char *str, int first, int last,
1739 	       char sep, bool oneBigWord)
1740 {
1741 	Words words;
1742 	int len, start, end, step;
1743 	int i;
1744 
1745 	SepBuf buf;
1746 	SepBuf_Init(&buf, sep);
1747 
1748 	if (oneBigWord) {
1749 		/* fake what Str_Words() would do if there were only one word */
1750 		words.len = 1;
1751 		words.words = bmake_malloc(
1752 		    (words.len + 1) * sizeof(words.words[0]));
1753 		words.freeIt = bmake_strdup(str);
1754 		words.words[0] = words.freeIt;
1755 		words.words[1] = NULL;
1756 	} else {
1757 		words = Str_Words(str, false);
1758 	}
1759 
1760 	/*
1761 	 * Now sanitize the given range.  If first or last are negative,
1762 	 * convert them to the positive equivalents (-1 gets converted to len,
1763 	 * -2 gets converted to (len - 1), etc.).
1764 	 */
1765 	len = (int)words.len;
1766 	if (first < 0)
1767 		first += len + 1;
1768 	if (last < 0)
1769 		last += len + 1;
1770 
1771 	/* We avoid scanning more of the list than we need to. */
1772 	if (first > last) {
1773 		start = (first > len ? len : first) - 1;
1774 		end = last < 1 ? 0 : last - 1;
1775 		step = -1;
1776 	} else {
1777 		start = first < 1 ? 0 : first - 1;
1778 		end = last > len ? len : last;
1779 		step = 1;
1780 	}
1781 
1782 	for (i = start; (step < 0) == (i >= end); i += step) {
1783 		SepBuf_AddStr(&buf, words.words[i]);
1784 		SepBuf_Sep(&buf);
1785 	}
1786 
1787 	Words_Free(words);
1788 
1789 	return SepBuf_DoneData(&buf);
1790 }
1791 
1792 
1793 /*
1794  * Callback for ModifyWords to implement the :tA modifier.
1795  * Replace each word with the result of realpath() if successful.
1796  */
1797 /*ARGSUSED*/
1798 static void
ModifyWord_Realpath(Substring word,SepBuf * buf,void * data MAKE_ATTR_UNUSED)1799 ModifyWord_Realpath(Substring word, SepBuf *buf, void *data MAKE_ATTR_UNUSED)
1800 {
1801 	struct stat st;
1802 	char rbuf[MAXPATHLEN];
1803 	const char *rp;
1804 
1805 	assert(word.end[0] == '\0');	/* assume null-terminated word */
1806 	rp = cached_realpath(word.start, rbuf);
1807 	if (rp != NULL && *rp == '/' && stat(rp, &st) == 0)
1808 		SepBuf_AddStr(buf, rp);
1809 	else
1810 		SepBuf_AddSubstring(buf, word);
1811 }
1812 
1813 
1814 static char *
Words_JoinFree(Words words)1815 Words_JoinFree(Words words)
1816 {
1817 	Buffer buf;
1818 	size_t i;
1819 
1820 	Buf_Init(&buf);
1821 
1822 	for (i = 0; i < words.len; i++) {
1823 		if (i != 0) {
1824 			/* XXX: Use ch->sep instead of ' ', for consistency. */
1825 			Buf_AddByte(&buf, ' ');
1826 		}
1827 		Buf_AddStr(&buf, words.words[i]);
1828 	}
1829 
1830 	Words_Free(words);
1831 
1832 	return Buf_DoneData(&buf);
1833 }
1834 
1835 /* Remove adjacent duplicate words. */
1836 static char *
VarUniq(const char * str)1837 VarUniq(const char *str)
1838 {
1839 	Words words = Str_Words(str, false);
1840 
1841 	if (words.len > 1) {
1842 		size_t i, j;
1843 		for (j = 0, i = 1; i < words.len; i++)
1844 			if (strcmp(words.words[i], words.words[j]) != 0 &&
1845 			    (++j != i))
1846 				words.words[j] = words.words[i];
1847 		words.len = j + 1;
1848 	}
1849 
1850 	return Words_JoinFree(words);
1851 }
1852 
1853 
1854 /*
1855  * Quote shell meta-characters and space characters in the string.
1856  * If quoteDollar is set, also quote and double any '$' characters.
1857  */
1858 static void
VarQuote(const char * str,bool quoteDollar,LazyBuf * buf)1859 VarQuote(const char *str, bool quoteDollar, LazyBuf *buf)
1860 {
1861 	const char *p;
1862 
1863 	LazyBuf_Init(buf, str);
1864 	for (p = str; *p != '\0'; p++) {
1865 		if (*p == '\n') {
1866 			const char *newline = Shell_GetNewline();
1867 			if (newline == NULL)
1868 				newline = "\\\n";
1869 			LazyBuf_AddStr(buf, newline);
1870 			continue;
1871 		}
1872 		if (ch_isspace(*p) || is_shell_metachar((unsigned char)*p))
1873 			LazyBuf_Add(buf, '\\');
1874 		LazyBuf_Add(buf, *p);
1875 		if (quoteDollar && *p == '$')
1876 			LazyBuf_AddStr(buf, "\\$");
1877 	}
1878 }
1879 
1880 /*
1881  * Compute the 32-bit hash of the given string, using the MurmurHash3
1882  * algorithm. Output is encoded as 8 hex digits, in Little Endian order.
1883  */
1884 static char *
VarHash(const char * str)1885 VarHash(const char *str)
1886 {
1887 	static const char hexdigits[16] = "0123456789abcdef";
1888 	const unsigned char *ustr = (const unsigned char *)str;
1889 
1890 	uint32_t h = 0x971e137bU;
1891 	uint32_t c1 = 0x95543787U;
1892 	uint32_t c2 = 0x2ad7eb25U;
1893 	size_t len2 = strlen(str);
1894 
1895 	char *buf;
1896 	size_t i;
1897 
1898 	size_t len;
1899 	for (len = len2; len != 0;) {
1900 		uint32_t k = 0;
1901 		switch (len) {
1902 		default:
1903 			k = ((uint32_t)ustr[3] << 24) |
1904 			    ((uint32_t)ustr[2] << 16) |
1905 			    ((uint32_t)ustr[1] << 8) |
1906 			    (uint32_t)ustr[0];
1907 			len -= 4;
1908 			ustr += 4;
1909 			break;
1910 		case 3:
1911 			k |= (uint32_t)ustr[2] << 16;
1912 			/* FALLTHROUGH */
1913 		case 2:
1914 			k |= (uint32_t)ustr[1] << 8;
1915 			/* FALLTHROUGH */
1916 		case 1:
1917 			k |= (uint32_t)ustr[0];
1918 			len = 0;
1919 		}
1920 		c1 = c1 * 5 + 0x7b7d159cU;
1921 		c2 = c2 * 5 + 0x6bce6396U;
1922 		k *= c1;
1923 		k = (k << 11) ^ (k >> 21);
1924 		k *= c2;
1925 		h = (h << 13) ^ (h >> 19);
1926 		h = h * 5 + 0x52dce729U;
1927 		h ^= k;
1928 	}
1929 	h ^= (uint32_t)len2;
1930 	h *= 0x85ebca6b;
1931 	h ^= h >> 13;
1932 	h *= 0xc2b2ae35;
1933 	h ^= h >> 16;
1934 
1935 	buf = bmake_malloc(9);
1936 	for (i = 0; i < 8; i++) {
1937 		buf[i] = hexdigits[h & 0x0f];
1938 		h >>= 4;
1939 	}
1940 	buf[8] = '\0';
1941 	return buf;
1942 }
1943 
1944 static char *
VarStrftime(const char * fmt,bool zulu,time_t tim)1945 VarStrftime(const char *fmt, bool zulu, time_t tim)
1946 {
1947 	char buf[BUFSIZ];
1948 
1949 	if (tim == 0)
1950 		time(&tim);
1951 	if (*fmt == '\0')
1952 		fmt = "%c";
1953 	strftime(buf, sizeof buf, fmt, zulu ? gmtime(&tim) : localtime(&tim));
1954 
1955 	buf[sizeof buf - 1] = '\0';
1956 	return bmake_strdup(buf);
1957 }
1958 
1959 /*
1960  * The ApplyModifier functions take an expression that is being evaluated.
1961  * Their task is to apply a single modifier to the expression.  This involves
1962  * parsing the modifier, evaluating it and finally updating the value of the
1963  * expression.
1964  *
1965  * Parsing the modifier
1966  *
1967  * If parsing succeeds, the parsing position *pp is updated to point to the
1968  * first character following the modifier, which typically is either ':' or
1969  * ch->endc.  The modifier doesn't have to check for this delimiter character,
1970  * this is done by ApplyModifiers.
1971  *
1972  * XXX: As of 2020-11-15, some modifiers such as :S, :C, :P, :L do not
1973  * need to be followed by a ':' or endc; this was an unintended mistake.
1974  *
1975  * If parsing fails because of a missing delimiter (as in the :S, :C or :@
1976  * modifiers), return AMR_CLEANUP.
1977  *
1978  * If parsing fails because the modifier is unknown, return AMR_UNKNOWN to
1979  * try the SysV modifier ${VAR:from=to} as fallback.  This should only be
1980  * done as long as there have been no side effects from evaluating nested
1981  * variables, to avoid evaluating them more than once.  In this case, the
1982  * parsing position may or may not be updated.  (XXX: Why not? The original
1983  * parsing position is well-known in ApplyModifiers.)
1984  *
1985  * If parsing fails and the SysV modifier ${VAR:from=to} should not be used
1986  * as a fallback, either issue an error message using Error or Parse_Error
1987  * and then return AMR_CLEANUP, or return AMR_BAD for the default error
1988  * message.  Both of these return values will stop processing the variable
1989  * expression.  (XXX: As of 2020-08-23, evaluation of the whole string
1990  * continues nevertheless after skipping a few bytes, which essentially is
1991  * undefined behavior.  Not in the sense of C, but still the resulting string
1992  * is garbage.)
1993  *
1994  * Evaluating the modifier
1995  *
1996  * After parsing, the modifier is evaluated.  The side effects from evaluating
1997  * nested variable expressions in the modifier text often already happen
1998  * during parsing though.  For most modifiers this doesn't matter since their
1999  * only noticeable effect is that the update the value of the expression.
2000  * Some modifiers such as ':sh' or '::=' have noticeable side effects though.
2001  *
2002  * Evaluating the modifier usually takes the current value of the variable
2003  * expression from ch->expr->value, or the variable name from ch->var->name
2004  * and stores the result back in expr->value via Expr_SetValueOwn or
2005  * Expr_SetValueRefer.
2006  *
2007  * If evaluating fails (as of 2020-08-23), an error message is printed using
2008  * Error.  This function has no side-effects, it really just prints the error
2009  * message.  Processing the expression continues as if everything were ok.
2010  * XXX: This should be fixed by adding proper error handling to Var_Subst,
2011  * Var_Parse, ApplyModifiers and ModifyWords.
2012  *
2013  * Housekeeping
2014  *
2015  * Some modifiers such as :D and :U turn undefined expressions into defined
2016  * expressions (see Expr_Define).
2017  *
2018  * Some modifiers need to free some memory.
2019  */
2020 
2021 typedef enum ExprDefined {
2022 	/* The variable expression is based on a regular, defined variable. */
2023 	DEF_REGULAR,
2024 	/* The variable expression is based on an undefined variable. */
2025 	DEF_UNDEF,
2026 	/*
2027 	 * The variable expression started as an undefined expression, but one
2028 	 * of the modifiers (such as ':D' or ':U') has turned the expression
2029 	 * from undefined to defined.
2030 	 */
2031 	DEF_DEFINED
2032 } ExprDefined;
2033 
2034 static const char *const ExprDefined_Name[] = {
2035 	"regular",
2036 	"undefined",
2037 	"defined"
2038 };
2039 
2040 #if __STDC_VERSION__ >= 199901L
2041 #define const_member const
2042 #else
2043 #define const_member /* no const possible */
2044 #endif
2045 
2046 /* A variable expression such as $@ or ${VAR:Mpattern:Q}. */
2047 typedef struct Expr {
2048 	const char *name;
2049 	FStr value;
2050 	VarEvalMode const_member emode;
2051 	GNode *const_member scope;
2052 	ExprDefined defined;
2053 } Expr;
2054 
2055 /*
2056  * The status of applying a chain of modifiers to an expression.
2057  *
2058  * The modifiers of an expression are broken into chains of modifiers,
2059  * starting a new nested chain whenever an indirect modifier starts.  There
2060  * are at most 2 nesting levels: the outer one for the direct modifiers, and
2061  * the inner one for the indirect modifiers.
2062  *
2063  * For example, the expression ${VAR:M*:${IND1}:${IND2}:O:u} has 3 chains of
2064  * modifiers:
2065  *
2066  *	Chain 1 starts with the single modifier ':M*'.
2067  *	  Chain 2 starts with all modifiers from ${IND1}.
2068  *	  Chain 2 ends at the ':' between ${IND1} and ${IND2}.
2069  *	  Chain 3 starts with all modifiers from ${IND2}.
2070  *	  Chain 3 ends at the ':' after ${IND2}.
2071  *	Chain 1 continues with the the 2 modifiers ':O' and ':u'.
2072  *	Chain 1 ends at the final '}' of the expression.
2073  *
2074  * After such a chain ends, its properties no longer have any effect.
2075  *
2076  * It may or may not have been intended that 'defined' has scope Expr while
2077  * 'sep' and 'oneBigWord' have smaller scope.
2078  *
2079  * See varmod-indirect.mk.
2080  */
2081 typedef struct ModChain {
2082 	Expr *expr;
2083 	/* '\0' or '{' or '(' */
2084 	char const_member startc;
2085 	/* '\0' or '}' or ')' */
2086 	char const_member endc;
2087 	/* Word separator in expansions (see the :ts modifier). */
2088 	char sep;
2089 	/*
2090 	 * True if some modifiers that otherwise split the variable value
2091 	 * into words, like :S and :C, treat the variable value as a single
2092 	 * big word, possibly containing spaces.
2093 	 */
2094 	bool oneBigWord;
2095 } ModChain;
2096 
2097 static void
Expr_Define(Expr * expr)2098 Expr_Define(Expr *expr)
2099 {
2100 	if (expr->defined == DEF_UNDEF)
2101 		expr->defined = DEF_DEFINED;
2102 }
2103 
2104 static void
Expr_SetValue(Expr * expr,FStr value)2105 Expr_SetValue(Expr *expr, FStr value)
2106 {
2107 	FStr_Done(&expr->value);
2108 	expr->value = value;
2109 }
2110 
2111 static void
Expr_SetValueOwn(Expr * expr,char * value)2112 Expr_SetValueOwn(Expr *expr, char *value)
2113 {
2114 	Expr_SetValue(expr, FStr_InitOwn(value));
2115 }
2116 
2117 static void
Expr_SetValueRefer(Expr * expr,const char * value)2118 Expr_SetValueRefer(Expr *expr, const char *value)
2119 {
2120 	Expr_SetValue(expr, FStr_InitRefer(value));
2121 }
2122 
2123 static bool
Expr_ShouldEval(const Expr * expr)2124 Expr_ShouldEval(const Expr *expr)
2125 {
2126 	return VarEvalMode_ShouldEval(expr->emode);
2127 }
2128 
2129 static bool
ModChain_ShouldEval(const ModChain * ch)2130 ModChain_ShouldEval(const ModChain *ch)
2131 {
2132 	return Expr_ShouldEval(ch->expr);
2133 }
2134 
2135 
2136 typedef enum ApplyModifierResult {
2137 	/* Continue parsing */
2138 	AMR_OK,
2139 	/* Not a match, try other modifiers as well. */
2140 	AMR_UNKNOWN,
2141 	/* Error out with "Bad modifier" message. */
2142 	AMR_BAD,
2143 	/* Error out without the standard error message. */
2144 	AMR_CLEANUP
2145 } ApplyModifierResult;
2146 
2147 /*
2148  * Allow backslashes to escape the delimiter, $, and \, but don't touch other
2149  * backslashes.
2150  */
2151 static bool
IsEscapedModifierPart(const char * p,char delim,struct ModifyWord_SubstArgs * subst)2152 IsEscapedModifierPart(const char *p, char delim,
2153 		      struct ModifyWord_SubstArgs *subst)
2154 {
2155 	if (p[0] != '\\')
2156 		return false;
2157 	if (p[1] == delim || p[1] == '\\' || p[1] == '$')
2158 		return true;
2159 	return p[1] == '&' && subst != NULL;
2160 }
2161 
2162 /* See ParseModifierPart */
2163 static VarParseResult
ParseModifierPartSubst(const char ** pp,char delim,VarEvalMode emode,ModChain * ch,LazyBuf * part,PatternFlags * out_pflags,struct ModifyWord_SubstArgs * subst)2164 ParseModifierPartSubst(
2165     const char **pp,
2166     char delim,
2167     VarEvalMode emode,
2168     ModChain *ch,
2169     LazyBuf *part,
2170     /* For the first part of the :S modifier, sets the VARP_ANCHOR_END flag
2171      * if the last character of the pattern is a $. */
2172     PatternFlags *out_pflags,
2173     /* For the second part of the :S modifier, allow ampersands to be
2174      * escaped and replace unescaped ampersands with subst->lhs. */
2175     struct ModifyWord_SubstArgs *subst
2176 )
2177 {
2178 	const char *p;
2179 
2180 	p = *pp;
2181 	LazyBuf_Init(part, p);
2182 
2183 	/*
2184 	 * Skim through until the matching delimiter is found; pick up
2185 	 * variable expressions on the way.
2186 	 */
2187 	while (*p != '\0' && *p != delim) {
2188 		const char *varstart;
2189 
2190 		if (IsEscapedModifierPart(p, delim, subst)) {
2191 			LazyBuf_Add(part, p[1]);
2192 			p += 2;
2193 			continue;
2194 		}
2195 
2196 		if (*p != '$') {	/* Unescaped, simple text */
2197 			if (subst != NULL && *p == '&')
2198 				LazyBuf_AddSubstring(part, subst->lhs);
2199 			else
2200 				LazyBuf_Add(part, *p);
2201 			p++;
2202 			continue;
2203 		}
2204 
2205 		if (p[1] == delim) {	/* Unescaped $ at end of pattern */
2206 			if (out_pflags != NULL)
2207 				out_pflags->anchorEnd = true;
2208 			else
2209 				LazyBuf_Add(part, *p);
2210 			p++;
2211 			continue;
2212 		}
2213 
2214 		if (VarEvalMode_ShouldEval(emode)) {
2215 			/* Nested variable, evaluated */
2216 			const char *nested_p = p;
2217 			FStr nested_val;
2218 
2219 			(void)Var_Parse(&nested_p, ch->expr->scope,
2220 			    VarEvalMode_WithoutKeepDollar(emode), &nested_val);
2221 			/* TODO: handle errors */
2222 			LazyBuf_AddStr(part, nested_val.str);
2223 			FStr_Done(&nested_val);
2224 			p += nested_p - p;
2225 			continue;
2226 		}
2227 
2228 		/*
2229 		 * XXX: This whole block is very similar to Var_Parse without
2230 		 * VARE_WANTRES.  There may be subtle edge cases
2231 		 * though that are not yet covered in the unit tests and that
2232 		 * are parsed differently, depending on whether they are
2233 		 * evaluated or not.
2234 		 *
2235 		 * This subtle difference is not documented in the manual
2236 		 * page, neither is the difference between parsing :D and
2237 		 * :M documented. No code should ever depend on these
2238 		 * details, but who knows.
2239 		 */
2240 
2241 		varstart = p;	/* Nested variable, only parsed */
2242 		if (p[1] == '(' || p[1] == '{') {
2243 			/*
2244 			 * Find the end of this variable reference
2245 			 * and suck it in without further ado.
2246 			 * It will be interpreted later.
2247 			 */
2248 			char startc = p[1];
2249 			int endc = startc == '(' ? ')' : '}';
2250 			int depth = 1;
2251 
2252 			for (p += 2; *p != '\0' && depth > 0; p++) {
2253 				if (p[-1] != '\\') {
2254 					if (*p == startc)
2255 						depth++;
2256 					if (*p == endc)
2257 						depth--;
2258 				}
2259 			}
2260 			LazyBuf_AddBytesBetween(part, varstart, p);
2261 		} else {
2262 			LazyBuf_Add(part, *varstart);
2263 			p++;
2264 		}
2265 	}
2266 
2267 	if (*p != delim) {
2268 		*pp = p;
2269 		Error("Unfinished modifier for \"%s\" ('%c' missing)",
2270 		    ch->expr->name, delim);
2271 		LazyBuf_Done(part);
2272 		return VPR_ERR;
2273 	}
2274 
2275 	*pp = p + 1;
2276 
2277 	{
2278 		Substring sub = LazyBuf_Get(part);
2279 		DEBUG2(VAR, "Modifier part: \"%.*s\"\n",
2280 		    (int)Substring_Length(sub), sub.start);
2281 	}
2282 
2283 	return VPR_OK;
2284 }
2285 
2286 /*
2287  * Parse a part of a modifier such as the "from" and "to" in :S/from/to/ or
2288  * the "var" or "replacement ${var}" in :@var@replacement ${var}@, up to and
2289  * including the next unescaped delimiter.  The delimiter, as well as the
2290  * backslash or the dollar, can be escaped with a backslash.
2291  *
2292  * Return the parsed (and possibly expanded) string, or NULL if no delimiter
2293  * was found.  On successful return, the parsing position pp points right
2294  * after the delimiter.  The delimiter is not included in the returned
2295  * value though.
2296  */
2297 static VarParseResult
ParseModifierPart(const char ** pp,char delim,VarEvalMode emode,ModChain * ch,LazyBuf * part)2298 ParseModifierPart(
2299     /* The parsing position, updated upon return */
2300     const char **pp,
2301     /* Parsing stops at this delimiter */
2302     char delim,
2303     /* Mode for evaluating nested variables. */
2304     VarEvalMode emode,
2305     ModChain *ch,
2306     LazyBuf *part
2307 )
2308 {
2309 	return ParseModifierPartSubst(pp, delim, emode, ch, part, NULL, NULL);
2310 }
2311 
2312 MAKE_INLINE bool
IsDelimiter(char c,const ModChain * ch)2313 IsDelimiter(char c, const ModChain *ch)
2314 {
2315 	return c == ':' || c == ch->endc;
2316 }
2317 
2318 /* Test whether mod starts with modname, followed by a delimiter. */
2319 MAKE_INLINE bool
ModMatch(const char * mod,const char * modname,const ModChain * ch)2320 ModMatch(const char *mod, const char *modname, const ModChain *ch)
2321 {
2322 	size_t n = strlen(modname);
2323 	return strncmp(mod, modname, n) == 0 && IsDelimiter(mod[n], ch);
2324 }
2325 
2326 /* Test whether mod starts with modname, followed by a delimiter or '='. */
2327 MAKE_INLINE bool
ModMatchEq(const char * mod,const char * modname,const ModChain * ch)2328 ModMatchEq(const char *mod, const char *modname, const ModChain *ch)
2329 {
2330 	size_t n = strlen(modname);
2331 	return strncmp(mod, modname, n) == 0 &&
2332 	       (IsDelimiter(mod[n], ch) || mod[n] == '=');
2333 }
2334 
2335 static bool
TryParseIntBase0(const char ** pp,int * out_num)2336 TryParseIntBase0(const char **pp, int *out_num)
2337 {
2338 	char *end;
2339 	long n;
2340 
2341 	errno = 0;
2342 	n = strtol(*pp, &end, 0);
2343 
2344 	if (end == *pp)
2345 		return false;
2346 	if ((n == LONG_MIN || n == LONG_MAX) && errno == ERANGE)
2347 		return false;
2348 	if (n < INT_MIN || n > INT_MAX)
2349 		return false;
2350 
2351 	*pp = end;
2352 	*out_num = (int)n;
2353 	return true;
2354 }
2355 
2356 static bool
TryParseSize(const char ** pp,size_t * out_num)2357 TryParseSize(const char **pp, size_t *out_num)
2358 {
2359 	char *end;
2360 	unsigned long n;
2361 
2362 	if (!ch_isdigit(**pp))
2363 		return false;
2364 
2365 	errno = 0;
2366 	n = strtoul(*pp, &end, 10);
2367 	if (n == ULONG_MAX && errno == ERANGE)
2368 		return false;
2369 	if (n > SIZE_MAX)
2370 		return false;
2371 
2372 	*pp = end;
2373 	*out_num = (size_t)n;
2374 	return true;
2375 }
2376 
2377 static bool
TryParseChar(const char ** pp,int base,char * out_ch)2378 TryParseChar(const char **pp, int base, char *out_ch)
2379 {
2380 	char *end;
2381 	unsigned long n;
2382 
2383 	if (!ch_isalnum(**pp))
2384 		return false;
2385 
2386 	errno = 0;
2387 	n = strtoul(*pp, &end, base);
2388 	if (n == ULONG_MAX && errno == ERANGE)
2389 		return false;
2390 	if (n > UCHAR_MAX)
2391 		return false;
2392 
2393 	*pp = end;
2394 	*out_ch = (char)n;
2395 	return true;
2396 }
2397 
2398 /*
2399  * Modify each word of the expression using the given function and place the
2400  * result back in the expression.
2401  */
2402 static void
ModifyWords(ModChain * ch,ModifyWordProc modifyWord,void * modifyWord_args,bool oneBigWord)2403 ModifyWords(ModChain *ch,
2404 	    ModifyWordProc modifyWord, void *modifyWord_args,
2405 	    bool oneBigWord)
2406 {
2407 	Expr *expr = ch->expr;
2408 	const char *val = expr->value.str;
2409 	SepBuf result;
2410 	SubstringWords words;
2411 	size_t i;
2412 	Substring word;
2413 
2414 	if (oneBigWord) {
2415 		SepBuf_Init(&result, ch->sep);
2416 		/* XXX: performance: Substring_InitStr calls strlen */
2417 		word = Substring_InitStr(val);
2418 		modifyWord(word, &result, modifyWord_args);
2419 		goto done;
2420 	}
2421 
2422 	words = Substring_Words(val, false);
2423 
2424 	DEBUG2(VAR, "ModifyWords: split \"%s\" into %u words\n",
2425 	    val, (unsigned)words.len);
2426 
2427 	SepBuf_Init(&result, ch->sep);
2428 	for (i = 0; i < words.len; i++) {
2429 		modifyWord(words.words[i], &result, modifyWord_args);
2430 		if (result.buf.len > 0)
2431 			SepBuf_Sep(&result);
2432 	}
2433 
2434 	SubstringWords_Free(words);
2435 
2436 done:
2437 	Expr_SetValueOwn(expr, SepBuf_DoneData(&result));
2438 }
2439 
2440 /* :@var@...${var}...@ */
2441 static ApplyModifierResult
ApplyModifier_Loop(const char ** pp,ModChain * ch)2442 ApplyModifier_Loop(const char **pp, ModChain *ch)
2443 {
2444 	Expr *expr = ch->expr;
2445 	struct ModifyWord_LoopArgs args;
2446 	char prev_sep;
2447 	VarParseResult res;
2448 	LazyBuf tvarBuf, strBuf;
2449 	FStr tvar, str;
2450 
2451 	args.scope = expr->scope;
2452 
2453 	(*pp)++;		/* Skip the first '@' */
2454 	res = ParseModifierPart(pp, '@', VARE_PARSE_ONLY, ch, &tvarBuf);
2455 	if (res != VPR_OK)
2456 		return AMR_CLEANUP;
2457 	tvar = LazyBuf_DoneGet(&tvarBuf);
2458 	args.var = tvar.str;
2459 	if (strchr(args.var, '$') != NULL) {
2460 		Parse_Error(PARSE_FATAL,
2461 		    "In the :@ modifier of \"%s\", the variable name \"%s\" "
2462 		    "must not contain a dollar.",
2463 		    expr->name, args.var);
2464 		return AMR_CLEANUP;
2465 	}
2466 
2467 	res = ParseModifierPart(pp, '@', VARE_PARSE_ONLY, ch, &strBuf);
2468 	if (res != VPR_OK)
2469 		return AMR_CLEANUP;
2470 	str = LazyBuf_DoneGet(&strBuf);
2471 	args.body = str.str;
2472 
2473 	if (!Expr_ShouldEval(expr))
2474 		goto done;
2475 
2476 	args.emode = VarEvalMode_WithoutKeepDollar(expr->emode);
2477 	prev_sep = ch->sep;
2478 	ch->sep = ' ';		/* XXX: should be ch->sep for consistency */
2479 	ModifyWords(ch, ModifyWord_Loop, &args, ch->oneBigWord);
2480 	ch->sep = prev_sep;
2481 	/* XXX: Consider restoring the previous value instead of deleting. */
2482 	Var_Delete(expr->scope, args.var);
2483 
2484 done:
2485 	FStr_Done(&tvar);
2486 	FStr_Done(&str);
2487 	return AMR_OK;
2488 }
2489 
2490 /* :Ddefined or :Uundefined */
2491 static ApplyModifierResult
ApplyModifier_Defined(const char ** pp,ModChain * ch)2492 ApplyModifier_Defined(const char **pp, ModChain *ch)
2493 {
2494 	Expr *expr = ch->expr;
2495 	LazyBuf buf;
2496 	const char *p;
2497 
2498 	VarEvalMode emode = VARE_PARSE_ONLY;
2499 	if (Expr_ShouldEval(expr))
2500 		if ((**pp == 'D') == (expr->defined == DEF_REGULAR))
2501 			emode = expr->emode;
2502 
2503 	p = *pp + 1;
2504 	LazyBuf_Init(&buf, p);
2505 	while (!IsDelimiter(*p, ch) && *p != '\0') {
2506 
2507 		/* XXX: This code is similar to the one in Var_Parse.
2508 		 * See if the code can be merged.
2509 		 * See also ApplyModifier_Match and ParseModifierPart. */
2510 
2511 		/* Escaped delimiter or other special character */
2512 		/* See Buf_AddEscaped in for.c. */
2513 		if (*p == '\\') {
2514 			char c = p[1];
2515 			if (IsDelimiter(c, ch) || c == '$' || c == '\\') {
2516 				LazyBuf_Add(&buf, c);
2517 				p += 2;
2518 				continue;
2519 			}
2520 		}
2521 
2522 		/* Nested variable expression */
2523 		if (*p == '$') {
2524 			FStr nested_val;
2525 
2526 			(void)Var_Parse(&p, expr->scope, emode, &nested_val);
2527 			/* TODO: handle errors */
2528 			if (Expr_ShouldEval(expr))
2529 				LazyBuf_AddStr(&buf, nested_val.str);
2530 			FStr_Done(&nested_val);
2531 			continue;
2532 		}
2533 
2534 		/* Ordinary text */
2535 		LazyBuf_Add(&buf, *p);
2536 		p++;
2537 	}
2538 	*pp = p;
2539 
2540 	Expr_Define(expr);
2541 
2542 	if (VarEvalMode_ShouldEval(emode))
2543 		Expr_SetValue(expr, Substring_Str(LazyBuf_Get(&buf)));
2544 	else
2545 		LazyBuf_Done(&buf);
2546 
2547 	return AMR_OK;
2548 }
2549 
2550 /* :L */
2551 static ApplyModifierResult
ApplyModifier_Literal(const char ** pp,ModChain * ch)2552 ApplyModifier_Literal(const char **pp, ModChain *ch)
2553 {
2554 	Expr *expr = ch->expr;
2555 
2556 	(*pp)++;
2557 
2558 	if (Expr_ShouldEval(expr)) {
2559 		Expr_Define(expr);
2560 		Expr_SetValueOwn(expr, bmake_strdup(expr->name));
2561 	}
2562 
2563 	return AMR_OK;
2564 }
2565 
2566 static bool
TryParseTime(const char ** pp,time_t * out_time)2567 TryParseTime(const char **pp, time_t *out_time)
2568 {
2569 	char *end;
2570 	unsigned long n;
2571 
2572 	if (!ch_isdigit(**pp))
2573 		return false;
2574 
2575 	errno = 0;
2576 	n = strtoul(*pp, &end, 10);
2577 	if (n == ULONG_MAX && errno == ERANGE)
2578 		return false;
2579 
2580 	*pp = end;
2581 	*out_time = (time_t)n;	/* ignore possible truncation for now */
2582 	return true;
2583 }
2584 
2585 /* :gmtime */
2586 static ApplyModifierResult
ApplyModifier_Gmtime(const char ** pp,ModChain * ch)2587 ApplyModifier_Gmtime(const char **pp, ModChain *ch)
2588 {
2589 	time_t utc;
2590 
2591 	const char *mod = *pp;
2592 	if (!ModMatchEq(mod, "gmtime", ch))
2593 		return AMR_UNKNOWN;
2594 
2595 	if (mod[6] == '=') {
2596 		const char *p = mod + 7;
2597 		if (!TryParseTime(&p, &utc)) {
2598 			Parse_Error(PARSE_FATAL,
2599 			    "Invalid time value: %s", mod + 7);
2600 			return AMR_CLEANUP;
2601 		}
2602 		*pp = p;
2603 	} else {
2604 		utc = 0;
2605 		*pp = mod + 6;
2606 	}
2607 
2608 	if (ModChain_ShouldEval(ch))
2609 		Expr_SetValueOwn(ch->expr,
2610 		    VarStrftime(ch->expr->value.str, true, utc));
2611 
2612 	return AMR_OK;
2613 }
2614 
2615 /* :localtime */
2616 static ApplyModifierResult
ApplyModifier_Localtime(const char ** pp,ModChain * ch)2617 ApplyModifier_Localtime(const char **pp, ModChain *ch)
2618 {
2619 	time_t utc;
2620 
2621 	const char *mod = *pp;
2622 	if (!ModMatchEq(mod, "localtime", ch))
2623 		return AMR_UNKNOWN;
2624 
2625 	if (mod[9] == '=') {
2626 		const char *p = mod + 10;
2627 		if (!TryParseTime(&p, &utc)) {
2628 			Parse_Error(PARSE_FATAL,
2629 			    "Invalid time value: %s", mod + 10);
2630 			return AMR_CLEANUP;
2631 		}
2632 		*pp = p;
2633 	} else {
2634 		utc = 0;
2635 		*pp = mod + 9;
2636 	}
2637 
2638 	if (ModChain_ShouldEval(ch))
2639 		Expr_SetValueOwn(ch->expr,
2640 		    VarStrftime(ch->expr->value.str, false, utc));
2641 
2642 	return AMR_OK;
2643 }
2644 
2645 /* :hash */
2646 static ApplyModifierResult
ApplyModifier_Hash(const char ** pp,ModChain * ch)2647 ApplyModifier_Hash(const char **pp, ModChain *ch)
2648 {
2649 	if (!ModMatch(*pp, "hash", ch))
2650 		return AMR_UNKNOWN;
2651 	*pp += 4;
2652 
2653 	if (ModChain_ShouldEval(ch))
2654 		Expr_SetValueOwn(ch->expr, VarHash(ch->expr->value.str));
2655 
2656 	return AMR_OK;
2657 }
2658 
2659 /* :P */
2660 static ApplyModifierResult
ApplyModifier_Path(const char ** pp,ModChain * ch)2661 ApplyModifier_Path(const char **pp, ModChain *ch)
2662 {
2663 	Expr *expr = ch->expr;
2664 	GNode *gn;
2665 	char *path;
2666 
2667 	(*pp)++;
2668 
2669 	if (!ModChain_ShouldEval(ch))
2670 		return AMR_OK;
2671 
2672 	Expr_Define(expr);
2673 
2674 	gn = Targ_FindNode(expr->name);
2675 	if (gn == NULL || gn->type & OP_NOPATH) {
2676 		path = NULL;
2677 	} else if (gn->path != NULL) {
2678 		path = bmake_strdup(gn->path);
2679 	} else {
2680 		SearchPath *searchPath = Suff_FindPath(gn);
2681 		path = Dir_FindFile(expr->name, searchPath);
2682 	}
2683 	if (path == NULL)
2684 		path = bmake_strdup(expr->name);
2685 	Expr_SetValueOwn(expr, path);
2686 
2687 	return AMR_OK;
2688 }
2689 
2690 /* :!cmd! */
2691 static ApplyModifierResult
ApplyModifier_ShellCommand(const char ** pp,ModChain * ch)2692 ApplyModifier_ShellCommand(const char **pp, ModChain *ch)
2693 {
2694 	Expr *expr = ch->expr;
2695 	const char *errfmt;
2696 	VarParseResult res;
2697 	LazyBuf cmdBuf;
2698 	FStr cmd;
2699 
2700 	(*pp)++;
2701 	res = ParseModifierPart(pp, '!', expr->emode, ch, &cmdBuf);
2702 	if (res != VPR_OK)
2703 		return AMR_CLEANUP;
2704 	cmd = LazyBuf_DoneGet(&cmdBuf);
2705 
2706 
2707 	errfmt = NULL;
2708 	if (Expr_ShouldEval(expr))
2709 		Expr_SetValueOwn(expr, Cmd_Exec(cmd.str, &errfmt));
2710 	else
2711 		Expr_SetValueRefer(expr, "");
2712 	if (errfmt != NULL)
2713 		Error(errfmt, cmd.str); /* XXX: why still return AMR_OK? */
2714 	FStr_Done(&cmd);
2715 	Expr_Define(expr);
2716 
2717 	return AMR_OK;
2718 }
2719 
2720 /*
2721  * The :range modifier generates an integer sequence as long as the words.
2722  * The :range=7 modifier generates an integer sequence from 1 to 7.
2723  */
2724 static ApplyModifierResult
ApplyModifier_Range(const char ** pp,ModChain * ch)2725 ApplyModifier_Range(const char **pp, ModChain *ch)
2726 {
2727 	size_t n;
2728 	Buffer buf;
2729 	size_t i;
2730 
2731 	const char *mod = *pp;
2732 	if (!ModMatchEq(mod, "range", ch))
2733 		return AMR_UNKNOWN;
2734 
2735 	if (mod[5] == '=') {
2736 		const char *p = mod + 6;
2737 		if (!TryParseSize(&p, &n)) {
2738 			Parse_Error(PARSE_FATAL,
2739 			    "Invalid number \"%s\" for ':range' modifier",
2740 			    mod + 6);
2741 			return AMR_CLEANUP;
2742 		}
2743 		*pp = p;
2744 	} else {
2745 		n = 0;
2746 		*pp = mod + 5;
2747 	}
2748 
2749 	if (!ModChain_ShouldEval(ch))
2750 		return AMR_OK;
2751 
2752 	if (n == 0) {
2753 		Words words = Str_Words(ch->expr->value.str, false);
2754 		n = words.len;
2755 		Words_Free(words);
2756 	}
2757 
2758 	Buf_Init(&buf);
2759 
2760 	for (i = 0; i < n; i++) {
2761 		if (i != 0) {
2762 			/* XXX: Use ch->sep instead of ' ', for consistency. */
2763 			Buf_AddByte(&buf, ' ');
2764 		}
2765 		Buf_AddInt(&buf, 1 + (int)i);
2766 	}
2767 
2768 	Expr_SetValueOwn(ch->expr, Buf_DoneData(&buf));
2769 	return AMR_OK;
2770 }
2771 
2772 /* Parse a ':M' or ':N' modifier. */
2773 static void
ParseModifier_Match(const char ** pp,const ModChain * ch,char ** out_pattern)2774 ParseModifier_Match(const char **pp, const ModChain *ch,
2775 		    char **out_pattern)
2776 {
2777 	const char *mod = *pp;
2778 	Expr *expr = ch->expr;
2779 	bool copy = false;	/* pattern should be, or has been, copied */
2780 	bool needSubst = false;
2781 	const char *endpat;
2782 	char *pattern;
2783 
2784 	/*
2785 	 * In the loop below, ignore ':' unless we are at (or back to) the
2786 	 * original brace level.
2787 	 * XXX: This will likely not work right if $() and ${} are intermixed.
2788 	 */
2789 	/*
2790 	 * XXX: This code is similar to the one in Var_Parse.
2791 	 * See if the code can be merged.
2792 	 * See also ApplyModifier_Defined.
2793 	 */
2794 	int nest = 0;
2795 	const char *p;
2796 	for (p = mod + 1; *p != '\0' && !(*p == ':' && nest == 0); p++) {
2797 		if (*p == '\\' &&
2798 		    (IsDelimiter(p[1], ch) || p[1] == ch->startc)) {
2799 			if (!needSubst)
2800 				copy = true;
2801 			p++;
2802 			continue;
2803 		}
2804 		if (*p == '$')
2805 			needSubst = true;
2806 		if (*p == '(' || *p == '{')
2807 			nest++;
2808 		if (*p == ')' || *p == '}') {
2809 			nest--;
2810 			if (nest < 0)
2811 				break;
2812 		}
2813 	}
2814 	*pp = p;
2815 	endpat = p;
2816 
2817 	if (copy) {
2818 		char *dst;
2819 		const char *src;
2820 
2821 		/* Compress the \:'s out of the pattern. */
2822 		pattern = bmake_malloc((size_t)(endpat - (mod + 1)) + 1);
2823 		dst = pattern;
2824 		src = mod + 1;
2825 		for (; src < endpat; src++, dst++) {
2826 			if (src[0] == '\\' && src + 1 < endpat &&
2827 			    /* XXX: ch->startc is missing here; see above */
2828 			    IsDelimiter(src[1], ch))
2829 				src++;
2830 			*dst = *src;
2831 		}
2832 		*dst = '\0';
2833 	} else {
2834 		pattern = bmake_strsedup(mod + 1, endpat);
2835 	}
2836 
2837 	if (needSubst) {
2838 		char *old_pattern = pattern;
2839 		(void)Var_Subst(pattern, expr->scope, expr->emode, &pattern);
2840 		/* TODO: handle errors */
2841 		free(old_pattern);
2842 	}
2843 
2844 	DEBUG2(VAR, "Pattern for ':%c' is \"%s\"\n", mod[0], pattern);
2845 
2846 	*out_pattern = pattern;
2847 }
2848 
2849 /* :Mpattern or :Npattern */
2850 static ApplyModifierResult
ApplyModifier_Match(const char ** pp,ModChain * ch)2851 ApplyModifier_Match(const char **pp, ModChain *ch)
2852 {
2853 	const char mod = **pp;
2854 	char *pattern;
2855 
2856 	ParseModifier_Match(pp, ch, &pattern);
2857 
2858 	if (ModChain_ShouldEval(ch)) {
2859 		ModifyWordProc modifyWord =
2860 		    mod == 'M' ? ModifyWord_Match : ModifyWord_NoMatch;
2861 		ModifyWords(ch, modifyWord, pattern, ch->oneBigWord);
2862 	}
2863 
2864 	free(pattern);
2865 	return AMR_OK;
2866 }
2867 
2868 static void
ParsePatternFlags(const char ** pp,PatternFlags * pflags,bool * oneBigWord)2869 ParsePatternFlags(const char **pp, PatternFlags *pflags, bool *oneBigWord)
2870 {
2871 	for (;; (*pp)++) {
2872 		if (**pp == 'g')
2873 			pflags->subGlobal = true;
2874 		else if (**pp == '1')
2875 			pflags->subOnce = true;
2876 		else if (**pp == 'W')
2877 			*oneBigWord = true;
2878 		else
2879 			break;
2880 	}
2881 }
2882 
2883 MAKE_INLINE PatternFlags
PatternFlags_None(void)2884 PatternFlags_None(void)
2885 {
2886 	PatternFlags pflags = { false, false, false, false };
2887 	return pflags;
2888 }
2889 
2890 /* :S,from,to, */
2891 static ApplyModifierResult
ApplyModifier_Subst(const char ** pp,ModChain * ch)2892 ApplyModifier_Subst(const char **pp, ModChain *ch)
2893 {
2894 	struct ModifyWord_SubstArgs args;
2895 	bool oneBigWord;
2896 	VarParseResult res;
2897 	LazyBuf lhsBuf, rhsBuf;
2898 
2899 	char delim = (*pp)[1];
2900 	if (delim == '\0') {
2901 		Error("Missing delimiter for modifier ':S'");
2902 		(*pp)++;
2903 		return AMR_CLEANUP;
2904 	}
2905 
2906 	*pp += 2;
2907 
2908 	args.pflags = PatternFlags_None();
2909 	args.matched = false;
2910 
2911 	if (**pp == '^') {
2912 		args.pflags.anchorStart = true;
2913 		(*pp)++;
2914 	}
2915 
2916 	res = ParseModifierPartSubst(pp, delim, ch->expr->emode, ch, &lhsBuf,
2917 	    &args.pflags, NULL);
2918 	if (res != VPR_OK)
2919 		return AMR_CLEANUP;
2920 	args.lhs = LazyBuf_Get(&lhsBuf);
2921 
2922 	res = ParseModifierPartSubst(pp, delim, ch->expr->emode, ch, &rhsBuf,
2923 	    NULL, &args);
2924 	if (res != VPR_OK) {
2925 		LazyBuf_Done(&lhsBuf);
2926 		return AMR_CLEANUP;
2927 	}
2928 	args.rhs = LazyBuf_Get(&rhsBuf);
2929 
2930 	oneBigWord = ch->oneBigWord;
2931 	ParsePatternFlags(pp, &args.pflags, &oneBigWord);
2932 
2933 	ModifyWords(ch, ModifyWord_Subst, &args, oneBigWord);
2934 
2935 	LazyBuf_Done(&lhsBuf);
2936 	LazyBuf_Done(&rhsBuf);
2937 	return AMR_OK;
2938 }
2939 
2940 #ifndef NO_REGEX
2941 
2942 /* :C,from,to, */
2943 static ApplyModifierResult
ApplyModifier_Regex(const char ** pp,ModChain * ch)2944 ApplyModifier_Regex(const char **pp, ModChain *ch)
2945 {
2946 	struct ModifyWord_SubstRegexArgs args;
2947 	bool oneBigWord;
2948 	int error;
2949 	VarParseResult res;
2950 	LazyBuf reBuf, replaceBuf;
2951 	FStr re, replace;
2952 
2953 	char delim = (*pp)[1];
2954 	if (delim == '\0') {
2955 		Error("Missing delimiter for :C modifier");
2956 		(*pp)++;
2957 		return AMR_CLEANUP;
2958 	}
2959 
2960 	*pp += 2;
2961 
2962 	res = ParseModifierPart(pp, delim, ch->expr->emode, ch, &reBuf);
2963 	if (res != VPR_OK)
2964 		return AMR_CLEANUP;
2965 	re = LazyBuf_DoneGet(&reBuf);
2966 
2967 	res = ParseModifierPart(pp, delim, ch->expr->emode, ch, &replaceBuf);
2968 	if (res != VPR_OK) {
2969 		FStr_Done(&re);
2970 		return AMR_CLEANUP;
2971 	}
2972 	replace = LazyBuf_DoneGet(&replaceBuf);
2973 	args.replace = replace.str;
2974 
2975 	args.pflags = PatternFlags_None();
2976 	args.matched = false;
2977 	oneBigWord = ch->oneBigWord;
2978 	ParsePatternFlags(pp, &args.pflags, &oneBigWord);
2979 
2980 	if (!ModChain_ShouldEval(ch)) {
2981 		FStr_Done(&replace);
2982 		FStr_Done(&re);
2983 		return AMR_OK;
2984 	}
2985 
2986 	error = regcomp(&args.re, re.str, REG_EXTENDED);
2987 	if (error != 0) {
2988 		VarREError(error, &args.re, "Regex compilation error");
2989 		FStr_Done(&replace);
2990 		FStr_Done(&re);
2991 		return AMR_CLEANUP;
2992 	}
2993 
2994 	args.nsub = args.re.re_nsub + 1;
2995 	if (args.nsub > 10)
2996 		args.nsub = 10;
2997 
2998 	ModifyWords(ch, ModifyWord_SubstRegex, &args, oneBigWord);
2999 
3000 	regfree(&args.re);
3001 	FStr_Done(&replace);
3002 	FStr_Done(&re);
3003 	return AMR_OK;
3004 }
3005 
3006 #endif
3007 
3008 /* :Q, :q */
3009 static ApplyModifierResult
ApplyModifier_Quote(const char ** pp,ModChain * ch)3010 ApplyModifier_Quote(const char **pp, ModChain *ch)
3011 {
3012 	LazyBuf buf;
3013 	bool quoteDollar;
3014 
3015 	quoteDollar = **pp == 'q';
3016 	if (!IsDelimiter((*pp)[1], ch))
3017 		return AMR_UNKNOWN;
3018 	(*pp)++;
3019 
3020 	if (!ModChain_ShouldEval(ch))
3021 		return AMR_OK;
3022 
3023 	VarQuote(ch->expr->value.str, quoteDollar, &buf);
3024 	if (buf.data != NULL)
3025 		Expr_SetValue(ch->expr, LazyBuf_DoneGet(&buf));
3026 	else
3027 		LazyBuf_Done(&buf);
3028 
3029 	return AMR_OK;
3030 }
3031 
3032 /*ARGSUSED*/
3033 static void
ModifyWord_Copy(Substring word,SepBuf * buf,void * data MAKE_ATTR_UNUSED)3034 ModifyWord_Copy(Substring word, SepBuf *buf, void *data MAKE_ATTR_UNUSED)
3035 {
3036 	SepBuf_AddSubstring(buf, word);
3037 }
3038 
3039 /* :ts<separator> */
3040 static ApplyModifierResult
ApplyModifier_ToSep(const char ** pp,ModChain * ch)3041 ApplyModifier_ToSep(const char **pp, ModChain *ch)
3042 {
3043 	const char *sep = *pp + 2;
3044 
3045 	/*
3046 	 * Even in parse-only mode, proceed as normal since there is
3047 	 * neither any observable side effect nor a performance penalty.
3048 	 * Checking for wantRes for every single piece of code in here
3049 	 * would make the code in this function too hard to read.
3050 	 */
3051 
3052 	/* ":ts<any><endc>" or ":ts<any>:" */
3053 	if (sep[0] != ch->endc && IsDelimiter(sep[1], ch)) {
3054 		*pp = sep + 1;
3055 		ch->sep = sep[0];
3056 		goto ok;
3057 	}
3058 
3059 	/* ":ts<endc>" or ":ts:" */
3060 	if (IsDelimiter(sep[0], ch)) {
3061 		*pp = sep;
3062 		ch->sep = '\0';	/* no separator */
3063 		goto ok;
3064 	}
3065 
3066 	/* ":ts<unrecognised><unrecognised>". */
3067 	if (sep[0] != '\\') {
3068 		(*pp)++;	/* just for backwards compatibility */
3069 		return AMR_BAD;
3070 	}
3071 
3072 	/* ":ts\n" */
3073 	if (sep[1] == 'n') {
3074 		*pp = sep + 2;
3075 		ch->sep = '\n';
3076 		goto ok;
3077 	}
3078 
3079 	/* ":ts\t" */
3080 	if (sep[1] == 't') {
3081 		*pp = sep + 2;
3082 		ch->sep = '\t';
3083 		goto ok;
3084 	}
3085 
3086 	/* ":ts\x40" or ":ts\100" */
3087 	{
3088 		const char *p = sep + 1;
3089 		int base = 8;	/* assume octal */
3090 
3091 		if (sep[1] == 'x') {
3092 			base = 16;
3093 			p++;
3094 		} else if (!ch_isdigit(sep[1])) {
3095 			(*pp)++;	/* just for backwards compatibility */
3096 			return AMR_BAD;	/* ":ts<backslash><unrecognised>". */
3097 		}
3098 
3099 		if (!TryParseChar(&p, base, &ch->sep)) {
3100 			Parse_Error(PARSE_FATAL,
3101 			    "Invalid character number: %s", p);
3102 			return AMR_CLEANUP;
3103 		}
3104 		if (!IsDelimiter(*p, ch)) {
3105 			(*pp)++;	/* just for backwards compatibility */
3106 			return AMR_BAD;
3107 		}
3108 
3109 		*pp = p;
3110 	}
3111 
3112 ok:
3113 	ModifyWords(ch, ModifyWord_Copy, NULL, ch->oneBigWord);
3114 	return AMR_OK;
3115 }
3116 
3117 static char *
str_toupper(const char * str)3118 str_toupper(const char *str)
3119 {
3120 	char *res;
3121 	size_t i, len;
3122 
3123 	len = strlen(str);
3124 	res = bmake_malloc(len + 1);
3125 	for (i = 0; i < len + 1; i++)
3126 		res[i] = ch_toupper(str[i]);
3127 
3128 	return res;
3129 }
3130 
3131 static char *
str_tolower(const char * str)3132 str_tolower(const char *str)
3133 {
3134 	char *res;
3135 	size_t i, len;
3136 
3137 	len = strlen(str);
3138 	res = bmake_malloc(len + 1);
3139 	for (i = 0; i < len + 1; i++)
3140 		res[i] = ch_tolower(str[i]);
3141 
3142 	return res;
3143 }
3144 
3145 /* :tA, :tu, :tl, :ts<separator>, etc. */
3146 static ApplyModifierResult
ApplyModifier_To(const char ** pp,ModChain * ch)3147 ApplyModifier_To(const char **pp, ModChain *ch)
3148 {
3149 	Expr *expr = ch->expr;
3150 	const char *mod = *pp;
3151 	assert(mod[0] == 't');
3152 
3153 	if (IsDelimiter(mod[1], ch) || mod[1] == '\0') {
3154 		*pp = mod + 1;
3155 		return AMR_BAD;	/* Found ":t<endc>" or ":t:". */
3156 	}
3157 
3158 	if (mod[1] == 's')
3159 		return ApplyModifier_ToSep(pp, ch);
3160 
3161 	if (!IsDelimiter(mod[2], ch)) {			/* :t<unrecognized> */
3162 		*pp = mod + 1;
3163 		return AMR_BAD;
3164 	}
3165 
3166 	if (mod[1] == 'A') {				/* :tA */
3167 		*pp = mod + 2;
3168 		ModifyWords(ch, ModifyWord_Realpath, NULL, ch->oneBigWord);
3169 		return AMR_OK;
3170 	}
3171 
3172 	if (mod[1] == 'u') {				/* :tu */
3173 		*pp = mod + 2;
3174 		if (ModChain_ShouldEval(ch))
3175 			Expr_SetValueOwn(expr, str_toupper(expr->value.str));
3176 		return AMR_OK;
3177 	}
3178 
3179 	if (mod[1] == 'l') {				/* :tl */
3180 		*pp = mod + 2;
3181 		if (ModChain_ShouldEval(ch))
3182 			Expr_SetValueOwn(expr, str_tolower(expr->value.str));
3183 		return AMR_OK;
3184 	}
3185 
3186 	if (mod[1] == 'W' || mod[1] == 'w') {		/* :tW, :tw */
3187 		*pp = mod + 2;
3188 		ch->oneBigWord = mod[1] == 'W';
3189 		return AMR_OK;
3190 	}
3191 
3192 	/* Found ":t<unrecognised>:" or ":t<unrecognised><endc>". */
3193 	*pp = mod + 1;		/* XXX: unnecessary but observable */
3194 	return AMR_BAD;
3195 }
3196 
3197 /* :[#], :[1], :[-1..1], etc. */
3198 static ApplyModifierResult
ApplyModifier_Words(const char ** pp,ModChain * ch)3199 ApplyModifier_Words(const char **pp, ModChain *ch)
3200 {
3201 	Expr *expr = ch->expr;
3202 	const char *estr;
3203 	int first, last;
3204 	VarParseResult res;
3205 	const char *p;
3206 	LazyBuf estrBuf;
3207 	FStr festr;
3208 
3209 	(*pp)++;		/* skip the '[' */
3210 	res = ParseModifierPart(pp, ']', expr->emode, ch, &estrBuf);
3211 	if (res != VPR_OK)
3212 		return AMR_CLEANUP;
3213 	festr = LazyBuf_DoneGet(&estrBuf);
3214 	estr = festr.str;
3215 
3216 	if (!IsDelimiter(**pp, ch))
3217 		goto bad_modifier;		/* Found junk after ']' */
3218 
3219 	if (!ModChain_ShouldEval(ch))
3220 		goto ok;
3221 
3222 	if (estr[0] == '\0')
3223 		goto bad_modifier;			/* Found ":[]". */
3224 
3225 	if (estr[0] == '#' && estr[1] == '\0') {	/* Found ":[#]" */
3226 		if (ch->oneBigWord) {
3227 			Expr_SetValueRefer(expr, "1");
3228 		} else {
3229 			Buffer buf;
3230 
3231 			Words words = Str_Words(expr->value.str, false);
3232 			size_t ac = words.len;
3233 			Words_Free(words);
3234 
3235 			/* 3 digits + '\0' is usually enough */
3236 			Buf_InitSize(&buf, 4);
3237 			Buf_AddInt(&buf, (int)ac);
3238 			Expr_SetValueOwn(expr, Buf_DoneData(&buf));
3239 		}
3240 		goto ok;
3241 	}
3242 
3243 	if (estr[0] == '*' && estr[1] == '\0') {	/* Found ":[*]" */
3244 		ch->oneBigWord = true;
3245 		goto ok;
3246 	}
3247 
3248 	if (estr[0] == '@' && estr[1] == '\0') {	/* Found ":[@]" */
3249 		ch->oneBigWord = false;
3250 		goto ok;
3251 	}
3252 
3253 	/*
3254 	 * We expect estr to contain a single integer for :[N], or two
3255 	 * integers separated by ".." for :[start..end].
3256 	 */
3257 	p = estr;
3258 	if (!TryParseIntBase0(&p, &first))
3259 		goto bad_modifier;	/* Found junk instead of a number */
3260 
3261 	if (p[0] == '\0') {		/* Found only one integer in :[N] */
3262 		last = first;
3263 	} else if (p[0] == '.' && p[1] == '.' && p[2] != '\0') {
3264 		/* Expecting another integer after ".." */
3265 		p += 2;
3266 		if (!TryParseIntBase0(&p, &last) || *p != '\0')
3267 			goto bad_modifier; /* Found junk after ".." */
3268 	} else
3269 		goto bad_modifier;	/* Found junk instead of ".." */
3270 
3271 	/*
3272 	 * Now first and last are properly filled in, but we still have to
3273 	 * check for 0 as a special case.
3274 	 */
3275 	if (first == 0 && last == 0) {
3276 		/* ":[0]" or perhaps ":[0..0]" */
3277 		ch->oneBigWord = true;
3278 		goto ok;
3279 	}
3280 
3281 	/* ":[0..N]" or ":[N..0]" */
3282 	if (first == 0 || last == 0)
3283 		goto bad_modifier;
3284 
3285 	/* Normal case: select the words described by first and last. */
3286 	Expr_SetValueOwn(expr,
3287 	    VarSelectWords(expr->value.str, first, last,
3288 	        ch->sep, ch->oneBigWord));
3289 
3290 ok:
3291 	FStr_Done(&festr);
3292 	return AMR_OK;
3293 
3294 bad_modifier:
3295 	FStr_Done(&festr);
3296 	return AMR_BAD;
3297 }
3298 
3299 static int
str_cmp_asc(const void * a,const void * b)3300 str_cmp_asc(const void *a, const void *b)
3301 {
3302 	return strcmp(*(const char *const *)a, *(const char *const *)b);
3303 }
3304 
3305 static int
str_cmp_desc(const void * a,const void * b)3306 str_cmp_desc(const void *a, const void *b)
3307 {
3308 	return strcmp(*(const char *const *)b, *(const char *const *)a);
3309 }
3310 
3311 static void
ShuffleStrings(char ** strs,size_t n)3312 ShuffleStrings(char **strs, size_t n)
3313 {
3314 	size_t i;
3315 
3316 	for (i = n - 1; i > 0; i--) {
3317 		size_t rndidx = (size_t)random() % (i + 1);
3318 		char *t = strs[i];
3319 		strs[i] = strs[rndidx];
3320 		strs[rndidx] = t;
3321 	}
3322 }
3323 
3324 /* :O (order ascending) or :Or (order descending) or :Ox (shuffle) */
3325 static ApplyModifierResult
ApplyModifier_Order(const char ** pp,ModChain * ch)3326 ApplyModifier_Order(const char **pp, ModChain *ch)
3327 {
3328 	const char *mod = (*pp)++;	/* skip past the 'O' in any case */
3329 	Words words;
3330 	enum SortMode {
3331 		ASC, DESC, SHUFFLE
3332 	} mode;
3333 
3334 	if (IsDelimiter(mod[1], ch)) {
3335 		mode = ASC;
3336 	} else if ((mod[1] == 'r' || mod[1] == 'x') &&
3337 	    IsDelimiter(mod[2], ch)) {
3338 		(*pp)++;
3339 		mode = mod[1] == 'r' ? DESC : SHUFFLE;
3340 	} else
3341 		return AMR_BAD;
3342 
3343 	if (!ModChain_ShouldEval(ch))
3344 		return AMR_OK;
3345 
3346 	words = Str_Words(ch->expr->value.str, false);
3347 	if (mode == SHUFFLE)
3348 		ShuffleStrings(words.words, words.len);
3349 	else
3350 		qsort(words.words, words.len, sizeof words.words[0],
3351 		    mode == ASC ? str_cmp_asc : str_cmp_desc);
3352 	Expr_SetValueOwn(ch->expr, Words_JoinFree(words));
3353 
3354 	return AMR_OK;
3355 }
3356 
3357 /* :? then : else */
3358 static ApplyModifierResult
ApplyModifier_IfElse(const char ** pp,ModChain * ch)3359 ApplyModifier_IfElse(const char **pp, ModChain *ch)
3360 {
3361 	Expr *expr = ch->expr;
3362 	VarParseResult res;
3363 	LazyBuf buf;
3364 	FStr then_expr, else_expr;
3365 
3366 	bool value = false;
3367 	VarEvalMode then_emode = VARE_PARSE_ONLY;
3368 	VarEvalMode else_emode = VARE_PARSE_ONLY;
3369 
3370 	int cond_rc = COND_PARSE;	/* anything other than COND_INVALID */
3371 	if (Expr_ShouldEval(expr)) {
3372 		cond_rc = Cond_EvalCondition(expr->name, &value);
3373 		if (cond_rc != COND_INVALID && value)
3374 			then_emode = expr->emode;
3375 		if (cond_rc != COND_INVALID && !value)
3376 			else_emode = expr->emode;
3377 	}
3378 
3379 	(*pp)++;			/* skip past the '?' */
3380 	res = ParseModifierPart(pp, ':', then_emode, ch, &buf);
3381 	if (res != VPR_OK)
3382 		return AMR_CLEANUP;
3383 	then_expr = LazyBuf_DoneGet(&buf);
3384 
3385 	res = ParseModifierPart(pp, ch->endc, else_emode, ch, &buf);
3386 	if (res != VPR_OK) {
3387 		FStr_Done(&then_expr);
3388 		return AMR_CLEANUP;
3389 	}
3390 	else_expr = LazyBuf_DoneGet(&buf);
3391 
3392 	(*pp)--;		/* Go back to the ch->endc. */
3393 
3394 	if (cond_rc == COND_INVALID) {
3395 		Error("Bad conditional expression '%s' in '%s?%s:%s'",
3396 		    expr->name, expr->name, then_expr.str, else_expr.str);
3397 		return AMR_CLEANUP;
3398 	}
3399 
3400 	if (!ModChain_ShouldEval(ch)) {
3401 		FStr_Done(&then_expr);
3402 		FStr_Done(&else_expr);
3403 	} else if (value) {
3404 		Expr_SetValue(expr, then_expr);
3405 		FStr_Done(&else_expr);
3406 	} else {
3407 		FStr_Done(&then_expr);
3408 		Expr_SetValue(expr, else_expr);
3409 	}
3410 	Expr_Define(expr);
3411 	return AMR_OK;
3412 }
3413 
3414 /*
3415  * The ::= modifiers are special in that they do not read the variable value
3416  * but instead assign to that variable.  They always expand to an empty
3417  * string.
3418  *
3419  * Their main purpose is in supporting .for loops that generate shell commands
3420  * since an ordinary variable assignment at that point would terminate the
3421  * dependency group for these targets.  For example:
3422  *
3423  * list-targets: .USE
3424  * .for i in ${.TARGET} ${.TARGET:R}.gz
3425  *	@${t::=$i}
3426  *	@echo 'The target is ${t:T}.'
3427  * .endfor
3428  *
3429  *	  ::=<str>	Assigns <str> as the new value of variable.
3430  *	  ::?=<str>	Assigns <str> as value of variable if
3431  *			it was not already set.
3432  *	  ::+=<str>	Appends <str> to variable.
3433  *	  ::!=<cmd>	Assigns output of <cmd> as the new value of
3434  *			variable.
3435  */
3436 static ApplyModifierResult
ApplyModifier_Assign(const char ** pp,ModChain * ch)3437 ApplyModifier_Assign(const char **pp, ModChain *ch)
3438 {
3439 	Expr *expr = ch->expr;
3440 	GNode *scope;
3441 	FStr val;
3442 	VarParseResult res;
3443 	LazyBuf buf;
3444 
3445 	const char *mod = *pp;
3446 	const char *op = mod + 1;
3447 
3448 	if (op[0] == '=')
3449 		goto ok;
3450 	if ((op[0] == '!' || op[0] == '+' || op[0] == '?') && op[1] == '=')
3451 		goto ok;
3452 	return AMR_UNKNOWN;	/* "::<unrecognised>" */
3453 
3454 ok:
3455 	if (expr->name[0] == '\0') {
3456 		*pp = mod + 1;
3457 		return AMR_BAD;
3458 	}
3459 
3460 	switch (op[0]) {
3461 	case '+':
3462 	case '?':
3463 	case '!':
3464 		*pp = mod + 3;
3465 		break;
3466 	default:
3467 		*pp = mod + 2;
3468 		break;
3469 	}
3470 
3471 	res = ParseModifierPart(pp, ch->endc, expr->emode, ch, &buf);
3472 	if (res != VPR_OK)
3473 		return AMR_CLEANUP;
3474 	val = LazyBuf_DoneGet(&buf);
3475 
3476 	(*pp)--;		/* Go back to the ch->endc. */
3477 
3478 	if (!Expr_ShouldEval(expr))
3479 		goto done;
3480 
3481 	scope = expr->scope;	/* scope where v belongs */
3482 	if (expr->defined == DEF_REGULAR && expr->scope != SCOPE_GLOBAL) {
3483 		Var *gv = VarFind(expr->name, expr->scope, false);
3484 		if (gv == NULL)
3485 			scope = SCOPE_GLOBAL;
3486 		else
3487 			VarFreeEnv(gv);
3488 	}
3489 
3490 	switch (op[0]) {
3491 	case '+':
3492 		Var_Append(scope, expr->name, val.str);
3493 		break;
3494 	case '!': {
3495 		const char *errfmt;
3496 		char *cmd_output = Cmd_Exec(val.str, &errfmt);
3497 		if (errfmt != NULL)
3498 			Error(errfmt, val.str);
3499 		else
3500 			Var_Set(scope, expr->name, cmd_output);
3501 		free(cmd_output);
3502 		break;
3503 	}
3504 	case '?':
3505 		if (expr->defined == DEF_REGULAR)
3506 			break;
3507 		/* FALLTHROUGH */
3508 	default:
3509 		Var_Set(scope, expr->name, val.str);
3510 		break;
3511 	}
3512 	Expr_SetValueRefer(expr, "");
3513 
3514 done:
3515 	FStr_Done(&val);
3516 	return AMR_OK;
3517 }
3518 
3519 /*
3520  * :_=...
3521  * remember current value
3522  */
3523 static ApplyModifierResult
ApplyModifier_Remember(const char ** pp,ModChain * ch)3524 ApplyModifier_Remember(const char **pp, ModChain *ch)
3525 {
3526 	Expr *expr = ch->expr;
3527 	const char *mod = *pp;
3528 	FStr name;
3529 
3530 	if (!ModMatchEq(mod, "_", ch))
3531 		return AMR_UNKNOWN;
3532 
3533 	name = FStr_InitRefer("_");
3534 	if (mod[1] == '=') {
3535 		/*
3536 		 * XXX: This ad-hoc call to strcspn deviates from the usual
3537 		 * behavior defined in ParseModifierPart.  This creates an
3538 		 * unnecessary, undocumented inconsistency in make.
3539 		 */
3540 		const char *arg = mod + 2;
3541 		size_t argLen = strcspn(arg, ":)}");
3542 		*pp = arg + argLen;
3543 		name = FStr_InitOwn(bmake_strldup(arg, argLen));
3544 	} else
3545 		*pp = mod + 1;
3546 
3547 	if (Expr_ShouldEval(expr))
3548 		Var_Set(expr->scope, name.str, expr->value.str);
3549 	FStr_Done(&name);
3550 
3551 	return AMR_OK;
3552 }
3553 
3554 /*
3555  * Apply the given function to each word of the variable value,
3556  * for a single-letter modifier such as :H, :T.
3557  */
3558 static ApplyModifierResult
ApplyModifier_WordFunc(const char ** pp,ModChain * ch,ModifyWordProc modifyWord)3559 ApplyModifier_WordFunc(const char **pp, ModChain *ch,
3560 		       ModifyWordProc modifyWord)
3561 {
3562 	if (!IsDelimiter((*pp)[1], ch))
3563 		return AMR_UNKNOWN;
3564 	(*pp)++;
3565 
3566 	if (ModChain_ShouldEval(ch))
3567 		ModifyWords(ch, modifyWord, NULL, ch->oneBigWord);
3568 
3569 	return AMR_OK;
3570 }
3571 
3572 static ApplyModifierResult
ApplyModifier_Unique(const char ** pp,ModChain * ch)3573 ApplyModifier_Unique(const char **pp, ModChain *ch)
3574 {
3575 	if (!IsDelimiter((*pp)[1], ch))
3576 		return AMR_UNKNOWN;
3577 	(*pp)++;
3578 
3579 	if (ModChain_ShouldEval(ch))
3580 		Expr_SetValueOwn(ch->expr, VarUniq(ch->expr->value.str));
3581 
3582 	return AMR_OK;
3583 }
3584 
3585 #ifdef SYSVVARSUB
3586 /* :from=to */
3587 static ApplyModifierResult
ApplyModifier_SysV(const char ** pp,ModChain * ch)3588 ApplyModifier_SysV(const char **pp, ModChain *ch)
3589 {
3590 	Expr *expr = ch->expr;
3591 	VarParseResult res;
3592 	LazyBuf lhsBuf, rhsBuf;
3593 	FStr rhs;
3594 	struct ModifyWord_SysVSubstArgs args;
3595 	Substring lhs;
3596 	const char *lhsSuffix;
3597 
3598 	const char *mod = *pp;
3599 	bool eqFound = false;
3600 
3601 	/*
3602 	 * First we make a pass through the string trying to verify it is a
3603 	 * SysV-make-style translation. It must be: <lhs>=<rhs>
3604 	 */
3605 	int depth = 1;
3606 	const char *p = mod;
3607 	while (*p != '\0' && depth > 0) {
3608 		if (*p == '=') {	/* XXX: should also test depth == 1 */
3609 			eqFound = true;
3610 			/* continue looking for ch->endc */
3611 		} else if (*p == ch->endc)
3612 			depth--;
3613 		else if (*p == ch->startc)
3614 			depth++;
3615 		if (depth > 0)
3616 			p++;
3617 	}
3618 	if (*p != ch->endc || !eqFound)
3619 		return AMR_UNKNOWN;
3620 
3621 	res = ParseModifierPart(pp, '=', expr->emode, ch, &lhsBuf);
3622 	if (res != VPR_OK)
3623 		return AMR_CLEANUP;
3624 
3625 	/* The SysV modifier lasts until the end of the variable expression. */
3626 	res = ParseModifierPart(pp, ch->endc, expr->emode, ch, &rhsBuf);
3627 	if (res != VPR_OK) {
3628 		LazyBuf_Done(&lhsBuf);
3629 		return AMR_CLEANUP;
3630 	}
3631 	rhs = LazyBuf_DoneGet(&rhsBuf);
3632 
3633 	(*pp)--;		/* Go back to the ch->endc. */
3634 
3635 	/* Do not turn an empty expression into non-empty. */
3636 	if (lhsBuf.len == 0 && expr->value.str[0] == '\0')
3637 		goto done;
3638 
3639 	lhs = LazyBuf_Get(&lhsBuf);
3640 	lhsSuffix = Substring_SkipFirst(lhs, '%');
3641 
3642 	args.scope = expr->scope;
3643 	args.lhsPrefix = Substring_Init(lhs.start,
3644 	    lhsSuffix != lhs.start ? lhsSuffix - 1 : lhs.start);
3645 	args.lhsPercent = lhsSuffix != lhs.start;
3646 	args.lhsSuffix = Substring_Init(lhsSuffix, lhs.end);
3647 	args.rhs = rhs.str;
3648 
3649 	ModifyWords(ch, ModifyWord_SysVSubst, &args, ch->oneBigWord);
3650 
3651 done:
3652 	LazyBuf_Done(&lhsBuf);
3653 	return AMR_OK;
3654 }
3655 #endif
3656 
3657 #ifdef SUNSHCMD
3658 /* :sh */
3659 static ApplyModifierResult
ApplyModifier_SunShell(const char ** pp,ModChain * ch)3660 ApplyModifier_SunShell(const char **pp, ModChain *ch)
3661 {
3662 	Expr *expr = ch->expr;
3663 	const char *p = *pp;
3664 	if (!(p[1] == 'h' && IsDelimiter(p[2], ch)))
3665 		return AMR_UNKNOWN;
3666 	*pp = p + 2;
3667 
3668 	if (Expr_ShouldEval(expr)) {
3669 		const char *errfmt;
3670 		char *output = Cmd_Exec(expr->value.str, &errfmt);
3671 		if (errfmt != NULL)
3672 			Error(errfmt, expr->value.str);
3673 		Expr_SetValueOwn(expr, output);
3674 	}
3675 
3676 	return AMR_OK;
3677 }
3678 #endif
3679 
3680 static void
LogBeforeApply(const ModChain * ch,const char * mod)3681 LogBeforeApply(const ModChain *ch, const char *mod)
3682 {
3683 	const Expr *expr = ch->expr;
3684 	bool is_single_char = mod[0] != '\0' && IsDelimiter(mod[1], ch);
3685 
3686 	/*
3687 	 * At this point, only the first character of the modifier can
3688 	 * be used since the end of the modifier is not yet known.
3689 	 */
3690 
3691 	if (!Expr_ShouldEval(expr)) {
3692 		debug_printf("Parsing modifier ${%s:%c%s}\n",
3693 		    expr->name, mod[0], is_single_char ? "" : "...");
3694 		return;
3695 	}
3696 
3697 	if ((expr->emode == VARE_WANTRES || expr->emode == VARE_UNDEFERR) &&
3698 	    expr->defined == DEF_REGULAR) {
3699 		debug_printf(
3700 		    "Evaluating modifier ${%s:%c%s} on value \"%s\"\n",
3701 		    expr->name, mod[0], is_single_char ? "" : "...",
3702 		    expr->value.str);
3703 		return;
3704 	}
3705 
3706 	debug_printf(
3707 	    "Evaluating modifier ${%s:%c%s} on value \"%s\" (%s, %s)\n",
3708 	    expr->name, mod[0], is_single_char ? "" : "...", expr->value.str,
3709 	    VarEvalMode_Name[expr->emode], ExprDefined_Name[expr->defined]);
3710 }
3711 
3712 static void
LogAfterApply(const ModChain * ch,const char * p,const char * mod)3713 LogAfterApply(const ModChain *ch, const char *p, const char *mod)
3714 {
3715 	const Expr *expr = ch->expr;
3716 	const char *value = expr->value.str;
3717 	const char *quot = value == var_Error ? "" : "\"";
3718 
3719 	if ((expr->emode == VARE_WANTRES || expr->emode == VARE_UNDEFERR) &&
3720 	    expr->defined == DEF_REGULAR) {
3721 
3722 		debug_printf("Result of ${%s:%.*s} is %s%s%s\n",
3723 		    expr->name, (int)(p - mod), mod,
3724 		    quot, value == var_Error ? "error" : value, quot);
3725 		return;
3726 	}
3727 
3728 	debug_printf("Result of ${%s:%.*s} is %s%s%s (%s, %s)\n",
3729 	    expr->name, (int)(p - mod), mod,
3730 	    quot, value == var_Error ? "error" : value, quot,
3731 	    VarEvalMode_Name[expr->emode],
3732 	    ExprDefined_Name[expr->defined]);
3733 }
3734 
3735 static ApplyModifierResult
ApplyModifier(const char ** pp,ModChain * ch)3736 ApplyModifier(const char **pp, ModChain *ch)
3737 {
3738 	switch (**pp) {
3739 	case '!':
3740 		return ApplyModifier_ShellCommand(pp, ch);
3741 	case ':':
3742 		return ApplyModifier_Assign(pp, ch);
3743 	case '?':
3744 		return ApplyModifier_IfElse(pp, ch);
3745 	case '@':
3746 		return ApplyModifier_Loop(pp, ch);
3747 	case '[':
3748 		return ApplyModifier_Words(pp, ch);
3749 	case '_':
3750 		return ApplyModifier_Remember(pp, ch);
3751 #ifndef NO_REGEX
3752 	case 'C':
3753 		return ApplyModifier_Regex(pp, ch);
3754 #endif
3755 	case 'D':
3756 		return ApplyModifier_Defined(pp, ch);
3757 	case 'E':
3758 		return ApplyModifier_WordFunc(pp, ch, ModifyWord_Suffix);
3759 	case 'g':
3760 		return ApplyModifier_Gmtime(pp, ch);
3761 	case 'H':
3762 		return ApplyModifier_WordFunc(pp, ch, ModifyWord_Head);
3763 	case 'h':
3764 		return ApplyModifier_Hash(pp, ch);
3765 	case 'L':
3766 		return ApplyModifier_Literal(pp, ch);
3767 	case 'l':
3768 		return ApplyModifier_Localtime(pp, ch);
3769 	case 'M':
3770 	case 'N':
3771 		return ApplyModifier_Match(pp, ch);
3772 	case 'O':
3773 		return ApplyModifier_Order(pp, ch);
3774 	case 'P':
3775 		return ApplyModifier_Path(pp, ch);
3776 	case 'Q':
3777 	case 'q':
3778 		return ApplyModifier_Quote(pp, ch);
3779 	case 'R':
3780 		return ApplyModifier_WordFunc(pp, ch, ModifyWord_Root);
3781 	case 'r':
3782 		return ApplyModifier_Range(pp, ch);
3783 	case 'S':
3784 		return ApplyModifier_Subst(pp, ch);
3785 #ifdef SUNSHCMD
3786 	case 's':
3787 		return ApplyModifier_SunShell(pp, ch);
3788 #endif
3789 	case 'T':
3790 		return ApplyModifier_WordFunc(pp, ch, ModifyWord_Tail);
3791 	case 't':
3792 		return ApplyModifier_To(pp, ch);
3793 	case 'U':
3794 		return ApplyModifier_Defined(pp, ch);
3795 	case 'u':
3796 		return ApplyModifier_Unique(pp, ch);
3797 	default:
3798 		return AMR_UNKNOWN;
3799 	}
3800 }
3801 
3802 static void ApplyModifiers(Expr *, const char **, char, char);
3803 
3804 typedef enum ApplyModifiersIndirectResult {
3805 	/* The indirect modifiers have been applied successfully. */
3806 	AMIR_CONTINUE,
3807 	/* Fall back to the SysV modifier. */
3808 	AMIR_SYSV,
3809 	/* Error out. */
3810 	AMIR_OUT
3811 } ApplyModifiersIndirectResult;
3812 
3813 /*
3814  * While expanding a variable expression, expand and apply indirect modifiers,
3815  * such as in ${VAR:${M_indirect}}.
3816  *
3817  * All indirect modifiers of a group must come from a single variable
3818  * expression.  ${VAR:${M1}} is valid but ${VAR:${M1}${M2}} is not.
3819  *
3820  * Multiple groups of indirect modifiers can be chained by separating them
3821  * with colons.  ${VAR:${M1}:${M2}} contains 2 indirect modifiers.
3822  *
3823  * If the variable expression is not followed by ch->endc or ':', fall
3824  * back to trying the SysV modifier, such as in ${VAR:${FROM}=${TO}}.
3825  */
3826 static ApplyModifiersIndirectResult
ApplyModifiersIndirect(ModChain * ch,const char ** pp)3827 ApplyModifiersIndirect(ModChain *ch, const char **pp)
3828 {
3829 	Expr *expr = ch->expr;
3830 	const char *p = *pp;
3831 	FStr mods;
3832 
3833 	(void)Var_Parse(&p, expr->scope, expr->emode, &mods);
3834 	/* TODO: handle errors */
3835 
3836 	if (mods.str[0] != '\0' && *p != '\0' && !IsDelimiter(*p, ch)) {
3837 		FStr_Done(&mods);
3838 		return AMIR_SYSV;
3839 	}
3840 
3841 	DEBUG3(VAR, "Indirect modifier \"%s\" from \"%.*s\"\n",
3842 	    mods.str, (int)(p - *pp), *pp);
3843 
3844 	if (mods.str[0] != '\0') {
3845 		const char *modsp = mods.str;
3846 		ApplyModifiers(expr, &modsp, '\0', '\0');
3847 		if (expr->value.str == var_Error || *modsp != '\0') {
3848 			FStr_Done(&mods);
3849 			*pp = p;
3850 			return AMIR_OUT;	/* error already reported */
3851 		}
3852 	}
3853 	FStr_Done(&mods);
3854 
3855 	if (*p == ':')
3856 		p++;
3857 	else if (*p == '\0' && ch->endc != '\0') {
3858 		Error("Unclosed variable expression after indirect "
3859 		      "modifier, expecting '%c' for variable \"%s\"",
3860 		    ch->endc, expr->name);
3861 		*pp = p;
3862 		return AMIR_OUT;
3863 	}
3864 
3865 	*pp = p;
3866 	return AMIR_CONTINUE;
3867 }
3868 
3869 static ApplyModifierResult
ApplySingleModifier(const char ** pp,ModChain * ch)3870 ApplySingleModifier(const char **pp, ModChain *ch)
3871 {
3872 	ApplyModifierResult res;
3873 	const char *mod = *pp;
3874 	const char *p = *pp;
3875 
3876 	if (DEBUG(VAR))
3877 		LogBeforeApply(ch, mod);
3878 
3879 	res = ApplyModifier(&p, ch);
3880 
3881 #ifdef SYSVVARSUB
3882 	if (res == AMR_UNKNOWN) {
3883 		assert(p == mod);
3884 		res = ApplyModifier_SysV(&p, ch);
3885 	}
3886 #endif
3887 
3888 	if (res == AMR_UNKNOWN) {
3889 		/*
3890 		 * Guess the end of the current modifier.
3891 		 * XXX: Skipping the rest of the modifier hides
3892 		 * errors and leads to wrong results.
3893 		 * Parsing should rather stop here.
3894 		 */
3895 		for (p++; !IsDelimiter(*p, ch) && *p != '\0'; p++)
3896 			continue;
3897 		Parse_Error(PARSE_FATAL, "Unknown modifier \"%.*s\"",
3898 		    (int)(p - mod), mod);
3899 		Expr_SetValueRefer(ch->expr, var_Error);
3900 	}
3901 	if (res == AMR_CLEANUP || res == AMR_BAD) {
3902 		*pp = p;
3903 		return res;
3904 	}
3905 
3906 	if (DEBUG(VAR))
3907 		LogAfterApply(ch, p, mod);
3908 
3909 	if (*p == '\0' && ch->endc != '\0') {
3910 		Error(
3911 		    "Unclosed variable expression, expecting '%c' for "
3912 		    "modifier \"%.*s\" of variable \"%s\" with value \"%s\"",
3913 		    ch->endc,
3914 		    (int)(p - mod), mod,
3915 		    ch->expr->name, ch->expr->value.str);
3916 	} else if (*p == ':') {
3917 		p++;
3918 	} else if (opts.strict && *p != '\0' && *p != ch->endc) {
3919 		Parse_Error(PARSE_FATAL,
3920 		    "Missing delimiter ':' after modifier \"%.*s\"",
3921 		    (int)(p - mod), mod);
3922 		/*
3923 		 * TODO: propagate parse error to the enclosing
3924 		 * expression
3925 		 */
3926 	}
3927 	*pp = p;
3928 	return AMR_OK;
3929 }
3930 
3931 #if __STDC_VERSION__ >= 199901L
3932 #define ModChain_Literal(expr, startc, endc, sep, oneBigWord) \
3933 	(ModChain) { expr, startc, endc, sep, oneBigWord }
3934 #else
3935 MAKE_INLINE ModChain
ModChain_Literal(Expr * expr,char startc,char endc,char sep,bool oneBigWord)3936 ModChain_Literal(Expr *expr, char startc, char endc, char sep, bool oneBigWord)
3937 {
3938 	ModChain ch;
3939 	ch.expr = expr;
3940 	ch.startc = startc;
3941 	ch.endc = endc;
3942 	ch.sep = sep;
3943 	ch.oneBigWord = oneBigWord;
3944 	return ch;
3945 }
3946 #endif
3947 
3948 /* Apply any modifiers (such as :Mpattern or :@var@loop@ or :Q or ::=value). */
3949 static void
ApplyModifiers(Expr * expr,const char ** pp,char startc,char endc)3950 ApplyModifiers(
3951     Expr *expr,
3952     const char **pp,	/* the parsing position, updated upon return */
3953     char startc,	/* '(' or '{'; or '\0' for indirect modifiers */
3954     char endc		/* ')' or '}'; or '\0' for indirect modifiers */
3955 )
3956 {
3957 	ModChain ch = ModChain_Literal(expr, startc, endc, ' ', false);
3958 	const char *p;
3959 	const char *mod;
3960 
3961 	assert(startc == '(' || startc == '{' || startc == '\0');
3962 	assert(endc == ')' || endc == '}' || endc == '\0');
3963 	assert(expr->value.str != NULL);
3964 
3965 	p = *pp;
3966 
3967 	if (*p == '\0' && endc != '\0') {
3968 		Error(
3969 		    "Unclosed variable expression (expecting '%c') for \"%s\"",
3970 		    ch.endc, expr->name);
3971 		goto cleanup;
3972 	}
3973 
3974 	while (*p != '\0' && *p != endc) {
3975 		ApplyModifierResult res;
3976 
3977 		if (*p == '$') {
3978 			ApplyModifiersIndirectResult amir =
3979 			    ApplyModifiersIndirect(&ch, &p);
3980 			if (amir == AMIR_CONTINUE)
3981 				continue;
3982 			if (amir == AMIR_OUT)
3983 				break;
3984 			/*
3985 			 * It's neither '${VAR}:' nor '${VAR}}'.  Try to parse
3986 			 * it as a SysV modifier, as that is the only modifier
3987 			 * that can start with '$'.
3988 			 */
3989 		}
3990 
3991 		mod = p;
3992 
3993 		res = ApplySingleModifier(&p, &ch);
3994 		if (res == AMR_CLEANUP)
3995 			goto cleanup;
3996 		if (res == AMR_BAD)
3997 			goto bad_modifier;
3998 	}
3999 
4000 	*pp = p;
4001 	assert(expr->value.str != NULL); /* Use var_Error or varUndefined. */
4002 	return;
4003 
4004 bad_modifier:
4005 	/* XXX: The modifier end is only guessed. */
4006 	Error("Bad modifier \":%.*s\" for variable \"%s\"",
4007 	    (int)strcspn(mod, ":)}"), mod, expr->name);
4008 
4009 cleanup:
4010 	/*
4011 	 * TODO: Use p + strlen(p) instead, to stop parsing immediately.
4012 	 *
4013 	 * In the unit tests, this generates a few unterminated strings in the
4014 	 * shell commands though.  Instead of producing these unfinished
4015 	 * strings, commands with evaluation errors should not be run at all.
4016 	 *
4017 	 * To make that happen, Var_Subst must report the actual errors
4018 	 * instead of returning VPR_OK unconditionally.
4019 	 */
4020 	*pp = p;
4021 	Expr_SetValueRefer(expr, var_Error);
4022 }
4023 
4024 /*
4025  * Only 4 of the 7 local variables are treated specially as they are the only
4026  * ones that will be set when dynamic sources are expanded.
4027  */
4028 static bool
VarnameIsDynamic(Substring varname)4029 VarnameIsDynamic(Substring varname)
4030 {
4031 	const char *name;
4032 	size_t len;
4033 
4034 	name = varname.start;
4035 	len = Substring_Length(varname);
4036 	if (len == 1 || (len == 2 && (name[1] == 'F' || name[1] == 'D'))) {
4037 		switch (name[0]) {
4038 		case '@':
4039 		case '%':
4040 		case '*':
4041 		case '!':
4042 			return true;
4043 		}
4044 		return false;
4045 	}
4046 
4047 	if ((len == 7 || len == 8) && name[0] == '.' && ch_isupper(name[1])) {
4048 		return Substring_Equals(varname, ".TARGET") ||
4049 		       Substring_Equals(varname, ".ARCHIVE") ||
4050 		       Substring_Equals(varname, ".PREFIX") ||
4051 		       Substring_Equals(varname, ".MEMBER");
4052 	}
4053 
4054 	return false;
4055 }
4056 
4057 static const char *
UndefinedShortVarValue(char varname,const GNode * scope)4058 UndefinedShortVarValue(char varname, const GNode *scope)
4059 {
4060 	if (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL) {
4061 		/*
4062 		 * If substituting a local variable in a non-local scope,
4063 		 * assume it's for dynamic source stuff. We have to handle
4064 		 * this specially and return the longhand for the variable
4065 		 * with the dollar sign escaped so it makes it back to the
4066 		 * caller. Only four of the local variables are treated
4067 		 * specially as they are the only four that will be set
4068 		 * when dynamic sources are expanded.
4069 		 */
4070 		switch (varname) {
4071 		case '@':
4072 			return "$(.TARGET)";
4073 		case '%':
4074 			return "$(.MEMBER)";
4075 		case '*':
4076 			return "$(.PREFIX)";
4077 		case '!':
4078 			return "$(.ARCHIVE)";
4079 		}
4080 	}
4081 	return NULL;
4082 }
4083 
4084 /*
4085  * Parse a variable name, until the end character or a colon, whichever
4086  * comes first.
4087  */
4088 static void
ParseVarname(const char ** pp,char startc,char endc,GNode * scope,VarEvalMode emode,LazyBuf * buf)4089 ParseVarname(const char **pp, char startc, char endc,
4090 	     GNode *scope, VarEvalMode emode,
4091 	     LazyBuf *buf)
4092 {
4093 	const char *p = *pp;
4094 	int depth = 0;		/* Track depth so we can spot parse errors. */
4095 
4096 	LazyBuf_Init(buf, p);
4097 
4098 	while (*p != '\0') {
4099 		if ((*p == endc || *p == ':') && depth == 0)
4100 			break;
4101 		if (*p == startc)
4102 			depth++;
4103 		if (*p == endc)
4104 			depth--;
4105 
4106 		/* A variable inside a variable, expand. */
4107 		if (*p == '$') {
4108 			FStr nested_val;
4109 			(void)Var_Parse(&p, scope, emode, &nested_val);
4110 			/* TODO: handle errors */
4111 			LazyBuf_AddStr(buf, nested_val.str);
4112 			FStr_Done(&nested_val);
4113 		} else {
4114 			LazyBuf_Add(buf, *p);
4115 			p++;
4116 		}
4117 	}
4118 	*pp = p;
4119 }
4120 
4121 static VarParseResult
ValidShortVarname(char varname,const char * start)4122 ValidShortVarname(char varname, const char *start)
4123 {
4124 	if (varname != '$' && varname != ':' && varname != '}' &&
4125 	    varname != ')' && varname != '\0')
4126 		return VPR_OK;
4127 
4128 	if (!opts.strict)
4129 		return VPR_ERR;	/* XXX: Missing error message */
4130 
4131 	if (varname == '$')
4132 		Parse_Error(PARSE_FATAL,
4133 		    "To escape a dollar, use \\$, not $$, at \"%s\"", start);
4134 	else if (varname == '\0')
4135 		Parse_Error(PARSE_FATAL, "Dollar followed by nothing");
4136 	else
4137 		Parse_Error(PARSE_FATAL,
4138 		    "Invalid variable name '%c', at \"%s\"", varname, start);
4139 
4140 	return VPR_ERR;
4141 }
4142 
4143 /*
4144  * Parse a single-character variable name such as in $V or $@.
4145  * Return whether to continue parsing.
4146  */
4147 static bool
ParseVarnameShort(char varname,const char ** pp,GNode * scope,VarEvalMode emode,VarParseResult * out_false_res,const char ** out_false_val,Var ** out_true_var)4148 ParseVarnameShort(char varname, const char **pp, GNode *scope,
4149 		  VarEvalMode emode,
4150 		  VarParseResult *out_false_res, const char **out_false_val,
4151 		  Var **out_true_var)
4152 {
4153 	char name[2];
4154 	Var *v;
4155 	VarParseResult vpr;
4156 
4157 	vpr = ValidShortVarname(varname, *pp);
4158 	if (vpr != VPR_OK) {
4159 		(*pp)++;
4160 		*out_false_res = vpr;
4161 		*out_false_val = var_Error;
4162 		return false;
4163 	}
4164 
4165 	name[0] = varname;
4166 	name[1] = '\0';
4167 	v = VarFind(name, scope, true);
4168 	if (v == NULL) {
4169 		const char *val;
4170 		*pp += 2;
4171 
4172 		val = UndefinedShortVarValue(varname, scope);
4173 		if (val == NULL)
4174 			val = emode == VARE_UNDEFERR
4175 			    ? var_Error : varUndefined;
4176 
4177 		if (opts.strict && val == var_Error) {
4178 			Parse_Error(PARSE_FATAL,
4179 			    "Variable \"%s\" is undefined", name);
4180 			*out_false_res = VPR_ERR;
4181 			*out_false_val = val;
4182 			return false;
4183 		}
4184 
4185 		/*
4186 		 * XXX: This looks completely wrong.
4187 		 *
4188 		 * If undefined expressions are not allowed, this should
4189 		 * rather be VPR_ERR instead of VPR_UNDEF, together with an
4190 		 * error message.
4191 		 *
4192 		 * If undefined expressions are allowed, this should rather
4193 		 * be VPR_UNDEF instead of VPR_OK.
4194 		 */
4195 		*out_false_res = emode == VARE_UNDEFERR
4196 		    ? VPR_UNDEF : VPR_OK;
4197 		*out_false_val = val;
4198 		return false;
4199 	}
4200 
4201 	*out_true_var = v;
4202 	return true;
4203 }
4204 
4205 /* Find variables like @F or <D. */
4206 static Var *
FindLocalLegacyVar(Substring varname,GNode * scope,const char ** out_extraModifiers)4207 FindLocalLegacyVar(Substring varname, GNode *scope,
4208 		   const char **out_extraModifiers)
4209 {
4210 	Var *v;
4211 
4212 	/* Only resolve these variables if scope is a "real" target. */
4213 	if (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL)
4214 		return NULL;
4215 
4216 	if (Substring_Length(varname) != 2)
4217 		return NULL;
4218 	if (varname.start[1] != 'F' && varname.start[1] != 'D')
4219 		return NULL;
4220 	if (strchr("@%?*!<>", varname.start[0]) == NULL)
4221 		return NULL;
4222 
4223 	v = VarFindSubstring(Substring_Sub(varname, 0, 1), scope, false);
4224 	if (v == NULL)
4225 		return NULL;
4226 
4227 	*out_extraModifiers = varname.start[1] == 'D' ? "H:" : "T:";
4228 	return v;
4229 }
4230 
4231 static VarParseResult
EvalUndefined(bool dynamic,const char * start,const char * p,Substring varname,VarEvalMode emode,FStr * out_val)4232 EvalUndefined(bool dynamic, const char *start, const char *p,
4233 	      Substring varname, VarEvalMode emode, FStr *out_val)
4234 {
4235 	if (dynamic) {
4236 		*out_val = FStr_InitOwn(bmake_strsedup(start, p));
4237 		return VPR_OK;
4238 	}
4239 
4240 	if (emode == VARE_UNDEFERR && opts.strict) {
4241 		Parse_Error(PARSE_FATAL,
4242 		    "Variable \"%.*s\" is undefined",
4243 		    (int)Substring_Length(varname), varname.start);
4244 		*out_val = FStr_InitRefer(var_Error);
4245 		return VPR_ERR;
4246 	}
4247 
4248 	if (emode == VARE_UNDEFERR) {
4249 		*out_val = FStr_InitRefer(var_Error);
4250 		return VPR_UNDEF;	/* XXX: Should be VPR_ERR instead. */
4251 	}
4252 
4253 	*out_val = FStr_InitRefer(varUndefined);
4254 	return VPR_OK;
4255 }
4256 
4257 /*
4258  * Parse a long variable name enclosed in braces or parentheses such as $(VAR)
4259  * or ${VAR}, up to the closing brace or parenthesis, or in the case of
4260  * ${VAR:Modifiers}, up to the ':' that starts the modifiers.
4261  * Return whether to continue parsing.
4262  */
4263 static bool
ParseVarnameLong(const char ** pp,char startc,GNode * scope,VarEvalMode emode,const char ** out_false_pp,VarParseResult * out_false_res,FStr * out_false_val,char * out_true_endc,Var ** out_true_v,bool * out_true_haveModifier,const char ** out_true_extraModifiers,bool * out_true_dynamic,ExprDefined * out_true_exprDefined)4264 ParseVarnameLong(
4265 	const char **pp,
4266 	char startc,
4267 	GNode *scope,
4268 	VarEvalMode emode,
4269 
4270 	const char **out_false_pp,
4271 	VarParseResult *out_false_res,
4272 	FStr *out_false_val,
4273 
4274 	char *out_true_endc,
4275 	Var **out_true_v,
4276 	bool *out_true_haveModifier,
4277 	const char **out_true_extraModifiers,
4278 	bool *out_true_dynamic,
4279 	ExprDefined *out_true_exprDefined
4280 )
4281 {
4282 	LazyBuf varname;
4283 	Var *v;
4284 	bool haveModifier;
4285 	bool dynamic = false;
4286 
4287 	const char *p = *pp;
4288 	const char *const start = p;
4289 	char endc = startc == '(' ? ')' : '}';
4290 
4291 	p += 2;			/* skip "${" or "$(" or "y(" */
4292 	ParseVarname(&p, startc, endc, scope, emode, &varname);
4293 
4294 	if (*p == ':') {
4295 		haveModifier = true;
4296 	} else if (*p == endc) {
4297 		haveModifier = false;
4298 	} else {
4299 		Substring name = LazyBuf_Get(&varname);
4300 		Parse_Error(PARSE_FATAL, "Unclosed variable \"%.*s\"",
4301 		    (int)Substring_Length(name), name.start);
4302 		LazyBuf_Done(&varname);
4303 		*out_false_pp = p;
4304 		*out_false_val = FStr_InitRefer(var_Error);
4305 		*out_false_res = VPR_ERR;
4306 		return false;
4307 	}
4308 
4309 	v = VarFindSubstring(LazyBuf_Get(&varname), scope, true);
4310 
4311 	/* At this point, p points just after the variable name,
4312 	 * either at ':' or at endc. */
4313 
4314 	if (v == NULL) {
4315 		v = FindLocalLegacyVar(LazyBuf_Get(&varname), scope,
4316 		    out_true_extraModifiers);
4317 	}
4318 
4319 	if (v == NULL) {
4320 		/*
4321 		 * Defer expansion of dynamic variables if they appear in
4322 		 * non-local scope since they are not defined there.
4323 		 */
4324 		dynamic = VarnameIsDynamic(LazyBuf_Get(&varname)) &&
4325 			  (scope == SCOPE_CMDLINE || scope == SCOPE_GLOBAL);
4326 
4327 		if (!haveModifier) {
4328 			p++;	/* skip endc */
4329 			*out_false_pp = p;
4330 			*out_false_res = EvalUndefined(dynamic, start, p,
4331 			    LazyBuf_Get(&varname), emode, out_false_val);
4332 			return false;
4333 		}
4334 
4335 		/*
4336 		 * The variable expression is based on an undefined variable.
4337 		 * Nevertheless it needs a Var, for modifiers that access the
4338 		 * variable name, such as :L or :?.
4339 		 *
4340 		 * Most modifiers leave this expression in the "undefined"
4341 		 * state (VES_UNDEF), only a few modifiers like :D, :U, :L,
4342 		 * :P turn this undefined expression into a defined
4343 		 * expression (VES_DEF).
4344 		 *
4345 		 * In the end, after applying all modifiers, if the expression
4346 		 * is still undefined, Var_Parse will return an empty string
4347 		 * instead of the actually computed value.
4348 		 */
4349 		v = VarNew(LazyBuf_DoneGet(&varname), "", false, false);
4350 		*out_true_exprDefined = DEF_UNDEF;
4351 	} else
4352 		LazyBuf_Done(&varname);
4353 
4354 	*pp = p;
4355 	*out_true_endc = endc;
4356 	*out_true_v = v;
4357 	*out_true_haveModifier = haveModifier;
4358 	*out_true_dynamic = dynamic;
4359 	return true;
4360 }
4361 
4362 /* Free the environment variable now since we own it. */
4363 static void
FreeEnvVar(Var * v,FStr * inout_val)4364 FreeEnvVar(Var *v, FStr *inout_val)
4365 {
4366 	char *varValue = Buf_DoneData(&v->val);
4367 	if (inout_val->str == varValue)
4368 		inout_val->freeIt = varValue;
4369 	else
4370 		free(varValue);
4371 
4372 	FStr_Done(&v->name);
4373 	free(v);
4374 }
4375 
4376 #if __STDC_VERSION__ >= 199901L
4377 #define Expr_Literal(name, value, emode, scope, defined) \
4378 	{ name, value, emode, scope, defined }
4379 #else
4380 MAKE_INLINE Expr
Expr_Literal(const char * name,FStr value,VarEvalMode emode,GNode * scope,ExprDefined defined)4381 Expr_Literal(const char *name, FStr value,
4382 	     VarEvalMode emode, GNode *scope, ExprDefined defined)
4383 {
4384 	Expr expr;
4385 
4386 	expr.name = name;
4387 	expr.value = value;
4388 	expr.emode = emode;
4389 	expr.scope = scope;
4390 	expr.defined = defined;
4391 	return expr;
4392 }
4393 #endif
4394 
4395 /*
4396  * Expressions of the form ${:U...} with a trivial value are often generated
4397  * by .for loops and are boring, therefore parse and evaluate them in a fast
4398  * lane without debug logging.
4399  */
4400 static bool
Var_Parse_FastLane(const char ** pp,VarEvalMode emode,FStr * out_value)4401 Var_Parse_FastLane(const char **pp, VarEvalMode emode, FStr *out_value)
4402 {
4403 	const char *p;
4404 
4405 	p = *pp;
4406 	if (!(p[0] == '$' && p[1] == '{' && p[2] == ':' && p[3] == 'U'))
4407 		return false;
4408 
4409 	p += 4;
4410 	while (*p != '$' && *p != '{' && *p != ':' && *p != '\\' &&
4411 	       *p != '}' && *p != '\0')
4412 		p++;
4413 	if (*p != '}')
4414 		return false;
4415 
4416 	if (emode == VARE_PARSE_ONLY)
4417 		*out_value = FStr_InitRefer("");
4418 	else
4419 		*out_value = FStr_InitOwn(bmake_strsedup(*pp + 4, p));
4420 	*pp = p + 1;
4421 	return true;
4422 }
4423 
4424 /*
4425  * Given the start of a variable expression (such as $v, $(VAR),
4426  * ${VAR:Mpattern}), extract the variable name and value, and the modifiers,
4427  * if any.  While doing that, apply the modifiers to the value of the
4428  * expression, forming its final value.  A few of the modifiers such as :!cmd!
4429  * or ::= have side effects.
4430  *
4431  * Input:
4432  *	*pp		The string to parse.
4433  *			When parsing a condition in ParseEmptyArg, it may also
4434  *			point to the "y" of "empty(VARNAME:Modifiers)", which
4435  *			is syntactically the same.
4436  *	scope		The scope for finding variables
4437  *	emode		Controls the exact details of parsing and evaluation
4438  *
4439  * Output:
4440  *	*pp		The position where to continue parsing.
4441  *			TODO: After a parse error, the value of *pp is
4442  *			unspecified.  It may not have been updated at all,
4443  *			point to some random character in the string, to the
4444  *			location of the parse error, or at the end of the
4445  *			string.
4446  *	*out_val	The value of the variable expression, never NULL.
4447  *	*out_val	var_Error if there was a parse error.
4448  *	*out_val	var_Error if the base variable of the expression was
4449  *			undefined, emode is VARE_UNDEFERR, and none of
4450  *			the modifiers turned the undefined expression into a
4451  *			defined expression.
4452  *			XXX: It is not guaranteed that an error message has
4453  *			been printed.
4454  *	*out_val	varUndefined if the base variable of the expression
4455  *			was undefined, emode was not VARE_UNDEFERR,
4456  *			and none of the modifiers turned the undefined
4457  *			expression into a defined expression.
4458  *			XXX: It is not guaranteed that an error message has
4459  *			been printed.
4460  */
4461 VarParseResult
Var_Parse(const char ** pp,GNode * scope,VarEvalMode emode,FStr * out_val)4462 Var_Parse(const char **pp, GNode *scope, VarEvalMode emode, FStr *out_val)
4463 {
4464 	const char *p = *pp;
4465 	const char *const start = p;
4466 	/* true if have modifiers for the variable. */
4467 	bool haveModifier;
4468 	/* Starting character if variable in parens or braces. */
4469 	char startc;
4470 	/* Ending character if variable in parens or braces. */
4471 	char endc;
4472 	/*
4473 	 * true if the variable is local and we're expanding it in a
4474 	 * non-local scope. This is done to support dynamic sources.
4475 	 * The result is just the expression, unaltered.
4476 	 */
4477 	bool dynamic;
4478 	const char *extramodifiers;
4479 	Var *v;
4480 	Expr expr = Expr_Literal(NULL, FStr_InitRefer(NULL), emode,
4481 	    scope, DEF_REGULAR);
4482 
4483 	if (Var_Parse_FastLane(pp, emode, out_val))
4484 		return VPR_OK;
4485 
4486 	DEBUG2(VAR, "Var_Parse: %s (%s)\n", start, VarEvalMode_Name[emode]);
4487 
4488 	*out_val = FStr_InitRefer(NULL);
4489 	extramodifiers = NULL;	/* extra modifiers to apply first */
4490 	dynamic = false;
4491 
4492 	/*
4493 	 * Appease GCC, which thinks that the variable might not be
4494 	 * initialized.
4495 	 */
4496 	endc = '\0';
4497 
4498 	startc = p[1];
4499 	if (startc != '(' && startc != '{') {
4500 		VarParseResult res;
4501 		if (!ParseVarnameShort(startc, pp, scope, emode, &res,
4502 		    &out_val->str, &v))
4503 			return res;
4504 		haveModifier = false;
4505 		p++;
4506 	} else {
4507 		VarParseResult res;
4508 		if (!ParseVarnameLong(&p, startc, scope, emode,
4509 		    pp, &res, out_val,
4510 		    &endc, &v, &haveModifier, &extramodifiers,
4511 		    &dynamic, &expr.defined))
4512 			return res;
4513 	}
4514 
4515 	expr.name = v->name.str;
4516 	if (v->inUse)
4517 		Fatal("Variable %s is recursive.", v->name.str);
4518 
4519 	/*
4520 	 * XXX: This assignment creates an alias to the current value of the
4521 	 * variable.  This means that as long as the value of the expression
4522 	 * stays the same, the value of the variable must not change.
4523 	 * Using the '::=' modifier, it could be possible to do exactly this.
4524 	 * At the bottom of this function, the resulting value is compared to
4525 	 * the then-current value of the variable.  This might also invoke
4526 	 * undefined behavior.
4527 	 */
4528 	expr.value = FStr_InitRefer(v->val.data);
4529 
4530 	/*
4531 	 * Before applying any modifiers, expand any nested expressions from
4532 	 * the variable value.
4533 	 */
4534 	if (strchr(expr.value.str, '$') != NULL &&
4535 	    VarEvalMode_ShouldEval(emode)) {
4536 		char *expanded;
4537 		VarEvalMode nested_emode = emode;
4538 		if (opts.strict)
4539 			nested_emode = VarEvalMode_UndefOk(nested_emode);
4540 		v->inUse = true;
4541 		(void)Var_Subst(expr.value.str, scope, nested_emode,
4542 		    &expanded);
4543 		v->inUse = false;
4544 		/* TODO: handle errors */
4545 		Expr_SetValueOwn(&expr, expanded);
4546 	}
4547 
4548 	if (extramodifiers != NULL) {
4549 		const char *em = extramodifiers;
4550 		ApplyModifiers(&expr, &em, '\0', '\0');
4551 	}
4552 
4553 	if (haveModifier) {
4554 		p++;	/* Skip initial colon. */
4555 		ApplyModifiers(&expr, &p, startc, endc);
4556 	}
4557 
4558 	if (*p != '\0')		/* Skip past endc if possible. */
4559 		p++;
4560 
4561 	*pp = p;
4562 
4563 	if (v->fromEnv) {
4564 		FreeEnvVar(v, &expr.value);
4565 
4566 	} else if (expr.defined != DEF_REGULAR) {
4567 		if (expr.defined == DEF_UNDEF) {
4568 			if (dynamic) {
4569 				Expr_SetValueOwn(&expr,
4570 				    bmake_strsedup(start, p));
4571 			} else {
4572 				/*
4573 				 * The expression is still undefined,
4574 				 * therefore discard the actual value and
4575 				 * return an error marker instead.
4576 				 */
4577 				Expr_SetValueRefer(&expr,
4578 				    emode == VARE_UNDEFERR
4579 					? var_Error : varUndefined);
4580 			}
4581 		}
4582 		/* XXX: This is not standard memory management. */
4583 		if (expr.value.str != v->val.data)
4584 			Buf_Done(&v->val);
4585 		FStr_Done(&v->name);
4586 		free(v);
4587 	}
4588 	*out_val = expr.value;
4589 	return VPR_OK;		/* XXX: Is not correct in all cases */
4590 }
4591 
4592 static void
VarSubstDollarDollar(const char ** pp,Buffer * res,VarEvalMode emode)4593 VarSubstDollarDollar(const char **pp, Buffer *res, VarEvalMode emode)
4594 {
4595 	/* A dollar sign may be escaped with another dollar sign. */
4596 	if (save_dollars && VarEvalMode_ShouldKeepDollar(emode))
4597 		Buf_AddByte(res, '$');
4598 	Buf_AddByte(res, '$');
4599 	*pp += 2;
4600 }
4601 
4602 static void
VarSubstExpr(const char ** pp,Buffer * buf,GNode * scope,VarEvalMode emode,bool * inout_errorReported)4603 VarSubstExpr(const char **pp, Buffer *buf, GNode *scope,
4604 	     VarEvalMode emode, bool *inout_errorReported)
4605 {
4606 	const char *p = *pp;
4607 	const char *nested_p = p;
4608 	FStr val;
4609 
4610 	(void)Var_Parse(&nested_p, scope, emode, &val);
4611 	/* TODO: handle errors */
4612 
4613 	if (val.str == var_Error || val.str == varUndefined) {
4614 		if (!VarEvalMode_ShouldKeepUndef(emode)) {
4615 			p = nested_p;
4616 		} else if (emode == VARE_UNDEFERR || val.str == var_Error) {
4617 
4618 			/*
4619 			 * XXX: This condition is wrong.  If val == var_Error,
4620 			 * this doesn't necessarily mean there was an undefined
4621 			 * variable.  It could equally well be a parse error;
4622 			 * see unit-tests/varmod-order.exp.
4623 			 */
4624 
4625 			/*
4626 			 * If variable is undefined, complain and skip the
4627 			 * variable. The complaint will stop us from doing
4628 			 * anything when the file is parsed.
4629 			 */
4630 			if (!*inout_errorReported) {
4631 				Parse_Error(PARSE_FATAL,
4632 				    "Undefined variable \"%.*s\"",
4633 				    (int)(size_t)(nested_p - p), p);
4634 			}
4635 			p = nested_p;
4636 			*inout_errorReported = true;
4637 		} else {
4638 			/* Copy the initial '$' of the undefined expression,
4639 			 * thereby deferring expansion of the expression, but
4640 			 * expand nested expressions if already possible.
4641 			 * See unit-tests/varparse-undef-partial.mk. */
4642 			Buf_AddByte(buf, *p);
4643 			p++;
4644 		}
4645 	} else {
4646 		p = nested_p;
4647 		Buf_AddStr(buf, val.str);
4648 	}
4649 
4650 	FStr_Done(&val);
4651 
4652 	*pp = p;
4653 }
4654 
4655 /*
4656  * Skip as many characters as possible -- either to the end of the string
4657  * or to the next dollar sign (variable expression).
4658  */
4659 static void
VarSubstPlain(const char ** pp,Buffer * res)4660 VarSubstPlain(const char **pp, Buffer *res)
4661 {
4662 	const char *p = *pp;
4663 	const char *start = p;
4664 
4665 	for (p++; *p != '$' && *p != '\0'; p++)
4666 		continue;
4667 	Buf_AddBytesBetween(res, start, p);
4668 	*pp = p;
4669 }
4670 
4671 /*
4672  * Expand all variable expressions like $V, ${VAR}, $(VAR:Modifiers) in the
4673  * given string.
4674  *
4675  * Input:
4676  *	str		The string in which the variable expressions are
4677  *			expanded.
4678  *	scope		The scope in which to start searching for
4679  *			variables.  The other scopes are searched as well.
4680  *	emode		The mode for parsing or evaluating subexpressions.
4681  */
4682 VarParseResult
Var_Subst(const char * str,GNode * scope,VarEvalMode emode,char ** out_res)4683 Var_Subst(const char *str, GNode *scope, VarEvalMode emode, char **out_res)
4684 {
4685 	const char *p = str;
4686 	Buffer res;
4687 
4688 	/* Set true if an error has already been reported,
4689 	 * to prevent a plethora of messages when recursing */
4690 	/* XXX: Why is the 'static' necessary here? */
4691 	static bool errorReported;
4692 
4693 	Buf_Init(&res);
4694 	errorReported = false;
4695 
4696 	while (*p != '\0') {
4697 		if (p[0] == '$' && p[1] == '$')
4698 			VarSubstDollarDollar(&p, &res, emode);
4699 		else if (p[0] == '$')
4700 			VarSubstExpr(&p, &res, scope, emode, &errorReported);
4701 		else
4702 			VarSubstPlain(&p, &res);
4703 	}
4704 
4705 	*out_res = Buf_DoneDataCompact(&res);
4706 	return VPR_OK;
4707 }
4708 
4709 /* Initialize the variables module. */
4710 void
Var_Init(void)4711 Var_Init(void)
4712 {
4713 	SCOPE_INTERNAL = GNode_New("Internal");
4714 	SCOPE_GLOBAL = GNode_New("Global");
4715 	SCOPE_CMDLINE = GNode_New("Command");
4716 }
4717 
4718 /* Clean up the variables module. */
4719 void
Var_End(void)4720 Var_End(void)
4721 {
4722 	Var_Stats();
4723 }
4724 
4725 void
Var_Stats(void)4726 Var_Stats(void)
4727 {
4728 	HashTable_DebugStats(&SCOPE_GLOBAL->vars, "Global variables");
4729 }
4730 
4731 /* Print all variables in a scope, sorted by name. */
4732 void
Var_Dump(GNode * scope)4733 Var_Dump(GNode *scope)
4734 {
4735 	Vector /* of const char * */ vec;
4736 	HashIter hi;
4737 	size_t i;
4738 	const char **varnames;
4739 
4740 	Vector_Init(&vec, sizeof(const char *));
4741 
4742 	HashIter_Init(&hi, &scope->vars);
4743 	while (HashIter_Next(&hi) != NULL)
4744 		*(const char **)Vector_Push(&vec) = hi.entry->key;
4745 	varnames = vec.items;
4746 
4747 	qsort(varnames, vec.len, sizeof varnames[0], str_cmp_asc);
4748 
4749 	for (i = 0; i < vec.len; i++) {
4750 		const char *varname = varnames[i];
4751 		Var *var = HashTable_FindValue(&scope->vars, varname);
4752 		debug_printf("%-16s = %s\n", varname, var->val.data);
4753 	}
4754 
4755 	Vector_Done(&vec);
4756 }
4757