1 /*
2  * main.c -- Code generator and main program for gawk.
3  */
4 
5 /*
6  * Copyright (C) 1986, 1988, 1989, 1991-2021,
7  * the Free Software Foundation, Inc.
8  *
9  * This file is part of GAWK, the GNU implementation of the
10  * AWK Programming Language.
11  *
12  * GAWK is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 3 of the License, or
15  * (at your option) any later version.
16  *
17  * GAWK is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
25  */
26 
27 /* FIX THIS BEFORE EVERY RELEASE: */
28 #define UPDATE_YEAR	2021
29 
30 #include "awk.h"
31 #include "getopt.h"
32 
33 #ifdef HAVE_MCHECK_H
34 #include <mcheck.h>
35 #endif
36 
37 #ifdef HAVE_LIBSIGSEGV
38 #include <sigsegv.h>
39 #else
40 typedef void *stackoverflow_context_t;
41 /* the argument to this macro is purposely not used */
42 #define sigsegv_install_handler(catchsegv) signal(SIGSEGV, catchsig)
43 /* define as 0 rather than empty so that (void) cast on it works */
44 #define stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE) 0
45 #endif
46 
47 #define DEFAULT_PROFILE		"awkprof.out"	/* where to put profile */
48 #define DEFAULT_VARFILE		"awkvars.out"	/* where to put vars */
49 #define DEFAULT_PREC		53
50 #define DEFAULT_ROUNDMODE	"N"		/* round to nearest */
51 
52 static const char *varfile = DEFAULT_VARFILE;
53 const char *command_file = NULL;	/* debugger commands */
54 
55 static void usage(int exitval, FILE *fp) ATTRIBUTE_NORETURN;
56 static void copyleft(void) ATTRIBUTE_NORETURN;
57 static void cmdline_fs(char *str);
58 static void init_args(int argc0, int argc, const char *argv0, char **argv);
59 static void init_vars(void);
60 static NODE *load_environ(void);
61 static NODE *load_procinfo(void);
62 static void catchsig(int sig);
63 #ifdef HAVE_LIBSIGSEGV
64 static int catchsegv(void *fault_address, int serious);
65 static void catchstackoverflow(int emergency, stackoverflow_context_t scp);
66 #endif
67 static void nostalgia(void) ATTRIBUTE_NORETURN;
68 static void version(void) ATTRIBUTE_NORETURN;
69 static void init_fds(void);
70 static void init_groupset(void);
71 static void save_argv(int, char **);
72 static const char *platform_name();
73 
74 /* These nodes store all the special variables AWK uses */
75 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node;
76 NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node;
77 NODE *FNR_node, *FPAT_node, *FS_node, *IGNORECASE_node, *LINT_node;
78 NODE *NF_node, *NR_node, *OFMT_node, *OFS_node, *ORS_node, *PROCINFO_node;
79 NODE *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
80 NODE *PREC_node, *ROUNDMODE_node;
81 NODE *TEXTDOMAIN_node;
82 
83 long NF;
84 long NR;
85 long FNR;
86 int BINMODE;
87 bool IGNORECASE;
88 char *OFS;
89 char *ORS;
90 char *OFMT;
91 char *TEXTDOMAIN;
92 
93 /*
94  * CONVFMT is a convenience pointer for the current number to string format.
95  * We must supply an initial value to avoid recursion problems of
96  *	set_CONVFMT -> fmt_index -> force_string: gets NULL CONVFMT
97  * Fun, fun, fun, fun.
98  */
99 const char *CONVFMT = "%.6g";
100 
101 NODE *Nnull_string;		/* The global null string */
102 
103 #if defined(HAVE_LOCALE_H)
104 struct lconv loc;		/* current locale */
105 static void init_locale(struct lconv *l);
106 #endif /* defined(HAVE_LOCALE_H) */
107 
108 /* The name the program was invoked under, for error messages */
109 const char *myname;
110 
111 /* A block of AWK code to be run */
112 INSTRUCTION *code_block = NULL;
113 
114 char **d_argv;			/* saved argv for debugger restarting */
115 /*
116  * List of rules and functions with first and last instruction (source_line)
117  * information; used for profiling and debugging.
118  */
119 INSTRUCTION *rule_list;
120 
121 int exit_val = EXIT_SUCCESS;		/* exit value */
122 
123 #if defined(YYDEBUG) || defined(GAWKDEBUG)
124 extern int yydebug;
125 #endif
126 
127 SRCFILE *srcfiles; /* source files */
128 
129 /*
130  * structure to remember variable pre-assignments
131  */
132 struct pre_assign {
133 	enum assign_type { PRE_ASSIGN = 1, PRE_ASSIGN_FS } type;
134 	char *val;
135 };
136 
137 static struct pre_assign *preassigns = NULL;	/* requested via -v or -F */
138 static long numassigns = -1;			/* how many of them */
139 
140 static bool disallow_var_assigns = false;	/* true for --exec */
141 
142 static void add_preassign(enum assign_type type, char *val);
143 
144 static void parse_args(int argc, char **argv);
145 static void set_locale_stuff(void);
146 static bool stopped_early = false;
147 
148 enum do_flag_values do_flags = DO_FLAG_NONE;
149 bool do_itrace = false;			/* provide simple instruction trace */
150 bool do_optimize = true;		/* apply default optimizations */
151 static int do_nostalgia = false;	/* provide a blast from the past */
152 static int do_binary = false;		/* hands off my data! */
153 static int do_version = false;		/* print version info */
154 static const char *locale = "";		/* default value to setlocale */
155 static const char *locale_dir = LOCALEDIR;	/* default locale dir */
156 
157 int use_lc_numeric = false;	/* obey locale for decimal point */
158 
159 int gawk_mb_cur_max;		/* MB_CUR_MAX value, see comment in main() */
160 
161 FILE *output_fp;		/* default gawk output, can be redirected in the debugger */
162 bool output_is_tty = false;	/* control flushing of output */
163 
164 /* default format for strftime(), available via PROCINFO */
165 const char def_strftime_format[] = "%a %b %e %H:%M:%S %Z %Y";
166 
167 extern const char *version_string;
168 
169 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
170 GETGROUPS_T *groupset;		/* current group set */
171 int ngroups;			/* size of said set */
172 #endif
173 
174 void (*lintfunc)(const char *mesg, ...) = r_warning;
175 
176 /* Sorted by long option name! */
177 static const struct option optab[] = {
178 	{ "assign",		required_argument,	NULL,	'v' },
179 	{ "bignum",		no_argument,		NULL,	'M' },
180 	{ "characters-as-bytes", no_argument,		& do_binary,	 'b' },
181 	{ "copyright",		no_argument,		NULL,	'C' },
182 	{ "debug",		optional_argument,	NULL,	'D' },
183 	{ "dump-variables",	optional_argument,	NULL,	'd' },
184 	{ "exec",		required_argument,	NULL,	'E' },
185 	{ "field-separator",	required_argument,	NULL,	'F' },
186 	{ "file",		required_argument,	NULL,	'f' },
187 	{ "gen-pot",		no_argument,		NULL,	'g' },
188 	{ "help",		no_argument,		NULL,	'h' },
189 	{ "include",		required_argument,	NULL,	'i' },
190 	{ "lint",		optional_argument,	NULL,	'L' },
191 	{ "lint-old",		no_argument,		NULL,	't' },
192 	{ "load",		required_argument,	NULL,	'l' },
193 #if defined(LOCALEDEBUG)
194 	{ "locale",		required_argument,	NULL,	'Z' },
195 #endif
196 	{ "non-decimal-data",	no_argument,		NULL,	'n' },
197 	{ "no-optimize",	no_argument,		NULL,	's' },
198 	{ "nostalgia",		no_argument,		& do_nostalgia,	1 },
199 	{ "optimize",		no_argument,		NULL,	'O' },
200 #if defined(YYDEBUG) || defined(GAWKDEBUG)
201 	{ "parsedebug",		no_argument,		NULL,	'Y' },
202 #endif
203 	{ "posix",		no_argument,		NULL,	'P' },
204 	{ "pretty-print",	optional_argument,	NULL,	'o' },
205 	{ "profile",		optional_argument,	NULL,	'p' },
206 	{ "re-interval",	no_argument,		NULL,	'r' },
207 	{ "sandbox",		no_argument,		NULL, 	'S' },
208 	{ "source",		required_argument,	NULL,	'e' },
209 	{ "trace",		no_argument,		NULL,	'I' },
210 	{ "traditional",	no_argument,		NULL,	'c' },
211 	{ "use-lc-numeric",	no_argument,		& use_lc_numeric, 1 },
212 	{ "version",		no_argument,		& do_version, 'V' },
213 	{ NULL, 0, NULL, '\0' }
214 };
215 
216 /* main --- process args, parse program, run it, clean up */
217 
218 int
main(int argc,char ** argv)219 main(int argc, char **argv)
220 {
221 	int i;
222 	char *extra_stack;
223 	bool have_srcfile = false;
224 	SRCFILE *s;
225 	char *cp;
226 #if defined(LOCALEDEBUG)
227 	const char *initial_locale;
228 #endif
229 
230 	/* do these checks early */
231 	if (getenv("TIDYMEM") != NULL)
232 		do_flags |= DO_TIDY_MEM;
233 
234 #ifdef HAVE_MCHECK_H
235 #ifdef HAVE_MTRACE
236 	if (do_tidy_mem)
237 		mtrace();
238 #endif /* HAVE_MTRACE */
239 #endif /* HAVE_MCHECK_H */
240 
241 	myname = gawk_name(argv[0]);
242 	os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
243 
244 	if (argc < 2)
245 		usage(EXIT_FAILURE, stderr);
246 
247 	if ((cp = getenv("GAWK_LOCALE_DIR")) != NULL)
248 		locale_dir = cp;
249 
250 #if defined(F_GETFL) && defined(O_APPEND)
251 	// 1/2018: This is needed on modern BSD systems so that the
252 	// inplace tests pass. I think it's a bug in those kernels
253 	// but let's just work around it anyway.
254 	int flags = fcntl(fileno(stderr), F_GETFL, NULL);
255 	if (flags >= 0 && (flags & O_APPEND) == 0) {
256 		flags |= O_APPEND;
257 		(void) fcntl(fileno(stderr), F_SETFL, flags);
258 	}
259 #endif
260 
261 #if defined(LOCALEDEBUG)
262 	initial_locale = locale;
263 #endif
264 	set_locale_stuff();
265 
266 	(void) signal(SIGFPE, catchsig);
267 #ifdef SIGBUS
268 	(void) signal(SIGBUS, catchsig);
269 #endif
270 
271 	/*
272 	 * Ignore SIGPIPE so that writes to pipes that fail don't
273 	 * kill the process but instead return -1 and set errno.
274 	 * That lets us print a fatal message instead of dieing suddenly.
275 	 *
276 	 * Note that this requires ignoring EPIPE when writing and
277 	 * flushing stdout/stderr in other parts of the program. E.g.,
278 	 *
279 	 * 	gawk 'BEGIN { print "hi" }' | exit
280 	 *
281 	 * should not give us "broken pipe" messages --- mainly because
282 	 * it did not do so in the past and people would complain.
283 	 */
284 	ignore_sigpipe();
285 
286 	(void) sigsegv_install_handler(catchsegv);
287 #define STACK_SIZE (16*1024)
288 	emalloc(extra_stack, char *, STACK_SIZE, "main");
289 	(void) stackoverflow_install_handler(catchstackoverflow, extra_stack, STACK_SIZE);
290 #undef STACK_SIZE
291 
292 	/* initialize the null string */
293 	Nnull_string = make_string("", 0);
294 
295 	/* Robustness: check that file descriptors 0, 1, 2 are open */
296 	init_fds();
297 
298 	/* init array handling. */
299 	array_init();
300 
301 	/* init the symbol tables */
302 	init_symbol_table();
303 
304 	output_fp = stdout;
305 
306 	/* initialize global (main) execution context */
307 	push_context(new_context());
308 
309 	parse_args(argc, argv);
310 
311 #if defined(LOCALEDEBUG)
312 	if (locale != initial_locale)
313 		set_locale_stuff();
314 #endif
315 
316 	/*
317 	 * In glibc, MB_CUR_MAX is actually a function.  This value is
318 	 * tested *a lot* in many speed-critical places in gawk. Caching
319 	 * this value once makes a speed difference.
320 	 */
321 	gawk_mb_cur_max = MB_CUR_MAX;
322 
323 	/* init the cache for checking bytes if they're characters */
324 	init_btowc_cache();
325 
326 	/* set up the single byte case table */
327 	if (gawk_mb_cur_max == 1)
328 		load_casetable();
329 
330 	if (do_nostalgia)
331 		nostalgia();
332 
333 	/* check for POSIXLY_CORRECT environment variable */
334 	if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
335 		do_flags |= DO_POSIX;
336 		if (do_lint)
337 			lintwarn(
338 	_("environment variable `POSIXLY_CORRECT' set: turning on `--posix'"));
339 	}
340 
341 	// Checks for conflicting command-line arguments.
342 	if (do_posix) {
343 		use_lc_numeric = true;
344 		if (do_traditional)	/* both on command line */
345 			warning(_("`--posix' overrides `--traditional'"));
346 		else
347 			do_flags |= DO_TRADITIONAL;
348 			/*
349 			 * POSIX compliance also implies
350 			 * no GNU extensions either.
351 			 */
352 	}
353 
354 	if (do_traditional && do_non_decimal_data) {
355 		do_flags &= ~DO_NON_DEC_DATA;
356 		warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'"));
357 	}
358 
359 	if (do_binary) {
360 		if (do_posix)
361 			warning(_("`--posix' overrides `--characters-as-bytes'"));
362 		else {
363 			gawk_mb_cur_max = 1;	/* hands off my data! */
364 #if defined(LC_ALL)
365 			setlocale(LC_ALL, "C");
366 #endif
367 		}
368 	}
369 
370 	if (do_lint && os_is_setuid())
371 		lintwarn(_("running %s setuid root may be a security problem"), myname);
372 
373 	if (do_debug)	/* Need to register the debugger pre-exec hook before any other */
374 		init_debug();
375 
376 #ifdef HAVE_MPFR
377 	/* Set up MPFR defaults, and register pre-exec hook to process arithmetic opcodes */
378 	if (do_mpfr)
379 		init_mpfr(DEFAULT_PREC, DEFAULT_ROUNDMODE);
380 #endif
381 
382 	/* load group set */
383 	init_groupset();
384 
385 #ifdef HAVE_MPFR
386 	if (do_mpfr) {
387 		mpz_init(Nnull_string->mpg_i);
388 		Nnull_string->flags = (MALLOC|STRCUR|STRING|MPZN|NUMCUR|NUMBER);
389 	} else
390 #endif
391 	{
392 		Nnull_string->numbr = 0.0;
393 		Nnull_string->flags = (MALLOC|STRCUR|STRING|NUMCUR|NUMBER);
394 	}
395 
396 	/*
397 	 * Tell the regex routines how they should work.
398 	 * Do this before initializing variables, since
399 	 * they could want to do a regexp compile.
400 	 */
401 	resetup();
402 
403 	/* Set up the special variables */
404 	init_vars();
405 
406 	/* Set up the field variables */
407 	init_fields();
408 
409 	/* Now process the pre-assignments */
410 	int dash_v_errs = 0;	// bad stuff for -v
411 	for (i = 0; i <= numassigns; i++) {
412 		if (preassigns[i].type == PRE_ASSIGN)
413 			dash_v_errs += (arg_assign(preassigns[i].val, true) == false);
414 		else	/* PRE_ASSIGN_FS */
415 			cmdline_fs(preassigns[i].val);
416 		efree(preassigns[i].val);
417 	}
418 
419 	if (preassigns != NULL)
420 		efree(preassigns);
421 
422 	if ((BINMODE & BINMODE_INPUT) != 0)
423 		if (os_setbinmode(fileno(stdin), O_BINARY) == -1)
424 			fatal(_("cannot set binary mode on stdin: %s"), strerror(errno));
425 	if ((BINMODE & BINMODE_OUTPUT) != 0) {
426 		if (os_setbinmode(fileno(stdout), O_BINARY) == -1)
427 			fatal(_("cannot set binary mode on stdout: %s"), strerror(errno));
428 		if (os_setbinmode(fileno(stderr), O_BINARY) == -1)
429 			fatal(_("cannot set binary mode on stderr: %s"), strerror(errno));
430 	}
431 
432 #ifdef GAWKDEBUG
433 	setbuf(stdout, (char *) NULL);	/* make debugging easier */
434 #endif
435 	if (os_isatty(fileno(stdout)))
436 		output_is_tty = true;
437 
438 	/* initialize API before loading extension libraries */
439 	init_ext_api();
440 
441 	/* load extension libs */
442 	for (s = srcfiles->next; s != srcfiles; s = s->next) {
443 		if (s->stype == SRC_EXTLIB)
444 			load_ext(s->fullpath);
445 		else if (s->stype != SRC_INC)
446 			have_srcfile = true;
447 	}
448 
449 	/* do version check after extensions are loaded to get extension info */
450 	if (do_version)
451 		version();
452 
453 	/* No -f or --source options, use next arg */
454 	if (! have_srcfile) {
455 		if (optind > argc - 1 || stopped_early) /* no args left or no program */
456 			usage(EXIT_FAILURE, stderr);
457 		(void) add_srcfile(SRC_CMDLINE, argv[optind], srcfiles, NULL, NULL);
458 		optind++;
459 	}
460 
461 	/* Select the interpreter routine */
462 	init_interpret();
463 
464 	init_args(optind, argc,
465 			do_posix ? argv[0] : myname,
466 			argv);
467 
468 #if defined(LC_NUMERIC)
469 	/*
470 	 * FRAGILE!  CAREFUL!
471 	 * Pre-initing the variables with arg_assign() can change the
472 	 * locale.  Force it to C before parsing the program.
473 	 */
474 	setlocale(LC_NUMERIC, "C");
475 #endif
476 	/* Read in the program */
477 	if (parse_program(& code_block, false) != 0 || dash_v_errs > 0)
478 		exit(EXIT_FAILURE);
479 
480 	if (do_intl)
481 		exit(EXIT_SUCCESS);
482 
483 	set_current_namespace(awk_namespace);
484 
485 	install_builtins();
486 
487 	if (do_lint)
488 		shadow_funcs();
489 
490 	if (do_lint && code_block->nexti->opcode == Op_atexit)
491 		lintwarn(_("no program text at all!"));
492 
493 	load_symbols();
494 
495 	if (do_profile)
496 		init_profiling_signals();
497 
498 #if defined(LC_NUMERIC)
499 	/*
500 	 * See comment above about using locale's decimal point.
501 	 *
502 	 * 10/2005:
503 	 * Bitter experience teaches us that most people the world over
504 	 * use period as the decimal point, not whatever their locale
505 	 * uses.  Thus, only use the locale's decimal point if being
506 	 * posixly anal-retentive.
507 	 *
508 	 * 7/2007:
509 	 * Be a little bit kinder. Allow the --use-lc-numeric option
510 	 * to also use the local decimal point. This avoids the draconian
511 	 * strictness of POSIX mode if someone just wants to parse their
512 	 * data using the local decimal point.
513 	 */
514 	if (use_lc_numeric)
515 		setlocale(LC_NUMERIC, locale);
516 #endif
517 
518 	init_io();
519 	output_fp = stdout;
520 
521 	if (do_debug)
522 		debug_prog(code_block);
523 	else if (do_pretty_print && ! do_profile)
524 		;	/* run pretty printer only. */
525 	else
526 		interpret(code_block);
527 
528 	if (do_pretty_print) {
529 		set_current_namespace(awk_namespace);
530 		dump_prog(code_block);
531 		dump_funcs();
532 	}
533 
534 	if (do_dump_vars)
535 		dump_vars(varfile);
536 
537 #ifdef HAVE_MPFR
538 	if (do_mpfr)
539 		cleanup_mpfr();
540 #endif
541 
542 	if (do_tidy_mem)
543 		release_all_vars();
544 
545 	/* keep valgrind happier */
546 	if (extra_stack)
547 		efree(extra_stack);
548 
549 	final_exit(exit_val);
550 	return exit_val;	/* to suppress warnings */
551 }
552 
553 /* add_preassign --- add one element to preassigns */
554 
555 static void
add_preassign(enum assign_type type,char * val)556 add_preassign(enum assign_type type, char *val)
557 {
558 	static long alloc_assigns;		/* for how many are allocated */
559 
560 #define INIT_SRC 4
561 
562 	++numassigns;
563 
564 	if (preassigns == NULL) {
565 		emalloc(preassigns, struct pre_assign *,
566 			INIT_SRC * sizeof(struct pre_assign), "add_preassign");
567 		alloc_assigns = INIT_SRC;
568 	} else if (numassigns >= alloc_assigns) {
569 		alloc_assigns *= 2;
570 		erealloc(preassigns, struct pre_assign *,
571 			alloc_assigns * sizeof(struct pre_assign), "add_preassigns");
572 	}
573 	preassigns[numassigns].type = type;
574 	preassigns[numassigns].val = estrdup(val, strlen(val));
575 
576 #undef INIT_SRC
577 }
578 
579 /* usage --- print usage information and exit */
580 
581 static void
usage(int exitval,FILE * fp)582 usage(int exitval, FILE *fp)
583 {
584 	/* Not factoring out common stuff makes it easier to translate. */
585 	fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"),
586 		myname);
587 	fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"),
588 			myname, quote, quote);
589 
590 	/* GNU long options info. This is too many options. */
591 
592 	fputs(_("POSIX options:\t\tGNU long options: (standard)\n"), fp);
593 	fputs(_("\t-f progfile\t\t--file=progfile\n"), fp);
594 	fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp);
595 	fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp);
596 	fputs(_("Short options:\t\tGNU long options: (extensions)\n"), fp);
597 	fputs(_("\t-b\t\t\t--characters-as-bytes\n"), fp);
598 	fputs(_("\t-c\t\t\t--traditional\n"), fp);
599 	fputs(_("\t-C\t\t\t--copyright\n"), fp);
600 	fputs(_("\t-d[file]\t\t--dump-variables[=file]\n"), fp);
601 	fputs(_("\t-D[file]\t\t--debug[=file]\n"), fp);
602 	fputs(_("\t-e 'program-text'\t--source='program-text'\n"), fp);
603 	fputs(_("\t-E file\t\t\t--exec=file\n"), fp);
604 	fputs(_("\t-g\t\t\t--gen-pot\n"), fp);
605 	fputs(_("\t-h\t\t\t--help\n"), fp);
606 	fputs(_("\t-i includefile\t\t--include=includefile\n"), fp);
607 	fputs(_("\t-I\t\t\t--trace\n"), fp);
608 	fputs(_("\t-l library\t\t--load=library\n"), fp);
609 	/*
610 	 * TRANSLATORS: the "fatal", "invalid" and "no-ext" here are literal
611 	 * values, they should not be translated. Thanks.
612 	 */
613 	fputs(_("\t-L[fatal|invalid|no-ext]\t--lint[=fatal|invalid|no-ext]\n"), fp);
614 	fputs(_("\t-M\t\t\t--bignum\n"), fp);
615 	fputs(_("\t-N\t\t\t--use-lc-numeric\n"), fp);
616 	fputs(_("\t-n\t\t\t--non-decimal-data\n"), fp);
617 	fputs(_("\t-o[file]\t\t--pretty-print[=file]\n"), fp);
618 	fputs(_("\t-O\t\t\t--optimize\n"), fp);
619 	fputs(_("\t-p[file]\t\t--profile[=file]\n"), fp);
620 	fputs(_("\t-P\t\t\t--posix\n"), fp);
621 	fputs(_("\t-r\t\t\t--re-interval\n"), fp);
622 	fputs(_("\t-s\t\t\t--no-optimize\n"), fp);
623 	fputs(_("\t-S\t\t\t--sandbox\n"), fp);
624 	fputs(_("\t-t\t\t\t--lint-old\n"), fp);
625 	fputs(_("\t-V\t\t\t--version\n"), fp);
626 #ifdef NOSTALGIA
627 	fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp);
628 #endif
629 #ifdef GAWKDEBUG
630 	fputs(_("\t-Y\t\t\t--parsedebug\n"), fp);
631 #endif
632 #ifdef GAWKDEBUG
633 	fputs(_("\t-Z locale-name\t\t--locale=locale-name\n"), fp);
634 #endif
635 
636 	/* This is one string to make things easier on translators. */
637 	/* TRANSLATORS: --help output (end)
638 	   no-wrap */
639 	fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info'\n\
640 which is section `Reporting Problems and Bugs' in the\n\
641 printed version.  This same information may be found at\n\
642 https://www.gnu.org/software/gawk/manual/html_node/Bugs.html.\n\
643 PLEASE do NOT try to report bugs by posting in comp.lang.awk,\n\
644 or by using a web forum such as Stack Overflow.\n\n"), fp);
645 
646 	/* ditto */
647 	fputs(_("gawk is a pattern scanning and processing language.\n\
648 By default it reads standard input and writes standard output.\n\n"), fp);
649 
650 	/* ditto */
651 	fprintf(fp, _("Examples:\n\t%s '{ sum += $1 }; END { print sum }' file\n\
652 \t%s -F: '{ print $1 }' /etc/passwd\n"), myname, myname);
653 
654 	fflush(fp);
655 
656 	if (ferror(fp)) {
657 #ifdef __MINGW32__
658 		if (errno == 0 || errno == EINVAL)
659 			w32_maybe_set_errno();
660 #endif
661 		/* don't warn about stdout/stderr if EPIPE, but do error exit */
662 		if (errno == EPIPE)
663 			die_via_sigpipe();
664 
665 		if (fp == stdout)
666 			warning(_("error writing standard output: %s"), strerror(errno));
667 		else if (fp == stderr)
668 			warning(_("error writing standard error: %s"), strerror(errno));
669 
670 		// some other problem than SIGPIPE
671 		exit(EXIT_FAILURE);
672 	}
673 
674 	exit(exitval);
675 }
676 
677 /* copyleft --- print out the short GNU copyright information */
678 
679 static void
copyleft()680 copyleft()
681 {
682 	static const char blurb_part1[] =
683 	  N_("Copyright (C) 1989, 1991-%d Free Software Foundation.\n\
684 \n\
685 This program is free software; you can redistribute it and/or modify\n\
686 it under the terms of the GNU General Public License as published by\n\
687 the Free Software Foundation; either version 3 of the License, or\n\
688 (at your option) any later version.\n\
689 \n");
690 	static const char blurb_part2[] =
691 	  N_("This program is distributed in the hope that it will be useful,\n\
692 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
693 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
694 GNU General Public License for more details.\n\
695 \n");
696 	static const char blurb_part3[] =
697 	  N_("You should have received a copy of the GNU General Public License\n\
698 along with this program. If not, see http://www.gnu.org/licenses/.\n");
699 
700 	/* multiple blurbs are needed for some brain dead compilers. */
701 	printf(_(blurb_part1), UPDATE_YEAR);	/* Last update year */
702 	fputs(_(blurb_part2), stdout);
703 	fputs(_(blurb_part3), stdout);
704 	fflush(stdout);
705 
706 	if (ferror(stdout)) {
707 #ifdef __MINGW32__
708 		if (errno == 0 || errno == EINVAL)
709 			w32_maybe_set_errno();
710 #endif
711 		/* don't warn about stdout if EPIPE, but do error exit */
712 		if (errno != EPIPE)
713 			warning(_("error writing standard output: %s"), strerror(errno));
714 		exit(EXIT_FAILURE);
715 	}
716 
717 	exit(EXIT_SUCCESS);
718 }
719 
720 /* cmdline_fs --- set FS from the command line */
721 
722 static void
cmdline_fs(char * str)723 cmdline_fs(char *str)
724 {
725 	NODE **tmp;
726 
727 	tmp = &FS_node->var_value;
728 	unref(*tmp);
729 	/*
730 	 * Only if in full compatibility mode check for the stupid special
731 	 * case so -F\t works as documented in awk book even though the shell
732 	 * hands us -Ft.  Bleah!
733 	 *
734 	 * Thankfully, POSIX didn't propagate this "feature".
735 	 */
736 	if (str[0] == 't' && str[1] == '\0') {
737 		if (do_lint)
738 			lintwarn(_("-Ft does not set FS to tab in POSIX awk"));
739 		if (do_traditional && ! do_posix)
740 			str[0] = '\t';
741 	}
742 
743 	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
744 	set_FS();
745 }
746 
747 /* init_args --- set up ARGV from stuff on the command line */
748 
749 static void
init_args(int argc0,int argc,const char * argv0,char ** argv)750 init_args(int argc0, int argc, const char *argv0, char **argv)
751 {
752 	int i, j;
753 	NODE *sub, *val;
754 	NODE *shadow_node = NULL;
755 
756 	ARGV_node = install_symbol(estrdup("ARGV", 4), Node_var_array);
757 	sub = make_number(0.0);
758 	val = make_string(argv0, strlen(argv0));
759 	val->flags |= USER_INPUT;
760 	assoc_set(ARGV_node, sub, val);
761 
762 	if (do_sandbox) {
763 		shadow_node = make_array();
764 		sub = make_string(argv0, strlen(argv0));
765 		val = make_number(0.0);
766 		assoc_set(shadow_node, sub, val);
767 	}
768 
769 
770 	for (i = argc0, j = 1; i < argc; i++, j++) {
771 		sub = make_number((AWKNUM) j);
772 		val = make_string(argv[i], strlen(argv[i]));
773 		val->flags |= USER_INPUT;
774 		assoc_set(ARGV_node, sub, val);
775 
776 		if (do_sandbox) {
777 			sub = make_string(argv[i], strlen(argv[i]));
778 			val = make_number(0.0);
779 			assoc_set(shadow_node, sub, val);
780 		}
781 	}
782 
783 	ARGC_node = install_symbol(estrdup("ARGC", 4), Node_var);
784 	ARGC_node->var_value = make_number((AWKNUM) j);
785 
786 	if (do_sandbox)
787 		init_argv_array(ARGV_node, shadow_node);
788 }
789 
790 
791 /*
792  * Set all the special variables to their initial values.
793  * Note that some of the variables that have set_FOO routines should
794  * *N*O*T* have those routines called upon initialization, and thus
795  * they have NULL entries in that field. This is notably true of FS
796  * and IGNORECASE.
797  */
798 
799 struct varinit {
800 	NODE **spec;
801 	const char *name;
802 	const char *strval;
803 	AWKNUM numval;
804 	Func_ptr update;
805 	Func_ptr assign;
806 	bool do_assign;
807 	int flags;
808 #define NO_INSTALL	0x01
809 #define NON_STANDARD	0x02
810 #define NOT_OFF_LIMITS	0x04	/* may be accessed by extension function */
811 };
812 
813 static const struct varinit varinit[] = {
814 {NULL,		"ARGC",		NULL,	0,  NULL, NULL,	false, NO_INSTALL },
815 {&ARGIND_node,	"ARGIND",	NULL,	0,  NULL, NULL,	false, NON_STANDARD },
816 {NULL,		"ARGV",		NULL,	0,  NULL, NULL,	false, NO_INSTALL },
817 {&BINMODE_node,	"BINMODE",	NULL,	0,  NULL, set_BINMODE,	false, NON_STANDARD },
818 {&CONVFMT_node,	"CONVFMT",	"%.6g",	0,  NULL, set_CONVFMT,true, 	0 },
819 {NULL,		"ENVIRON",	NULL,	0,  NULL, NULL,	false, NO_INSTALL },
820 {&ERRNO_node,	"ERRNO",	"",	0,  NULL, NULL,	false, NON_STANDARD },
821 {&FIELDWIDTHS_node, "FIELDWIDTHS", "",	0,  NULL, set_FIELDWIDTHS,	false, NON_STANDARD },
822 {&FILENAME_node, "FILENAME",	"",	0,  NULL, NULL,	false, 0 },
823 {&FNR_node,	"FNR",		NULL,	0,  update_FNR, set_FNR,	true, 0 },
824 {&FS_node,	"FS",		" ",	0,  NULL, set_FS,	false, 0 },
825 {&FPAT_node,	"FPAT",		"[^[:space:]]+", 0,  NULL, set_FPAT,	false, NON_STANDARD },
826 {&IGNORECASE_node, "IGNORECASE", NULL,	0,  NULL, set_IGNORECASE,	false, NON_STANDARD },
827 {&LINT_node,	"LINT",		NULL,	0,  NULL, set_LINT,	false, NON_STANDARD },
828 {&PREC_node,	"PREC",		NULL,	DEFAULT_PREC,	NULL,	set_PREC,	false,	NON_STANDARD},
829 {&NF_node,	"NF",		NULL,	-1, update_NF, set_NF,	false, 0 },
830 {&NR_node,	"NR",		NULL,	0,  update_NR, set_NR,	true, 0 },
831 {&OFMT_node,	"OFMT",		"%.6g",	0,  NULL, set_OFMT,	true, 0 },
832 {&OFS_node,	"OFS",		" ",	0,  NULL, set_OFS,	true, 0 },
833 {&ORS_node,	"ORS",		"\n",	0,  NULL, set_ORS,	true, 0 },
834 {NULL,		"PROCINFO",	NULL,	0,  NULL, NULL,	false, NO_INSTALL | NON_STANDARD | NOT_OFF_LIMITS },
835 {&RLENGTH_node, "RLENGTH",	NULL,	0,  NULL, NULL,	false, 0 },
836 {&ROUNDMODE_node, "ROUNDMODE",	DEFAULT_ROUNDMODE,	0,  NULL, set_ROUNDMODE,	false, NON_STANDARD },
837 {&RS_node,	"RS",		"\n",	0,  NULL, set_RS,	true, 0 },
838 {&RSTART_node,	"RSTART",	NULL,	0,  NULL, NULL,	false, 0 },
839 {&RT_node,	"RT",		"",	0,  NULL, NULL,	false, NON_STANDARD },
840 {&SUBSEP_node,	"SUBSEP",	"\034",	0,  NULL, set_SUBSEP,	true, 0 },
841 {&TEXTDOMAIN_node,	"TEXTDOMAIN",	"messages",	0,  NULL, set_TEXTDOMAIN,	true, NON_STANDARD },
842 {0,		NULL,		NULL,	0,  NULL, NULL,	false, 0 },
843 };
844 
845 /* init_vars --- actually initialize everything in the symbol table */
846 
847 static void
init_vars()848 init_vars()
849 {
850 	const struct varinit *vp;
851 	NODE *n;
852 
853 	for (vp = varinit; vp->name != NULL; vp++) {
854 		if ((vp->flags & NO_INSTALL) != 0)
855 			continue;
856 		n = *(vp->spec) = install_symbol(estrdup(vp->name, strlen(vp->name)), Node_var);
857 		if (vp->strval != NULL)
858 			n->var_value = make_string(vp->strval, strlen(vp->strval));
859 		else
860 			n->var_value = make_number(vp->numval);
861 		n->var_assign = (Func_ptr) vp->assign;
862 		n->var_update = (Func_ptr) vp->update;
863 		if (vp->do_assign)
864 			(*(vp->assign))();
865 	}
866 
867 	/* Load PROCINFO and ENVIRON */
868 	if (! do_traditional)
869 		load_procinfo();
870 	load_environ();
871 }
872 
873 /* path_environ --- put path variable into environment if not already there */
874 
875 static void
path_environ(const char * pname,const char * dflt)876 path_environ(const char *pname, const char *dflt)
877 {
878 	const char *val;
879 	NODE **aptr;
880 	NODE *tmp;
881 
882 	tmp = make_string(pname, strlen(pname));
883 	/*
884 	 * On VMS, environ[] only holds a subset of what getenv() can
885 	 * find, so look AWKPATH up before resorting to default path.
886 	 */
887 	val = getenv(pname);
888 	if (val == NULL || *val == '\0')
889 		val = dflt;
890 	aptr = assoc_lookup(ENVIRON_node, tmp);
891 	/*
892 	 * If original value was the empty string, set it to
893 	 * the default value.
894 	 */
895 	if ((*aptr)->stlen == 0) {
896 		unref(*aptr);
897 		*aptr = make_string(val, strlen(val));
898 	}
899 
900 	unref(tmp);
901 }
902 
903 /* load_environ --- populate the ENVIRON array */
904 
905 static NODE *
load_environ()906 load_environ()
907 {
908 #if ! (defined(VMS) && defined(__DECC))
909 	extern char **environ;
910 #endif
911 	char *var, *val;
912 	int i;
913 	NODE *sub, *newval;
914 	static bool been_here = false;
915 
916 	if (been_here)
917 		return ENVIRON_node;
918 
919 	been_here = true;
920 
921 	ENVIRON_node = install_symbol(estrdup("ENVIRON", 7), Node_var_array);
922 	for (i = 0; environ[i] != NULL; i++) {
923 		static char nullstr[] = "";
924 
925 		var = environ[i];
926 		val = strchr(var, '=');
927 		if (val != NULL)
928 			*val++ = '\0';
929 		else
930 			val = nullstr;
931 		sub = make_string(var, strlen(var));
932 		newval = make_string(val, strlen(val));
933 		newval->flags |= USER_INPUT;
934 		assoc_set(ENVIRON_node, sub, newval);
935 
936 		/* restore '=' so that system() gets a valid environment */
937 		if (val != nullstr)
938 			*--val = '=';
939 	}
940 	/*
941 	 * Put AWKPATH and AWKLIBPATH into ENVIRON if not already there.
942 	 * This allows querying it from within awk programs.
943 	 *
944 	 * October 2014:
945 	 * If their values are "", override with the default values;
946 	 * since 2.10 AWKPATH used default value if environment's
947 	 * value was "".
948 	 */
949 	path_environ("AWKPATH", defpath);
950 	path_environ("AWKLIBPATH", deflibpath);
951 
952 	/* set up array functions */
953 	init_env_array(ENVIRON_node);
954 
955 	return ENVIRON_node;
956 }
957 
958 /* load_procinfo_argv --- populate PROCINFO["argv"] */
959 
960 static void
load_procinfo_argv()961 load_procinfo_argv()
962 {
963 	NODE *sub;
964 	NODE *val;
965 	NODE *argv_array;
966 	int i;
967 
968 	// build the sub-array first
969 	getnode(argv_array);
970  	memset(argv_array, '\0', sizeof(NODE));  /* valgrind wants this */
971 	null_array(argv_array);
972 	argv_array->parent_array = PROCINFO_node;
973 	argv_array->vname = estrdup("argv", 4);
974 	for (i = 0; d_argv[i] != NULL; i++) {
975 		sub = make_number(i);
976 		val = make_string(d_argv[i], strlen(d_argv[i]));
977 		assoc_set(argv_array, sub, val);
978 	}
979 
980 	// hook it into PROCINFO
981 	sub = make_string("argv", 4);
982 	assoc_set(PROCINFO_node, sub, argv_array);
983 
984 }
985 
986 /* load_procinfo --- populate the PROCINFO array */
987 
988 static NODE *
load_procinfo()989 load_procinfo()
990 {
991 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
992 	int i;
993 #endif
994 #if (defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0) || defined(HAVE_MPFR)
995 	char name[100];
996 #endif
997 	AWKNUM value;
998 	static bool been_here = false;
999 
1000 	if (been_here)
1001 		return PROCINFO_node;
1002 
1003 	been_here = true;
1004 
1005 	PROCINFO_node = install_symbol(estrdup("PROCINFO", 8), Node_var_array);
1006 
1007 	update_PROCINFO_str("version", VERSION);
1008 	update_PROCINFO_str("strftime", def_strftime_format);
1009 	update_PROCINFO_str("platform", platform_name());
1010 
1011 #ifdef HAVE_MPFR
1012 	sprintf(name, "GNU MPFR %s", mpfr_get_version());
1013 	update_PROCINFO_str("mpfr_version", name);
1014 	sprintf(name, "GNU MP %s", gmp_version);
1015 	update_PROCINFO_str("gmp_version", name);
1016 	update_PROCINFO_num("prec_max", MPFR_PREC_MAX);
1017 	update_PROCINFO_num("prec_min", MPFR_PREC_MIN);
1018 #endif
1019 
1020 #ifdef DYNAMIC
1021 	update_PROCINFO_num("api_major", GAWK_API_MAJOR_VERSION);
1022 	update_PROCINFO_num("api_minor", GAWK_API_MINOR_VERSION);
1023 #endif
1024 
1025 #ifdef GETPGRP_VOID
1026 #define getpgrp_arg() /* nothing */
1027 #else
1028 #define getpgrp_arg() getpid()
1029 #endif
1030 
1031 	value = getpgrp(getpgrp_arg());
1032 	update_PROCINFO_num("pgrpid", value);
1033 
1034 	/*
1035 	 * Could put a lot of this into a table, but then there's
1036 	 * portability problems declaring all the functions. So just
1037 	 * do it the slow and stupid way. Sigh.
1038 	 */
1039 
1040 	value = getpid();
1041 	update_PROCINFO_num("pid", value);
1042 
1043 	value = getppid();
1044 	update_PROCINFO_num("ppid", value);
1045 
1046 	value = getuid();
1047 	update_PROCINFO_num("uid", value);
1048 
1049 	value = geteuid();
1050 	update_PROCINFO_num("euid", value);
1051 
1052 	value = getgid();
1053 	update_PROCINFO_num("gid", value);
1054 
1055 	value = getegid();
1056 	update_PROCINFO_num("egid", value);
1057 
1058 	update_PROCINFO_str("FS", current_field_sep_str());
1059 
1060 #if defined (HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1061 	for (i = 0; i < ngroups; i++) {
1062 		sprintf(name, "group%d", i + 1);
1063 		value = groupset[i];
1064 		update_PROCINFO_num(name, value);
1065 	}
1066 	if (groupset) {
1067 		efree(groupset);
1068 		groupset = NULL;
1069 	}
1070 #endif
1071 	load_procinfo_argv();
1072 	return PROCINFO_node;
1073 }
1074 
1075 /* is_std_var --- return true if a variable is a standard variable */
1076 
1077 int
is_std_var(const char * var)1078 is_std_var(const char *var)
1079 {
1080 	const struct varinit *vp;
1081 
1082 	for (vp = varinit; vp->name != NULL; vp++) {
1083 		if (strcmp(vp->name, var) == 0) {
1084 			if ((do_traditional || do_posix) && (vp->flags & NON_STANDARD) != 0)
1085 				return false;
1086 
1087 			return true;
1088 		}
1089 	}
1090 
1091 	return false;
1092 }
1093 
1094 /*
1095  * is_off_limits_var --- return true if a variable is off limits
1096  * 			to extension functions
1097  */
1098 
1099 int
is_off_limits_var(const char * var)1100 is_off_limits_var(const char *var)
1101 {
1102 	const struct varinit *vp;
1103 
1104 	for (vp = varinit; vp->name != NULL; vp++) {
1105 		if (strcmp(vp->name, var) == 0)
1106 			return ((vp->flags & NOT_OFF_LIMITS) == 0);
1107 	}
1108 
1109 	return false;
1110 }
1111 
1112 /* get_spec_varname --- return the name of a special variable
1113 	with the given assign or update routine.
1114 */
1115 
1116 const char *
get_spec_varname(Func_ptr fptr)1117 get_spec_varname(Func_ptr fptr)
1118 {
1119 	const struct varinit *vp;
1120 
1121 	if (! fptr)
1122 		return NULL;
1123 	for (vp = varinit; vp->name != NULL; vp++) {
1124 		if (vp->assign == fptr || vp->update == fptr)
1125 			return vp->name;
1126 	}
1127 	return NULL;
1128 }
1129 
1130 
1131 /* arg_assign --- process a command-line assignment */
1132 
1133 int
arg_assign(char * arg,bool initing)1134 arg_assign(char *arg, bool initing)
1135 {
1136 	char *cp, *cp2;
1137 	bool badvar;
1138 	NODE *var;
1139 	NODE *it;
1140 	NODE **lhs;
1141 	long save_FNR;
1142 
1143 	if (! initing && disallow_var_assigns)
1144 		return false;	/* --exec */
1145 
1146 	cp = strchr(arg, '=');
1147 
1148 	if (cp == NULL) {
1149 		if (! initing)
1150 			return false;	/* This is file name, not assignment. */
1151 
1152 		fprintf(stderr,
1153 			_("%s: `%s' argument to `-v' not in `var=value' form\n\n"),
1154 			myname, arg);
1155 		usage(EXIT_FAILURE, stderr);
1156 	}
1157 
1158 	*cp++ = '\0';
1159 
1160 	/* avoid false source indications in a fatal message */
1161 	source = NULL;
1162 	sourceline = 0;
1163 	save_FNR = FNR;
1164 	FNR = 0;
1165 
1166 	/* first check that the variable name has valid syntax */
1167 	badvar = false;
1168 	if (! is_letter((unsigned char) arg[0]))
1169 		badvar = true;
1170 	else
1171 		for (cp2 = arg+1; *cp2; cp2++)
1172 			if (! is_identchar((unsigned char) *cp2) && *cp2 != ':') {
1173 				badvar = true;
1174 				break;
1175 			}
1176 
1177 	if (badvar) {
1178 		if (initing)
1179 			fatal(_("`%s' is not a legal variable name"), arg);
1180 
1181 		if (do_lint)
1182 			lintwarn(_("`%s' is not a variable name, looking for file `%s=%s'"),
1183 				arg, arg, cp);
1184 
1185 		goto done;
1186 	}
1187 
1188 	// Assigning a string or typed regex
1189 
1190 	if (! validate_qualified_name(arg)) {
1191 		badvar = true;
1192 		goto done;
1193 	}
1194 
1195 	if (check_special(arg) >= 0)
1196 		fatal(_("cannot use gawk builtin `%s' as variable name"), arg);
1197 
1198 	if (! initing) {
1199 		var = lookup(arg);
1200 		if (var != NULL && var->type == Node_func)
1201 			fatal(_("cannot use function `%s' as variable name"), arg);
1202 	}
1203 
1204 	cp2 = cp + strlen(cp) - 1;	// end char
1205 	if (! do_traditional
1206 	    && strlen(cp) >= 3		// '@/' doesn't do it.
1207 	    && cp[0] == '@' && cp[1] == '/' && *cp2 == '/') {
1208 		// typed regex
1209 		size_t len = strlen(cp) - 3;
1210 
1211 		ezalloc(cp2, char *, len + 1, "arg_assign");
1212 		memcpy(cp2, cp + 2, len);
1213 
1214 		it = make_typed_regex(cp2, len);
1215 		// fall through to variable setup
1216 	} else {
1217 		// string assignment
1218 
1219 		// POSIX disallows any newlines inside strings
1220 		// The scanner handles that for program files.
1221 		// We have to check here for strings passed to -v.
1222 		if (do_posix && strchr(cp, '\n') != NULL)
1223 			fatal(_("POSIX does not allow physical newlines in string values"));
1224 
1225 		/*
1226 		 * BWK awk expands escapes inside assignments.
1227 		 * This makes sense, so we do it too.
1228 		 * In addition, remove \-<newline> as in scanning.
1229 		 */
1230 		it = make_str_node(cp, strlen(cp), SCAN | ELIDE_BACK_NL);
1231 		it->flags |= USER_INPUT;
1232 #ifdef LC_NUMERIC
1233 		/*
1234 		 * See comment above about locale decimal point.
1235 		 */
1236 		if (do_posix)
1237 			setlocale(LC_NUMERIC, "C");
1238 #endif /* LC_NUMERIC */
1239 		(void) force_number(it);
1240 #ifdef LC_NUMERIC
1241 		if (do_posix)
1242 			setlocale(LC_NUMERIC, locale);
1243 #endif /* LC_NUMERIC */
1244 	}
1245 
1246 	/*
1247 	 * since we are restoring the original text of ARGV later,
1248 	 * need to copy the variable name part if we don't want
1249 	 * name like v=abc instead of just v in var->vname
1250 	 */
1251 
1252 	cp2 = estrdup(arg, cp - arg);	/* var name */
1253 
1254 	var = variable(0, cp2, Node_var);
1255 	if (var == NULL)	/* error */
1256 		final_exit(EXIT_FATAL);
1257 
1258 	if (var->type == Node_var && var->var_update)
1259 		var->var_update();
1260 	lhs = get_lhs(var, false);
1261 	unref(*lhs);
1262 	*lhs = it;
1263 	/* check for set_FOO() routine */
1264 	if (var->type == Node_var && var->var_assign)
1265 		var->var_assign();
1266 
1267 done:
1268 	if (! initing)
1269 		*--cp = '=';	/* restore original text of ARGV */
1270 	FNR = save_FNR;
1271 	return ! badvar;
1272 }
1273 
1274 /* catchsig --- catch signals */
1275 
1276 static void
catchsig(int sig)1277 catchsig(int sig)
1278 {
1279 	if (sig == SIGFPE) {
1280 		fatal(_("floating point exception"));
1281 	} else if (sig == SIGSEGV
1282 #ifdef SIGBUS
1283 	        || sig == SIGBUS
1284 #endif
1285 	) {
1286 		if (errcount > 0)	// assume a syntax error corrupted our data structures
1287 			exit(EXIT_FATAL);
1288 
1289 		set_loc(__FILE__, __LINE__);
1290 		msg(_("fatal error: internal error"));
1291 		/* fatal won't abort() if not compiled for debugging */
1292 		// GLIBC 2.27 doesn't necessarily flush on abort. Sigh.
1293 		fflush(NULL);
1294 		abort();
1295 	} else
1296 		cant_happen();
1297 	/* NOTREACHED */
1298 }
1299 
1300 #ifdef HAVE_LIBSIGSEGV
1301 /* catchsegv --- for use with libsigsegv */
1302 
1303 static int
catchsegv(void * fault_address,int serious)1304 catchsegv(void *fault_address, int serious)
1305 {
1306 	if (errcount > 0)	// assume a syntax error corrupted our data structures
1307 		exit(EXIT_FATAL);
1308 
1309 	set_loc(__FILE__, __LINE__);
1310 	msg(_("fatal error: internal error: segfault"));
1311 	fflush(NULL);
1312 	abort();
1313 	/*NOTREACHED*/
1314 	return 0;
1315 }
1316 
1317 /* catchstackoverflow --- for use with libsigsegv */
1318 
1319 static void
catchstackoverflow(int emergency,stackoverflow_context_t scp)1320 catchstackoverflow(int emergency, stackoverflow_context_t scp)
1321 {
1322 	set_loc(__FILE__, __LINE__);
1323 	msg(_("fatal error: internal error: stack overflow"));
1324 	fflush(NULL);
1325 	abort();
1326 	/*NOTREACHED*/
1327 	return;
1328 }
1329 #endif /* HAVE_LIBSIGSEGV */
1330 
1331 /* nostalgia --- print the famous error message and die */
1332 
1333 static void
nostalgia()1334 nostalgia()
1335 {
1336 	/*
1337 	 * N.B.: This string is not gettextized, on purpose.
1338 	 * So there.
1339 	 */
1340 	fprintf(stderr, "awk: bailing out near line 1\n");
1341 	fflush(stderr);
1342 	abort();
1343 }
1344 
1345 /* version --- print version message */
1346 
1347 static void
version()1348 version()
1349 {
1350 	printf("%s", version_string);
1351 #ifdef DYNAMIC
1352 	printf(", API: %d.%d", GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION);
1353 #endif
1354 #ifdef HAVE_MPFR
1355 	printf(" (GNU MPFR %s, GNU MP %s)", mpfr_get_version(), gmp_version);
1356 #endif
1357 	printf("\n");
1358 	print_ext_versions();
1359 
1360 	/*
1361 	 * Per GNU coding standards, print copyright info,
1362 	 * then exit successfully, do nothing else.
1363 	 */
1364 	copyleft();
1365 	exit(EXIT_SUCCESS);
1366 }
1367 
1368 /* init_fds --- check for 0, 1, 2, open on /dev/null if possible */
1369 
1370 static void
init_fds()1371 init_fds()
1372 {
1373 	struct stat sbuf;
1374 	int fd;
1375 	int newfd;
1376 	char const *const opposite_mode[] = {"w", "r", "r"};
1377 
1378 	/* maybe no stderr, don't bother with error mesg */
1379 	for (fd = 0; fd <= 2; fd++) {
1380 		if (fstat(fd, &sbuf) < 0) {
1381 #if MAKE_A_HEROIC_EFFORT
1382 			if (do_lint)
1383 				lintwarn(_("no pre-opened fd %d"), fd);
1384 #endif
1385 			newfd = devopen("/dev/null", opposite_mode[fd]);
1386 			/* turn off some compiler warnings "set but not used" */
1387 			newfd += 0;
1388 #ifdef MAKE_A_HEROIC_EFFORT
1389 			if (do_lint && newfd < 0)
1390 				lintwarn(_("could not pre-open /dev/null for fd %d"), fd);
1391 #endif
1392 		}
1393 	}
1394 }
1395 
1396 /* init_groupset --- initialize groupset */
1397 
1398 static void
init_groupset()1399 init_groupset()
1400 {
1401 #if defined(HAVE_GETGROUPS) && defined(NGROUPS_MAX) && NGROUPS_MAX > 0
1402 #ifdef GETGROUPS_NOT_STANDARD
1403 	/* For systems that aren't standards conformant, use old way. */
1404 	ngroups = NGROUPS_MAX;
1405 #else
1406 	/*
1407 	 * If called with 0 for both args, return value is
1408 	 * total number of groups.
1409 	 */
1410 	ngroups = getgroups(0, NULL);
1411 #endif
1412 	/* If an error or no groups, just give up and get on with life. */
1413 	if (ngroups <= 0)
1414 		return;
1415 
1416 	/* fill in groups */
1417 	emalloc(groupset, GETGROUPS_T *, ngroups * sizeof(GETGROUPS_T), "init_groupset");
1418 
1419 	ngroups = getgroups(ngroups, groupset);
1420 	/* same thing here, give up but keep going */
1421 	if (ngroups == -1) {
1422 		efree(groupset);
1423 		ngroups = 0;
1424 		groupset = NULL;
1425 	}
1426 #endif
1427 }
1428 
1429 /* estrdup --- duplicate a string */
1430 
1431 char *
estrdup(const char * str,size_t len)1432 estrdup(const char *str, size_t len)
1433 {
1434 	char *s;
1435 	emalloc(s, char *, len + 1, "estrdup");
1436 	memcpy(s, str, len);
1437 	s[len] = '\0';
1438 	return s;
1439 }
1440 
1441 #if defined(HAVE_LOCALE_H)
1442 
1443 /* init_locale --- initialize locale info. */
1444 
1445 /*
1446  * On some operating systems, the pointers in the struct returned
1447  * by localeconv() can become dangling pointers after a call to
1448  * setlocale().  So we do a deep copy.
1449  *
1450  * Thanks to KIMURA Koichi <kimura.koichi@canon.co.jp>.
1451  */
1452 
1453 static void
init_locale(struct lconv * l)1454 init_locale(struct lconv *l)
1455 {
1456 	struct lconv *t;
1457 
1458 	t = localeconv();
1459 	*l = *t;
1460 	l->thousands_sep = estrdup(t->thousands_sep, strlen(t->thousands_sep));
1461 	l->decimal_point = estrdup(t->decimal_point, strlen(t->decimal_point));
1462 	l->grouping = estrdup(t->grouping, strlen(t->grouping));
1463 	l->int_curr_symbol = estrdup(t->int_curr_symbol, strlen(t->int_curr_symbol));
1464 	l->currency_symbol = estrdup(t->currency_symbol, strlen(t->currency_symbol));
1465 	l->mon_decimal_point = estrdup(t->mon_decimal_point, strlen(t->mon_decimal_point));
1466 	l->mon_thousands_sep = estrdup(t->mon_thousands_sep, strlen(t->mon_thousands_sep));
1467 	l->mon_grouping = estrdup(t->mon_grouping, strlen(t->mon_grouping));
1468 	l->positive_sign = estrdup(t->positive_sign, strlen(t->positive_sign));
1469 	l->negative_sign = estrdup(t->negative_sign, strlen(t->negative_sign));
1470 }
1471 #endif /* LOCALE_H */
1472 
1473 /* save_argv --- save argv array */
1474 
1475 static void
save_argv(int argc,char ** argv)1476 save_argv(int argc, char **argv)
1477 {
1478 	int i;
1479 
1480 	emalloc(d_argv, char **, (argc + 1) * sizeof(char *), "save_argv");
1481 	for (i = 0; i < argc; i++)
1482 		d_argv[i] = estrdup(argv[i], strlen(argv[i]));
1483 	d_argv[argc] = NULL;
1484 }
1485 
1486 /*
1487  * update_global_values --- make sure the symbol table has correct values.
1488  * Called from the grammar before dumping values.
1489  *
1490  * Also called when accessing through SYMTAB, and from api_sym_lookup().
1491  */
1492 
1493 void
update_global_values()1494 update_global_values()
1495 {
1496 	const struct varinit *vp;
1497 
1498 	for (vp = varinit; vp->name; vp++) {
1499 		if (vp->update != NULL)
1500 			vp->update();
1501 	}
1502 }
1503 
1504 /* getenv_long --- read a long value (>= 0) from an environment var. */
1505 
1506 long
getenv_long(const char * name)1507 getenv_long(const char *name)
1508 {
1509 	const char *val;
1510 	long newval;
1511 	if ((val = getenv(name)) != NULL && isdigit((unsigned char) *val)) {
1512 		for (newval = 0; *val && isdigit((unsigned char) *val); val++)
1513 			newval = (newval * 10) + *val - '0';
1514 		return newval;
1515 	}
1516 	return -1;
1517 }
1518 
1519 /* parse_args --- do the getopt_long thing */
1520 
1521 static void
parse_args(int argc,char ** argv)1522 parse_args(int argc, char **argv)
1523 {
1524 	/*
1525 	 * The + on the front tells GNU getopt not to rearrange argv.
1526 	 */
1527 	const char *optlist = "+F:f:v:W;bcCd::D::e:E:ghi:Il:L::nNo::Op::MPrSstVYZ:";
1528 	int old_optind;
1529 	int c;
1530 	char *scan;
1531 	char *src;
1532 
1533 	/* we do error messages ourselves on invalid options */
1534 	opterr = false;
1535 
1536 	/* copy argv before getopt gets to it; used to restart the debugger */
1537 	save_argv(argc, argv);
1538 
1539 	/* option processing. ready, set, go! */
1540 	for (optopt = 0, old_optind = 1;
1541 	     (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
1542 	     optopt = 0, old_optind = optind) {
1543 		if (do_posix)
1544 			opterr = true;
1545 
1546 		switch (c) {
1547 		case 'F':
1548 			add_preassign(PRE_ASSIGN_FS, optarg);
1549 			break;
1550 
1551 		case 'E':
1552 			disallow_var_assigns = true;
1553 			/* fall through */
1554 		case 'f':
1555 			/*
1556 			 * Allow multiple -f options.
1557 			 * This makes function libraries real easy.
1558 			 * Most of the magic is in the scanner.
1559 			 *
1560 			 * The following is to allow for whitespace at the end
1561 			 * of a #! /bin/gawk line in an executable file
1562 			 */
1563 			scan = optarg;
1564 			if (argv[optind-1] != optarg)
1565 				while (isspace((unsigned char) *scan))
1566 					scan++;
1567 			src = (*scan == '\0' ? argv[optind++] : optarg);
1568 			(void) add_srcfile((src && src[0] == '-' && src[1] == '\0') ?
1569 					SRC_STDIN : SRC_FILE,
1570 					src, srcfiles, NULL, NULL);
1571 
1572 			break;
1573 
1574 		case 'v':
1575 			add_preassign(PRE_ASSIGN, optarg);
1576 			break;
1577 
1578 		case 'b':
1579 			do_binary = true;
1580 			break;
1581 
1582 		case 'c':
1583 			do_flags |= DO_TRADITIONAL;
1584 			break;
1585 
1586 		case 'C':
1587 			copyleft();
1588 			break;
1589 
1590 		case 'd':
1591 			do_flags |= DO_DUMP_VARS;
1592 			if (optarg != NULL && optarg[0] != '\0')
1593 				varfile = optarg;
1594 			break;
1595 
1596 		case 'D':
1597 			do_flags |= DO_DEBUG;
1598 			if (optarg != NULL && optarg[0] != '\0')
1599 				command_file = optarg;
1600 			break;
1601 
1602 		case 'e':
1603 			if (optarg[0] == '\0')
1604 				warning(_("empty argument to `-e/--source' ignored"));
1605 			else
1606 				(void) add_srcfile(SRC_CMDLINE, optarg, srcfiles, NULL, NULL);
1607 			break;
1608 
1609 		case 'g':
1610 			do_flags |= DO_INTL;
1611 			break;
1612 
1613 		case 'h':
1614 			/* write usage to stdout, per GNU coding stds */
1615 			usage(EXIT_SUCCESS, stdout);
1616 			break;
1617 
1618 		case 'i':
1619 			(void) add_srcfile(SRC_INC, optarg, srcfiles, NULL, NULL);
1620 			break;
1621 
1622 		case 'I':
1623 			do_itrace = true;
1624 			break;
1625 
1626 		case 'l':
1627 			(void) add_srcfile(SRC_EXTLIB, optarg, srcfiles, NULL, NULL);
1628 			break;
1629 
1630 #ifndef NO_LINT
1631 		case 'L':
1632 			do_flags |= (DO_LINT_ALL|DO_LINT_EXTENSIONS);
1633 			if (optarg != NULL) {
1634 				if (strcmp(optarg, "fatal") == 0)
1635 					lintfunc = r_fatal;
1636 				else if (strcmp(optarg, "invalid") == 0) {
1637 					do_flags &= ~DO_LINT_ALL;
1638 					do_flags |= DO_LINT_INVALID;
1639 				}
1640 				else if (strcmp(optarg, "no-ext") == 0) {
1641 					do_flags &= ~DO_LINT_EXTENSIONS;
1642 				}
1643 			}
1644 			break;
1645 
1646 		case 't':
1647 			do_flags |= DO_LINT_OLD;
1648 			break;
1649 #else
1650 		case 'L':
1651 		case 't':
1652 			break;
1653 #endif
1654 
1655 		case 'n':
1656 			do_flags |= DO_NON_DEC_DATA;
1657 			break;
1658 
1659 		case 'N':
1660 			use_lc_numeric = true;
1661 			break;
1662 
1663 		case 'O':
1664 			do_optimize = true;
1665 			break;
1666 
1667 		case 'p':
1668 			if (do_pretty_print)
1669 				warning(_("`--profile' overrides `--pretty-print'"));
1670 			do_flags |= DO_PROFILE;
1671 			/* fall through */
1672 		case 'o':
1673 			if (c == 'o' && do_profile)
1674 				warning(_("`--profile' overrides `--pretty-print'"));
1675 			do_flags |= DO_PRETTY_PRINT;
1676 			if (optarg != NULL)
1677 				set_prof_file(optarg);
1678 			else
1679 				set_prof_file(DEFAULT_PROFILE);
1680 			break;
1681 
1682 		case 'M':
1683 #ifdef HAVE_MPFR
1684 			do_flags |= DO_MPFR;
1685 #else
1686 			warning(_("-M ignored: MPFR/GMP support not compiled in"));
1687 #endif
1688 			break;
1689 
1690 		case 'P':
1691 			do_flags |= DO_POSIX;
1692 			break;
1693 
1694 		case 'r':
1695 			do_flags |= DO_INTERVALS;
1696  			break;
1697 
1698 		case 's':
1699 			do_optimize = false;
1700 			break;
1701 
1702 		case 'S':
1703 			do_flags |= DO_SANDBOX;
1704 			break;
1705 
1706 		case 'V':
1707 			do_version = true;
1708 			break;
1709 
1710 		case 'W':       /* gawk specific options - now in getopt_long */
1711 			fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"),
1712 				argv[0], optarg);
1713 			break;
1714 
1715 		case 0:
1716 			/*
1717 			 * getopt_long found an option that sets a variable
1718 			 * instead of returning a letter. Do nothing, just
1719 			 * cycle around for the next one.
1720 			 */
1721 			break;
1722 
1723 		case 'Y':
1724 		case 'Z':
1725 #if defined(YYDEBUG) || defined(GAWKDEBUG)
1726 			if (c == 'Y') {
1727 				yydebug = 2;
1728 				break;
1729 			}
1730 #endif
1731 #if defined(LOCALEDEBUG)
1732 			if (c == 'Z') {
1733 				locale = optarg;
1734 				break;
1735 			}
1736 #endif
1737 			/* if not debugging, fall through */
1738 		case '?':
1739 		default:
1740 			/*
1741 			 * If not posix, an unrecognized option stops argument
1742 			 * processing so that it can go into ARGV for the awk
1743 			 * program to see. This makes use of ``#! /bin/gawk -f''
1744 			 * easier.
1745 			 *
1746 			 * However, it's never simple. If optopt is set,
1747 			 * an option that requires an argument didn't get the
1748 			 * argument. We care because if opterr is 0, then
1749 			 * getopt_long won't print the error message for us.
1750 			 */
1751 			if (! do_posix
1752 			    && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
1753 				/*
1754 				 * can't just do optind--. In case of an
1755 				 * option with >= 2 letters, getopt_long
1756 				 * won't have incremented optind.
1757 				 */
1758 				optind = old_optind;
1759 				stopped_early = true;
1760 				goto out;
1761 			} else if (optopt != '\0') {
1762 				/* Use POSIX required message format */
1763 				fprintf(stderr,
1764 					_("%s: option requires an argument -- %c\n"),
1765 					myname, optopt);
1766 				usage(EXIT_FAILURE, stderr);
1767 			}
1768 			/* else
1769 				let getopt print error message for us */
1770 			break;
1771 		}
1772 		if (c == 'E')	/* --exec ends option processing */
1773 			break;
1774 	}
1775 out:
1776 	do_optimize = (do_optimize && ! do_pretty_print);
1777 
1778 	return;
1779 }
1780 
1781 /* set_locale_stuff --- setup the locale stuff */
1782 
1783 static void
set_locale_stuff(void)1784 set_locale_stuff(void)
1785 {
1786 #if defined(LC_CTYPE)
1787 	setlocale(LC_CTYPE, locale);
1788 #endif
1789 #if defined(LC_COLLATE)
1790 	setlocale(LC_COLLATE, locale);
1791 #endif
1792 #if defined(LC_MESSAGES)
1793 	setlocale(LC_MESSAGES, locale);
1794 #endif
1795 #if defined(LC_NUMERIC) && defined(HAVE_LOCALE_H)
1796 	/*
1797 	 * Force the issue here.  According to POSIX 2001, decimal
1798 	 * point is used for parsing source code and for command-line
1799 	 * assignments and the locale value for processing input,
1800 	 * number to string conversion, and printing output.
1801 	 *
1802 	 * 10/2005 --- see below also; we now only use the locale's
1803 	 * decimal point if do_posix in effect.
1804 	 *
1805 	 * 9/2007:
1806 	 * This is a mess. We need to get the locale's numeric info for
1807 	 * the thousands separator for the %'d flag.
1808 	 */
1809 	setlocale(LC_NUMERIC, locale);
1810 	init_locale(& loc);
1811 	setlocale(LC_NUMERIC, "C");
1812 #endif
1813 #if defined(LC_TIME)
1814 	setlocale(LC_TIME, locale);
1815 #endif
1816 
1817 	/* These must be done after calling setlocale */
1818 	(void) bindtextdomain(PACKAGE, locale_dir);
1819 	(void) textdomain(PACKAGE);
1820 }
1821 
1822 /* platform_name --- return the platform name */
1823 
1824 static const char *
platform_name()1825 platform_name()
1826 {
1827 	// Cygwin and Mac OS X count as POSIX
1828 #if defined(__VMS)
1829 	return "vms";
1830 #elif defined(__MINGW32__)
1831 	return "mingw";
1832 #elif defined(__DJGPP__)
1833 	return "djgpp";
1834 #elif defined(__EMX__)
1835 	return "os2";
1836 #elif defined(USE_EBCDIC)
1837 	return "os390";
1838 #else
1839 	return "posix";
1840 #endif
1841 }
1842 
1843 /* set_current_namespace --- set current_namespace and handle memory management */
1844 
1845 void
set_current_namespace(const char * new_namespace)1846 set_current_namespace(const char *new_namespace)
1847 {
1848 	if (current_namespace != awk_namespace)
1849 		efree((void *) current_namespace);
1850 
1851 	current_namespace = new_namespace;
1852 }
1853