1 /*
2  * main.c -- Expression tree constructors and main program for gawk.
3  */
4 
5 /*
6  * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc.
7  *
8  * This file is part of GAWK, the GNU implementation of the
9  * AWK Programming Language.
10  *
11  * GAWK is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * GAWK is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
24  */
25 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
26    Last change: Feb. 16, 2001 by okabe katsuyuki */
27 
28 #include "awk.h"
29 #include "getopt.h"
30 #include "patchlevel.h"
31 
32 static void usage P((int exitval, FILE *fp));
33 static void copyleft P((void));
34 static void cmdline_fs P((char *str));
35 static void init_args P((int argc0, int argc, char *argv0, char **argv));
36 static void init_vars P((void));
37 static void pre_assign P((char *v));
38 RETSIGTYPE catchsig P((int sig, int code));
39 static void nostalgia P((void));
40 static void version P((void));
41 
42 /* These nodes store all the special variables AWK uses */
43 NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node;
44 NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node;
45 NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node;
46 NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node;
47 
48 long NF;
49 long NR;
50 long FNR;
51 int IGNORECASE;
52 char *OFS;
53 char *ORS;
54 char *OFMT;
55 
56 /*
57  * CONVFMT is a convenience pointer for the current number to string format.
58  * We must supply an initial value to avoid recursion problems of
59  *	set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT
60  * Fun, fun, fun, fun.
61  */
62 char *CONVFMT = "%.6g";
63 
64 int errcount = 0;		/* error counter, used by yyerror() */
65 
66 NODE *Nnull_string;		/* The global null string */
67 
68 /* The name the program was invoked under, for error messages */
69 const char *myname;
70 
71 /* A block of AWK code to be run before running the program */
72 NODE *begin_block = NULL;
73 
74 /* A block of AWK code to be run after the last input file */
75 NODE *end_block = NULL;
76 
77 int exiting = FALSE;		/* Was an "exit" statement executed? */
78 int exit_val = 0;		/* optional exit value */
79 
80 #if defined(YYDEBUG) || defined(DEBUG)
81 extern int yydebug;
82 #endif
83 
84 struct src *srcfiles = NULL;	/* source file name(s) */
85 long numfiles = -1;		/* how many source files */
86 
87 int do_traditional = FALSE;	/* no gnu extensions, add traditional weirdnesses */
88 int do_posix = FALSE;		/* turn off gnu and unix extensions */
89 int do_lint = FALSE;		/* provide warnings about questionable stuff */
90 int do_lint_old = FALSE;	/* warn about stuff not in V7 awk */
91 int do_nostalgia = FALSE;	/* provide a blast from the past */
92 int do_intervals = FALSE;	/* allow {...,...} in regexps */
93 
94 int save_memory = FALSE;
95 
96 int in_begin_rule = FALSE;	/* we're in a BEGIN rule */
97 int in_end_rule = FALSE;	/* we're in a END rule */
98 
99 int output_is_tty = FALSE;	/* control flushing of output */
100 
101 #ifdef __TURBOC__
102 int _stklen = 0x8000;
103 #endif
104 
105 extern char *version_string;	/* current version, for printing */
106 
107 /* The parse tree is stored here.  */
108 NODE *expression_value;
109 
110 static struct option optab[] = {
111 	{ "compat",		no_argument,		& do_traditional,	1 },
112 	{ "traditional",	no_argument,		& do_traditional,	1 },
113 	{ "lint",		no_argument,		& do_lint,	1 },
114 	{ "lint-old",		no_argument,		& do_lint_old,	1 },
115 	{ "posix",		no_argument,		& do_posix,	1 },
116 	{ "nostalgia",		no_argument,		& do_nostalgia,	1 },
117 	{ "copyleft",		no_argument,		NULL,		'C' },
118 	{ "copyright",		no_argument,		NULL,		'C' },
119 	{ "field-separator",	required_argument,	NULL,		'F' },
120 	{ "file",		required_argument,	NULL,		'f' },
121 	{ "re-interval",		no_argument,	& do_intervals,		1 },
122 	{ "source",		required_argument,	NULL,		's' },
123 	{ "assign",		required_argument,	NULL,		'v' },
124 	{ "version",		no_argument,		NULL,		'V' },
125 	{ "usage",		no_argument,		NULL,		'u' },
126 	{ "help",		no_argument,		NULL,		'u' },
127 	{ "ctype",		required_argument,	NULL,		'T' },
128 	{ "memory",		no_argument,		& save_memory,  1 },
129 #ifdef DEBUG
130 	{ "parsedebug",		no_argument,		NULL,		'D' },
131 #endif
132 	{ NULL, 0, NULL, '\0' }
133 };
134 
135 /* main --- process args, parse program, run it, clean up */
136 
137 int
main(argc,argv)138 main(argc, argv)
139 int argc;
140 char **argv;
141 {
142 	int c;
143 	char *scan;
144 	/* the + on the front tells GNU getopt not to rearrange argv */
145 	const char *optlist = "+F:f:v:W;m:";
146 	int stopped_early = FALSE;
147 	int old_optind;
148 	extern int optind;
149 	extern int opterr;
150 	extern char *optarg;
151 
152 	setlocale(LC_CTYPE, "");
153 	setlocale(LC_COLLATE, "");
154 
155 	(void) signal(SIGFPE,  (RETSIGTYPE (*) P((int))) catchsig);
156 	(void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig);
157 #ifdef SIGBUS
158 	(void) signal(SIGBUS,  (RETSIGTYPE (*) P((int))) catchsig);
159 #endif
160 #if defined(MSDOS) && !defined(DJGPP)
161 	(void) signal(SIGINT,  (RETSIGTYPE (*) P((int))) catchsig);
162 #endif
163 
164 #ifdef __human68k__
165 	if (!isatty(fileno(stdout)))
166 		fmode(stdout, _IOBIN);
167 	if (!isatty(fileno(stderr)))
168 		fmode(stderr, _IOBIN);
169 #endif
170 	myname = gawk_name(argv[0]);
171         argv[0] = (char *) myname;
172 	os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */
173 	if (myname[0] == 'j' || myname[0] == 'J') {
174 		mbcinit(MBCTYPE_DEFAULT);
175 	} else if (!strncasecmp(myname, "euc", 3)) {
176 		mbcinit(MBCTYPE_EUC);
177 	} else if (!strncasecmp(myname, "sjis", 4)) {
178 		mbcinit(MBCTYPE_SJIS);
179 	} else if (!strncasecmp(myname, "utf8", 4)) {
180 		mbcinit(MBCTYPE_UTF8);
181 	} else if (getenv("GAWKMB_ENABLE")) {
182 		mbcinit(MBCTYPE_DEFAULT);
183 	}
184 
185 	/* remove sccs gunk */
186 	if (strncmp(version_string, "@(#)", 4) == 0)
187 		version_string += 4;
188 
189 	if (argc < 2)
190 		usage(1, stderr);
191 
192 	/* initialize the null string */
193 	Nnull_string = make_string("", 0);
194 	Nnull_string->numbr = 0.0;
195 	Nnull_string->type = Node_val;
196 	Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER);
197 
198 	/*
199 	 * Tell the regex routines how they should work.
200 	 * Do this before initializing variables, since
201 	 * they could want to do a regexp compile.
202 	 */
203 	resetup();
204 
205 	/* Set up the special variables */
206 	/*
207 	 * Note that this must be done BEFORE arg parsing else -F
208 	 * breaks horribly.
209 	 */
210 	init_vars();
211 
212 	/* Set up the field variables */
213 	/*
214 	 * Do this before arg parsing so that `-v NF=blah' won't
215 	 * break anything.
216 	 */
217 	init_fields();
218 
219 	/* worst case */
220 	emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main");
221 	memset(srcfiles, '\0', argc * sizeof(struct src));
222 
223 	/* we do error messages ourselves on invalid options */
224 	opterr = FALSE;
225 
226 	/* option processing. ready, set, go! */
227 	for (optopt = 0, old_optind = 1;
228 	     (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF;
229 	     optopt = 0, old_optind = optind) {
230 		if (do_posix)
231 			opterr = TRUE;
232 
233 		switch (c) {
234 		case 'F':
235 			cmdline_fs(optarg);
236 			break;
237 
238 		case 'f':
239 			/*
240 			 * a la MKS awk, allow multiple -f options.
241 			 * this makes function libraries real easy.
242 			 * most of the magic is in the scanner.
243 			 *
244 			 * The following is to allow for whitespace at the end
245 			 * of a #! /bin/gawk line in an executable file
246 			 */
247 			scan = optarg;
248 			while (ISSPACE((unsigned char) *scan))
249 				scan++;
250 
251 			++numfiles;
252 			srcfiles[numfiles].stype = SOURCEFILE;
253 			if (*scan == '\0')
254 				srcfiles[numfiles].val = argv[optind++];
255 			else
256 				srcfiles[numfiles].val = optarg;
257 			break;
258 
259 		case 'v':
260 			pre_assign(optarg);
261 			break;
262 
263 		case 'm':
264 			/*
265 			 * Research awk extension.
266 			 *	-mf nnn		set # fields, gawk ignores
267 			 *	-mr nnn		set record length, ditto
268 			 */
269 			if (do_lint)
270 				warning("-m[fr] option irrelevant in gawk");
271 			if (optarg[0] != 'r' && optarg[0] != 'f')
272 				warning("-m option usage: `-m[fr] nnn'");
273 			if (optarg[1] == '\0')
274 				optind++;
275 			break;
276 
277 		case 'W':       /* gawk specific options - now in getopt_long */
278 			fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n",
279 				argv[0], optarg);
280 			break;
281 
282 		/* These can only come from long form options */
283 		case 'C':
284 			copyleft();
285 			break;
286 
287 		case 's':
288 			if (optarg[0] == '\0')
289 				warning("empty argument to --source ignored");
290 			else {
291 				srcfiles[++numfiles].stype = CMDLINE;
292 				srcfiles[numfiles].val = optarg;
293 			}
294 			break;
295 
296 		case 'u':
297 			usage(0, stdout);	/* per coding stds */
298 			break;
299 
300 		case 'V':
301 			version();
302 			break;
303 
304 #ifdef DEBUG
305 		case 'D':
306 			yydebug = 2;
307 			break;
308 #endif
309 
310 		case 0:
311 			/*
312 			 * getopt_long found an option that sets a variable
313 			 * instead of returning a letter. Do nothing, just
314 			 * cycle around for the next one.
315 			 */
316 			break;
317 
318 		case 'T':
319 			if (strcasecmp(optarg, "ASCII") == 0)
320 				mbcinit(MBCTYPE_ASCII);
321 			else if (strcasecmp(optarg, "EUC") == 0)
322 				mbcinit(MBCTYPE_EUC);
323 			else if (strcasecmp(optarg, "SJIS") == 0)
324 				mbcinit(MBCTYPE_SJIS);
325 			else if (strcasecmp(optarg, "UTF8") == 0)
326 				mbcinit(MBCTYPE_UTF8);
327 			else
328 				warning("unknown argument to --ctype ignored");
329 			break;
330 
331 		case '?':
332 		default:
333 			/*
334 			 * New behavior.  If not posix, an unrecognized
335 			 * option stops argument processing so that it can
336 			 * go into ARGV for the awk program to see. This
337 			 * makes use of ``#! /bin/gawk -f'' easier.
338 			 *
339 			 * However, it's never simple. If optopt is set,
340 			 * an option that requires an argument didn't get the
341 			 * argument. We care because if opterr is 0, then
342 			 * getopt_long won't print the error message for us.
343 			 */
344 			if (! do_posix
345 			    && (optopt == '\0' || strchr(optlist, optopt) == NULL)) {
346 				/*
347 				 * can't just do optind--. In case of an
348 				 * option with >= 2 letters, getopt_long
349 				 * won't have incremented optind.
350 				 */
351 				optind = old_optind;
352 				stopped_early = TRUE;
353 				goto out;
354 			} else if (optopt != '\0')
355 				/* Use 1003.2 required message format */
356 				fprintf(stderr,
357 				"%s: option requires an argument -- %c\n",
358 					myname, optopt);
359 			/* else
360 				let getopt print error message for us */
361 			break;
362 		}
363 	}
364 out:
365 
366 	if (do_nostalgia)
367 		nostalgia();
368 
369 	/* check for POSIXLY_CORRECT environment variable */
370 	if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) {
371 		do_posix = TRUE;
372 		if (do_lint)
373 			warning(
374 	"environment variable `POSIXLY_CORRECT' set: turning on --posix");
375 	}
376 
377 	if (do_posix) {
378 		if (do_traditional)	/* both on command line */
379 			warning("--posix overrides --traditional");
380 		else
381 			do_traditional = TRUE;
382 			/*
383 			 * POSIX compliance also implies
384 			 * no GNU extensions either.
385 			 */
386 	}
387 
388 	/*
389 	 * Tell the regex routines how they should work.
390 	 * Do this again, after argument processing, since do_posix
391 	 * and do_traditional are now paid attention to by resetup().
392 	 */
393 	if (do_traditional || do_posix || do_intervals) {
394 		resetup();
395 
396 		/* now handle RS and FS. have to be careful with FS */
397 		set_RS();
398 		if (using_fieldwidths()) {
399 			set_FS();
400 			set_FIELDWIDTHS();
401 		} else
402 			set_FS();
403 	}
404 
405 #ifdef DEBUG
406 	setbuf(stdout, (char *) NULL);	/* make debugging easier */
407 #endif
408 	if (isatty(fileno(stdout)))
409 		output_is_tty = TRUE;
410 	/* No -f or --source options, use next arg */
411 	if (numfiles == -1) {
412 		if (optind > argc - 1 || stopped_early) /* no args left or no program */
413 			usage(1, stderr);
414 		srcfiles[++numfiles].stype = CMDLINE;
415 		srcfiles[numfiles].val = argv[optind];
416 		optind++;
417 	}
418 
419 	init_args(optind, argc, (char *) myname, argv);
420 	(void) tokexpand();
421 
422 	/* Read in the program */
423 	if (yyparse() != 0 || errcount != 0)
424 		exit(1);
425 	/* recover any space from C based alloca */
426 #ifdef C_ALLOCA
427 	(void) alloca(0);
428 #endif
429 
430 	if (do_lint && begin_block == NULL && expression_value == NULL
431 	     && end_block == NULL)
432 		warning("no program");
433 
434 	if (begin_block != NULL) {
435 		in_begin_rule = TRUE;
436 		(void) interpret(begin_block);
437 	}
438 	in_begin_rule = FALSE;
439 	if (! exiting && (expression_value != NULL || end_block != NULL))
440 		do_input();
441 	if (end_block != NULL) {
442 		in_end_rule = TRUE;
443 		(void) interpret(end_block);
444 	}
445 	in_end_rule = FALSE;
446 	if (close_io() != 0 && exit_val == 0)
447 		exit_val = 1;
448 	exit(exit_val);		/* more portable */
449 	return exit_val;	/* to suppress warnings */
450 }
451 
452 /* usage --- print usage information and exit */
453 
454 static void
usage(exitval,fp)455 usage(exitval, fp)
456 int exitval;
457 FILE *fp;
458 {
459 	char *opt1 = " -f progfile [--]";
460 	char *regops = " [POSIX or GNU style options]";
461 
462 	fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n",
463 		myname, regops, opt1, myname, regops, quote, quote);
464 
465 	/* GNU long options info. Gack. */
466 	fputs("POSIX options:\t\tGNU long options:\n", fp);
467 	fputs("\t-f progfile\t\t--file=progfile\n", fp);
468 	fputs("\t-F fs\t\t\t--field-separator=fs\n", fp);
469 	fputs("\t-v var=val\t\t--assign=var=val\n", fp);
470 	fputs("\t-m[fr] val\n", fp);
471 	fputs("\t-W compat\t\t--compat\n", fp);
472 	fputs("\t-W copyleft\t\t--copyleft\n", fp);
473 	fputs("\t-W copyright\t\t--copyright\n", fp);
474 	fputs("\t-W help\t\t\t--help\n", fp);
475 	fputs("\t-W lint\t\t\t--lint\n", fp);
476 	fputs("\t-W lint-old\t\t--lint-old\n", fp);
477 #ifdef NOSTALGIA
478 	fputs("\t-W nostalgia\t\t--nostalgia\n", fp);
479 #endif
480 #ifdef DEBUG
481 	fputs("\t-W parsedebug\t\t--parsedebug\n", fp);
482 #endif
483 	fputs("\t-W posix\t\t--posix\n", fp);
484 	fputs("\t-W re-interval\t\t--re-interval\n", fp);
485 	fputs("\t-W source=program-text\t--source=program-text\n", fp);
486 	fputs("\t-W traditional\t\t--traditional\n", fp);
487 	fputs("\t-W usage\t\t--usage\n", fp);
488 	fputs("\t-W version\t\t--version\n", fp);
489  	fputs("\t-W ctype=ASCII\t\t--ctype=ASCII\n", stderr);
490  	fputs("\t-W ctype=EUC\t\t--ctype=EUC\n", stderr);
491  	fputs("\t-W ctype=SJIS\t\t--ctype=SJIS\n", stderr);
492  	fputs("\t-W ctype=UTF8\t\t--ctype=UTF8\n", stderr);
493  	fputs("\t-W memory\t\t--memory\n", stderr);
494 	fputs("\nTo report bugs, see node `Bugs' in `gawk.info', which\n", fp);
495 	fputs("is section `Reporting Problems and Bugs' in the\n", fp);
496 	fputs("printed version.\n", fp);
497  	fputs("\nReport multi-byte extension version bugs to HGC02147@nifty.ne.jp\n", fp);
498 	exit(exitval);
499 }
500 
501 /* copyleft --- print out the short GNU copyright information */
502 
503 static void
copyleft()504 copyleft()
505 {
506 	static char blurb_part1[] =
507 "Copyright (C) 1989, 1991-2000 Free Software Foundation.\n\
508 \n\
509 This program is free software; you can redistribute it and/or modify\n\
510 it under the terms of the GNU General Public License as published by\n\
511 the Free Software Foundation; either version 2 of the License, or\n\
512 (at your option) any later version.\n\
513 \n";
514 	static char blurb_part2[] =
515 "This program is distributed in the hope that it will be useful,\n\
516 but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
517 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
518 GNU General Public License for more details.\n\
519 \n";
520 	static char blurb_part3[] =
521 "You should have received a copy of the GNU General Public License\n\
522 along with this program; if not, write to the Free Software\n\
523 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.\n";
524 
525 	/* multiple blurbs are needed for some brain dead compilers. */
526 	fputs(blurb_part1, stdout);
527 	fputs(blurb_part2, stdout);
528 	fputs(blurb_part3, stdout);
529 	fflush(stdout);
530 	exit(0);
531 }
532 
533 /* cmdline_fs --- set FS from the command line */
534 
535 static void
cmdline_fs(str)536 cmdline_fs(str)
537 char *str;
538 {
539 	register NODE **tmp;
540 
541 	tmp = get_lhs(FS_node, (Func_ptr *) 0);
542 	unref(*tmp);
543 	/*
544 	 * Only if in full compatibility mode check for the stupid special
545 	 * case so -F\t works as documented in awk book even though the shell
546 	 * hands us -Ft.  Bleah!
547 	 *
548 	 * Thankfully, Posix didn't propogate this "feature".
549 	 */
550 	if (str[0] == 't' && str[1] == '\0') {
551 		if (do_lint)
552 			warning("-Ft does not set FS to tab in POSIX awk");
553 		if (do_traditional && ! do_posix)
554 			str[0] = '\t';
555 	}
556 	*tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */
557 	set_FS();
558 }
559 
560 /* init_args --- set up ARGV from stuff on the command line */
561 
562 static void
init_args(argc0,argc,argv0,argv)563 init_args(argc0, argc, argv0, argv)
564 int argc0, argc;
565 char *argv0;
566 char **argv;
567 {
568 	int i, j;
569 	NODE **aptr;
570 
571 	ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL));
572 	aptr = assoc_lookup(ARGV_node, tmp_number(0.0));
573 	*aptr = make_string(argv0, strlen(argv0));
574 	(*aptr)->flags |= MAYBE_NUM;
575 	for (i = argc0, j = 1; i < argc; i++) {
576 		aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j));
577 		*aptr = make_string(argv[i], strlen(argv[i]));
578 		(*aptr)->flags |= MAYBE_NUM;
579 		j++;
580 	}
581 	ARGC_node = install("ARGC",
582 			node(make_number((AWKNUM) j), Node_var, (NODE *) NULL));
583 }
584 
585 /*
586  * Set all the special variables to their initial values.
587  * Note that some of the variables that have set_FOO routines should
588  * *N*O*T* have those routines called upon initialization, and thus
589  * they have NULL entries in that field. This is notably true of FS
590  * and IGNORECASE.
591  */
592 struct varinit {
593 	NODE **spec;
594 	const char *name;
595 	NODETYPE type;
596 	const char *strval;
597 	AWKNUM numval;
598 	Func_ptr assign;
599 };
600 static struct varinit varinit[] = {
601 {&CONVFMT_node,	"CONVFMT",	Node_CONVFMT,		"%.6g",	0,  set_CONVFMT },
602 {&NF_node,	"NF",		Node_NF,		NULL,	-1, set_NF },
603 {&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS,	"",	0,  NULL },
604 {&NR_node,	"NR",		Node_NR,		NULL,	0,  set_NR },
605 {&FNR_node,	"FNR",		Node_FNR,		NULL,	0,  set_FNR },
606 {&FS_node,	"FS",		Node_FS,		" ",	0,  NULL },
607 {&RS_node,	"RS",		Node_RS,		"\n",	0,  set_RS },
608 {&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE,	NULL,	0,  NULL },
609 {&FILENAME_node, "FILENAME",	Node_var,		"",	0,  NULL },
610 {&OFS_node,	"OFS",		Node_OFS,		" ",	0,  set_OFS },
611 {&ORS_node,	"ORS",		Node_ORS,		"\n",	0,  set_ORS },
612 {&OFMT_node,	"OFMT",		Node_OFMT,		"%.6g",	0,  set_OFMT },
613 {&RLENGTH_node, "RLENGTH",	Node_var,		NULL,	0,  NULL },
614 {&RSTART_node,	"RSTART",	Node_var,		NULL,	0,  NULL },
615 {&SUBSEP_node,	"SUBSEP",	Node_var,		"\034",	0,  NULL },
616 {&ARGIND_node,	"ARGIND",	Node_var,		NULL,	0,  NULL },
617 {&ERRNO_node,	"ERRNO",	Node_var,		NULL,	0,  NULL },
618 {&RT_node,	"RT",		Node_var,		"",	0,  NULL },
619 {0,		NULL,		Node_illegal,		NULL,	0,  NULL },
620 };
621 
622 /* init_vars --- actually initialize everything in the symbol table */
623 
624 static void
init_vars()625 init_vars()
626 {
627 	register struct varinit *vp;
628 
629 	for (vp = varinit; vp->name; vp++) {
630 		*(vp->spec) = install((char *) vp->name,
631 		  node(vp->strval == NULL ? make_number(vp->numval)
632 				: make_string((char *) vp->strval,
633 					strlen(vp->strval)),
634 		       vp->type, (NODE *) NULL));
635 		(*(vp->spec))->flags |= SCALAR;
636 		if (vp->assign)
637 			(*(vp->assign))();
638 	}
639 }
640 
641 /* load_environ --- populate the ENVIRON array */
642 
643 void
load_environ()644 load_environ()
645 {
646 #if ! (defined(MSDOS) && !defined(__TURBOC__) && !defined(__GO32__)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC))
647 	extern char **environ;
648 #endif
649 	register char *var, *val, *cp;
650 	NODE **aptr;
651 	register int i;
652 
653 	ENVIRON_node = install("ENVIRON",
654 			node(Nnull_string, Node_var, (NODE *) NULL));
655 	for (i = 0; environ[i] != NULL; i++) {
656 		static char nullstr[] = "";
657 
658 		var = environ[i];
659 		val = strchr(var, '=');
660 		if (val != NULL)
661 			*val++ = '\0';
662 		else
663 			val = nullstr;
664 		aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var)));
665 		*aptr = make_string(val, strlen(val));
666 		(*aptr)->flags |= (MAYBE_NUM|SCALAR);
667 
668 		/* restore '=' so that system() gets a valid environment */
669 		if (val != nullstr)
670 			*--val = '=';
671 	}
672 	/*
673 	 * Put AWKPATH into ENVIRON if it's not there.
674 	 * This allows querying it from outside gawk.
675 	 */
676 	if ((cp = getenv("AWKPATH")) == NULL) {
677 		aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7));
678 		*aptr = make_string(defpath, strlen(defpath));
679 		(*aptr)->flags |= SCALAR;
680 	}
681 }
682 
683 /* arg_assign --- process a command-line assignment */
684 
685 char *
arg_assign(arg)686 arg_assign(arg)
687 char *arg;
688 {
689 	char *cp, *cp2;
690 	int badvar;
691 	Func_ptr after_assign = NULL;
692 	NODE *var;
693 	NODE *it;
694 	NODE **lhs;
695 
696 	cp = strchr(arg, '=');
697 	if (cp != NULL) {
698 		*cp++ = '\0';
699 		/* first check that the variable name has valid syntax */
700 		badvar = FALSE;
701 		if (! isalpha((unsigned char)arg[0]) && arg[0] != '_'
702 		    && !ismbchar(arg[0]))
703 			badvar = TRUE;
704 		else
705 			for (cp2 = arg; *cp2; cp2++) {
706 				if (ismbchar(*cp2)) {
707 					size_t n = mbclen(*cp2) - 1;
708 					while (n-- > 0)
709 						if (! *++cp2) {
710 							badvar = TRUE;
711 							break;
712 						}
713 					if (badvar == TRUE)
714 						break;
715 					continue;
716 				}
717 				if (! isalnum((unsigned char)*cp2) && *cp2 != '_') {
718 					badvar = TRUE;
719 					break;
720 				}
721 			}
722 		if (badvar) {
723 			if (do_lint)
724 				warning("illegal name `%s' in variable assignment", arg);
725 			*--cp = '=';	/* restore original text of ARGV */
726 			return NULL;
727 		}
728 
729 		/*
730 		 * Recent versions of nawk expand escapes inside assignments.
731 		 * This makes sense, so we do it too.
732 		 */
733 		it = make_str_node(cp, strlen(cp), SCAN);
734 		it->flags |= (MAYBE_NUM|SCALAR);
735 		var = variable(arg, FALSE, Node_var);
736 		lhs = get_lhs(var, &after_assign);
737 		unref(*lhs);
738 		*lhs = it;
739 		if (after_assign != NULL)
740 			(*after_assign)();
741 		*--cp = '=';	/* restore original text of ARGV */
742 	}
743 	return cp;
744 }
745 
746 /* pre_assign --- handle -v, print a message and die if a problem */
747 
748 static void
pre_assign(v)749 pre_assign(v)
750 char *v;
751 {
752 	if (arg_assign(v) == NULL) {
753 		fprintf(stderr,
754 			"%s: `%s' argument to `-v' not in `var=value' form\n",
755 				myname, v);
756 		usage(1, stderr);
757 	}
758 }
759 
760 /* catchsig --- catch signals */
761 
762 RETSIGTYPE
catchsig(sig,code)763 catchsig(sig, code)
764 int sig, code;
765 {
766 #ifdef lint
767 	code = 0; sig = code; code = sig;
768 #endif
769 #if defined(MSDOS) && !defined(DJGPP)
770 	if (sig == SIGINT) {
771 		exit(1);
772 	}
773 #endif
774 	if (sig == SIGFPE) {
775 #if defined(_MSC_VER) && (_MSC_VER >= 700)
776 		_fpreset();
777 #endif
778 		fatal("floating point exception");
779 	} else if (sig == SIGSEGV
780 #ifdef SIGBUS
781 	        || sig == SIGBUS
782 #endif
783 	) {
784 		set_loc(__FILE__, __LINE__);
785 		msg("fatal error: internal error");
786 		/* fatal won't abort() if not compiled for debugging */
787 		abort();
788 	} else
789 		cant_happen();
790 	/* NOTREACHED */
791 }
792 
793 /* nostalgia --- print the famous error message and die */
794 
795 static void
nostalgia()796 nostalgia()
797 {
798 	fprintf(stderr, "awk: bailing out near line 1\n");
799 	abort();
800 }
801 
802 /* version --- print version message */
803 
804 static void
version()805 version()
806 {
807 	printf("%s.%d + multi-byte extension 1.15\n", version_string, PATCHLEVEL);
808 	/*
809 	 * Per GNU coding standards, print copyright info,
810 	 * then exit successfully, do nothing else.
811 	 */
812 	copyleft();
813 	exit(0);
814 }
815