xref: /openbsd/usr.bin/awk/main.c (revision fb60ec6a)
1*fb60ec6aSmillert /*	$OpenBSD: main.c,v 1.72 2024/08/03 21:12:16 millert Exp $	*/
26ab05f83Stholo /****************************************************************
307edfa4aSkstailey Copyright (C) Lucent Technologies 1997
46ab05f83Stholo All Rights Reserved
56ab05f83Stholo 
66ab05f83Stholo Permission to use, copy, modify, and distribute this software and
76ab05f83Stholo its documentation for any purpose and without fee is hereby
86ab05f83Stholo granted, provided that the above copyright notice appear in all
96ab05f83Stholo copies and that both that the copyright notice and this
106ab05f83Stholo permission notice and warranty disclaimer appear in supporting
1107edfa4aSkstailey documentation, and that the name Lucent Technologies or any of
1207edfa4aSkstailey its entities not be used in advertising or publicity pertaining
1307edfa4aSkstailey to distribution of the software without specific, written prior
1407edfa4aSkstailey permission.
156ab05f83Stholo 
1607edfa4aSkstailey LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1707edfa4aSkstailey INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
1807edfa4aSkstailey IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
1907edfa4aSkstailey SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
2007edfa4aSkstailey WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
2107edfa4aSkstailey IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
2207edfa4aSkstailey ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
2307edfa4aSkstailey THIS SOFTWARE.
246ab05f83Stholo ****************************************************************/
256ab05f83Stholo 
26*fb60ec6aSmillert const char	*version = "version 20240728";
276ab05f83Stholo 
286ab05f83Stholo #define DEBUG
296ab05f83Stholo #include <stdio.h>
306ab05f83Stholo #include <ctype.h>
31b2698ba9Smillert #include <locale.h>
326ab05f83Stholo #include <stdlib.h>
336ab05f83Stholo #include <string.h>
346ab05f83Stholo #include <signal.h>
358ce597b3Sdoug #include <unistd.h>
366ab05f83Stholo #include "awk.h"
376ab05f83Stholo 
38f642db95Smillert extern	char	*__progname;
396ab05f83Stholo extern	char	**environ;
406ab05f83Stholo extern	int	nfields;
416ab05f83Stholo 
426ab05f83Stholo int	dbg	= 0;
43000399a4Smillert Awkfloat	srand_seed = 1;
446ab05f83Stholo char	*cmdname;	/* gets argv[0] for error messages */
456ab05f83Stholo extern	FILE	*yyin;	/* lex input file */
466ab05f83Stholo char	*lexprog;	/* points to program argument if it exists */
476ab05f83Stholo extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
48f81b289fSmillert enum compile_states	compile_time = ERROR_PRINTING;
496ab05f83Stholo 
506685ce51Smillert static char	**pfile;	/* program filenames from -f's */
516685ce51Smillert static size_t	maxpfile;	/* max program filename */
526685ce51Smillert static size_t	npfile;		/* number of filenames */
536685ce51Smillert static size_t	curpfile;	/* current filename */
546ab05f83Stholo 
55a886e62eSmillert bool	CSV = false;		/* true for csv input */
56f81b289fSmillert bool	safe = false;		/* true => "safe" mode */
57ef789615Smillert bool	do_posix = false;	/* true => POSIX mode */
5807edfa4aSkstailey 
59c35264f9Smillert size_t	awk_mb_cur_max = 1;
60c35264f9Smillert 
fpecatch(int n,siginfo_t * si,void * uc)614edce374Smillert static noreturn void fpecatch(int n
626685ce51Smillert #ifdef SA_SIGINFO
636685ce51Smillert 	, siginfo_t *si, void *uc
646685ce51Smillert #endif
656685ce51Smillert )
666685ce51Smillert {
676685ce51Smillert 	extern Node *curnode;
686685ce51Smillert #ifdef SA_SIGINFO
69e109dc98Smillert 	const char *mesg = NULL;
70e109dc98Smillert 
71e109dc98Smillert 	switch (si->si_code) {
72e109dc98Smillert 	case FPE_INTDIV:
73e109dc98Smillert 		mesg = "Integer divide by zero";
74e109dc98Smillert 		break;
75e109dc98Smillert 	case FPE_INTOVF:
76e109dc98Smillert 		mesg = "Integer overflow";
77e109dc98Smillert 		break;
78e109dc98Smillert 	case FPE_FLTDIV:
79e109dc98Smillert 		mesg = "Floating point divide by zero";
80e109dc98Smillert 		break;
81e109dc98Smillert 	case FPE_FLTOVF:
82e109dc98Smillert 		mesg = "Floating point overflow";
83e109dc98Smillert 		break;
84e109dc98Smillert 	case FPE_FLTUND:
85e109dc98Smillert 		mesg = "Floating point underflow";
86e109dc98Smillert 		break;
87e109dc98Smillert 	case FPE_FLTRES:
88e109dc98Smillert 		mesg = "Floating point inexact result";
89e109dc98Smillert 		break;
90e109dc98Smillert 	case FPE_FLTINV:
91e109dc98Smillert 		mesg = "Invalid Floating point operation";
92e109dc98Smillert 		break;
93e109dc98Smillert 	case FPE_FLTSUB:
94e109dc98Smillert 		mesg = "Subscript out of range";
95e109dc98Smillert 		break;
96e109dc98Smillert 	case 0:
97e109dc98Smillert 	default:
98e109dc98Smillert 		mesg = "Unknown error";
99e109dc98Smillert 		break;
100e109dc98Smillert 	}
1016685ce51Smillert #endif
1026685ce51Smillert 	dprintf(STDERR_FILENO, "floating point exception%s%s\n",
1036685ce51Smillert #ifdef SA_SIGINFO
104e109dc98Smillert 		": ", mesg
1056685ce51Smillert #else
1066685ce51Smillert 		"", ""
1076685ce51Smillert #endif
1086685ce51Smillert 	    );
1096685ce51Smillert 
1106685ce51Smillert 	if (compile_time != 2 && NR && *NR > 0) {
1116685ce51Smillert 		dprintf(STDERR_FILENO, " input record number %d", (int) (*FNR));
1126685ce51Smillert 		if (strcmp(*FILENAME, "-") != 0) {
1136685ce51Smillert 			dprintf(STDERR_FILENO, ", file %s", *FILENAME);
1146685ce51Smillert 		}
1156685ce51Smillert 		dprintf(STDERR_FILENO, "\n");
1166685ce51Smillert 	}
1176685ce51Smillert 	if (compile_time != 2 && curnode) {
1186685ce51Smillert 		dprintf(STDERR_FILENO, " source line number %d", curnode->lineno);
1196685ce51Smillert 	} else if (compile_time != 2 && lineno) {
1206685ce51Smillert 		dprintf(STDERR_FILENO, " source line number %d", lineno);
1216685ce51Smillert 	}
1226685ce51Smillert 	if (compile_time == 1 && cursource() != NULL) {
1236685ce51Smillert 		dprintf(STDERR_FILENO, " source file %s", cursource());
1246685ce51Smillert 	}
1256685ce51Smillert 	dprintf(STDERR_FILENO, "\n");
1266685ce51Smillert 	if (dbg > 1)		/* core dump if serious debugging on */
1276685ce51Smillert 		abort();
1286685ce51Smillert 	_exit(2);
1296685ce51Smillert }
1306685ce51Smillert 
1316685ce51Smillert static const char *
setfs(char * p)1326685ce51Smillert setfs(char *p)
1336685ce51Smillert {
1346685ce51Smillert 	/* wart: t=>\t */
1356685ce51Smillert 	if (p[0] == 't' && p[1] == '\0')
1366685ce51Smillert 		return "\t";
1376685ce51Smillert 	return p;
1386685ce51Smillert }
1396685ce51Smillert 
1406685ce51Smillert static char *
getarg(int * argc,char *** argv,const char * msg)1416685ce51Smillert getarg(int *argc, char ***argv, const char *msg)
1426685ce51Smillert {
1436685ce51Smillert 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
1446685ce51Smillert 		return &(*argv)[1][2];
1456685ce51Smillert 	} else {			/* arg is -f something */
1466685ce51Smillert 		(*argc)--; (*argv)++;
1476685ce51Smillert 		if (*argc <= 1)
1486685ce51Smillert 			FATAL("%s", msg);
1496685ce51Smillert 		return (*argv)[1];
1506685ce51Smillert 	}
1516685ce51Smillert }
1526685ce51Smillert 
main(int argc,char * argv[])1536ab05f83Stholo int main(int argc, char *argv[])
1546ab05f83Stholo {
1559a69093aSmillert 	const char *fs = NULL;
1566685ce51Smillert 	char *fn, *vn;
1576ab05f83Stholo 
1584c8cf207Smillert 	setlocale(LC_CTYPE, "");
1592682ef6bSderaadt 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
160c35264f9Smillert 	awk_mb_cur_max = MB_CUR_MAX;
1616ab0fc03Stb 	cmdname = __progname;
162f642db95Smillert 
1638ce597b3Sdoug 	if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) {
1648ce597b3Sdoug 		fprintf(stderr, "%s: pledge: incorrect arguments\n",
1658ce597b3Sdoug 		    cmdname);
1668ce597b3Sdoug 		exit(1);
1678ce597b3Sdoug 	}
1688ce597b3Sdoug 
1696ab05f83Stholo 	if (argc == 1) {
17014c49119Sjmc 		fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] "
17114c49119Sjmc 		    "[-f fs | --csv] [-v var=value]\n"
172f642db95Smillert 		    "\t   [prog | -f progfile] file ...\n", cmdname);
173f642db95Smillert 		return 1;
1746ab05f83Stholo 	}
1756685ce51Smillert #ifdef SA_SIGINFO
1766685ce51Smillert 	{
1776685ce51Smillert 		struct sigaction sa;
1786685ce51Smillert 		sa.sa_sigaction = fpecatch;
1796685ce51Smillert 		sa.sa_flags = SA_SIGINFO;
1806685ce51Smillert 		sigemptyset(&sa.sa_mask);
1816685ce51Smillert 		(void)sigaction(SIGFPE, &sa, NULL);
1826685ce51Smillert 	}
1836685ce51Smillert #else
1846685ce51Smillert 	(void)signal(SIGFPE, fpecatch);
1856685ce51Smillert #endif
186000399a4Smillert 
187ef789615Smillert 	do_posix = (getenv("POSIXLY_CORRECT") != NULL);
188ef789615Smillert 
1896ab05f83Stholo 	yyin = NULL;
1906ab05f83Stholo 	symtab = makesymtab(NSYMTAB);
1916ab05f83Stholo 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
192d322136cSmillert 		if (strcmp(argv[1], "--version") == 0) {
193d322136cSmillert 			printf("awk %s\n", version);
194d322136cSmillert 			return 0;
195d322136cSmillert 		}
19607edfa4aSkstailey 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
1976ab05f83Stholo 			argc--;
1986ab05f83Stholo 			argv++;
1996ab05f83Stholo 			break;
2006ab05f83Stholo 		}
201a886e62eSmillert 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
202a886e62eSmillert 			CSV = true;
203a886e62eSmillert 			argc--;
204a886e62eSmillert 			argv++;
205a886e62eSmillert 			continue;
206a886e62eSmillert 		}
2076ab05f83Stholo 		switch (argv[1][1]) {
20807edfa4aSkstailey 		case 's':
20907edfa4aSkstailey 			if (strcmp(argv[1], "-safe") == 0)
210f81b289fSmillert 				safe = true;
21107edfa4aSkstailey 			break;
2126ab05f83Stholo 		case 'f':	/* next argument is program filename */
2136685ce51Smillert 			fn = getarg(&argc, &argv, "no program filename");
2146685ce51Smillert 			if (npfile >= maxpfile) {
2156685ce51Smillert 				maxpfile += 20;
216a886e62eSmillert 				pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
2176685ce51Smillert 				if (pfile == NULL)
2186685ce51Smillert 					FATAL("error allocating space for -f options");
219000399a4Smillert  			}
2206685ce51Smillert 			pfile[npfile++] = fn;
2216ab05f83Stholo  			break;
2226ab05f83Stholo 		case 'F':	/* set field separator */
2236685ce51Smillert 			fs = setfs(getarg(&argc, &argv, "no field separator"));
2246ab05f83Stholo 			break;
2256ab05f83Stholo 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
2266685ce51Smillert 			vn = getarg(&argc, &argv, "no variable name");
2276685ce51Smillert 			if (isclvar(vn))
2286685ce51Smillert 				setclvar(vn);
229000399a4Smillert 			else
2306685ce51Smillert 				FATAL("invalid -v option argument: %s", vn);
2316ab05f83Stholo 			break;
2326ab05f83Stholo 		case 'd':
2336ab05f83Stholo 			dbg = atoi(&argv[1][2]);
2346ab05f83Stholo 			if (dbg == 0)
2356ab05f83Stholo 				dbg = 1;
2366ab05f83Stholo 			printf("awk %s\n", version);
2376ab05f83Stholo 			break;
238f642db95Smillert 		case 'V':
239a4fa8700Smillert 			printf("awk %s\n", version);
240d322136cSmillert 			return 0;
2416ab05f83Stholo 		default:
2427b11b857Smillert 			WARNING("unknown option %s ignored", argv[1]);
2436ab05f83Stholo 			break;
2446ab05f83Stholo 		}
2456ab05f83Stholo 		argc--;
2466ab05f83Stholo 		argv++;
2476ab05f83Stholo 	}
2488ce597b3Sdoug 
2498ce597b3Sdoug 	if (safe) {
2508ce597b3Sdoug 		if (pledge("stdio rpath", NULL) == -1) {
2518ce597b3Sdoug 			fprintf(stderr, "%s: pledge: incorrect arguments\n",
2528ce597b3Sdoug 			    cmdname);
2538ce597b3Sdoug 			exit(1);
2548ce597b3Sdoug 		}
2558ce597b3Sdoug 	}
2568ce597b3Sdoug 
257ce6cba49Smillert 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
258ce6cba49Smillert 		WARNING("danger: don't set FS when --csv is in effect");
259ce6cba49Smillert 
2606ab05f83Stholo 	/* argv[1] is now the first argument */
2616ab05f83Stholo 	if (npfile == 0) {	/* no -f; first argument is program */
2626ab05f83Stholo 		if (argc <= 1) {
2636ab05f83Stholo 			if (dbg)
2646ab05f83Stholo 				exit(0);
2657b11b857Smillert 			FATAL("no program given");
2666ab05f83Stholo 		}
267115bd590Smillert 		DPRINTF("program = |%s|\n", argv[1]);
2686ab05f83Stholo 		lexprog = argv[1];
2696ab05f83Stholo 		argc--;
2706ab05f83Stholo 		argv++;
2716ab05f83Stholo 	}
2726ab05f83Stholo 	recinit(recsize);
2736ab05f83Stholo 	syminit();
274f81b289fSmillert 	compile_time = COMPILING;
2756ab05f83Stholo 	argv[0] = cmdname;	/* put prog name at front of arglist */
276115bd590Smillert 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
2776ab05f83Stholo 	arginit(argc, argv);
27807edfa4aSkstailey 	if (!safe)
2796ab05f83Stholo 		envinit(environ);
2806ab05f83Stholo 	yyparse();
2816685ce51Smillert #if 0
2826685ce51Smillert 	// Doing this would comply with POSIX, but is not compatible with
2836685ce51Smillert 	// other awks and with what most users expect. So comment it out.
28423cb51abSmillert 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
2856685ce51Smillert #endif
2866ab05f83Stholo 	if (fs)
28707edfa4aSkstailey 		*FS = qstring(fs, '\0');
288115bd590Smillert 	DPRINTF("errorflag=%d\n", errorflag);
2896ab05f83Stholo 	if (errorflag == 0) {
290f81b289fSmillert 		compile_time = RUNNING;
2916ab05f83Stholo 		run(winner);
2926ab05f83Stholo 	} else
2936ab05f83Stholo 		bracecheck();
2946ab05f83Stholo 	return(errorflag);
2956ab05f83Stholo }
2966ab05f83Stholo 
pgetc(void)2976ab05f83Stholo int pgetc(void)		/* get 1 character from awk program */
2986ab05f83Stholo {
2996ab05f83Stholo 	int c;
3006ab05f83Stholo 
3016ab05f83Stholo 	for (;;) {
3026ab05f83Stholo 		if (yyin == NULL) {
3036ab05f83Stholo 			if (curpfile >= npfile)
3046ab05f83Stholo 				return EOF;
30507edfa4aSkstailey 			if (strcmp(pfile[curpfile], "-") == 0)
3066ab05f83Stholo 				yyin = stdin;
30707edfa4aSkstailey 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
3087b11b857Smillert 				FATAL("can't open file %s", pfile[curpfile]);
309271018d0Smillert 			lineno = 1;
3106ab05f83Stholo 		}
3116ab05f83Stholo 		if ((c = getc(yyin)) != EOF)
3126ab05f83Stholo 			return c;
3136ab05f83Stholo 		if (yyin != stdin)
3146ab05f83Stholo 			fclose(yyin);
3156ab05f83Stholo 		yyin = NULL;
3166ab05f83Stholo 		curpfile++;
3176ab05f83Stholo 	}
3186ab05f83Stholo }
319271018d0Smillert 
cursource(void)320271018d0Smillert char *cursource(void)	/* current source file name */
321271018d0Smillert {
322271018d0Smillert 	if (npfile > 0)
3235df2889fSmillert 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
324271018d0Smillert 	else
325271018d0Smillert 		return NULL;
326271018d0Smillert }
327