xref: /openbsd/usr.bin/awk/main.c (revision 510d2225)
1 /*	$OpenBSD: main.c,v 1.67 2023/11/28 20:54:38 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 const char	*version = "version 20231127";
27 
28 #define DEBUG
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <locale.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <signal.h>
35 #include <unistd.h>
36 #include "awk.h"
37 
38 extern	char	*__progname;
39 extern	char	**environ;
40 extern	int	nfields;
41 
42 int	dbg	= 0;
43 Awkfloat	srand_seed = 1;
44 char	*cmdname;	/* gets argv[0] for error messages */
45 extern	FILE	*yyin;	/* lex input file */
46 char	*lexprog;	/* points to program argument if it exists */
47 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
48 enum compile_states	compile_time = ERROR_PRINTING;
49 
50 static char	**pfile;	/* program filenames from -f's */
51 static size_t	maxpfile;	/* max program filename */
52 static size_t	npfile;		/* number of filenames */
53 static size_t	curpfile;	/* current filename */
54 
55 bool	CSV = false;		/* true for csv input */
56 bool	safe = false;		/* true => "safe" mode */
57 bool	do_posix = false;	/* true => POSIX mode */
58 
59 size_t	awk_mb_cur_max = 1;
60 
61 static noreturn void fpecatch(int n
62 #ifdef SA_SIGINFO
63 	, siginfo_t *si, void *uc
64 #endif
65 )
66 {
67 	extern Node *curnode;
68 #ifdef SA_SIGINFO
69 	static const char *emsg[] = {
70 		[0] = "Unknown error",
71 		[FPE_INTDIV] = "Integer divide by zero",
72 		[FPE_INTOVF] = "Integer overflow",
73 		[FPE_FLTDIV] = "Floating point divide by zero",
74 		[FPE_FLTOVF] = "Floating point overflow",
75 		[FPE_FLTUND] = "Floating point underflow",
76 		[FPE_FLTRES] = "Floating point inexact result",
77 		[FPE_FLTINV] = "Invalid Floating point operation",
78 		[FPE_FLTSUB] = "Subscript out of range",
79 	};
80 #endif
81 	dprintf(STDERR_FILENO, "floating point exception%s%s\n",
82 #ifdef SA_SIGINFO
83 		": ", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) &&
84 		emsg[si->si_code] ? emsg[si->si_code] : emsg[0]
85 #else
86 		"", ""
87 #endif
88 	    );
89 
90 	if (compile_time != 2 && NR && *NR > 0) {
91 		dprintf(STDERR_FILENO, " input record number %d", (int) (*FNR));
92 		if (strcmp(*FILENAME, "-") != 0) {
93 			dprintf(STDERR_FILENO, ", file %s", *FILENAME);
94 		}
95 		dprintf(STDERR_FILENO, "\n");
96 	}
97 	if (compile_time != 2 && curnode) {
98 		dprintf(STDERR_FILENO, " source line number %d", curnode->lineno);
99 	} else if (compile_time != 2 && lineno) {
100 		dprintf(STDERR_FILENO, " source line number %d", lineno);
101 	}
102 	if (compile_time == 1 && cursource() != NULL) {
103 		dprintf(STDERR_FILENO, " source file %s", cursource());
104 	}
105 	dprintf(STDERR_FILENO, "\n");
106 	if (dbg > 1)		/* core dump if serious debugging on */
107 		abort();
108 	_exit(2);
109 }
110 
111 static const char *
112 setfs(char *p)
113 {
114 	/* wart: t=>\t */
115 	if (p[0] == 't' && p[1] == '\0')
116 		return "\t";
117 	return p;
118 }
119 
120 static char *
121 getarg(int *argc, char ***argv, const char *msg)
122 {
123 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
124 		return &(*argv)[1][2];
125 	} else {			/* arg is -f something */
126 		(*argc)--; (*argv)++;
127 		if (*argc <= 1)
128 			FATAL("%s", msg);
129 		return (*argv)[1];
130 	}
131 }
132 
133 int main(int argc, char *argv[])
134 {
135 	const char *fs = NULL;
136 	char *fn, *vn;
137 
138 	setlocale(LC_CTYPE, "");
139 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
140 	awk_mb_cur_max = MB_CUR_MAX;
141 	cmdname = __progname;
142 
143 	if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) {
144 		fprintf(stderr, "%s: pledge: incorrect arguments\n",
145 		    cmdname);
146 		exit(1);
147 	}
148 
149 	if (argc == 1) {
150 		fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] "
151 		    "[-f fs | --csv] [-v var=value]\n"
152 		    "\t   [prog | -f progfile] file ...\n", cmdname);
153 		return 1;
154 	}
155 #ifdef SA_SIGINFO
156 	{
157 		struct sigaction sa;
158 		sa.sa_sigaction = fpecatch;
159 		sa.sa_flags = SA_SIGINFO;
160 		sigemptyset(&sa.sa_mask);
161 		(void)sigaction(SIGFPE, &sa, NULL);
162 	}
163 #else
164 	(void)signal(SIGFPE, fpecatch);
165 #endif
166 
167 	do_posix = (getenv("POSIXLY_CORRECT") != NULL);
168 
169 	yyin = NULL;
170 	symtab = makesymtab(NSYMTAB);
171 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
172 		if (strcmp(argv[1], "--version") == 0) {
173 			printf("awk %s\n", version);
174 			return 0;
175 		}
176 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
177 			argc--;
178 			argv++;
179 			break;
180 		}
181 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
182 			CSV = true;
183 			if (fs)
184 				WARNING("danger: don't set FS when --csv is in effect");
185 			argc--;
186 			argv++;
187 			continue;
188 		}
189 		switch (argv[1][1]) {
190 		case 's':
191 			if (strcmp(argv[1], "-safe") == 0)
192 				safe = true;
193 			break;
194 		case 'f':	/* next argument is program filename */
195 			fn = getarg(&argc, &argv, "no program filename");
196 			if (npfile >= maxpfile) {
197 				maxpfile += 20;
198 				pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
199 				if (pfile == NULL)
200 					FATAL("error allocating space for -f options");
201  			}
202 			pfile[npfile++] = fn;
203  			break;
204 		case 'F':	/* set field separator */
205 			fs = setfs(getarg(&argc, &argv, "no field separator"));
206 			if (CSV)
207 				WARNING("danger: don't set FS when --csv is in effect");
208 			break;
209 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
210 			vn = getarg(&argc, &argv, "no variable name");
211 			if (isclvar(vn))
212 				setclvar(vn);
213 			else
214 				FATAL("invalid -v option argument: %s", vn);
215 			break;
216 		case 'd':
217 			dbg = atoi(&argv[1][2]);
218 			if (dbg == 0)
219 				dbg = 1;
220 			printf("awk %s\n", version);
221 			break;
222 		case 'V':
223 			printf("awk %s\n", version);
224 			return 0;
225 		default:
226 			WARNING("unknown option %s ignored", argv[1]);
227 			break;
228 		}
229 		argc--;
230 		argv++;
231 	}
232 
233 	if (safe) {
234 		if (pledge("stdio rpath", NULL) == -1) {
235 			fprintf(stderr, "%s: pledge: incorrect arguments\n",
236 			    cmdname);
237 			exit(1);
238 		}
239 	}
240 
241 	/* argv[1] is now the first argument */
242 	if (npfile == 0) {	/* no -f; first argument is program */
243 		if (argc <= 1) {
244 			if (dbg)
245 				exit(0);
246 			FATAL("no program given");
247 		}
248 		DPRINTF("program = |%s|\n", argv[1]);
249 		lexprog = argv[1];
250 		argc--;
251 		argv++;
252 	}
253 	recinit(recsize);
254 	syminit();
255 	compile_time = COMPILING;
256 	argv[0] = cmdname;	/* put prog name at front of arglist */
257 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
258 	arginit(argc, argv);
259 	if (!safe)
260 		envinit(environ);
261 	yyparse();
262 #if 0
263 	// Doing this would comply with POSIX, but is not compatible with
264 	// other awks and with what most users expect. So comment it out.
265 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
266 #endif
267 	if (fs)
268 		*FS = qstring(fs, '\0');
269 	DPRINTF("errorflag=%d\n", errorflag);
270 	if (errorflag == 0) {
271 		compile_time = RUNNING;
272 		run(winner);
273 	} else
274 		bracecheck();
275 	return(errorflag);
276 }
277 
278 int pgetc(void)		/* get 1 character from awk program */
279 {
280 	int c;
281 
282 	for (;;) {
283 		if (yyin == NULL) {
284 			if (curpfile >= npfile)
285 				return EOF;
286 			if (strcmp(pfile[curpfile], "-") == 0)
287 				yyin = stdin;
288 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
289 				FATAL("can't open file %s", pfile[curpfile]);
290 			lineno = 1;
291 		}
292 		if ((c = getc(yyin)) != EOF)
293 			return c;
294 		if (yyin != stdin)
295 			fclose(yyin);
296 		yyin = NULL;
297 		curpfile++;
298 	}
299 }
300 
301 char *cursource(void)	/* current source file name */
302 {
303 	if (npfile > 0)
304 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
305 	else
306 		return NULL;
307 }
308