xref: /openbsd/usr.bin/awk/main.c (revision fb60ec6a)
1 /*	$OpenBSD: main.c,v 1.72 2024/08/03 21:12:16 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 const char	*version = "version 20240728";
27 
28 #define DEBUG
29 #include <stdio.h>
30 #include <ctype.h>
31 #include <locale.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <signal.h>
35 #include <unistd.h>
36 #include "awk.h"
37 
38 extern	char	*__progname;
39 extern	char	**environ;
40 extern	int	nfields;
41 
42 int	dbg	= 0;
43 Awkfloat	srand_seed = 1;
44 char	*cmdname;	/* gets argv[0] for error messages */
45 extern	FILE	*yyin;	/* lex input file */
46 char	*lexprog;	/* points to program argument if it exists */
47 extern	int errorflag;	/* non-zero if any syntax errors; set by yyerror */
48 enum compile_states	compile_time = ERROR_PRINTING;
49 
50 static char	**pfile;	/* program filenames from -f's */
51 static size_t	maxpfile;	/* max program filename */
52 static size_t	npfile;		/* number of filenames */
53 static size_t	curpfile;	/* current filename */
54 
55 bool	CSV = false;		/* true for csv input */
56 bool	safe = false;		/* true => "safe" mode */
57 bool	do_posix = false;	/* true => POSIX mode */
58 
59 size_t	awk_mb_cur_max = 1;
60 
fpecatch(int n,siginfo_t * si,void * uc)61 static noreturn void fpecatch(int n
62 #ifdef SA_SIGINFO
63 	, siginfo_t *si, void *uc
64 #endif
65 )
66 {
67 	extern Node *curnode;
68 #ifdef SA_SIGINFO
69 	const char *mesg = NULL;
70 
71 	switch (si->si_code) {
72 	case FPE_INTDIV:
73 		mesg = "Integer divide by zero";
74 		break;
75 	case FPE_INTOVF:
76 		mesg = "Integer overflow";
77 		break;
78 	case FPE_FLTDIV:
79 		mesg = "Floating point divide by zero";
80 		break;
81 	case FPE_FLTOVF:
82 		mesg = "Floating point overflow";
83 		break;
84 	case FPE_FLTUND:
85 		mesg = "Floating point underflow";
86 		break;
87 	case FPE_FLTRES:
88 		mesg = "Floating point inexact result";
89 		break;
90 	case FPE_FLTINV:
91 		mesg = "Invalid Floating point operation";
92 		break;
93 	case FPE_FLTSUB:
94 		mesg = "Subscript out of range";
95 		break;
96 	case 0:
97 	default:
98 		mesg = "Unknown error";
99 		break;
100 	}
101 #endif
102 	dprintf(STDERR_FILENO, "floating point exception%s%s\n",
103 #ifdef SA_SIGINFO
104 		": ", mesg
105 #else
106 		"", ""
107 #endif
108 	    );
109 
110 	if (compile_time != 2 && NR && *NR > 0) {
111 		dprintf(STDERR_FILENO, " input record number %d", (int) (*FNR));
112 		if (strcmp(*FILENAME, "-") != 0) {
113 			dprintf(STDERR_FILENO, ", file %s", *FILENAME);
114 		}
115 		dprintf(STDERR_FILENO, "\n");
116 	}
117 	if (compile_time != 2 && curnode) {
118 		dprintf(STDERR_FILENO, " source line number %d", curnode->lineno);
119 	} else if (compile_time != 2 && lineno) {
120 		dprintf(STDERR_FILENO, " source line number %d", lineno);
121 	}
122 	if (compile_time == 1 && cursource() != NULL) {
123 		dprintf(STDERR_FILENO, " source file %s", cursource());
124 	}
125 	dprintf(STDERR_FILENO, "\n");
126 	if (dbg > 1)		/* core dump if serious debugging on */
127 		abort();
128 	_exit(2);
129 }
130 
131 static const char *
setfs(char * p)132 setfs(char *p)
133 {
134 	/* wart: t=>\t */
135 	if (p[0] == 't' && p[1] == '\0')
136 		return "\t";
137 	return p;
138 }
139 
140 static char *
getarg(int * argc,char *** argv,const char * msg)141 getarg(int *argc, char ***argv, const char *msg)
142 {
143 	if ((*argv)[1][2] != '\0') {	/* arg is -fsomething */
144 		return &(*argv)[1][2];
145 	} else {			/* arg is -f something */
146 		(*argc)--; (*argv)++;
147 		if (*argc <= 1)
148 			FATAL("%s", msg);
149 		return (*argv)[1];
150 	}
151 }
152 
main(int argc,char * argv[])153 int main(int argc, char *argv[])
154 {
155 	const char *fs = NULL;
156 	char *fn, *vn;
157 
158 	setlocale(LC_CTYPE, "");
159 	setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */
160 	awk_mb_cur_max = MB_CUR_MAX;
161 	cmdname = __progname;
162 
163 	if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) {
164 		fprintf(stderr, "%s: pledge: incorrect arguments\n",
165 		    cmdname);
166 		exit(1);
167 	}
168 
169 	if (argc == 1) {
170 		fprintf(stderr, "usage: %s [-safe] [-V] [-d[n]] "
171 		    "[-f fs | --csv] [-v var=value]\n"
172 		    "\t   [prog | -f progfile] file ...\n", cmdname);
173 		return 1;
174 	}
175 #ifdef SA_SIGINFO
176 	{
177 		struct sigaction sa;
178 		sa.sa_sigaction = fpecatch;
179 		sa.sa_flags = SA_SIGINFO;
180 		sigemptyset(&sa.sa_mask);
181 		(void)sigaction(SIGFPE, &sa, NULL);
182 	}
183 #else
184 	(void)signal(SIGFPE, fpecatch);
185 #endif
186 
187 	do_posix = (getenv("POSIXLY_CORRECT") != NULL);
188 
189 	yyin = NULL;
190 	symtab = makesymtab(NSYMTAB);
191 	while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') {
192 		if (strcmp(argv[1], "--version") == 0) {
193 			printf("awk %s\n", version);
194 			return 0;
195 		}
196 		if (strcmp(argv[1], "--") == 0) {	/* explicit end of args */
197 			argc--;
198 			argv++;
199 			break;
200 		}
201 		if (strcmp(argv[1], "--csv") == 0) {	/* turn on csv input processing */
202 			CSV = true;
203 			argc--;
204 			argv++;
205 			continue;
206 		}
207 		switch (argv[1][1]) {
208 		case 's':
209 			if (strcmp(argv[1], "-safe") == 0)
210 				safe = true;
211 			break;
212 		case 'f':	/* next argument is program filename */
213 			fn = getarg(&argc, &argv, "no program filename");
214 			if (npfile >= maxpfile) {
215 				maxpfile += 20;
216 				pfile = (char **) reallocarray(pfile, maxpfile, sizeof(*pfile));
217 				if (pfile == NULL)
218 					FATAL("error allocating space for -f options");
219  			}
220 			pfile[npfile++] = fn;
221  			break;
222 		case 'F':	/* set field separator */
223 			fs = setfs(getarg(&argc, &argv, "no field separator"));
224 			break;
225 		case 'v':	/* -v a=1 to be done NOW.  one -v for each */
226 			vn = getarg(&argc, &argv, "no variable name");
227 			if (isclvar(vn))
228 				setclvar(vn);
229 			else
230 				FATAL("invalid -v option argument: %s", vn);
231 			break;
232 		case 'd':
233 			dbg = atoi(&argv[1][2]);
234 			if (dbg == 0)
235 				dbg = 1;
236 			printf("awk %s\n", version);
237 			break;
238 		case 'V':
239 			printf("awk %s\n", version);
240 			return 0;
241 		default:
242 			WARNING("unknown option %s ignored", argv[1]);
243 			break;
244 		}
245 		argc--;
246 		argv++;
247 	}
248 
249 	if (safe) {
250 		if (pledge("stdio rpath", NULL) == -1) {
251 			fprintf(stderr, "%s: pledge: incorrect arguments\n",
252 			    cmdname);
253 			exit(1);
254 		}
255 	}
256 
257 	if (CSV && (fs != NULL || lookup("FS", symtab) != NULL))
258 		WARNING("danger: don't set FS when --csv is in effect");
259 
260 	/* argv[1] is now the first argument */
261 	if (npfile == 0) {	/* no -f; first argument is program */
262 		if (argc <= 1) {
263 			if (dbg)
264 				exit(0);
265 			FATAL("no program given");
266 		}
267 		DPRINTF("program = |%s|\n", argv[1]);
268 		lexprog = argv[1];
269 		argc--;
270 		argv++;
271 	}
272 	recinit(recsize);
273 	syminit();
274 	compile_time = COMPILING;
275 	argv[0] = cmdname;	/* put prog name at front of arglist */
276 	DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]);
277 	arginit(argc, argv);
278 	if (!safe)
279 		envinit(environ);
280 	yyparse();
281 #if 0
282 	// Doing this would comply with POSIX, but is not compatible with
283 	// other awks and with what most users expect. So comment it out.
284 	setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */
285 #endif
286 	if (fs)
287 		*FS = qstring(fs, '\0');
288 	DPRINTF("errorflag=%d\n", errorflag);
289 	if (errorflag == 0) {
290 		compile_time = RUNNING;
291 		run(winner);
292 	} else
293 		bracecheck();
294 	return(errorflag);
295 }
296 
pgetc(void)297 int pgetc(void)		/* get 1 character from awk program */
298 {
299 	int c;
300 
301 	for (;;) {
302 		if (yyin == NULL) {
303 			if (curpfile >= npfile)
304 				return EOF;
305 			if (strcmp(pfile[curpfile], "-") == 0)
306 				yyin = stdin;
307 			else if ((yyin = fopen(pfile[curpfile], "r")) == NULL)
308 				FATAL("can't open file %s", pfile[curpfile]);
309 			lineno = 1;
310 		}
311 		if ((c = getc(yyin)) != EOF)
312 			return c;
313 		if (yyin != stdin)
314 			fclose(yyin);
315 		yyin = NULL;
316 		curpfile++;
317 	}
318 }
319 
cursource(void)320 char *cursource(void)	/* current source file name */
321 {
322 	if (npfile > 0)
323 		return pfile[curpfile < npfile ? curpfile : curpfile - 1];
324 	else
325 		return NULL;
326 }
327