xref: /netbsd/external/historical/nawk/dist/run.c (revision f9be4c11)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28 
29 #define DEBUG
30 #include <stdio.h>
31 #include <ctype.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <fcntl.h>
35 #include <setjmp.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <string.h>
39 #include <stdlib.h>
40 #include <time.h>
41 #include <sys/types.h>
42 #include <sys/wait.h>
43 #include "awk.h"
44 #include "awkgram.h"
45 
46 static void stdinit(void);
47 static void flush_all(void);
48 
49 #if 1
50 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
51 #else
tempfree(Cell * p)52 void tempfree(Cell *p) {
53 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
54 		WARNING("bad csub %d in Cell %d %s",
55 			p->csub, p->ctype, p->sval);
56 	}
57 	if (istemp(p))
58 		tfree(p);
59 }
60 #endif
61 
62 /* do we really need these? */
63 /* #ifdef _NFILE */
64 /* #ifndef FOPEN_MAX */
65 /* #define FOPEN_MAX _NFILE */
66 /* #endif */
67 /* #endif */
68 /*  */
69 /* #ifndef	FOPEN_MAX */
70 /* #define	FOPEN_MAX	40 */	/* max number of open files */
71 /* #endif */
72 /*  */
73 /* #ifndef RAND_MAX */
74 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
75 /* #endif */
76 
77 jmp_buf env;
78 extern	int	pairstack[];
79 extern	Awkfloat	srand_seed;
80 
81 Node	*winner = NULL;	/* root of parse tree */
82 Cell	*tmps;		/* free temporary cells for execution */
83 
84 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
85 Cell	*True	= &truecell;
86 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell	*False	= &falsecell;
88 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell	*jbreak	= &breakcell;
90 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell	*jcont	= &contcell;
92 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell	*jnext	= &nextcell;
94 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell	*jnextfile	= &nextfilecell;
96 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell	*jexit	= &exitcell;
98 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
99 Cell	*jret	= &retcell;
100 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
101 
102 Node	*curnode = NULL;	/* the node being executed, for debugging */
103 
104 /* buffer memory management */
adjbuf(char ** pbuf,int * psiz,int minlen,int quantum,char ** pbptr,const char * whatrtn)105 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
106 	const char *whatrtn)
107 /* pbuf:    address of pointer to buffer being managed
108  * psiz:    address of buffer size variable
109  * minlen:  minimum length of buffer needed
110  * quantum: buffer size quantum
111  * pbptr:   address of movable pointer into buffer, or 0 if none
112  * whatrtn: name of the calling routine if failure should cause fatal error
113  *
114  * return   0 for realloc failure, !=0 for success
115  */
116 {
117 	if (minlen > *psiz) {
118 		char *tbuf;
119 		int rminlen = quantum ? minlen % quantum : 0;
120 		int boff = pbptr ? *pbptr - *pbuf : 0;
121 		/* round up to next multiple of quantum */
122 		if (rminlen)
123 			minlen += quantum - rminlen;
124 		tbuf = realloc(*pbuf, minlen);
125 		dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
126 		if (tbuf == NULL) {
127 			if (whatrtn)
128 				FATAL("out of memory in %s", whatrtn);
129 			return 0;
130 		}
131 		*pbuf = tbuf;
132 		*psiz = minlen;
133 		if (pbptr)
134 			*pbptr = tbuf + boff;
135 	}
136 	return 1;
137 }
138 
run(Node * a)139 void run(Node *a)	/* execution of parse tree starts here */
140 {
141 
142 	stdinit();
143 	execute(a);
144 	closeall();
145 }
146 
execute(Node * u)147 Cell *execute(Node *u)	/* execute a node of the parse tree */
148 {
149 	Cell *(*proc)(Node **, int);
150 	Cell *x;
151 	Node *a;
152 
153 	if (u == NULL)
154 		return(True);
155 	for (a = u; ; a = a->nnext) {
156 		curnode = a;
157 		if (isvalue(a)) {
158 			x = (Cell *) (a->narg[0]);
159 			if (isfld(x) && !donefld)
160 				fldbld();
161 			else if (isrec(x) && !donerec)
162 				recbld();
163 			return(x);
164 		}
165 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
166 			FATAL("illegal statement");
167 		proc = proctab[a->nobj-FIRSTTOKEN];
168 		x = (*proc)(a->narg, a->nobj);
169 		if (isfld(x) && !donefld)
170 			fldbld();
171 		else if (isrec(x) && !donerec)
172 			recbld();
173 		if (isexpr(a))
174 			return(x);
175 		if (isjump(x))
176 			return(x);
177 		if (a->nnext == NULL)
178 			return(x);
179 		tempfree(x);
180 	}
181 }
182 
183 
program(Node ** a,int n)184 Cell *program(Node **a, int n)	/* execute an awk program */
185 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
186 	Cell *x;
187 
188 	if (setjmp(env) != 0)
189 		goto ex;
190 	if (a[0]) {		/* BEGIN */
191 		x = execute(a[0]);
192 		if (isexit(x))
193 			return(True);
194 		if (isjump(x))
195 			FATAL("illegal break, continue, next or nextfile from BEGIN");
196 		tempfree(x);
197 	}
198 	if (a[1] || a[2])
199 		while (getrec(&record, &recsize, true) > 0) {
200 			x = execute(a[1]);
201 			if (isexit(x))
202 				break;
203 			tempfree(x);
204 		}
205   ex:
206 	if (setjmp(env) != 0)	/* handles exit within END */
207 		goto ex1;
208 	if (a[2]) {		/* END */
209 		x = execute(a[2]);
210 		if (isbreak(x) || isnext(x) || iscont(x))
211 			FATAL("illegal break, continue, next or nextfile from END");
212 		tempfree(x);
213 	}
214   ex1:
215 	return(True);
216 }
217 
218 struct Frame {	/* stack frame for awk function calls */
219 	int nargs;	/* number of arguments in this call */
220 	Cell *fcncell;	/* pointer to Cell for function */
221 	Cell **args;	/* pointer to array of arguments after execute */
222 	Cell *retval;	/* return value */
223 };
224 
225 #define	NARGS	50	/* max args in a call */
226 
227 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
228 int	nframe = 0;		/* number of frames allocated */
229 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
230 
call(Node ** a,int n)231 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
232 {
233 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
234 	int i, ncall, ndef;
235 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
236 	Node *x;
237 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
238 	Cell *y, *z, *fcn;
239 	char *s;
240 
241 	fcn = execute(a[0]);	/* the function itself */
242 	s = fcn->nval;
243 	if (!isfcn(fcn))
244 		FATAL("calling undefined function %s", s);
245 	if (frame == NULL) {
246 		frp = frame = calloc(nframe += 100, sizeof(*frame));
247 		if (frame == NULL)
248 			FATAL("out of space for stack frames calling %s", s);
249 	}
250 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
251 		ncall++;
252 	ndef = (int) fcn->fval;			/* args in defn */
253 	   dprintf( ("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)) );
254 	if (ncall > ndef)
255 		WARNING("function %s called with %d args, uses only %d",
256 			s, ncall, ndef);
257 	if (ncall + ndef > NARGS)
258 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
259 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
260 		   dprintf( ("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)) );
261 		y = execute(x);
262 		oargs[i] = y;
263 		   dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
264 			   i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
265 		if (isfcn(y))
266 			FATAL("can't use function %s as argument in %s", y->nval, s);
267 		if (isarr(y))
268 			args[i] = y;	/* arrays by ref */
269 		else
270 			args[i] = copycell(y);
271 		tempfree(y);
272 	}
273 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
274 		args[i] = gettemp();
275 		*args[i] = newcopycell;
276 	}
277 	frp++;	/* now ok to up frame */
278 	if (frp >= frame + nframe) {
279 		int dfp = frp - frame;	/* old index */
280 		frame = realloc(frame, (nframe += 100) * sizeof(*frame));
281 		if (frame == NULL)
282 			FATAL("out of space for stack frames in %s", s);
283 		frp = frame + dfp;
284 	}
285 	frp->fcncell = fcn;
286 	frp->args = args;
287 	frp->nargs = ndef;	/* number defined with (excess are locals) */
288 	frp->retval = gettemp();
289 
290 	   dprintf( ("start exec of %s, frp=%d\n", s, (int) (frp-frame)) );
291 	y = execute((Node *)(fcn->sval));	/* execute body */
292 	   dprintf( ("finished exec of %s, frp=%d\n", s, (int) (frp-frame)) );
293 
294 	for (i = 0; i < ndef; i++) {
295 		Cell *t = frp->args[i];
296 		if (isarr(t)) {
297 			if (t->csub == CCOPY) {
298 				if (i >= ncall) {
299 					freesymtab(t);
300 					t->csub = CTEMP;
301 					tempfree(t);
302 				} else {
303 					oargs[i]->tval = t->tval;
304 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
305 					oargs[i]->sval = t->sval;
306 					tempfree(t);
307 				}
308 			}
309 		} else if (t != y) {	/* kludge to prevent freeing twice */
310 			t->csub = CTEMP;
311 			tempfree(t);
312 		} else if (t == y && t->csub == CCOPY) {
313 			t->csub = CTEMP;
314 			tempfree(t);
315 			freed = 1;
316 		}
317 	}
318 	tempfree(fcn);
319 	if (isexit(y) || isnext(y))
320 		return y;
321 	if (freed == 0) {
322 		tempfree(y);	/* don't free twice! */
323 	}
324 	z = frp->retval;			/* return value */
325 	   dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
326 	frp--;
327 	return(z);
328 }
329 
copycell(Cell * x)330 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
331 {
332 	Cell *y;
333 
334 	/* copy is not constant or field */
335 
336 	y = gettemp();
337 	y->tval = x->tval & ~(CON|FLD|REC);
338 	y->csub = CCOPY;	/* prevents freeing until call is over */
339 	y->nval = x->nval;	/* BUG? */
340 	if (isstr(x) /* || x->ctype == OCELL */) {
341 		y->sval = tostring(x->sval);
342 		y->tval &= ~DONTFREE;
343 	} else
344 		y->tval |= DONTFREE;
345 	y->fval = x->fval;
346 	return y;
347 }
348 
arg(Node ** a,int n)349 Cell *arg(Node **a, int n)	/* nth argument of a function */
350 {
351 
352 	n = ptoi(a[0]);	/* argument number, counting from 0 */
353 	   dprintf( ("arg(%d), frp->nargs=%d\n", n, frp->nargs) );
354 	if (n+1 > frp->nargs)
355 		FATAL("argument #%d of function %s was not supplied",
356 			n+1, frp->fcncell->nval);
357 	return frp->args[n];
358 }
359 
jump(Node ** a,int n)360 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
361 {
362 	Cell *y;
363 
364 	switch (n) {
365 	case EXIT:
366 		if (a[0] != NULL) {
367 			y = execute(a[0]);
368 			errorflag = (int) getfval(y);
369 			tempfree(y);
370 		}
371 		longjmp(env, 1);
372 	case RETURN:
373 		if (a[0] != NULL) {
374 			y = execute(a[0]);
375 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
376 				setsval(frp->retval, getsval(y));
377 				frp->retval->fval = getfval(y);
378 				frp->retval->tval |= NUM;
379 			}
380 			else if (y->tval & STR)
381 				setsval(frp->retval, getsval(y));
382 			else if (y->tval & NUM)
383 				setfval(frp->retval, getfval(y));
384 			else		/* can't happen */
385 				FATAL("bad type variable %d", y->tval);
386 			tempfree(y);
387 		}
388 		return(jret);
389 	case NEXT:
390 		return(jnext);
391 	case NEXTFILE:
392 		nextfile();
393 		return(jnextfile);
394 	case BREAK:
395 		return(jbreak);
396 	case CONTINUE:
397 		return(jcont);
398 	default:	/* can't happen */
399 		FATAL("illegal jump type %d", n);
400 	}
401 	return 0;	/* not reached */
402 }
403 
awkgetline(Node ** a,int n)404 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
405 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
406 	Cell *r, *x;
407 	extern Cell **fldtab;
408 	FILE *fp;
409 	char *buf;
410 	int bufsize = recsize;
411 	int mode;
412 	bool newflag;
413 
414 	if ((buf = malloc(bufsize)) == NULL)
415 		FATAL("out of memory in getline");
416 
417 	fflush(stdout);	/* in case someone is waiting for a prompt */
418 	r = gettemp();
419 	if (a[1] != NULL) {		/* getline < file */
420 		x = execute(a[2]);		/* filename */
421 		mode = ptoi(a[1]);
422 		if (mode == '|')		/* input pipe */
423 			mode = LE;	/* arbitrary flag */
424 		fp = openfile(mode, getsval(x), &newflag);
425 		tempfree(x);
426 		if (fp == NULL)
427 			n = -1;
428 		else
429 			n = readrec(&buf, &bufsize, fp, newflag);
430 		if (n <= 0) {
431 			;
432 		} else if (a[0] != NULL) {	/* getline var <file */
433 			x = execute(a[0]);
434 			setsval(x, buf);
435 			check_number(x);
436 			tempfree(x);
437 		} else {			/* getline <file */
438 			setsval(fldtab[0], buf);
439 			check_number(fldtab[0]);
440 		}
441 	} else {			/* bare getline; use current input */
442 		if (a[0] == NULL)	/* getline */
443 			n = getrec(&record, &recsize, true);
444 		else {			/* getline var */
445 			n = getrec(&buf, &bufsize, false);
446 			x = execute(a[0]);
447 			setsval(x, buf);
448 			check_number(x);
449 			tempfree(x);
450 		}
451 	}
452 	setfval(r, (Awkfloat) n);
453 	free(buf);
454 	return r;
455 }
456 
getnf(Node ** a,int n)457 Cell *getnf(Node **a, int n)	/* get NF */
458 {
459 	if (!donefld)
460 		fldbld();
461 	return (Cell *) a[0];
462 }
463 
464 static char *
makearraystring(Node * p,const char * func)465 makearraystring(Node *p, const char *func)
466 {
467 	char *buf;
468 	int bufsz = recsize;
469 	size_t blen, seplen;
470 
471 	if ((buf = malloc(bufsz)) == NULL) {
472 		FATAL("%s: out of memory", func);
473 	}
474 
475 	blen = 0;
476 	buf[blen] = '\0';
477 	seplen = strlen(getsval(subseploc));
478 
479 	for (; p; p = p->nnext) {
480 		Cell *x = execute(p);	/* expr */
481 		char *s = getsval(x);
482 		size_t nsub = p->nnext ? seplen : 0;
483 		size_t slen = strlen(s);
484 		size_t tlen = blen + slen + nsub;
485 
486 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
487 			FATAL("%s: out of memory %s[%s...]",
488 			    func, x->nval, buf);
489 		}
490 		memcpy(buf + blen, s, slen);
491 		if (nsub) {
492 			memcpy(buf + blen + slen, *SUBSEP, nsub);
493 		}
494 		buf[tlen] = '\0';
495 		blen = tlen;
496 		tempfree(x);
497 	}
498 	return buf;
499 }
500 
array(Node ** a,int n)501 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
502 {
503 	Cell *x, *z;
504 	char *buf;
505 
506 	x = execute(a[0]);	/* Cell* for symbol table */
507 	buf = makearraystring(a[1], __func__);
508 	if (!isarr(x)) {
509 		   dprintf( ("making %s into an array\n", NN(x->nval)) );
510 		if (freeable(x))
511 			xfree(x->sval);
512 		x->tval &= ~(STR|NUM|DONTFREE);
513 		x->tval |= ARR;
514 		x->sval = (char *) makesymtab(NSYMTAB);
515 	}
516 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
517 	z->ctype = OCELL;
518 	z->csub = CVAR;
519 	tempfree(x);
520 	free(buf);
521 	return(z);
522 }
523 
awkdelete(Node ** a,int n)524 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
525 {
526 	Cell *x;
527 
528 	x = execute(a[0]);	/* Cell* for symbol table */
529 	if (x == symtabloc) {
530 		FATAL("cannot delete SYMTAB or its elements");
531 	}
532 	if (!isarr(x))
533 		return True;
534 	if (a[1] == NULL) {	/* delete the elements, not the table */
535 		freesymtab(x);
536 		x->tval &= ~STR;
537 		x->tval |= ARR;
538 		x->sval = (char *) makesymtab(NSYMTAB);
539 	} else {
540 		char *buf = makearraystring(a[1], __func__);
541 		freeelem(x, buf);
542 		free(buf);
543 	}
544 	tempfree(x);
545 	return True;
546 }
547 
intest(Node ** a,int n)548 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
549 {
550 	Cell *ap, *k;
551 	char *buf;
552 
553 	ap = execute(a[1]);	/* array name */
554 	if (!isarr(ap)) {
555 		   dprintf( ("making %s into an array\n", ap->nval) );
556 		if (freeable(ap))
557 			xfree(ap->sval);
558 		ap->tval &= ~(STR|NUM|DONTFREE);
559 		ap->tval |= ARR;
560 		ap->sval = (char *) makesymtab(NSYMTAB);
561 	}
562 	buf = makearraystring(a[0], __func__);
563 	k = lookup(buf, (Array *) ap->sval);
564 	tempfree(ap);
565 	free(buf);
566 	if (k == NULL)
567 		return(False);
568 	else
569 		return(True);
570 }
571 
572 
matchop(Node ** a,int n)573 Cell *matchop(Node **a, int n)	/* ~ and match() */
574 {
575 	Cell *x, *y;
576 	char *s, *t;
577 	int i;
578 	fa *pfa;
579 	int (*mf)(fa *, const char *) = match, mode = 0;
580 
581 	if (n == MATCHFCN) {
582 		mf = pmatch;
583 		mode = 1;
584 	}
585 	x = execute(a[1]);	/* a[1] = target text */
586 	s = getsval(x);
587 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
588 		i = (*mf)((fa *) a[2], s);
589 	else {
590 		y = execute(a[2]);	/* a[2] = regular expr */
591 		t = getsval(y);
592 		pfa = makedfa(t, mode);
593 		i = (*mf)(pfa, s);
594 		tempfree(y);
595 	}
596 	tempfree(x);
597 	if (n == MATCHFCN) {
598 		int start = patbeg - s + 1;
599 		if (patlen < 0)
600 			start = 0;
601 		setfval(rstartloc, (Awkfloat) start);
602 		setfval(rlengthloc, (Awkfloat) patlen);
603 		x = gettemp();
604 		x->tval = NUM;
605 		x->fval = start;
606 		return x;
607 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
608 		return(True);
609 	else
610 		return(False);
611 }
612 
613 
boolop(Node ** a,int n)614 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
615 {
616 	Cell *x, *y;
617 	int i;
618 
619 	x = execute(a[0]);
620 	i = istrue(x);
621 	tempfree(x);
622 	switch (n) {
623 	case BOR:
624 		if (i) return(True);
625 		y = execute(a[1]);
626 		i = istrue(y);
627 		tempfree(y);
628 		if (i) return(True);
629 		else return(False);
630 	case AND:
631 		if ( !i ) return(False);
632 		y = execute(a[1]);
633 		i = istrue(y);
634 		tempfree(y);
635 		if (i) return(True);
636 		else return(False);
637 	case NOT:
638 		if (i) return(False);
639 		else return(True);
640 	default:	/* can't happen */
641 		FATAL("unknown boolean operator %d", n);
642 	}
643 	return 0;	/*NOTREACHED*/
644 }
645 
relop(Node ** a,int n)646 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
647 {
648 	int i;
649 	Cell *x, *y;
650 	Awkfloat j;
651 
652 	x = execute(a[0]);
653 	y = execute(a[1]);
654 	if (x->tval&NUM && y->tval&NUM) {
655 		j = x->fval - y->fval;
656 		i = j<0? -1: (j>0? 1: 0);
657 	} else {
658 		i = strcmp(getsval(x), getsval(y));
659 	}
660 	tempfree(x);
661 	tempfree(y);
662 	switch (n) {
663 	case LT:	if (i<0) return(True);
664 			else return(False);
665 	case LE:	if (i<=0) return(True);
666 			else return(False);
667 	case NE:	if (i!=0) return(True);
668 			else return(False);
669 	case EQ:	if (i == 0) return(True);
670 			else return(False);
671 	case GE:	if (i>=0) return(True);
672 			else return(False);
673 	case GT:	if (i>0) return(True);
674 			else return(False);
675 	default:	/* can't happen */
676 		FATAL("unknown relational operator %d", n);
677 	}
678 	return 0;	/*NOTREACHED*/
679 }
680 
tfree(Cell * a)681 void tfree(Cell *a)	/* free a tempcell */
682 {
683 	if (freeable(a)) {
684 		   dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
685 		xfree(a->sval);
686 	}
687 	if (a == tmps)
688 		FATAL("tempcell list is curdled");
689 	a->cnext = tmps;
690 	tmps = a;
691 }
692 
gettemp(void)693 Cell *gettemp(void)	/* get a tempcell */
694 {	int i;
695 	Cell *x;
696 
697 	if (!tmps) {
698 		tmps = calloc(100, sizeof(*tmps));
699 		if (!tmps)
700 			FATAL("out of space for temporaries");
701 		for (i = 1; i < 100; i++)
702 			tmps[i-1].cnext = &tmps[i];
703 		tmps[i-1].cnext = NULL;
704 	}
705 	x = tmps;
706 	tmps = x->cnext;
707 	*x = tempcell;
708 	return(x);
709 }
710 
indirect(Node ** a,int n)711 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
712 {
713 	Awkfloat val;
714 	Cell *x;
715 	int m;
716 	char *s;
717 
718 	x = execute(a[0]);
719 	val = getfval(x);	/* freebsd: defend against super large field numbers */
720 	if ((Awkfloat)INT_MAX < val)
721 		FATAL("trying to access out of range field %s", x->nval);
722 	m = (int) val;
723 	if (m == 0 && !is_number(s = getsval(x)))	/* suspicion! */
724 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
725 		/* BUG: can x->nval ever be null??? */
726 	tempfree(x);
727 	x = fieldadr(m);
728 	x->ctype = OCELL;	/* BUG?  why are these needed? */
729 	x->csub = CFLD;
730 	return(x);
731 }
732 
substr(Node ** a,int nnn)733 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
734 {
735 	int k, m, n;
736 	char *s;
737 	int temp;
738 	Cell *x, *y, *z = NULL;
739 
740 	x = execute(a[0]);
741 	y = execute(a[1]);
742 	if (a[2] != NULL)
743 		z = execute(a[2]);
744 	s = getsval(x);
745 	k = strlen(s) + 1;
746 	if (k <= 1) {
747 		tempfree(x);
748 		tempfree(y);
749 		if (a[2] != NULL) {
750 			tempfree(z);
751 		}
752 		x = gettemp();
753 		setsval(x, "");
754 		return(x);
755 	}
756 	m = (int) getfval(y);
757 	if (m <= 0)
758 		m = 1;
759 	else if (m > k)
760 		m = k;
761 	tempfree(y);
762 	if (a[2] != NULL) {
763 		n = (int) getfval(z);
764 		tempfree(z);
765 	} else
766 		n = k - 1;
767 	if (n < 0)
768 		n = 0;
769 	else if (n > k - m)
770 		n = k - m;
771 	   dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
772 	y = gettemp();
773 	temp = s[n+m-1];	/* with thanks to John Linderman */
774 	s[n+m-1] = '\0';
775 	setsval(y, s + m - 1);
776 	s[n+m-1] = temp;
777 	tempfree(x);
778 	return(y);
779 }
780 
sindex(Node ** a,int nnn)781 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
782 {
783 	Cell *x, *y, *z;
784 	char *s1, *s2, *p1, *p2, *q;
785 	Awkfloat v = 0.0;
786 
787 	x = execute(a[0]);
788 	s1 = getsval(x);
789 	y = execute(a[1]);
790 	s2 = getsval(y);
791 
792 	z = gettemp();
793 	for (p1 = s1; *p1 != '\0'; p1++) {
794 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
795 			continue;
796 		if (*p2 == '\0') {
797 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
798 			break;
799 		}
800 	}
801 	tempfree(x);
802 	tempfree(y);
803 	setfval(z, v);
804 	return(z);
805 }
806 
807 #define	MAXNUMSIZE	50
808 
format(char ** pbuf,int * pbufsize,const char * s,Node * a)809 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
810 {
811 	char *fmt;
812 	char *p, *t;
813 	const char *os;
814 	Cell *x;
815 	int flag = 0, n;
816 	int fmtwd; /* format width */
817 	int fmtsz = recsize;
818 	char *buf = *pbuf;
819 	int bufsize = *pbufsize;
820 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
821 #define BUFSZ(a)   (bufsize - ((a) - buf))
822 
823 	static bool first = true;
824 	static bool have_a_format = false;
825 
826 	if (first) {
827 		char xbuf[100];
828 
829 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
830 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
831 		first = false;
832 	}
833 
834 	os = s;
835 	p = buf;
836 	if ((fmt = malloc(fmtsz)) == NULL)
837 		FATAL("out of memory in format()");
838 	while (*s) {
839 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
840 		if (*s != '%') {
841 			*p++ = *s++;
842 			continue;
843 		}
844 		if (*(s+1) == '%') {
845 			*p++ = '%';
846 			s += 2;
847 			continue;
848 		}
849 		/* have to be real careful in case this is a huge number, eg, %100000d */
850 		fmtwd = atoi(s+1);
851 		if (fmtwd < 0)
852 			fmtwd = -fmtwd;
853 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
854 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
855 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
856 				FATAL("format item %.30s... ran format() out of memory", os);
857 			/* Ignore size specifiers */
858 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
859 				t--;
860 				continue;
861 			}
862 			if (isalpha((uschar)*s))
863 				break;
864 			if (*s == '$') {
865 				FATAL("'$' not permitted in awk formats");
866 			}
867 			if (*s == '*') {
868 				if (a == NULL) {
869 					FATAL("not enough args in printf(%s)", os);
870 				}
871 				x = execute(a);
872 				a = a->nnext;
873 				snprintf(t - 1, FMTSZ(t - 1),
874 				    "%d", fmtwd=(int) getfval(x));
875 				if (fmtwd < 0)
876 					fmtwd = -fmtwd;
877 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
878 				t = fmt + strlen(fmt);
879 				tempfree(x);
880 			}
881 		}
882 		*t = '\0';
883 		if (fmtwd < 0)
884 			fmtwd = -fmtwd;
885 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
886 		switch (*s) {
887 		case 'a': case 'A':
888 			if (have_a_format)
889 				flag = *s;
890 			else
891 				flag = 'f';
892 			break;
893 		case 'f': case 'e': case 'g': case 'E': case 'G':
894 			flag = 'f';
895 			break;
896 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
897 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
898 			*(t-1) = 'j';
899 			*t = *s;
900 			*++t = '\0';
901 			break;
902 		case 's':
903 			flag = 's';
904 			break;
905 		case 'c':
906 			flag = 'c';
907 			break;
908 		default:
909 			WARNING("weird printf conversion %s", fmt);
910 			flag = '?';
911 			break;
912 		}
913 		if (a == NULL)
914 			FATAL("not enough args in printf(%s)", os);
915 		x = execute(a);
916 		a = a->nnext;
917 		n = MAXNUMSIZE;
918 		if (fmtwd > n)
919 			n = fmtwd;
920 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
921 		switch (flag) {
922 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
923 			t = getsval(x);
924 			n = strlen(t);
925 			if (fmtwd > n)
926 				n = fmtwd;
927 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
928 			p += strlen(p);
929 			snprintf(p, BUFSZ(p), "%s", t);
930 			break;
931 		case 'a':
932 		case 'A':
933 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
934 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
935 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
936 		case 's':
937 			t = getsval(x);
938 			n = strlen(t);
939 			if (fmtwd > n)
940 				n = fmtwd;
941 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
942 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
943 			snprintf(p, BUFSZ(p), fmt, t);
944 			break;
945 		case 'c':
946 			if (isnum(x)) {
947 				if ((int)getfval(x))
948 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
949 				else {
950 					*p++ = '\0'; /* explicit null byte */
951 					*p = '\0';   /* next output will start here */
952 				}
953 			} else
954 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
955 			break;
956 		default:
957 			FATAL("can't happen: bad conversion %c in format()", flag);
958 		}
959 		tempfree(x);
960 		p += strlen(p);
961 		s++;
962 	}
963 	*p = '\0';
964 	free(fmt);
965 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
966 		execute(a);
967 	*pbuf = buf;
968 	*pbufsize = bufsize;
969 	return p - buf;
970 }
971 
awksprintf(Node ** a,int n)972 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
973 {
974 	Cell *x;
975 	Node *y;
976 	char *buf;
977 	int bufsz=3*recsize;
978 
979 	if ((buf = malloc(bufsz)) == NULL)
980 		FATAL("out of memory in awksprintf");
981 	y = a[0]->nnext;
982 	x = execute(a[0]);
983 	if (format(&buf, &bufsz, getsval(x), y) == -1)
984 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
985 	tempfree(x);
986 	x = gettemp();
987 	x->sval = buf;
988 	x->tval = STR;
989 	return(x);
990 }
991 
awkprintf(Node ** a,int n)992 Cell *awkprintf(Node **a, int n)		/* printf */
993 {	/* a[0] is list of args, starting with format string */
994 	/* a[1] is redirection operator, a[2] is redirection file */
995 	FILE *fp;
996 	Cell *x;
997 	Node *y;
998 	char *buf;
999 	int len;
1000 	int bufsz=3*recsize;
1001 
1002 	if ((buf = malloc(bufsz)) == NULL)
1003 		FATAL("out of memory in awkprintf");
1004 	y = a[0]->nnext;
1005 	x = execute(a[0]);
1006 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1007 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1008 	tempfree(x);
1009 	if (a[1] == NULL) {
1010 		/* fputs(buf, stdout); */
1011 		fwrite(buf, len, 1, stdout);
1012 		if (ferror(stdout))
1013 			FATAL("write error on stdout");
1014 	} else {
1015 		fp = redirect(ptoi(a[1]), a[2]);
1016 		/* fputs(buf, fp); */
1017 		fwrite(buf, len, 1, fp);
1018 		fflush(fp);
1019 		if (ferror(fp))
1020 			FATAL("write error on %s", filename(fp));
1021 	}
1022 	free(buf);
1023 	return(True);
1024 }
1025 
arith(Node ** a,int n)1026 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1027 {
1028 	Awkfloat i, j = 0;
1029 	double v;
1030 	Cell *x, *y, *z;
1031 
1032 	x = execute(a[0]);
1033 	i = getfval(x);
1034 	tempfree(x);
1035 	if (n != UMINUS && n != UPLUS) {
1036 		y = execute(a[1]);
1037 		j = getfval(y);
1038 		tempfree(y);
1039 	}
1040 	z = gettemp();
1041 	switch (n) {
1042 	case ADD:
1043 		i += j;
1044 		break;
1045 	case MINUS:
1046 		i -= j;
1047 		break;
1048 	case MULT:
1049 		i *= j;
1050 		break;
1051 	case DIVIDE:
1052 		if (j == 0)
1053 			FATAL("division by zero");
1054 		i /= j;
1055 		break;
1056 	case MOD:
1057 		if (j == 0)
1058 			FATAL("division by zero in mod");
1059 		modf(i/j, &v);
1060 		i = i - j * v;
1061 		break;
1062 	case UMINUS:
1063 		i = -i;
1064 		break;
1065 	case UPLUS: /* handled by getfval(), above */
1066 		break;
1067 	case POWER:
1068 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1069 			i = ipow(i, (int) j);
1070 		else
1071 			i = errcheck(pow(i, j), "pow");
1072 		break;
1073 	default:	/* can't happen */
1074 		FATAL("illegal arithmetic operator %d", n);
1075 	}
1076 	setfval(z, i);
1077 	return(z);
1078 }
1079 
ipow(double x,int n)1080 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1081 {
1082 	double v;
1083 
1084 	if (n <= 0)
1085 		return 1;
1086 	v = ipow(x, n/2);
1087 	if (n % 2 == 0)
1088 		return v * v;
1089 	else
1090 		return x * v * v;
1091 }
1092 
incrdecr(Node ** a,int n)1093 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1094 {
1095 	Cell *x, *z;
1096 	int k;
1097 	Awkfloat xf;
1098 
1099 	x = execute(a[0]);
1100 	xf = getfval(x);
1101 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1102 	if (n == PREINCR || n == PREDECR) {
1103 		setfval(x, xf + k);
1104 		return(x);
1105 	}
1106 	z = gettemp();
1107 	setfval(z, xf);
1108 	setfval(x, xf + k);
1109 	tempfree(x);
1110 	return(z);
1111 }
1112 
assign(Node ** a,int n)1113 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1114 {		/* this is subtle; don't muck with it. */
1115 	Cell *x, *y;
1116 	Awkfloat xf, yf;
1117 	double v;
1118 
1119 	y = execute(a[1]);
1120 	x = execute(a[0]);
1121 	if (n == ASSIGN) {	/* ordinary assignment */
1122 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1123 			;	/* self-assignment: leave alone unless it's a field or NF */
1124 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1125 			setsval(x, getsval(y));
1126 			x->fval = getfval(y);
1127 			x->tval |= NUM;
1128 		}
1129 		else if (isstr(y))
1130 			setsval(x, getsval(y));
1131 		else if (isnum(y))
1132 			setfval(x, getfval(y));
1133 		else
1134 			funnyvar(y, "read value of");
1135 		tempfree(y);
1136 		return(x);
1137 	}
1138 	xf = getfval(x);
1139 	yf = getfval(y);
1140 	switch (n) {
1141 	case ADDEQ:
1142 		xf += yf;
1143 		break;
1144 	case SUBEQ:
1145 		xf -= yf;
1146 		break;
1147 	case MULTEQ:
1148 		xf *= yf;
1149 		break;
1150 	case DIVEQ:
1151 		if (yf == 0)
1152 			FATAL("division by zero in /=");
1153 		xf /= yf;
1154 		break;
1155 	case MODEQ:
1156 		if (yf == 0)
1157 			FATAL("division by zero in %%=");
1158 		modf(xf/yf, &v);
1159 		xf = xf - yf * v;
1160 		break;
1161 	case POWEQ:
1162 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1163 			xf = ipow(xf, (int) yf);
1164 		else
1165 			xf = errcheck(pow(xf, yf), "pow");
1166 		break;
1167 	default:
1168 		FATAL("illegal assignment operator %d", n);
1169 		break;
1170 	}
1171 	tempfree(y);
1172 	setfval(x, xf);
1173 	return(x);
1174 }
1175 
cat(Node ** a,int q)1176 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1177 {
1178 	Cell *x, *y, *z;
1179 	int n1, n2;
1180 	char *s = NULL;
1181 	int ssz = 0;
1182 
1183 	x = execute(a[0]);
1184 	n1 = strlen(getsval(x));
1185 
1186 	y = execute(a[1]);
1187 	n2 = strlen(getsval(y));
1188 
1189 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat");
1190 	memcpy(s, x->sval, n1);
1191 	memcpy(s + n1, y->sval, n2);
1192 	s[n1 + n2] = '\0';
1193 
1194 	tempfree(x);
1195 	tempfree(y);
1196 
1197 	z = gettemp();
1198 	z->sval = s;
1199 	z->tval = STR;
1200 
1201 	return(z);
1202 }
1203 
pastat(Node ** a,int n)1204 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1205 {
1206 	Cell *x;
1207 
1208 	if (a[0] == NULL)
1209 		x = execute(a[1]);
1210 	else {
1211 		x = execute(a[0]);
1212 		if (istrue(x)) {
1213 			tempfree(x);
1214 			x = execute(a[1]);
1215 		}
1216 	}
1217 	return x;
1218 }
1219 
dopa2(Node ** a,int n)1220 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1221 {
1222 	Cell *x;
1223 	int pair;
1224 
1225 	pair = ptoi(a[3]);
1226 	if (pairstack[pair] == 0) {
1227 		x = execute(a[0]);
1228 		if (istrue(x))
1229 			pairstack[pair] = 1;
1230 		tempfree(x);
1231 	}
1232 	if (pairstack[pair] == 1) {
1233 		x = execute(a[1]);
1234 		if (istrue(x))
1235 			pairstack[pair] = 0;
1236 		tempfree(x);
1237 		x = execute(a[2]);
1238 		return(x);
1239 	}
1240 	return(False);
1241 }
1242 
split(Node ** a,int nnn)1243 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1244 {
1245 	Cell *x = NULL, *y, *ap;
1246 	const char *s, *origs, *t;
1247 	const char *fs = NULL;
1248 	char *origfs = NULL;
1249 	int sep;
1250 	char temp, num[50];
1251 	int n, tempstat, arg3type;
1252 
1253 	y = execute(a[0]);	/* source string */
1254 	origs = s = strdup(getsval(y));
1255 	arg3type = ptoi(a[3]);
1256 	if (a[2] == NULL)		/* fs string */
1257 		fs = getsval(fsloc);
1258 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1259 		x = execute(a[2]);
1260 		fs = origfs = strdup(getsval(x));
1261 		tempfree(x);
1262 	} else if (arg3type == REGEXPR)
1263 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1264 	else
1265 		FATAL("illegal type of split");
1266 	sep = *fs;
1267 	ap = execute(a[1]);	/* array name */
1268 	freesymtab(ap);
1269 	   dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1270 	ap->tval &= ~STR;
1271 	ap->tval |= ARR;
1272 	ap->sval = (char *) makesymtab(NSYMTAB);
1273 
1274 	n = 0;
1275         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1276 		/* split(s, a, //); have to arrange that it looks like empty sep */
1277 		arg3type = 0;
1278 		fs = "";
1279 		sep = 0;
1280 	}
1281 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1282 		fa *pfa;
1283 		if (arg3type == REGEXPR) {	/* it's ready already */
1284 			pfa = (fa *) a[2];
1285 		} else {
1286 			pfa = makedfa(fs, 1);
1287 		}
1288 		if (nematch(pfa,s)) {
1289 			tempstat = pfa->initstat;
1290 			pfa->initstat = 2;
1291 			do {
1292 				n++;
1293 				snprintf(num, sizeof(num), "%d", n);
1294 				temp = *patbeg;
1295 				setptr(patbeg, '\0');
1296 				if (is_number(s))
1297 					setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1298 				else
1299 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1300 				setptr(patbeg, temp);
1301 				s = patbeg + patlen;
1302 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1303 					n++;
1304 					snprintf(num, sizeof(num), "%d", n);
1305 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1306 					pfa->initstat = tempstat;
1307 					goto spdone;
1308 				}
1309 			} while (nematch(pfa,s));
1310 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1311 							/* cf gsub and refldbld */
1312 		}
1313 		n++;
1314 		snprintf(num, sizeof(num), "%d", n);
1315 		if (is_number(s))
1316 			setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1317 		else
1318 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1319   spdone:
1320 		pfa = NULL;
1321 	} else if (sep == ' ') {
1322 		for (n = 0; ; ) {
1323 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1324 			while (ISWS(*s))
1325 				s++;
1326 			if (*s == '\0')
1327 				break;
1328 			n++;
1329 			t = s;
1330 			do
1331 				s++;
1332 			while (*s != '\0' && !ISWS(*s));
1333 			temp = *s;
1334 			setptr(s, '\0');
1335 			snprintf(num, sizeof(num), "%d", n);
1336 			if (is_number(t))
1337 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1338 			else
1339 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1340 			setptr(s, temp);
1341 			if (*s != '\0')
1342 				s++;
1343 		}
1344 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1345 		for (n = 0; *s != '\0'; s++) {
1346 			char buf[2];
1347 			n++;
1348 			snprintf(num, sizeof(num), "%d", n);
1349 			buf[0] = *s;
1350 			buf[1] = '\0';
1351 			if (isdigit((uschar)buf[0]))
1352 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1353 			else
1354 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1355 		}
1356 	} else if (*s != '\0') {
1357 		for (;;) {
1358 			n++;
1359 			t = s;
1360 			while (*s != sep && *s != '\n' && *s != '\0')
1361 				s++;
1362 			temp = *s;
1363 			setptr(s, '\0');
1364 			snprintf(num, sizeof(num), "%d", n);
1365 			if (is_number(t))
1366 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1367 			else
1368 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1369 			setptr(s, temp);
1370 			if (*s++ == '\0')
1371 				break;
1372 		}
1373 	}
1374 	tempfree(ap);
1375 	tempfree(y);
1376 	xfree(origs);
1377 	xfree(origfs);
1378 	x = gettemp();
1379 	x->tval = NUM;
1380 	x->fval = n;
1381 	return(x);
1382 }
1383 
condexpr(Node ** a,int n)1384 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1385 {
1386 	Cell *x;
1387 
1388 	x = execute(a[0]);
1389 	if (istrue(x)) {
1390 		tempfree(x);
1391 		x = execute(a[1]);
1392 	} else {
1393 		tempfree(x);
1394 		x = execute(a[2]);
1395 	}
1396 	return(x);
1397 }
1398 
ifstat(Node ** a,int n)1399 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1400 {
1401 	Cell *x;
1402 
1403 	x = execute(a[0]);
1404 	if (istrue(x)) {
1405 		tempfree(x);
1406 		x = execute(a[1]);
1407 	} else if (a[2] != NULL) {
1408 		tempfree(x);
1409 		x = execute(a[2]);
1410 	}
1411 	return(x);
1412 }
1413 
whilestat(Node ** a,int n)1414 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1415 {
1416 	Cell *x;
1417 
1418 	for (;;) {
1419 		x = execute(a[0]);
1420 		if (!istrue(x))
1421 			return(x);
1422 		tempfree(x);
1423 		x = execute(a[1]);
1424 		if (isbreak(x)) {
1425 			x = True;
1426 			return(x);
1427 		}
1428 		if (isnext(x) || isexit(x) || isret(x))
1429 			return(x);
1430 		tempfree(x);
1431 	}
1432 }
1433 
dostat(Node ** a,int n)1434 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1435 {
1436 	Cell *x;
1437 
1438 	for (;;) {
1439 		x = execute(a[0]);
1440 		if (isbreak(x))
1441 			return True;
1442 		if (isnext(x) || isexit(x) || isret(x))
1443 			return(x);
1444 		tempfree(x);
1445 		x = execute(a[1]);
1446 		if (!istrue(x))
1447 			return(x);
1448 		tempfree(x);
1449 	}
1450 }
1451 
forstat(Node ** a,int n)1452 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1453 {
1454 	Cell *x;
1455 
1456 	x = execute(a[0]);
1457 	tempfree(x);
1458 	for (;;) {
1459 		if (a[1]!=NULL) {
1460 			x = execute(a[1]);
1461 			if (!istrue(x)) return(x);
1462 			else tempfree(x);
1463 		}
1464 		x = execute(a[3]);
1465 		if (isbreak(x))		/* turn off break */
1466 			return True;
1467 		if (isnext(x) || isexit(x) || isret(x))
1468 			return(x);
1469 		tempfree(x);
1470 		x = execute(a[2]);
1471 		tempfree(x);
1472 	}
1473 }
1474 
instat(Node ** a,int n)1475 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1476 {
1477 	Cell *x, *vp, *arrayp, *cp, *ncp;
1478 	Array *tp;
1479 	int i;
1480 
1481 	vp = execute(a[0]);
1482 	arrayp = execute(a[1]);
1483 	if (!isarr(arrayp)) {
1484 		return True;
1485 	}
1486 	tp = (Array *) arrayp->sval;
1487 	tempfree(arrayp);
1488 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1489 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1490 			setsval(vp, cp->nval);
1491 			ncp = cp->cnext;
1492 			x = execute(a[2]);
1493 			if (isbreak(x)) {
1494 				tempfree(vp);
1495 				return True;
1496 			}
1497 			if (isnext(x) || isexit(x) || isret(x)) {
1498 				tempfree(vp);
1499 				return(x);
1500 			}
1501 			tempfree(x);
1502 		}
1503 	}
1504 	return True;
1505 }
1506 
nawk_convert(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1507 static char *nawk_convert(const char *s, int (*fun_c)(int),
1508     wint_t (*fun_wc)(wint_t))
1509 {
1510 	char *buf      = NULL;
1511 	char *pbuf     = NULL;
1512 	const char *ps = NULL;
1513 	size_t n       = 0;
1514 	mbstate_t mbs, mbs2;
1515 	wchar_t wc;
1516 	size_t sz = MB_CUR_MAX;
1517 
1518 	if (sz == 1) {
1519 		buf = tostring(s);
1520 
1521 		for (pbuf = buf; *pbuf; pbuf++)
1522 			*pbuf = fun_c((uschar)*pbuf);
1523 
1524 		return buf;
1525 	} else {
1526 		/* upper/lower character may be shorter/longer */
1527 		buf = tostringN(s, strlen(s) * sz + 1);
1528 
1529 		memset(&mbs,  0, sizeof(mbs));
1530 		memset(&mbs2, 0, sizeof(mbs2));
1531 
1532 		ps   = s;
1533 		pbuf = buf;
1534 		while (n = mbrtowc(&wc, ps, sz, &mbs),
1535 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1536 		{
1537 			ps += n;
1538 
1539 			n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1540 			if (n == (size_t)-1)
1541 				FATAL("illegal wide character %s", s);
1542 
1543 			pbuf += n;
1544 		}
1545 
1546 		*pbuf = '\0';
1547 
1548 		if (n)
1549 			FATAL("illegal byte sequence %s", s);
1550 
1551 		return buf;
1552 	}
1553 }
1554 
nawk_toupper(const char * s)1555 static char *nawk_toupper(const char *s)
1556 {
1557 	return nawk_convert(s, toupper, towupper);
1558 }
1559 
nawk_tolower(const char * s)1560 static char *nawk_tolower(const char *s)
1561 {
1562 	return nawk_convert(s, tolower, towlower);
1563 }
1564 
bltin(Node ** a,int n)1565 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1566 {
1567 	Cell *x, *y;
1568 	Awkfloat u;
1569 	int t, sz;
1570 	Awkfloat tmp;
1571 	char *buf, *fmt;
1572 	Node *nextarg;
1573 	FILE *fp;
1574 	int status = 0;
1575 	time_t tv;
1576 	struct tm *tm;
1577 
1578 	t = ptoi(a[0]);
1579 	x = execute(a[1]);
1580 	nextarg = a[1]->nnext;
1581 	switch (t) {
1582 	case FLENGTH:
1583 		if (isarr(x))
1584 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1585 		else
1586 			u = strlen(getsval(x));
1587 		break;
1588 	case FLOG:
1589 		u = errcheck(log(getfval(x)), "log"); break;
1590 	case FINT:
1591 		modf(getfval(x), &u); break;
1592 	case FEXP:
1593 		u = errcheck(exp(getfval(x)), "exp"); break;
1594 	case FSQRT:
1595 		u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1596 	case FSIN:
1597 		u = sin(getfval(x)); break;
1598 	case FCOS:
1599 		u = cos(getfval(x)); break;
1600 	case FATAN:
1601 		if (nextarg == NULL) {
1602 			WARNING("atan2 requires two arguments; returning 1.0");
1603 			u = 1.0;
1604 		} else {
1605 			y = execute(a[1]->nnext);
1606 			u = atan2(getfval(x), getfval(y));
1607 			tempfree(y);
1608 			nextarg = nextarg->nnext;
1609 		}
1610 		break;
1611 	case FCOMPL:
1612 		u = ~((int)getfval(x));
1613 		break;
1614 	case FAND:
1615 		if (nextarg == 0) {
1616 			WARNING("and requires two arguments; returning 0");
1617 			u = 0;
1618 			break;
1619 		}
1620 		y = execute(a[1]->nnext);
1621 		u = ((int)getfval(x)) & ((int)getfval(y));
1622 		tempfree(y);
1623 		nextarg = nextarg->nnext;
1624 		break;
1625 	case FFOR:
1626 		if (nextarg == 0) {
1627 			WARNING("or requires two arguments; returning 0");
1628 			u = 0;
1629 			break;
1630 		}
1631 		y = execute(a[1]->nnext);
1632 		u = ((int)getfval(x)) | ((int)getfval(y));
1633 		tempfree(y);
1634 		nextarg = nextarg->nnext;
1635 		break;
1636 	case FXOR:
1637 		if (nextarg == 0) {
1638 			WARNING("xor requires two arguments; returning 0");
1639 			u = 0;
1640 			break;
1641 		}
1642 		y = execute(a[1]->nnext);
1643 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1644 		tempfree(y);
1645 		nextarg = nextarg->nnext;
1646 		break;
1647 	case FLSHIFT:
1648 		if (nextarg == 0) {
1649 			WARNING("lshift requires two arguments; returning 0");
1650 			u = 0;
1651 			break;
1652 		}
1653 		y = execute(a[1]->nnext);
1654 		u = ((int)getfval(x)) << ((int)getfval(y));
1655 		tempfree(y);
1656 		nextarg = nextarg->nnext;
1657 		break;
1658 	case FRSHIFT:
1659 		if (nextarg == 0) {
1660 			WARNING("rshift requires two arguments; returning 0");
1661 			u = 0;
1662 			break;
1663 		}
1664 		y = execute(a[1]->nnext);
1665 		u = ((int)getfval(x)) >> ((int)getfval(y));
1666 		tempfree(y);
1667 		nextarg = nextarg->nnext;
1668 		break;
1669 	case FSYSTEM:
1670 		fflush(stdout);		/* in case something is buffered already */
1671 		status = system(getsval(x));
1672 		u = status;
1673 		if (status != -1) {
1674 			if (WIFEXITED(status)) {
1675 				u = WEXITSTATUS(status);
1676 			} else if (WIFSIGNALED(status)) {
1677 				u = WTERMSIG(status) + 256;
1678 #ifdef WCOREDUMP
1679 				if (WCOREDUMP(status))
1680 					u += 256;
1681 #endif
1682 			} else	/* something else?!? */
1683 				u = 0;
1684 		}
1685 		break;
1686 	case FRAND:
1687 		/* random() returns numbers in [0..2^31-1]
1688 		 * in order to get a number in [0, 1), divide it by 2^31
1689 		 */
1690 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1691 		break;
1692 	case FSRAND:
1693 		if (isrec(x))	/* no argument provided */
1694 			u = time((time_t *)0);
1695 		else
1696 			u = getfval(x);
1697 		tmp = u;
1698 		srandom((unsigned long) u);
1699 		u = srand_seed;
1700 		srand_seed = tmp;
1701 		break;
1702 	case FTOUPPER:
1703 	case FTOLOWER:
1704 		if (t == FTOUPPER)
1705 			buf = nawk_toupper(getsval(x));
1706 		else
1707 			buf = nawk_tolower(getsval(x));
1708 		tempfree(x);
1709 		x = gettemp();
1710 		setsval(x, buf);
1711 		free(buf);
1712 		return x;
1713 	case FFLUSH:
1714 		if (isrec(x) || strlen(getsval(x)) == 0) {
1715 			flush_all();	/* fflush() or fflush("") -> all */
1716 			u = 0;
1717 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1718 			u = EOF;
1719 		else
1720 			u = fflush(fp);
1721 		break;
1722 	case FSYSTIME:
1723 		u = time((time_t *) 0);
1724 		break;
1725 	case FSTRFTIME:
1726 		/* strftime([format [,timestamp]]) */
1727 		if (nextarg) {
1728 			y = execute(nextarg);
1729 			nextarg = nextarg->nnext;
1730 			tv = (time_t) getfval(y);
1731 			tempfree(y);
1732 		} else
1733 			tv = time((time_t *) 0);
1734 		tm = localtime(&tv);
1735 		if (tm == NULL)
1736 			FATAL("bad time %ld", (long)tv);
1737 
1738 		if (isrec(x)) {
1739 			/* format argument not provided, use default */
1740 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1741 		} else
1742 			fmt = tostring(getsval(x));
1743 
1744 		sz = 32;
1745 		buf = NULL;
1746 		do {
1747 			if ((buf = realloc(buf, (sz *= 2))) == NULL)
1748 				FATAL("out of memory in strftime");
1749 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1750 
1751 		y = gettemp();
1752 		setsval(y, buf);
1753 		free(fmt);
1754 		free(buf);
1755 
1756 		return y;
1757 	default:	/* can't happen */
1758 		FATAL("illegal function type %d", t);
1759 		break;
1760 	}
1761 	tempfree(x);
1762 	x = gettemp();
1763 	setfval(x, u);
1764 	if (nextarg != NULL) {
1765 		WARNING("warning: function has too many arguments");
1766 		for ( ; nextarg; nextarg = nextarg->nnext)
1767 			execute(nextarg);
1768 	}
1769 	return(x);
1770 }
1771 
printstat(Node ** a,int n)1772 Cell *printstat(Node **a, int n)	/* print a[0] */
1773 {
1774 	Node *x;
1775 	Cell *y;
1776 	FILE *fp;
1777 
1778 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1779 		fp = stdout;
1780 	else
1781 		fp = redirect(ptoi(a[1]), a[2]);
1782 	for (x = a[0]; x != NULL; x = x->nnext) {
1783 		y = execute(x);
1784 		fputs(getpssval(y), fp);
1785 		tempfree(y);
1786 		if (x->nnext == NULL)
1787 			fputs(getsval(orsloc), fp);
1788 		else
1789 			fputs(getsval(ofsloc), fp);
1790 	}
1791 	if (a[1] != NULL)
1792 		fflush(fp);
1793 	if (ferror(fp))
1794 		FATAL("write error on %s", filename(fp));
1795 	return(True);
1796 }
1797 
nullproc(Node ** a,int n)1798 Cell *nullproc(Node **a, int n)
1799 {
1800 	return 0;
1801 }
1802 
1803 
redirect(int a,Node * b)1804 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1805 {
1806 	FILE *fp;
1807 	Cell *x;
1808 	char *fname;
1809 
1810 	x = execute(b);
1811 	fname = getsval(x);
1812 	fp = openfile(a, fname, NULL);
1813 	if (fp == NULL)
1814 		FATAL("can't open file %s", fname);
1815 	tempfree(x);
1816 	return fp;
1817 }
1818 
1819 struct files {
1820 	FILE	*fp;
1821 	const char	*fname;
1822 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1823 } *files;
1824 
1825 size_t nfiles;
1826 
stdinit(void)1827 static void stdinit(void)	/* in case stdin, etc., are not constants */
1828 {
1829 	nfiles = FOPEN_MAX;
1830 	files = calloc(nfiles, sizeof(*files));
1831 	if (files == NULL)
1832 		FATAL("can't allocate file memory for %zu files", nfiles);
1833         files[0].fp = stdin;
1834 	files[0].fname = "/dev/stdin";
1835 	files[0].mode = LT;
1836         files[1].fp = stdout;
1837 	files[1].fname = "/dev/stdout";
1838 	files[1].mode = GT;
1839         files[2].fp = stderr;
1840 	files[2].fname = "/dev/stderr";
1841 	files[2].mode = GT;
1842 }
1843 
openfile(int a,const char * us,bool * pnewflag)1844 FILE *openfile(int a, const char *us, bool *pnewflag)
1845 {
1846 	const char *s = us;
1847 	size_t i;
1848 	int m;
1849 	FILE *fp = NULL;
1850 
1851 	if (*s == '\0')
1852 		FATAL("null file name in print or getline");
1853 	for (i = 0; i < nfiles; i++)
1854 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1855 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1856 		     a == FFLUSH)) {
1857 			if (pnewflag)
1858 				*pnewflag = false;
1859 			return files[i].fp;
1860 		}
1861 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1862 		return NULL;
1863 
1864 	for (i = 0; i < nfiles; i++)
1865 		if (files[i].fp == NULL)
1866 			break;
1867 	if (i >= nfiles) {
1868 		struct files *nf;
1869 		size_t nnf = nfiles + FOPEN_MAX;
1870 		nf = realloc(files, nnf * sizeof(*nf));
1871 		if (nf == NULL)
1872 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1873 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1874 		nfiles = nnf;
1875 		files = nf;
1876 	}
1877 	fflush(stdout);	/* force a semblance of order */
1878 	m = a;
1879 	if (a == GT) {
1880 		fp = fopen(s, "w");
1881 	} else if (a == APPEND) {
1882 		fp = fopen(s, "a");
1883 		m = GT;	/* so can mix > and >> */
1884 	} else if (a == '|') {	/* output pipe */
1885 		fp = popen(s, "w");
1886 	} else if (a == LE) {	/* input pipe */
1887 		fp = popen(s, "r");
1888 	} else if (a == LT) {	/* getline <file */
1889 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1890 	} else	/* can't happen */
1891 		FATAL("illegal redirection %d", a);
1892 	if (fp != NULL) {
1893 		files[i].fname = tostring(s);
1894 		files[i].fp = fp;
1895 		files[i].mode = m;
1896 		if (pnewflag)
1897 			*pnewflag = true;
1898 		if (fp != stdin && fp != stdout && fp != stderr)
1899 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1900 	}
1901 	return fp;
1902 }
1903 
filename(FILE * fp)1904 const char *filename(FILE *fp)
1905 {
1906 	size_t i;
1907 
1908 	for (i = 0; i < nfiles; i++)
1909 		if (fp == files[i].fp)
1910 			return files[i].fname;
1911 	return "???";
1912 }
1913 
closefile(Node ** a,int n)1914  Cell *closefile(Node **a, int n)
1915  {
1916  	Cell *x;
1917 	size_t i;
1918 	bool stat;
1919 
1920  	x = execute(a[0]);
1921  	getsval(x);
1922 	stat = true;
1923  	for (i = 0; i < nfiles; i++) {
1924 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1925 			continue;
1926 		if (ferror(files[i].fp))
1927 			FATAL("i/o error occurred on %s", files[i].fname);
1928 		if (files[i].mode == '|' || files[i].mode == LE)
1929 			stat = pclose(files[i].fp) == -1;
1930 		else
1931 			stat = fclose(files[i].fp) == EOF;
1932 		if (stat)
1933 			FATAL("i/o error occurred closing %s", files[i].fname);
1934 		if (i > 2)	/* don't do /dev/std... */
1935 			xfree(files[i].fname);
1936 		files[i].fname = NULL;	/* watch out for ref thru this */
1937 		files[i].fp = NULL;
1938  	}
1939  	tempfree(x);
1940  	x = gettemp();
1941 	setfval(x, (Awkfloat) (stat ? -1 : 0));
1942  	return(x);
1943  }
1944 
closeall(void)1945 void closeall(void)
1946 {
1947 	size_t i;
1948 	bool stat = false;
1949 
1950 	for (i = 0; i < nfiles; i++) {
1951 		if (! files[i].fp)
1952 			continue;
1953 		if (ferror(files[i].fp))
1954 			FATAL( "i/o error occurred on %s", files[i].fname );
1955 		if (files[i].mode == '|' || files[i].mode == LE)
1956 			stat = pclose(files[i].fp) == -1;
1957 		else
1958 			stat = fclose(files[i].fp) == EOF;
1959 		if (stat)
1960 			FATAL( "i/o error occurred while closing %s", files[i].fname );
1961 	}
1962 }
1963 
flush_all(void)1964 static void flush_all(void)
1965 {
1966 	size_t i;
1967 
1968 	for (i = 0; i < nfiles; i++)
1969 		if (files[i].fp)
1970 			fflush(files[i].fp);
1971 }
1972 
1973 void backsub(char **pb_ptr, const char **sptr_ptr);
1974 
sub(Node ** a,int nnn)1975 Cell *sub(Node **a, int nnn)	/* substitute command */
1976 {
1977 	const char *sptr, *q;
1978 	Cell *x, *y, *result;
1979 	char *t, *buf, *pb;
1980 	fa *pfa;
1981 	int bufsz = recsize;
1982 
1983 	if ((buf = malloc(bufsz)) == NULL)
1984 		FATAL("out of memory in sub");
1985 	x = execute(a[3]);	/* target string */
1986 	t = getsval(x);
1987 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
1988 		pfa = (fa *) a[1];	/* regular expression */
1989 	else {
1990 		y = execute(a[1]);
1991 		pfa = makedfa(getsval(y), 1);
1992 		tempfree(y);
1993 	}
1994 	y = execute(a[2]);	/* replacement string */
1995 	result = False;
1996 	if (pmatch(pfa, t)) {
1997 		sptr = t;
1998 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1999 		pb = buf;
2000 		while (sptr < patbeg)
2001 			*pb++ = *sptr++;
2002 		sptr = getsval(y);
2003 		while (*sptr != '\0') {
2004 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2005 			if (*sptr == '\\') {
2006 				backsub(&pb, &sptr);
2007 			} else if (*sptr == '&') {
2008 				sptr++;
2009 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2010 				for (q = patbeg; q < patbeg+patlen; )
2011 					*pb++ = *q++;
2012 			} else
2013 				*pb++ = *sptr++;
2014 		}
2015 		*pb = '\0';
2016 		if (pb > buf + bufsz)
2017 			FATAL("sub result1 %.30s too big; can't happen", buf);
2018 		sptr = patbeg + patlen;
2019 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2020 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2021 			while ((*pb++ = *sptr++) != '\0')
2022 				continue;
2023 		}
2024 		if (pb > buf + bufsz)
2025 			FATAL("sub result2 %.30s too big; can't happen", buf);
2026 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2027 		result = True;
2028 	}
2029 	tempfree(x);
2030 	tempfree(y);
2031 	free(buf);
2032 	return result;
2033 }
2034 
gsub(Node ** a,int nnn)2035 Cell *gsub(Node **a, int nnn)	/* global substitute */
2036 {
2037 	Cell *x, *y;
2038 	char *rptr, *pb;
2039 	const char *q, *t, *sptr;
2040 	char *buf;
2041 	fa *pfa;
2042 	int mflag, tempstat, num;
2043 	int bufsz = recsize;
2044 
2045 	if ((buf = malloc(bufsz)) == NULL)
2046 		FATAL("out of memory in gsub");
2047 	mflag = 0;	/* if mflag == 0, can replace empty string */
2048 	num = 0;
2049 	x = execute(a[3]);	/* target string */
2050 	t = getsval(x);
2051 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2052 		pfa = (fa *) a[1];	/* regular expression */
2053 	else {
2054 		y = execute(a[1]);
2055 		pfa = makedfa(getsval(y), 1);
2056 		tempfree(y);
2057 	}
2058 	y = execute(a[2]);	/* replacement string */
2059 	if (pmatch(pfa, t)) {
2060 		tempstat = pfa->initstat;
2061 		pfa->initstat = 2;
2062 		pb = buf;
2063 		rptr = getsval(y);
2064 		do {
2065 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2066 				if (mflag == 0) {	/* can replace empty */
2067 					num++;
2068 					sptr = rptr;
2069 					while (*sptr != '\0') {
2070 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2071 						if (*sptr == '\\') {
2072 							backsub(&pb, &sptr);
2073 						} else if (*sptr == '&') {
2074 							sptr++;
2075 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2076 							for (q = patbeg; q < patbeg+patlen; )
2077 								*pb++ = *q++;
2078 						} else
2079 							*pb++ = *sptr++;
2080 					}
2081 				}
2082 				if (*t == '\0')	/* at end */
2083 					goto done;
2084 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2085 				*pb++ = *t++;
2086 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2087 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2088 				mflag = 0;
2089 			}
2090 			else {	/* matched nonempty string */
2091 				num++;
2092 				sptr = t;
2093 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2094 				while (sptr < patbeg)
2095 					*pb++ = *sptr++;
2096 				sptr = rptr;
2097 				while (*sptr != '\0') {
2098 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2099 					if (*sptr == '\\') {
2100 						backsub(&pb, &sptr);
2101 					} else if (*sptr == '&') {
2102 						sptr++;
2103 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2104 						for (q = patbeg; q < patbeg+patlen; )
2105 							*pb++ = *q++;
2106 					} else
2107 						*pb++ = *sptr++;
2108 				}
2109 				t = patbeg + patlen;
2110 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2111 					goto done;
2112 				if (pb > buf + bufsz)
2113 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2114 				mflag = 1;
2115 			}
2116 		} while (pmatch(pfa,t));
2117 		sptr = t;
2118 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2119 		while ((*pb++ = *sptr++) != '\0')
2120 			continue;
2121 	done:	if (pb < buf + bufsz)
2122 			*pb = '\0';
2123 		else if (*(pb-1) != '\0')
2124 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2125 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2126 		pfa->initstat = tempstat;
2127 	}
2128 	tempfree(x);
2129 	tempfree(y);
2130 	x = gettemp();
2131 	x->tval = NUM;
2132 	x->fval = num;
2133 	free(buf);
2134 	return(x);
2135 }
2136 
gensub(Node ** a,int nnn)2137 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2138 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2139 {
2140 	Cell *x, *y, *res, *h;
2141 	char *rptr;
2142 	const char *sptr;
2143 	char *buf, *pb;
2144 	const char *t, *q;
2145 	fa *pfa;
2146 	int mflag, tempstat, num, whichm;
2147 	int bufsz = recsize;
2148 
2149 	if ((buf = malloc(bufsz)) == NULL)
2150 		FATAL("out of memory in gensub");
2151 	mflag = 0;	/* if mflag == 0, can replace empty string */
2152 	num = 0;
2153 	x = execute(a[4]);	/* source string */
2154 	t = getsval(x);
2155 	res = copycell(x);	/* target string - initially copy of source */
2156 	res->csub = CTEMP;	/* result values are temporary */
2157 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2158 		pfa = (fa *) a[1];	/* regular expression */
2159 	else {
2160 		y = execute(a[1]);
2161 		pfa = makedfa(getsval(y), 1);
2162 		tempfree(y);
2163 	}
2164 	y = execute(a[2]);	/* replacement string */
2165 	h = execute(a[3]);	/* which matches should be replaced */
2166 	sptr = getsval(h);
2167 	if (sptr[0] == 'g' || sptr[0] == 'G')
2168 		whichm = -1;
2169 	else {
2170 		/*
2171 		 * The specified number is index of replacement, starting
2172 		 * from 1. GNU awk treats index lower than 0 same as
2173 		 * 1, we do same for compatibility.
2174 		 */
2175 		whichm = (int) getfval(h) - 1;
2176 		if (whichm < 0)
2177 			whichm = 0;
2178 	}
2179 	tempfree(h);
2180 
2181 	if (pmatch(pfa, t)) {
2182 		char *sl;
2183 
2184 		tempstat = pfa->initstat;
2185 		pfa->initstat = 2;
2186 		pb = buf;
2187 		rptr = getsval(y);
2188 		/*
2189 		 * XXX if there are any backreferences in subst string,
2190 		 * complain now.
2191 		 */
2192 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2193 			if (strchr("0123456789", sl[1])) {
2194 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2195 			}
2196 		}
2197 
2198 		do {
2199 			if (whichm >= 0 && whichm != num) {
2200 				num++;
2201 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2202 
2203 				/* copy the part of string up to and including
2204 				 * match to output buffer */
2205 				while (t < patbeg + patlen)
2206 					*pb++ = *t++;
2207 				continue;
2208 			}
2209 
2210 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2211 				if (mflag == 0) {	/* can replace empty */
2212 					num++;
2213 					sptr = rptr;
2214 					while (*sptr != 0) {
2215 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2216 						if (*sptr == '\\') {
2217 							backsub(&pb, &sptr);
2218 						} else if (*sptr == '&') {
2219 							sptr++;
2220 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2221 							for (q = patbeg; q < patbeg+patlen; )
2222 								*pb++ = *q++;
2223 						} else
2224 							*pb++ = *sptr++;
2225 					}
2226 				}
2227 				if (*t == 0)	/* at end */
2228 					goto done;
2229 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2230 				*pb++ = *t++;
2231 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2232 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2233 				mflag = 0;
2234 			}
2235 			else {	/* matched nonempty string */
2236 				num++;
2237 				sptr = t;
2238 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2239 				while (sptr < patbeg)
2240 					*pb++ = *sptr++;
2241 				sptr = rptr;
2242 				while (*sptr != 0) {
2243 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2244 					if (*sptr == '\\') {
2245 						backsub(&pb, &sptr);
2246 					} else if (*sptr == '&') {
2247 						sptr++;
2248 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2249 						for (q = patbeg; q < patbeg+patlen; )
2250 							*pb++ = *q++;
2251 					} else
2252 						*pb++ = *sptr++;
2253 				}
2254 				t = patbeg + patlen;
2255 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2256 					goto done;
2257 				if (pb > buf + bufsz)
2258 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2259 				mflag = 1;
2260 			}
2261 		} while (pmatch(pfa,t));
2262 		sptr = t;
2263 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2264 		while ((*pb++ = *sptr++) != 0)
2265 			;
2266 	done:	if (pb > buf + bufsz)
2267 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2268 		*pb = '\0';
2269 		setsval(res, buf);
2270 		pfa->initstat = tempstat;
2271 	}
2272 	tempfree(x);
2273 	tempfree(y);
2274 	free(buf);
2275 	return(res);
2276 }
2277 
backsub(char ** pb_ptr,const char ** sptr_ptr)2278 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2279 {						/* sptr[0] == '\\' */
2280 	char *pb = *pb_ptr;
2281 	const char *sptr = *sptr_ptr;
2282 	static bool first = true;
2283 	static bool do_posix = false;
2284 
2285 	if (first) {
2286 		first = false;
2287 		do_posix = (getenv("POSIXLY_CORRECT") != NULL);
2288 	}
2289 
2290 	if (sptr[1] == '\\') {
2291 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2292 			*pb++ = '\\';
2293 			*pb++ = '&';
2294 			sptr += 4;
2295 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2296 			*pb++ = '\\';
2297 			sptr += 2;
2298 		} else if (do_posix) {		/* \\x -> \x */
2299 			sptr++;
2300 			*pb++ = *sptr++;
2301 		} else {			/* \\x -> \\x */
2302 			*pb++ = *sptr++;
2303 			*pb++ = *sptr++;
2304 		}
2305 	} else if (sptr[1] == '&') {	/* literal & */
2306 		sptr++;
2307 		*pb++ = *sptr++;
2308 	} else				/* literal \ */
2309 		*pb++ = *sptr++;
2310 
2311 	*pb_ptr = pb;
2312 	*sptr_ptr = sptr;
2313 }
2314