xref: /openbsd/usr.bin/awk/run.c (revision 097a140d)
1 /*	$OpenBSD: run.c,v 1.69 2020/12/09 20:00:11 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <fcntl.h>
33 #include <setjmp.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <string.h>
37 #include <stdlib.h>
38 #include <time.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41 #include "awk.h"
42 #include "awkgram.tab.h"
43 
44 static void stdinit(void);
45 static void flush_all(void);
46 
47 #if 1
48 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 #else
50 void tempfree(Cell *p) {
51 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 		WARNING("bad csub %d in Cell %d %s",
53 			p->csub, p->ctype, p->sval);
54 	}
55 	if (istemp(p))
56 		tfree(p);
57 }
58 #endif
59 
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /*  */
67 /* #ifndef	FOPEN_MAX */
68 /* #define	FOPEN_MAX	40 */	/* max number of open files */
69 /* #endif */
70 /*  */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
73 /* #endif */
74 
75 jmp_buf env;
76 extern	int	pairstack[];
77 extern	Awkfloat	srand_seed;
78 
79 Node	*winner = NULL;	/* root of parse tree */
80 Cell	*tmps;		/* free temporary cells for execution */
81 
82 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83 Cell	*True	= &truecell;
84 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85 Cell	*False	= &falsecell;
86 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell	*jbreak	= &breakcell;
88 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell	*jcont	= &contcell;
90 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell	*jnext	= &nextcell;
92 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell	*jnextfile	= &nextfilecell;
94 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell	*jexit	= &exitcell;
96 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell	*jret	= &retcell;
98 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99 
100 Node	*curnode = NULL;	/* the node being executed, for debugging */
101 
102 /* buffer memory management */
103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 	const char *whatrtn)
105 /* pbuf:    address of pointer to buffer being managed
106  * psiz:    address of buffer size variable
107  * minlen:  minimum length of buffer needed
108  * quantum: buffer size quantum
109  * pbptr:   address of movable pointer into buffer, or 0 if none
110  * whatrtn: name of the calling routine if failure should cause fatal error
111  *
112  * return   0 for realloc failure, !=0 for success
113  */
114 {
115 	if (minlen > *psiz) {
116 		char *tbuf;
117 		int rminlen = quantum ? minlen % quantum : 0;
118 		int boff = pbptr ? *pbptr - *pbuf : 0;
119 		/* round up to next multiple of quantum */
120 		if (rminlen)
121 			minlen += quantum - rminlen;
122 		tbuf = (char *) realloc(*pbuf, minlen);
123 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124 		if (tbuf == NULL) {
125 			if (whatrtn)
126 				FATAL("out of memory in %s", whatrtn);
127 			return 0;
128 		}
129 		*pbuf = tbuf;
130 		*psiz = minlen;
131 		if (pbptr)
132 			*pbptr = tbuf + boff;
133 	}
134 	return 1;
135 }
136 
137 void run(Node *a)	/* execution of parse tree starts here */
138 {
139 
140 	stdinit();
141 	execute(a);
142 	closeall();
143 }
144 
145 Cell *execute(Node *u)	/* execute a node of the parse tree */
146 {
147 	Cell *(*proc)(Node **, int);
148 	Cell *x;
149 	Node *a;
150 
151 	if (u == NULL)
152 		return(True);
153 	for (a = u; ; a = a->nnext) {
154 		curnode = a;
155 		if (isvalue(a)) {
156 			x = (Cell *) (a->narg[0]);
157 			if (isfld(x) && !donefld)
158 				fldbld();
159 			else if (isrec(x) && !donerec)
160 				recbld();
161 			return(x);
162 		}
163 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
164 			FATAL("illegal statement");
165 		proc = proctab[a->nobj-FIRSTTOKEN];
166 		x = (*proc)(a->narg, a->nobj);
167 		if (isfld(x) && !donefld)
168 			fldbld();
169 		else if (isrec(x) && !donerec)
170 			recbld();
171 		if (isexpr(a))
172 			return(x);
173 		if (isjump(x))
174 			return(x);
175 		if (a->nnext == NULL)
176 			return(x);
177 		tempfree(x);
178 	}
179 }
180 
181 
182 Cell *program(Node **a, int n)	/* execute an awk program */
183 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
184 	Cell *x;
185 
186 	if (setjmp(env) != 0)
187 		goto ex;
188 	if (a[0]) {		/* BEGIN */
189 		x = execute(a[0]);
190 		if (isexit(x))
191 			return(True);
192 		if (isjump(x))
193 			FATAL("illegal break, continue, next or nextfile from BEGIN");
194 		tempfree(x);
195 	}
196 	if (a[1] || a[2])
197 		while (getrec(&record, &recsize, true) > 0) {
198 			x = execute(a[1]);
199 			if (isexit(x))
200 				break;
201 			tempfree(x);
202 		}
203   ex:
204 	if (setjmp(env) != 0)	/* handles exit within END */
205 		goto ex1;
206 	if (a[2]) {		/* END */
207 		x = execute(a[2]);
208 		if (isbreak(x) || isnext(x) || iscont(x))
209 			FATAL("illegal break, continue, next or nextfile from END");
210 		tempfree(x);
211 	}
212   ex1:
213 	return(True);
214 }
215 
216 struct Frame {	/* stack frame for awk function calls */
217 	int nargs;	/* number of arguments in this call */
218 	Cell *fcncell;	/* pointer to Cell for function */
219 	Cell **args;	/* pointer to array of arguments after execute */
220 	Cell *retval;	/* return value */
221 };
222 
223 #define	NARGS	50	/* max args in a call */
224 
225 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
226 int	nframe = 0;		/* number of frames allocated */
227 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
228 
229 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
230 {
231 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 	int i, ncall, ndef;
233 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 	Node *x;
235 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
236 	Cell *y, *z, *fcn;
237 	char *s;
238 
239 	fcn = execute(a[0]);	/* the function itself */
240 	s = fcn->nval;
241 	if (!isfcn(fcn))
242 		FATAL("calling undefined function %s", s);
243 	if (frame == NULL) {
244 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
245 		if (frame == NULL)
246 			FATAL("out of space for stack frames calling %s", s);
247 	}
248 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
249 		ncall++;
250 	ndef = (int) fcn->fval;			/* args in defn */
251 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252 	if (ncall > ndef)
253 		WARNING("function %s called with %d args, uses only %d",
254 			s, ncall, ndef);
255 	if (ncall + ndef > NARGS)
256 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259 		y = execute(x);
260 		oargs[i] = y;
261 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263 		if (isfcn(y))
264 			FATAL("can't use function %s as argument in %s", y->nval, s);
265 		if (isarr(y))
266 			args[i] = y;	/* arrays by ref */
267 		else
268 			args[i] = copycell(y);
269 		tempfree(y);
270 	}
271 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
272 		args[i] = gettemp();
273 		*args[i] = newcopycell;
274 	}
275 	frp++;	/* now ok to up frame */
276 	if (frp >= frame + nframe) {
277 		int dfp = frp - frame;	/* old index */
278 		frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
279 		if (frame == NULL)
280 			FATAL("out of space for stack frames in %s", s);
281 		frp = frame + dfp;
282 	}
283 	frp->fcncell = fcn;
284 	frp->args = args;
285 	frp->nargs = ndef;	/* number defined with (excess are locals) */
286 	frp->retval = gettemp();
287 
288 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289 	y = execute((Node *)(fcn->sval));	/* execute body */
290 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291 
292 	for (i = 0; i < ndef; i++) {
293 		Cell *t = frp->args[i];
294 		if (isarr(t)) {
295 			if (t->csub == CCOPY) {
296 				if (i >= ncall) {
297 					freesymtab(t);
298 					t->csub = CTEMP;
299 					tempfree(t);
300 				} else {
301 					oargs[i]->tval = t->tval;
302 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 					oargs[i]->sval = t->sval;
304 					tempfree(t);
305 				}
306 			}
307 		} else if (t != y) {	/* kludge to prevent freeing twice */
308 			t->csub = CTEMP;
309 			tempfree(t);
310 		} else if (t == y && t->csub == CCOPY) {
311 			t->csub = CTEMP;
312 			tempfree(t);
313 			freed = 1;
314 		}
315 	}
316 	tempfree(fcn);
317 	if (isexit(y) || isnext(y))
318 		return y;
319 	if (freed == 0) {
320 		tempfree(y);	/* don't free twice! */
321 	}
322 	z = frp->retval;			/* return value */
323 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324 	frp--;
325 	return(z);
326 }
327 
328 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
329 {
330 	Cell *y;
331 
332 	/* copy is not constant or field */
333 
334 	y = gettemp();
335 	y->tval = x->tval & ~(CON|FLD|REC);
336 	y->csub = CCOPY;	/* prevents freeing until call is over */
337 	y->nval = x->nval;	/* BUG? */
338 	if (isstr(x) /* || x->ctype == OCELL */) {
339 		y->sval = tostring(x->sval);
340 		y->tval &= ~DONTFREE;
341 	} else
342 		y->tval |= DONTFREE;
343 	y->fval = x->fval;
344 	return y;
345 }
346 
347 Cell *arg(Node **a, int n)	/* nth argument of a function */
348 {
349 
350 	n = ptoi(a[0]);	/* argument number, counting from 0 */
351 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352 	if (n+1 > frp->nargs)
353 		FATAL("argument #%d of function %s was not supplied",
354 			n+1, frp->fcncell->nval);
355 	return frp->args[n];
356 }
357 
358 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
359 {
360 	Cell *y;
361 
362 	switch (n) {
363 	case EXIT:
364 		if (a[0] != NULL) {
365 			y = execute(a[0]);
366 			errorflag = (int) getfval(y);
367 			tempfree(y);
368 		}
369 		longjmp(env, 1);
370 	case RETURN:
371 		if (a[0] != NULL) {
372 			y = execute(a[0]);
373 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 				setsval(frp->retval, getsval(y));
375 				frp->retval->fval = getfval(y);
376 				frp->retval->tval |= NUM;
377 			}
378 			else if (y->tval & STR)
379 				setsval(frp->retval, getsval(y));
380 			else if (y->tval & NUM)
381 				setfval(frp->retval, getfval(y));
382 			else		/* can't happen */
383 				FATAL("bad type variable %d", y->tval);
384 			tempfree(y);
385 		}
386 		return(jret);
387 	case NEXT:
388 		return(jnext);
389 	case NEXTFILE:
390 		nextfile();
391 		return(jnextfile);
392 	case BREAK:
393 		return(jbreak);
394 	case CONTINUE:
395 		return(jcont);
396 	default:	/* can't happen */
397 		FATAL("illegal jump type %d", n);
398 	}
399 	return 0;	/* not reached */
400 }
401 
402 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
403 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
404 	Cell *r, *x;
405 	extern Cell **fldtab;
406 	FILE *fp;
407 	char *buf;
408 	int bufsize = recsize;
409 	int mode;
410 	bool newflag;
411 	double result;
412 
413 	if ((buf = (char *) malloc(bufsize)) == NULL)
414 		FATAL("out of memory in getline");
415 
416 	fflush(stdout);	/* in case someone is waiting for a prompt */
417 	r = gettemp();
418 	if (a[1] != NULL) {		/* getline < file */
419 		x = execute(a[2]);		/* filename */
420 		mode = ptoi(a[1]);
421 		if (mode == '|')		/* input pipe */
422 			mode = LE;	/* arbitrary flag */
423 		fp = openfile(mode, getsval(x), &newflag);
424 		tempfree(x);
425 		if (fp == NULL)
426 			n = -1;
427 		else
428 			n = readrec(&buf, &bufsize, fp, newflag);
429 		if (n <= 0) {
430 			;
431 		} else if (a[0] != NULL) {	/* getline var <file */
432 			x = execute(a[0]);
433 			setsval(x, buf);
434 			if (is_number(x->sval, & result)) {
435 				x->fval = result;
436 				x->tval |= NUM;
437 			}
438 			tempfree(x);
439 		} else {			/* getline <file */
440 			setsval(fldtab[0], buf);
441 			if (is_number(fldtab[0]->sval, & result)) {
442 				fldtab[0]->fval = result;
443 				fldtab[0]->tval |= NUM;
444 			}
445 		}
446 	} else {			/* bare getline; use current input */
447 		if (a[0] == NULL)	/* getline */
448 			n = getrec(&record, &recsize, true);
449 		else {			/* getline var */
450 			n = getrec(&buf, &bufsize, false);
451 			x = execute(a[0]);
452 			setsval(x, buf);
453 			if (is_number(x->sval, & result)) {
454 				x->fval = result;
455 				x->tval |= NUM;
456 			}
457 			tempfree(x);
458 		}
459 	}
460 	setfval(r, (Awkfloat) n);
461 	free(buf);
462 	return r;
463 }
464 
465 Cell *getnf(Node **a, int n)	/* get NF */
466 {
467 	if (!donefld)
468 		fldbld();
469 	return (Cell *) a[0];
470 }
471 
472 static char *
473 makearraystring(Node *p, const char *func)
474 {
475 	char *buf;
476 	int bufsz = recsize;
477 	size_t blen;
478 
479 	if ((buf = (char *) malloc(bufsz)) == NULL) {
480 		FATAL("%s: out of memory", func);
481 	}
482 
483 	blen = 0;
484 	buf[blen] = '\0';
485 
486 	for (; p; p = p->nnext) {
487 		Cell *x = execute(p);	/* expr */
488 		char *s = getsval(x);
489 		size_t seplen = strlen(getsval(subseploc));
490 		size_t nsub = p->nnext ? seplen : 0;
491 		size_t slen = strlen(s);
492 		size_t tlen = blen + slen + nsub;
493 
494 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
495 			FATAL("%s: out of memory %s[%s...]",
496 			    func, x->nval, buf);
497 		}
498 		memcpy(buf + blen, s, slen);
499 		if (nsub) {
500 			memcpy(buf + blen + slen, *SUBSEP, nsub);
501 		}
502 		buf[tlen] = '\0';
503 		blen = tlen;
504 		tempfree(x);
505 	}
506 	return buf;
507 }
508 
509 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
510 {
511 	Cell *x, *z;
512 	char *buf;
513 
514 	x = execute(a[0]);	/* Cell* for symbol table */
515 	buf = makearraystring(a[1], __func__);
516 	if (!isarr(x)) {
517 		DPRINTF("making %s into an array\n", NN(x->nval));
518 		if (freeable(x))
519 			xfree(x->sval);
520 		x->tval &= ~(STR|NUM|DONTFREE);
521 		x->tval |= ARR;
522 		x->sval = (char *) makesymtab(NSYMTAB);
523 	}
524 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
525 	z->ctype = OCELL;
526 	z->csub = CVAR;
527 	tempfree(x);
528 	free(buf);
529 	return(z);
530 }
531 
532 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
533 {
534 	Cell *x;
535 
536 	x = execute(a[0]);	/* Cell* for symbol table */
537 	if (x == symtabloc) {
538 		FATAL("cannot delete SYMTAB or its elements");
539 	}
540 	if (!isarr(x))
541 		return True;
542 	if (a[1] == NULL) {	/* delete the elements, not the table */
543 		freesymtab(x);
544 		x->tval &= ~STR;
545 		x->tval |= ARR;
546 		x->sval = (char *) makesymtab(NSYMTAB);
547 	} else {
548 		char *buf = makearraystring(a[1], __func__);
549 		freeelem(x, buf);
550 		free(buf);
551 	}
552 	tempfree(x);
553 	return True;
554 }
555 
556 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
557 {
558 	Cell *ap, *k;
559 	char *buf;
560 
561 	ap = execute(a[1]);	/* array name */
562 	if (!isarr(ap)) {
563 		DPRINTF("making %s into an array\n", ap->nval);
564 		if (freeable(ap))
565 			xfree(ap->sval);
566 		ap->tval &= ~(STR|NUM|DONTFREE);
567 		ap->tval |= ARR;
568 		ap->sval = (char *) makesymtab(NSYMTAB);
569 	}
570 	buf = makearraystring(a[0], __func__);
571 	k = lookup(buf, (Array *) ap->sval);
572 	tempfree(ap);
573 	free(buf);
574 	if (k == NULL)
575 		return(False);
576 	else
577 		return(True);
578 }
579 
580 
581 Cell *matchop(Node **a, int n)	/* ~ and match() */
582 {
583 	Cell *x, *y;
584 	char *s, *t;
585 	int i;
586 	fa *pfa;
587 	int (*mf)(fa *, const char *) = match, mode = 0;
588 
589 	if (n == MATCHFCN) {
590 		mf = pmatch;
591 		mode = 1;
592 	}
593 	x = execute(a[1]);	/* a[1] = target text */
594 	s = getsval(x);
595 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
596 		i = (*mf)((fa *) a[2], s);
597 	else {
598 		y = execute(a[2]);	/* a[2] = regular expr */
599 		t = getsval(y);
600 		pfa = makedfa(t, mode);
601 		i = (*mf)(pfa, s);
602 		tempfree(y);
603 	}
604 	tempfree(x);
605 	if (n == MATCHFCN) {
606 		int start = patbeg - s + 1;
607 		if (patlen < 0)
608 			start = 0;
609 		setfval(rstartloc, (Awkfloat) start);
610 		setfval(rlengthloc, (Awkfloat) patlen);
611 		x = gettemp();
612 		x->tval = NUM;
613 		x->fval = start;
614 		return x;
615 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
616 		return(True);
617 	else
618 		return(False);
619 }
620 
621 
622 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
623 {
624 	Cell *x, *y;
625 	int i;
626 
627 	x = execute(a[0]);
628 	i = istrue(x);
629 	tempfree(x);
630 	switch (n) {
631 	case BOR:
632 		if (i) return(True);
633 		y = execute(a[1]);
634 		i = istrue(y);
635 		tempfree(y);
636 		if (i) return(True);
637 		else return(False);
638 	case AND:
639 		if ( !i ) return(False);
640 		y = execute(a[1]);
641 		i = istrue(y);
642 		tempfree(y);
643 		if (i) return(True);
644 		else return(False);
645 	case NOT:
646 		if (i) return(False);
647 		else return(True);
648 	default:	/* can't happen */
649 		FATAL("unknown boolean operator %d", n);
650 	}
651 	return 0;	/*NOTREACHED*/
652 }
653 
654 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
655 {
656 	int i;
657 	Cell *x, *y;
658 	Awkfloat j;
659 
660 	x = execute(a[0]);
661 	y = execute(a[1]);
662 	if (x->tval&NUM && y->tval&NUM) {
663 		j = x->fval - y->fval;
664 		i = j<0? -1: (j>0? 1: 0);
665 	} else {
666 		i = strcmp(getsval(x), getsval(y));
667 	}
668 	tempfree(x);
669 	tempfree(y);
670 	switch (n) {
671 	case LT:	if (i<0) return(True);
672 			else return(False);
673 	case LE:	if (i<=0) return(True);
674 			else return(False);
675 	case NE:	if (i!=0) return(True);
676 			else return(False);
677 	case EQ:	if (i == 0) return(True);
678 			else return(False);
679 	case GE:	if (i>=0) return(True);
680 			else return(False);
681 	case GT:	if (i>0) return(True);
682 			else return(False);
683 	default:	/* can't happen */
684 		FATAL("unknown relational operator %d", n);
685 	}
686 	return 0;	/*NOTREACHED*/
687 }
688 
689 void tfree(Cell *a)	/* free a tempcell */
690 {
691 	if (freeable(a)) {
692 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
693 		xfree(a->sval);
694 	}
695 	if (a == tmps)
696 		FATAL("tempcell list is curdled");
697 	a->cnext = tmps;
698 	tmps = a;
699 }
700 
701 Cell *gettemp(void)	/* get a tempcell */
702 {	int i;
703 	Cell *x;
704 
705 	if (!tmps) {
706 		tmps = (Cell *) calloc(100, sizeof(*tmps));
707 		if (!tmps)
708 			FATAL("out of space for temporaries");
709 		for (i = 1; i < 100; i++)
710 			tmps[i-1].cnext = &tmps[i];
711 		tmps[i-1].cnext = NULL;
712 	}
713 	x = tmps;
714 	tmps = x->cnext;
715 	*x = tempcell;
716 	return(x);
717 }
718 
719 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
720 {
721 	Awkfloat val;
722 	Cell *x;
723 	int m;
724 	char *s;
725 
726 	x = execute(a[0]);
727 	val = getfval(x);	/* freebsd: defend against super large field numbers */
728 	if ((Awkfloat)INT_MAX < val)
729 		FATAL("trying to access out of range field %s", x->nval);
730 	m = (int) val;
731 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
732 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
733 		/* BUG: can x->nval ever be null??? */
734 	tempfree(x);
735 	x = fieldadr(m);
736 	x->ctype = OCELL;	/* BUG?  why are these needed? */
737 	x->csub = CFLD;
738 	return(x);
739 }
740 
741 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
742 {
743 	int k, m, n;
744 	char *s;
745 	int temp;
746 	Cell *x, *y, *z = NULL;
747 
748 	x = execute(a[0]);
749 	y = execute(a[1]);
750 	if (a[2] != NULL)
751 		z = execute(a[2]);
752 	s = getsval(x);
753 	k = strlen(s) + 1;
754 	if (k <= 1) {
755 		tempfree(x);
756 		tempfree(y);
757 		if (a[2] != NULL) {
758 			tempfree(z);
759 		}
760 		x = gettemp();
761 		setsval(x, "");
762 		return(x);
763 	}
764 	m = (int) getfval(y);
765 	if (m <= 0)
766 		m = 1;
767 	else if (m > k)
768 		m = k;
769 	tempfree(y);
770 	if (a[2] != NULL) {
771 		n = (int) getfval(z);
772 		tempfree(z);
773 	} else
774 		n = k - 1;
775 	if (n < 0)
776 		n = 0;
777 	else if (n > k - m)
778 		n = k - m;
779 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
780 	y = gettemp();
781 	temp = s[n+m-1];	/* with thanks to John Linderman */
782 	s[n+m-1] = '\0';
783 	setsval(y, s + m - 1);
784 	s[n+m-1] = temp;
785 	tempfree(x);
786 	return(y);
787 }
788 
789 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
790 {
791 	Cell *x, *y, *z;
792 	char *s1, *s2, *p1, *p2, *q;
793 	Awkfloat v = 0.0;
794 
795 	x = execute(a[0]);
796 	s1 = getsval(x);
797 	y = execute(a[1]);
798 	s2 = getsval(y);
799 
800 	z = gettemp();
801 	for (p1 = s1; *p1 != '\0'; p1++) {
802 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
803 			continue;
804 		if (*p2 == '\0') {
805 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
806 			break;
807 		}
808 	}
809 	tempfree(x);
810 	tempfree(y);
811 	setfval(z, v);
812 	return(z);
813 }
814 
815 #define	MAXNUMSIZE	50
816 
817 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
818 {
819 	char *fmt;
820 	char *p, *t;
821 	const char *os;
822 	Cell *x;
823 	int flag = 0, n;
824 	int fmtwd; /* format width */
825 	int fmtsz = recsize;
826 	char *buf = *pbuf;
827 	int bufsize = *pbufsize;
828 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
829 #define BUFSZ(a)   (bufsize - ((a) - buf))
830 
831 	static bool first = true;
832 	static bool have_a_format = false;
833 
834 	if (first) {
835 		char xbuf[100];
836 
837 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
838 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
839 		first = false;
840 	}
841 
842 	os = s;
843 	p = buf;
844 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
845 		FATAL("out of memory in format()");
846 	while (*s) {
847 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
848 		if (*s != '%') {
849 			*p++ = *s++;
850 			continue;
851 		}
852 		if (*(s+1) == '%') {
853 			*p++ = '%';
854 			s += 2;
855 			continue;
856 		}
857 		/* have to be real careful in case this is a huge number, eg, %100000d */
858 		fmtwd = atoi(s+1);
859 		if (fmtwd < 0)
860 			fmtwd = -fmtwd;
861 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
862 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
863 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
864 				FATAL("format item %.30s... ran format() out of memory", os);
865 			/* Ignore size specifiers */
866 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
867 				t--;
868 				continue;
869 			}
870 			if (isalpha((uschar)*s))
871 				break;
872 			if (*s == '$') {
873 				FATAL("'$' not permitted in awk formats");
874 			}
875 			if (*s == '*') {
876 				if (a == NULL) {
877 					FATAL("not enough args in printf(%s)", os);
878 				}
879 				x = execute(a);
880 				a = a->nnext;
881 				snprintf(t - 1, FMTSZ(t - 1),
882 				    "%d", fmtwd=(int) getfval(x));
883 				if (fmtwd < 0)
884 					fmtwd = -fmtwd;
885 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
886 				t = fmt + strlen(fmt);
887 				tempfree(x);
888 			}
889 		}
890 		*t = '\0';
891 		if (fmtwd < 0)
892 			fmtwd = -fmtwd;
893 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
894 		switch (*s) {
895 		case 'a': case 'A':
896 			if (have_a_format)
897 				flag = *s;
898 			else
899 				flag = 'f';
900 			break;
901 		case 'f': case 'e': case 'g': case 'E': case 'G':
902 			flag = 'f';
903 			break;
904 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
905 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
906 			*(t-1) = 'j';
907 			*t = *s;
908 			*++t = '\0';
909 			break;
910 		case 's':
911 			flag = 's';
912 			break;
913 		case 'c':
914 			flag = 'c';
915 			break;
916 		default:
917 			WARNING("weird printf conversion %s", fmt);
918 			flag = '?';
919 			break;
920 		}
921 		if (a == NULL)
922 			FATAL("not enough args in printf(%s)", os);
923 		x = execute(a);
924 		a = a->nnext;
925 		n = MAXNUMSIZE;
926 		if (fmtwd > n)
927 			n = fmtwd;
928 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
929 		switch (flag) {
930 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
931 			t = getsval(x);
932 			n = strlen(t);
933 			if (fmtwd > n)
934 				n = fmtwd;
935 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
936 			p += strlen(p);
937 			snprintf(p, BUFSZ(p), "%s", t);
938 			break;
939 		case 'a':
940 		case 'A':
941 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
942 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
943 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
944 		case 's':
945 			t = getsval(x);
946 			n = strlen(t);
947 			if (fmtwd > n)
948 				n = fmtwd;
949 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
950 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
951 			snprintf(p, BUFSZ(p), fmt, t);
952 			break;
953 		case 'c':
954 			if (isnum(x)) {
955 				if ((int)getfval(x))
956 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
957 				else {
958 					*p++ = '\0'; /* explicit null byte */
959 					*p = '\0';   /* next output will start here */
960 				}
961 			} else
962 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
963 			break;
964 		default:
965 			FATAL("can't happen: bad conversion %c in format()", flag);
966 		}
967 		tempfree(x);
968 		p += strlen(p);
969 		s++;
970 	}
971 	*p = '\0';
972 	free(fmt);
973 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
974 		execute(a);
975 	*pbuf = buf;
976 	*pbufsize = bufsize;
977 	return p - buf;
978 }
979 
980 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
981 {
982 	Cell *x;
983 	Node *y;
984 	char *buf;
985 	int bufsz=3*recsize;
986 
987 	if ((buf = (char *) malloc(bufsz)) == NULL)
988 		FATAL("out of memory in awksprintf");
989 	y = a[0]->nnext;
990 	x = execute(a[0]);
991 	if (format(&buf, &bufsz, getsval(x), y) == -1)
992 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
993 	tempfree(x);
994 	x = gettemp();
995 	x->sval = buf;
996 	x->tval = STR;
997 	return(x);
998 }
999 
1000 Cell *awkprintf(Node **a, int n)		/* printf */
1001 {	/* a[0] is list of args, starting with format string */
1002 	/* a[1] is redirection operator, a[2] is redirection file */
1003 	FILE *fp;
1004 	Cell *x;
1005 	Node *y;
1006 	char *buf;
1007 	int len;
1008 	int bufsz=3*recsize;
1009 
1010 	if ((buf = (char *) malloc(bufsz)) == NULL)
1011 		FATAL("out of memory in awkprintf");
1012 	y = a[0]->nnext;
1013 	x = execute(a[0]);
1014 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1015 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1016 	tempfree(x);
1017 	if (a[1] == NULL) {
1018 		/* fputs(buf, stdout); */
1019 		fwrite(buf, len, 1, stdout);
1020 		if (ferror(stdout))
1021 			FATAL("write error on stdout");
1022 	} else {
1023 		fp = redirect(ptoi(a[1]), a[2]);
1024 		/* fputs(buf, fp); */
1025 		fwrite(buf, len, 1, fp);
1026 		fflush(fp);
1027 		if (ferror(fp))
1028 			FATAL("write error on %s", filename(fp));
1029 	}
1030 	free(buf);
1031 	return(True);
1032 }
1033 
1034 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1035 {
1036 	Awkfloat i, j = 0;
1037 	double v;
1038 	Cell *x, *y, *z;
1039 
1040 	x = execute(a[0]);
1041 	i = getfval(x);
1042 	tempfree(x);
1043 	if (n != UMINUS && n != UPLUS) {
1044 		y = execute(a[1]);
1045 		j = getfval(y);
1046 		tempfree(y);
1047 	}
1048 	z = gettemp();
1049 	switch (n) {
1050 	case ADD:
1051 		i += j;
1052 		break;
1053 	case MINUS:
1054 		i -= j;
1055 		break;
1056 	case MULT:
1057 		i *= j;
1058 		break;
1059 	case DIVIDE:
1060 		if (j == 0)
1061 			FATAL("division by zero");
1062 		i /= j;
1063 		break;
1064 	case MOD:
1065 		if (j == 0)
1066 			FATAL("division by zero in mod");
1067 		modf(i/j, &v);
1068 		i = i - j * v;
1069 		break;
1070 	case UMINUS:
1071 		i = -i;
1072 		break;
1073 	case UPLUS: /* handled by getfval(), above */
1074 		break;
1075 	case POWER:
1076 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1077 			i = ipow(i, (int) j);
1078                else {
1079 			errno = 0;
1080 			i = errcheck(pow(i, j), "pow");
1081                }
1082 		break;
1083 	default:	/* can't happen */
1084 		FATAL("illegal arithmetic operator %d", n);
1085 	}
1086 	setfval(z, i);
1087 	return(z);
1088 }
1089 
1090 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1091 {
1092 	double v;
1093 
1094 	if (n <= 0)
1095 		return 1;
1096 	v = ipow(x, n/2);
1097 	if (n % 2 == 0)
1098 		return v * v;
1099 	else
1100 		return x * v * v;
1101 }
1102 
1103 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1104 {
1105 	Cell *x, *z;
1106 	int k;
1107 	Awkfloat xf;
1108 
1109 	x = execute(a[0]);
1110 	xf = getfval(x);
1111 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1112 	if (n == PREINCR || n == PREDECR) {
1113 		setfval(x, xf + k);
1114 		return(x);
1115 	}
1116 	z = gettemp();
1117 	setfval(z, xf);
1118 	setfval(x, xf + k);
1119 	tempfree(x);
1120 	return(z);
1121 }
1122 
1123 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1124 {		/* this is subtle; don't muck with it. */
1125 	Cell *x, *y;
1126 	Awkfloat xf, yf;
1127 	double v;
1128 
1129 	y = execute(a[1]);
1130 	x = execute(a[0]);
1131 	if (n == ASSIGN) {	/* ordinary assignment */
1132 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1133 			;	/* self-assignment: leave alone unless it's a field or NF */
1134 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1135 			setsval(x, getsval(y));
1136 			x->fval = getfval(y);
1137 			x->tval |= NUM;
1138 		}
1139 		else if (isstr(y))
1140 			setsval(x, getsval(y));
1141 		else if (isnum(y))
1142 			setfval(x, getfval(y));
1143 		else
1144 			funnyvar(y, "read value of");
1145 		tempfree(y);
1146 		return(x);
1147 	}
1148 	xf = getfval(x);
1149 	yf = getfval(y);
1150 	switch (n) {
1151 	case ADDEQ:
1152 		xf += yf;
1153 		break;
1154 	case SUBEQ:
1155 		xf -= yf;
1156 		break;
1157 	case MULTEQ:
1158 		xf *= yf;
1159 		break;
1160 	case DIVEQ:
1161 		if (yf == 0)
1162 			FATAL("division by zero in /=");
1163 		xf /= yf;
1164 		break;
1165 	case MODEQ:
1166 		if (yf == 0)
1167 			FATAL("division by zero in %%=");
1168 		modf(xf/yf, &v);
1169 		xf = xf - yf * v;
1170 		break;
1171 	case POWEQ:
1172 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1173 			xf = ipow(xf, (int) yf);
1174                else {
1175 			errno = 0;
1176 			xf = errcheck(pow(xf, yf), "pow");
1177                }
1178 		break;
1179 	default:
1180 		FATAL("illegal assignment operator %d", n);
1181 		break;
1182 	}
1183 	tempfree(y);
1184 	setfval(x, xf);
1185 	return(x);
1186 }
1187 
1188 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1189 {
1190 	Cell *x, *y, *z;
1191 	int n1, n2;
1192 	char *s = NULL;
1193 	int ssz = 0;
1194 
1195 	x = execute(a[0]);
1196 	n1 = strlen(getsval(x));
1197 	adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
1198 	memcpy(s, x->sval, n1);
1199 
1200 	y = execute(a[1]);
1201 	n2 = strlen(getsval(y));
1202 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1203 	memcpy(s + n1, y->sval, n2);
1204 	s[n1 + n2] = '\0';
1205 
1206 	tempfree(x);
1207 	tempfree(y);
1208 
1209 	z = gettemp();
1210 	z->sval = s;
1211 	z->tval = STR;
1212 
1213 	return(z);
1214 }
1215 
1216 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1217 {
1218 	Cell *x;
1219 
1220 	if (a[0] == NULL)
1221 		x = execute(a[1]);
1222 	else {
1223 		x = execute(a[0]);
1224 		if (istrue(x)) {
1225 			tempfree(x);
1226 			x = execute(a[1]);
1227 		}
1228 	}
1229 	return x;
1230 }
1231 
1232 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1233 {
1234 	Cell *x;
1235 	int pair;
1236 
1237 	pair = ptoi(a[3]);
1238 	if (pairstack[pair] == 0) {
1239 		x = execute(a[0]);
1240 		if (istrue(x))
1241 			pairstack[pair] = 1;
1242 		tempfree(x);
1243 	}
1244 	if (pairstack[pair] == 1) {
1245 		x = execute(a[1]);
1246 		if (istrue(x))
1247 			pairstack[pair] = 0;
1248 		tempfree(x);
1249 		x = execute(a[2]);
1250 		return(x);
1251 	}
1252 	return(False);
1253 }
1254 
1255 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1256 {
1257 	Cell *x = NULL, *y, *ap;
1258 	const char *s, *origs, *t;
1259 	const char *fs = NULL;
1260 	char *origfs = NULL;
1261 	int sep;
1262 	char temp, num[50];
1263 	int n, tempstat, arg3type;
1264 	double result;
1265 
1266 	y = execute(a[0]);	/* source string */
1267 	origs = s = strdup(getsval(y));
1268 	if (s == NULL)
1269 		FATAL("out of space in split");
1270 	arg3type = ptoi(a[3]);
1271 	if (a[2] == NULL)		/* fs string */
1272 		fs = getsval(fsloc);
1273 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1274 		x = execute(a[2]);
1275 		fs = origfs = strdup(getsval(x));
1276 		if (fs == NULL)
1277 			FATAL("out of space in split");
1278 		tempfree(x);
1279 	} else if (arg3type == REGEXPR)
1280 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1281 	else
1282 		FATAL("illegal type of split");
1283 	sep = *fs;
1284 	ap = execute(a[1]);	/* array name */
1285 	freesymtab(ap);
1286 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1287 	ap->tval &= ~STR;
1288 	ap->tval |= ARR;
1289 	ap->sval = (char *) makesymtab(NSYMTAB);
1290 
1291 	n = 0;
1292         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1293 		/* split(s, a, //); have to arrange that it looks like empty sep */
1294 		arg3type = 0;
1295 		fs = "";
1296 		sep = 0;
1297 	}
1298 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1299 		fa *pfa;
1300 		if (arg3type == REGEXPR) {	/* it's ready already */
1301 			pfa = (fa *) a[2];
1302 		} else {
1303 			pfa = makedfa(fs, 1);
1304 		}
1305 		if (nematch(pfa,s)) {
1306 			tempstat = pfa->initstat;
1307 			pfa->initstat = 2;
1308 			do {
1309 				n++;
1310 				snprintf(num, sizeof(num), "%d", n);
1311 				temp = *patbeg;
1312 				setptr(patbeg, '\0');
1313 				if (is_number(s, & result))
1314 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1315 				else
1316 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1317 				setptr(patbeg, temp);
1318 				s = patbeg + patlen;
1319 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1320 					n++;
1321 					snprintf(num, sizeof(num), "%d", n);
1322 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1323 					pfa->initstat = tempstat;
1324 					goto spdone;
1325 				}
1326 			} while (nematch(pfa,s));
1327 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1328 							/* cf gsub and refldbld */
1329 		}
1330 		n++;
1331 		snprintf(num, sizeof(num), "%d", n);
1332 		if (is_number(s, & result))
1333 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1334 		else
1335 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1336   spdone:
1337 		pfa = NULL;
1338 	} else if (sep == ' ') {
1339 		for (n = 0; ; ) {
1340 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1341 			while (ISWS(*s))
1342 				s++;
1343 			if (*s == '\0')
1344 				break;
1345 			n++;
1346 			t = s;
1347 			do
1348 				s++;
1349 			while (*s != '\0' && !ISWS(*s));
1350 			temp = *s;
1351 			setptr(s, '\0');
1352 			snprintf(num, sizeof(num), "%d", n);
1353 			if (is_number(t, & result))
1354 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1355 			else
1356 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1357 			setptr(s, temp);
1358 			if (*s != '\0')
1359 				s++;
1360 		}
1361 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1362 		for (n = 0; *s != '\0'; s++) {
1363 			char buf[2];
1364 			n++;
1365 			snprintf(num, sizeof(num), "%d", n);
1366 			buf[0] = *s;
1367 			buf[1] = '\0';
1368 			if (isdigit((uschar)buf[0]))
1369 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1370 			else
1371 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1372 		}
1373 	} else if (*s != '\0') {
1374 		for (;;) {
1375 			n++;
1376 			t = s;
1377 			while (*s != sep && *s != '\n' && *s != '\0')
1378 				s++;
1379 			temp = *s;
1380 			setptr(s, '\0');
1381 			snprintf(num, sizeof(num), "%d", n);
1382 			if (is_number(t, & result))
1383 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1384 			else
1385 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1386 			setptr(s, temp);
1387 			if (*s++ == '\0')
1388 				break;
1389 		}
1390 	}
1391 	tempfree(ap);
1392 	tempfree(y);
1393 	xfree(origs);
1394 	xfree(origfs);
1395 	x = gettemp();
1396 	x->tval = NUM;
1397 	x->fval = n;
1398 	return(x);
1399 }
1400 
1401 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1402 {
1403 	Cell *x;
1404 
1405 	x = execute(a[0]);
1406 	if (istrue(x)) {
1407 		tempfree(x);
1408 		x = execute(a[1]);
1409 	} else {
1410 		tempfree(x);
1411 		x = execute(a[2]);
1412 	}
1413 	return(x);
1414 }
1415 
1416 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1417 {
1418 	Cell *x;
1419 
1420 	x = execute(a[0]);
1421 	if (istrue(x)) {
1422 		tempfree(x);
1423 		x = execute(a[1]);
1424 	} else if (a[2] != NULL) {
1425 		tempfree(x);
1426 		x = execute(a[2]);
1427 	}
1428 	return(x);
1429 }
1430 
1431 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1432 {
1433 	Cell *x;
1434 
1435 	for (;;) {
1436 		x = execute(a[0]);
1437 		if (!istrue(x))
1438 			return(x);
1439 		tempfree(x);
1440 		x = execute(a[1]);
1441 		if (isbreak(x)) {
1442 			x = True;
1443 			return(x);
1444 		}
1445 		if (isnext(x) || isexit(x) || isret(x))
1446 			return(x);
1447 		tempfree(x);
1448 	}
1449 }
1450 
1451 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1452 {
1453 	Cell *x;
1454 
1455 	for (;;) {
1456 		x = execute(a[0]);
1457 		if (isbreak(x))
1458 			return True;
1459 		if (isnext(x) || isexit(x) || isret(x))
1460 			return(x);
1461 		tempfree(x);
1462 		x = execute(a[1]);
1463 		if (!istrue(x))
1464 			return(x);
1465 		tempfree(x);
1466 	}
1467 }
1468 
1469 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1470 {
1471 	Cell *x;
1472 
1473 	x = execute(a[0]);
1474 	tempfree(x);
1475 	for (;;) {
1476 		if (a[1]!=NULL) {
1477 			x = execute(a[1]);
1478 			if (!istrue(x)) return(x);
1479 			else tempfree(x);
1480 		}
1481 		x = execute(a[3]);
1482 		if (isbreak(x))		/* turn off break */
1483 			return True;
1484 		if (isnext(x) || isexit(x) || isret(x))
1485 			return(x);
1486 		tempfree(x);
1487 		x = execute(a[2]);
1488 		tempfree(x);
1489 	}
1490 }
1491 
1492 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1493 {
1494 	Cell *x, *vp, *arrayp, *cp, *ncp;
1495 	Array *tp;
1496 	int i;
1497 
1498 	vp = execute(a[0]);
1499 	arrayp = execute(a[1]);
1500 	if (!isarr(arrayp)) {
1501 		return True;
1502 	}
1503 	tp = (Array *) arrayp->sval;
1504 	tempfree(arrayp);
1505 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1506 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1507 			setsval(vp, cp->nval);
1508 			ncp = cp->cnext;
1509 			x = execute(a[2]);
1510 			if (isbreak(x)) {
1511 				tempfree(vp);
1512 				return True;
1513 			}
1514 			if (isnext(x) || isexit(x) || isret(x)) {
1515 				tempfree(vp);
1516 				return(x);
1517 			}
1518 			tempfree(x);
1519 		}
1520 	}
1521 	return True;
1522 }
1523 
1524 static char *nawk_convert(const char *s, int (*fun_c)(int),
1525     wint_t (*fun_wc)(wint_t))
1526 {
1527 	char *buf      = NULL;
1528 	char *pbuf     = NULL;
1529 	const char *ps = NULL;
1530 	size_t n       = 0;
1531 	wchar_t wc;
1532 	size_t sz = MB_CUR_MAX;
1533 
1534 	if (sz == 1) {
1535 		buf = tostring(s);
1536 
1537 		for (pbuf = buf; *pbuf; pbuf++)
1538 			*pbuf = fun_c((uschar)*pbuf);
1539 
1540 		return buf;
1541 	} else {
1542 		/* upper/lower character may be shorter/longer */
1543 		buf = tostringN(s, strlen(s) * sz + 1);
1544 
1545 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1546 		/*
1547 		 * Reset internal state here too.
1548 		 * Assign result to avoid a compiler warning. (Casting to void
1549 		 * doesn't work.)
1550 		 * Increment said variable to avoid a different warning.
1551 		 */
1552 		int unused = wctomb(NULL, L'\0');
1553 		unused++;
1554 
1555 		ps   = s;
1556 		pbuf = buf;
1557 		while (n = mbtowc(&wc, ps, sz),
1558 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1559 		{
1560 			ps += n;
1561 
1562 			n = wctomb(pbuf, fun_wc(wc));
1563 			if (n == (size_t)-1)
1564 				FATAL("illegal wide character %s", s);
1565 
1566 			pbuf += n;
1567 		}
1568 
1569 		*pbuf = '\0';
1570 
1571 		if (n)
1572 			FATAL("illegal byte sequence %s", s);
1573 
1574 		return buf;
1575 	}
1576 }
1577 
1578 #ifdef __DJGPP__
1579 static wint_t towupper(wint_t wc)
1580 {
1581 	if (wc >= 0 && wc < 256)
1582 		return toupper(wc & 0xFF);
1583 
1584 	return wc;
1585 }
1586 
1587 static wint_t towlower(wint_t wc)
1588 {
1589 	if (wc >= 0 && wc < 256)
1590 		return tolower(wc & 0xFF);
1591 
1592 	return wc;
1593 }
1594 #endif
1595 
1596 static char *nawk_toupper(const char *s)
1597 {
1598 	return nawk_convert(s, toupper, towupper);
1599 }
1600 
1601 static char *nawk_tolower(const char *s)
1602 {
1603 	return nawk_convert(s, tolower, towlower);
1604 }
1605 
1606 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1607 {
1608 	Cell *x, *y;
1609 	Awkfloat u;
1610 	int t, sz;
1611 	Awkfloat tmp;
1612 	char *buf, *fmt;
1613 	Node *nextarg;
1614 	FILE *fp;
1615 	int status = 0;
1616 	time_t tv;
1617 	struct tm *tm, tmbuf;
1618 
1619 	t = ptoi(a[0]);
1620 	x = execute(a[1]);
1621 	nextarg = a[1]->nnext;
1622 	switch (t) {
1623 	case FLENGTH:
1624 		if (isarr(x))
1625 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1626 		else
1627 			u = strlen(getsval(x));
1628 		break;
1629 	case FLOG:
1630 		errno = 0;
1631 		u = errcheck(log(getfval(x)), "log");
1632 		break;
1633 	case FINT:
1634 		modf(getfval(x), &u); break;
1635 	case FEXP:
1636 		errno = 0;
1637 		u = errcheck(exp(getfval(x)), "exp");
1638 		break;
1639 	case FSQRT:
1640 		errno = 0;
1641 		u = errcheck(sqrt(getfval(x)), "sqrt");
1642 		break;
1643 	case FSIN:
1644 		u = sin(getfval(x)); break;
1645 	case FCOS:
1646 		u = cos(getfval(x)); break;
1647 	case FATAN:
1648 		if (nextarg == NULL) {
1649 			WARNING("atan2 requires two arguments; returning 1.0");
1650 			u = 1.0;
1651 		} else {
1652 			y = execute(a[1]->nnext);
1653 			u = atan2(getfval(x), getfval(y));
1654 			tempfree(y);
1655 			nextarg = nextarg->nnext;
1656 		}
1657 		break;
1658 	case FCOMPL:
1659 		u = ~((int)getfval(x));
1660 		break;
1661 	case FAND:
1662 		if (nextarg == 0) {
1663 			WARNING("and requires two arguments; returning 0");
1664 			u = 0;
1665 			break;
1666 		}
1667 		y = execute(a[1]->nnext);
1668 		u = ((int)getfval(x)) & ((int)getfval(y));
1669 		tempfree(y);
1670 		nextarg = nextarg->nnext;
1671 		break;
1672 	case FFOR:
1673 		if (nextarg == 0) {
1674 			WARNING("or requires two arguments; returning 0");
1675 			u = 0;
1676 			break;
1677 		}
1678 		y = execute(a[1]->nnext);
1679 		u = ((int)getfval(x)) | ((int)getfval(y));
1680 		tempfree(y);
1681 		nextarg = nextarg->nnext;
1682 		break;
1683 	case FXOR:
1684 		if (nextarg == 0) {
1685 			WARNING("xor requires two arguments; returning 0");
1686 			u = 0;
1687 			break;
1688 		}
1689 		y = execute(a[1]->nnext);
1690 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1691 		tempfree(y);
1692 		nextarg = nextarg->nnext;
1693 		break;
1694 	case FLSHIFT:
1695 		if (nextarg == 0) {
1696 			WARNING("lshift requires two arguments; returning 0");
1697 			u = 0;
1698 			break;
1699 		}
1700 		y = execute(a[1]->nnext);
1701 		u = ((int)getfval(x)) << ((int)getfval(y));
1702 		tempfree(y);
1703 		nextarg = nextarg->nnext;
1704 		break;
1705 	case FRSHIFT:
1706 		if (nextarg == 0) {
1707 			WARNING("rshift requires two arguments; returning 0");
1708 			u = 0;
1709 			break;
1710 		}
1711 		y = execute(a[1]->nnext);
1712 		u = ((int)getfval(x)) >> ((int)getfval(y));
1713 		tempfree(y);
1714 		nextarg = nextarg->nnext;
1715 		break;
1716 	case FSYSTEM:
1717 		fflush(stdout);		/* in case something is buffered already */
1718 		status = system(getsval(x));
1719 		u = status;
1720 		if (status != -1) {
1721 			if (WIFEXITED(status)) {
1722 				u = WEXITSTATUS(status);
1723 			} else if (WIFSIGNALED(status)) {
1724 				u = WTERMSIG(status) + 256;
1725 #ifdef WCOREDUMP
1726 				if (WCOREDUMP(status))
1727 					u += 256;
1728 #endif
1729 			} else	/* something else?!? */
1730 				u = 0;
1731 		}
1732 		break;
1733 	case FRAND:
1734 		/* random() returns numbers in [0..2^31-1]
1735 		 * in order to get a number in [0, 1), divide it by 2^31
1736 		 */
1737 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1738 		break;
1739 	case FSRAND:
1740 		if (isrec(x)) {		/* no argument provided */
1741 			u = time(NULL);
1742 			tmp = u;
1743 			srandom((unsigned int) u);
1744 		} else {
1745 			u = getfval(x);
1746 			tmp = u;
1747 			srandom_deterministic((unsigned int) u);
1748 		}
1749 		u = srand_seed;
1750 		srand_seed = tmp;
1751 		break;
1752 	case FTOUPPER:
1753 	case FTOLOWER:
1754 		if (t == FTOUPPER)
1755 			buf = nawk_toupper(getsval(x));
1756 		else
1757 			buf = nawk_tolower(getsval(x));
1758 		tempfree(x);
1759 		x = gettemp();
1760 		setsval(x, buf);
1761 		free(buf);
1762 		return x;
1763 	case FFLUSH:
1764 		if (isrec(x) || strlen(getsval(x)) == 0) {
1765 			flush_all();	/* fflush() or fflush("") -> all */
1766 			u = 0;
1767 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1768 			u = EOF;
1769 		else
1770 			u = fflush(fp);
1771 		break;
1772 	case FMKTIME:
1773 		memset(&tmbuf, 0, sizeof(tmbuf));
1774 		tm = &tmbuf;
1775 		t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
1776 		    &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
1777 		    &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
1778 		switch (t) {
1779 		case 6:
1780 			tm->tm_isdst = -1;	/* let mktime figure it out */
1781 			/* FALLTHROUGH */
1782 		case 7:
1783 			tm->tm_year -= 1900;
1784 			tm->tm_mon--;
1785 			u = mktime(tm);
1786 			break;
1787 		default:
1788 			u = -1;
1789 			break;
1790 		}
1791 		break;
1792 	case FSYSTIME:
1793 		u = time((time_t *) 0);
1794 		break;
1795 	case FSTRFTIME:
1796 		/* strftime([format [,timestamp]]) */
1797 		if (nextarg) {
1798 			y = execute(nextarg);
1799 			nextarg = nextarg->nnext;
1800 			tv = (time_t) getfval(y);
1801 			tempfree(y);
1802 		} else
1803 			tv = time((time_t *) 0);
1804 		tm = localtime(&tv);
1805 		if (tm == NULL)
1806 			FATAL("bad time %ld", (long)tv);
1807 
1808 		if (isrec(x)) {
1809 			/* format argument not provided, use default */
1810 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1811 		} else
1812 			fmt = tostring(getsval(x));
1813 
1814 		sz = 32;
1815 		buf = NULL;
1816 		do {
1817 			if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
1818 				FATAL("out of memory in strftime");
1819 			sz *= 2;
1820 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1821 
1822 		y = gettemp();
1823 		setsval(y, buf);
1824 		free(fmt);
1825 		free(buf);
1826 
1827 		return y;
1828 	default:	/* can't happen */
1829 		FATAL("illegal function type %d", t);
1830 		break;
1831 	}
1832 	tempfree(x);
1833 	x = gettemp();
1834 	setfval(x, u);
1835 	if (nextarg != NULL) {
1836 		WARNING("warning: function has too many arguments");
1837 		for ( ; nextarg; nextarg = nextarg->nnext)
1838 			execute(nextarg);
1839 	}
1840 	return(x);
1841 }
1842 
1843 Cell *printstat(Node **a, int n)	/* print a[0] */
1844 {
1845 	Node *x;
1846 	Cell *y;
1847 	FILE *fp;
1848 
1849 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1850 		fp = stdout;
1851 	else
1852 		fp = redirect(ptoi(a[1]), a[2]);
1853 	for (x = a[0]; x != NULL; x = x->nnext) {
1854 		y = execute(x);
1855 		fputs(getpssval(y), fp);
1856 		tempfree(y);
1857 		if (x->nnext == NULL)
1858 			fputs(getsval(orsloc), fp);
1859 		else
1860 			fputs(getsval(ofsloc), fp);
1861 	}
1862 	if (a[1] != NULL)
1863 		fflush(fp);
1864 	if (ferror(fp))
1865 		FATAL("write error on %s", filename(fp));
1866 	return(True);
1867 }
1868 
1869 Cell *nullproc(Node **a, int n)
1870 {
1871 	return 0;
1872 }
1873 
1874 
1875 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1876 {
1877 	FILE *fp;
1878 	Cell *x;
1879 	char *fname;
1880 
1881 	x = execute(b);
1882 	fname = getsval(x);
1883 	fp = openfile(a, fname, NULL);
1884 	if (fp == NULL)
1885 		FATAL("can't open file %s", fname);
1886 	tempfree(x);
1887 	return fp;
1888 }
1889 
1890 struct files {
1891 	FILE	*fp;
1892 	const char	*fname;
1893 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1894 } *files;
1895 
1896 size_t nfiles;
1897 
1898 static void stdinit(void)	/* in case stdin, etc., are not constants */
1899 {
1900 	nfiles = FOPEN_MAX;
1901 	files = (struct files *) calloc(nfiles, sizeof(*files));
1902 	if (files == NULL)
1903 		FATAL("can't allocate file memory for %zu files", nfiles);
1904         files[0].fp = stdin;
1905 	files[0].fname = "/dev/stdin";
1906 	files[0].mode = LT;
1907         files[1].fp = stdout;
1908 	files[1].fname = "/dev/stdout";
1909 	files[1].mode = GT;
1910         files[2].fp = stderr;
1911 	files[2].fname = "/dev/stderr";
1912 	files[2].mode = GT;
1913 }
1914 
1915 FILE *openfile(int a, const char *us, bool *pnewflag)
1916 {
1917 	const char *s = us;
1918 	size_t i;
1919 	int m;
1920 	FILE *fp = NULL;
1921 
1922 	if (*s == '\0')
1923 		FATAL("null file name in print or getline");
1924 	for (i = 0; i < nfiles; i++)
1925 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1926 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1927 		     a == FFLUSH)) {
1928 			if (pnewflag)
1929 				*pnewflag = false;
1930 			return files[i].fp;
1931 		}
1932 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1933 		return NULL;
1934 
1935 	for (i = 0; i < nfiles; i++)
1936 		if (files[i].fp == NULL)
1937 			break;
1938 	if (i >= nfiles) {
1939 		struct files *nf;
1940 		size_t nnf = nfiles + FOPEN_MAX;
1941 		nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
1942 		if (nf == NULL)
1943 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1944 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1945 		nfiles = nnf;
1946 		files = nf;
1947 	}
1948 	fflush(stdout);	/* force a semblance of order */
1949 	m = a;
1950 	if (a == GT) {
1951 		fp = fopen(s, "w");
1952 	} else if (a == APPEND) {
1953 		fp = fopen(s, "a");
1954 		m = GT;	/* so can mix > and >> */
1955 	} else if (a == '|') {	/* output pipe */
1956 		fp = popen(s, "w");
1957 	} else if (a == LE) {	/* input pipe */
1958 		fp = popen(s, "r");
1959 	} else if (a == LT) {	/* getline <file */
1960 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1961 	} else	/* can't happen */
1962 		FATAL("illegal redirection %d", a);
1963 	if (fp != NULL) {
1964 		files[i].fname = tostring(s);
1965 		files[i].fp = fp;
1966 		files[i].mode = m;
1967 		if (pnewflag)
1968 			*pnewflag = true;
1969 		if (fp != stdin && fp != stdout && fp != stderr)
1970 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1971 	}
1972 	return fp;
1973 }
1974 
1975 const char *filename(FILE *fp)
1976 {
1977 	size_t i;
1978 
1979 	for (i = 0; i < nfiles; i++)
1980 		if (fp == files[i].fp)
1981 			return files[i].fname;
1982 	return "???";
1983 }
1984 
1985  Cell *closefile(Node **a, int n)
1986  {
1987  	Cell *x;
1988 	size_t i;
1989 	bool stat;
1990 
1991  	x = execute(a[0]);
1992  	getsval(x);
1993 	stat = true;
1994  	for (i = 0; i < nfiles; i++) {
1995 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1996 			continue;
1997 		if (ferror(files[i].fp))
1998 			FATAL("i/o error occurred on %s", files[i].fname);
1999 		if (files[i].fp == stdin || files[i].fp == stdout ||
2000 		    files[i].fp == stderr)
2001 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2002 		else if (files[i].mode == '|' || files[i].mode == LE)
2003 			stat = pclose(files[i].fp) == -1;
2004 		else
2005 			stat = fclose(files[i].fp) == EOF;
2006 		if (stat)
2007 			FATAL("i/o error occurred closing %s", files[i].fname);
2008 		if (i > 2)	/* don't do /dev/std... */
2009 			xfree(files[i].fname);
2010 		files[i].fname = NULL;	/* watch out for ref thru this */
2011 		files[i].fp = NULL;
2012 		break;
2013  	}
2014  	tempfree(x);
2015  	x = gettemp();
2016 	setfval(x, (Awkfloat) (stat ? -1 : 0));
2017  	return(x);
2018  }
2019 
2020 void closeall(void)
2021 {
2022 	size_t i;
2023 	bool stat = false;
2024 
2025 	for (i = 0; i < nfiles; i++) {
2026 		if (! files[i].fp)
2027 			continue;
2028 		if (ferror(files[i].fp))
2029 			FATAL( "i/o error occurred on %s", files[i].fname );
2030 		if (files[i].fp == stdin)
2031 			continue;
2032 		if (files[i].mode == '|' || files[i].mode == LE)
2033 			stat = pclose(files[i].fp) == -1;
2034 		else if (files[i].fp == stdout || files[i].fp == stderr)
2035 			stat = fflush(files[i].fp) == EOF;
2036 		else
2037 			stat = fclose(files[i].fp) == EOF;
2038 		if (stat)
2039 			FATAL( "i/o error occurred while closing %s", files[i].fname );
2040 	}
2041 }
2042 
2043 static void flush_all(void)
2044 {
2045 	size_t i;
2046 
2047 	for (i = 0; i < nfiles; i++)
2048 		if (files[i].fp)
2049 			fflush(files[i].fp);
2050 }
2051 
2052 void backsub(char **pb_ptr, const char **sptr_ptr);
2053 
2054 Cell *sub(Node **a, int nnn)	/* substitute command */
2055 {
2056 	const char *sptr, *q;
2057 	Cell *x, *y, *result;
2058 	char *t, *buf, *pb;
2059 	fa *pfa;
2060 	int bufsz = recsize;
2061 
2062 	if ((buf = (char *) malloc(bufsz)) == NULL)
2063 		FATAL("out of memory in sub");
2064 	x = execute(a[3]);	/* target string */
2065 	t = getsval(x);
2066 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2067 		pfa = (fa *) a[1];	/* regular expression */
2068 	else {
2069 		y = execute(a[1]);
2070 		pfa = makedfa(getsval(y), 1);
2071 		tempfree(y);
2072 	}
2073 	y = execute(a[2]);	/* replacement string */
2074 	result = False;
2075 	if (pmatch(pfa, t)) {
2076 		sptr = t;
2077 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
2078 		pb = buf;
2079 		while (sptr < patbeg)
2080 			*pb++ = *sptr++;
2081 		sptr = getsval(y);
2082 		while (*sptr != '\0') {
2083 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2084 			if (*sptr == '\\') {
2085 				backsub(&pb, &sptr);
2086 			} else if (*sptr == '&') {
2087 				sptr++;
2088 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2089 				for (q = patbeg; q < patbeg+patlen; )
2090 					*pb++ = *q++;
2091 			} else
2092 				*pb++ = *sptr++;
2093 		}
2094 		*pb = '\0';
2095 		if (pb > buf + bufsz)
2096 			FATAL("sub result1 %.30s too big; can't happen", buf);
2097 		sptr = patbeg + patlen;
2098 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2099 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2100 			while ((*pb++ = *sptr++) != '\0')
2101 				continue;
2102 		}
2103 		if (pb > buf + bufsz)
2104 			FATAL("sub result2 %.30s too big; can't happen", buf);
2105 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2106 		result = True;
2107 	}
2108 	tempfree(x);
2109 	tempfree(y);
2110 	free(buf);
2111 	return result;
2112 }
2113 
2114 Cell *gsub(Node **a, int nnn)	/* global substitute */
2115 {
2116 	Cell *x, *y;
2117 	char *rptr, *pb;
2118 	const char *q, *t, *sptr;
2119 	char *buf;
2120 	fa *pfa;
2121 	int mflag, tempstat, num;
2122 	int bufsz = recsize;
2123 
2124 	if ((buf = (char *) malloc(bufsz)) == NULL)
2125 		FATAL("out of memory in gsub");
2126 	mflag = 0;	/* if mflag == 0, can replace empty string */
2127 	num = 0;
2128 	x = execute(a[3]);	/* target string */
2129 	t = getsval(x);
2130 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2131 		pfa = (fa *) a[1];	/* regular expression */
2132 	else {
2133 		y = execute(a[1]);
2134 		pfa = makedfa(getsval(y), 1);
2135 		tempfree(y);
2136 	}
2137 	y = execute(a[2]);	/* replacement string */
2138 	if (pmatch(pfa, t)) {
2139 		tempstat = pfa->initstat;
2140 		pfa->initstat = 2;
2141 		pb = buf;
2142 		rptr = getsval(y);
2143 		do {
2144 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2145 				if (mflag == 0) {	/* can replace empty */
2146 					num++;
2147 					sptr = rptr;
2148 					while (*sptr != '\0') {
2149 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2150 						if (*sptr == '\\') {
2151 							backsub(&pb, &sptr);
2152 						} else if (*sptr == '&') {
2153 							sptr++;
2154 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2155 							for (q = patbeg; q < patbeg+patlen; )
2156 								*pb++ = *q++;
2157 						} else
2158 							*pb++ = *sptr++;
2159 					}
2160 				}
2161 				if (*t == '\0')	/* at end */
2162 					goto done;
2163 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2164 				*pb++ = *t++;
2165 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2166 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2167 				mflag = 0;
2168 			}
2169 			else {	/* matched nonempty string */
2170 				num++;
2171 				sptr = t;
2172 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2173 				while (sptr < patbeg)
2174 					*pb++ = *sptr++;
2175 				sptr = rptr;
2176 				while (*sptr != '\0') {
2177 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2178 					if (*sptr == '\\') {
2179 						backsub(&pb, &sptr);
2180 					} else if (*sptr == '&') {
2181 						sptr++;
2182 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2183 						for (q = patbeg; q < patbeg+patlen; )
2184 							*pb++ = *q++;
2185 					} else
2186 						*pb++ = *sptr++;
2187 				}
2188 				t = patbeg + patlen;
2189 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2190 					goto done;
2191 				if (pb > buf + bufsz)
2192 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2193 				mflag = 1;
2194 			}
2195 		} while (pmatch(pfa,t));
2196 		sptr = t;
2197 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2198 		while ((*pb++ = *sptr++) != '\0')
2199 			continue;
2200 	done:	if (pb < buf + bufsz)
2201 			*pb = '\0';
2202 		else if (*(pb-1) != '\0')
2203 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2204 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2205 		pfa->initstat = tempstat;
2206 	}
2207 	tempfree(x);
2208 	tempfree(y);
2209 	x = gettemp();
2210 	x->tval = NUM;
2211 	x->fval = num;
2212 	free(buf);
2213 	return(x);
2214 }
2215 
2216 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2217 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2218 {
2219 	Cell *x, *y, *res, *h;
2220 	char *rptr;
2221 	const char *sptr;
2222 	char *buf, *pb;
2223 	const char *t, *q;
2224 	fa *pfa;
2225 	int mflag, tempstat, num, whichm;
2226 	int bufsz = recsize;
2227 
2228 	if ((buf = malloc(bufsz)) == NULL)
2229 		FATAL("out of memory in gensub");
2230 	mflag = 0;	/* if mflag == 0, can replace empty string */
2231 	num = 0;
2232 	x = execute(a[4]);	/* source string */
2233 	t = getsval(x);
2234 	res = copycell(x);	/* target string - initially copy of source */
2235 	res->csub = CTEMP;	/* result values are temporary */
2236 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2237 		pfa = (fa *) a[1];	/* regular expression */
2238 	else {
2239 		y = execute(a[1]);
2240 		pfa = makedfa(getsval(y), 1);
2241 		tempfree(y);
2242 	}
2243 	y = execute(a[2]);	/* replacement string */
2244 	h = execute(a[3]);	/* which matches should be replaced */
2245 	sptr = getsval(h);
2246 	if (sptr[0] == 'g' || sptr[0] == 'G')
2247 		whichm = -1;
2248 	else {
2249 		/*
2250 		 * The specified number is index of replacement, starting
2251 		 * from 1. GNU awk treats index lower than 0 same as
2252 		 * 1, we do same for compatibility.
2253 		 */
2254 		whichm = (int) getfval(h) - 1;
2255 		if (whichm < 0)
2256 			whichm = 0;
2257 	}
2258 	tempfree(h);
2259 
2260 	if (pmatch(pfa, t)) {
2261 		char *sl;
2262 
2263 		tempstat = pfa->initstat;
2264 		pfa->initstat = 2;
2265 		pb = buf;
2266 		rptr = getsval(y);
2267 		/*
2268 		 * XXX if there are any backreferences in subst string,
2269 		 * complain now.
2270 		 */
2271 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2272 			if (strchr("0123456789", sl[1])) {
2273 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2274 			}
2275 		}
2276 
2277 		do {
2278 			if (whichm >= 0 && whichm != num) {
2279 				num++;
2280 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2281 
2282 				/* copy the part of string up to and including
2283 				 * match to output buffer */
2284 				while (t < patbeg + patlen)
2285 					*pb++ = *t++;
2286 				continue;
2287 			}
2288 
2289 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2290 				if (mflag == 0) {	/* can replace empty */
2291 					num++;
2292 					sptr = rptr;
2293 					while (*sptr != 0) {
2294 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2295 						if (*sptr == '\\') {
2296 							backsub(&pb, &sptr);
2297 						} else if (*sptr == '&') {
2298 							sptr++;
2299 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2300 							for (q = patbeg; q < patbeg+patlen; )
2301 								*pb++ = *q++;
2302 						} else
2303 							*pb++ = *sptr++;
2304 					}
2305 				}
2306 				if (*t == 0)	/* at end */
2307 					goto done;
2308 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2309 				*pb++ = *t++;
2310 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2311 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2312 				mflag = 0;
2313 			}
2314 			else {	/* matched nonempty string */
2315 				num++;
2316 				sptr = t;
2317 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2318 				while (sptr < patbeg)
2319 					*pb++ = *sptr++;
2320 				sptr = rptr;
2321 				while (*sptr != 0) {
2322 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2323 					if (*sptr == '\\') {
2324 						backsub(&pb, &sptr);
2325 					} else if (*sptr == '&') {
2326 						sptr++;
2327 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2328 						for (q = patbeg; q < patbeg+patlen; )
2329 							*pb++ = *q++;
2330 					} else
2331 						*pb++ = *sptr++;
2332 				}
2333 				t = patbeg + patlen;
2334 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2335 					goto done;
2336 				if (pb > buf + bufsz)
2337 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2338 				mflag = 1;
2339 			}
2340 		} while (pmatch(pfa,t));
2341 		sptr = t;
2342 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2343 		while ((*pb++ = *sptr++) != 0)
2344 			;
2345 	done:	if (pb > buf + bufsz)
2346 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2347 		*pb = '\0';
2348 		setsval(res, buf);
2349 		pfa->initstat = tempstat;
2350 	}
2351 	tempfree(x);
2352 	tempfree(y);
2353 	free(buf);
2354 	return(res);
2355 }
2356 
2357 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2358 {						/* sptr[0] == '\\' */
2359 	char *pb = *pb_ptr;
2360 	const char *sptr = *sptr_ptr;
2361 
2362 	if (sptr[1] == '\\') {
2363 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2364 			*pb++ = '\\';
2365 			*pb++ = '&';
2366 			sptr += 4;
2367 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2368 			*pb++ = '\\';
2369 			sptr += 2;
2370 		} else if (do_posix) {		/* \\x -> \x */
2371 			sptr++;
2372 			*pb++ = *sptr++;
2373 		} else {			/* \\x -> \\x */
2374 			*pb++ = *sptr++;
2375 			*pb++ = *sptr++;
2376 		}
2377 	} else if (sptr[1] == '&') {	/* literal & */
2378 		sptr++;
2379 		*pb++ = *sptr++;
2380 	} else				/* literal \ */
2381 		*pb++ = *sptr++;
2382 
2383 	*pb_ptr = pb;
2384 	*sptr_ptr = sptr;
2385 }
2386