xref: /openbsd/usr.bin/awk/run.c (revision d89ec533)
1 /*	$OpenBSD: run.c,v 1.70 2021/11/01 18:28:24 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <wchar.h>
31 #include <wctype.h>
32 #include <fcntl.h>
33 #include <setjmp.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <string.h>
37 #include <stdlib.h>
38 #include <time.h>
39 #include <sys/types.h>
40 #include <sys/wait.h>
41 #include "awk.h"
42 #include "awkgram.tab.h"
43 
44 static void stdinit(void);
45 static void flush_all(void);
46 
47 #if 1
48 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
49 #else
50 void tempfree(Cell *p) {
51 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
52 		WARNING("bad csub %d in Cell %d %s",
53 			p->csub, p->ctype, p->sval);
54 	}
55 	if (istemp(p))
56 		tfree(p);
57 }
58 #endif
59 
60 /* do we really need these? */
61 /* #ifdef _NFILE */
62 /* #ifndef FOPEN_MAX */
63 /* #define FOPEN_MAX _NFILE */
64 /* #endif */
65 /* #endif */
66 /*  */
67 /* #ifndef	FOPEN_MAX */
68 /* #define	FOPEN_MAX	40 */	/* max number of open files */
69 /* #endif */
70 /*  */
71 /* #ifndef RAND_MAX */
72 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
73 /* #endif */
74 
75 jmp_buf env;
76 extern	int	pairstack[];
77 extern	Awkfloat	srand_seed;
78 
79 Node	*winner = NULL;	/* root of parse tree */
80 Cell	*tmps;		/* free temporary cells for execution */
81 
82 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
83 Cell	*True	= &truecell;
84 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
85 Cell	*False	= &falsecell;
86 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
87 Cell	*jbreak	= &breakcell;
88 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
89 Cell	*jcont	= &contcell;
90 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
91 Cell	*jnext	= &nextcell;
92 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
93 Cell	*jnextfile	= &nextfilecell;
94 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
95 Cell	*jexit	= &exitcell;
96 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
97 Cell	*jret	= &retcell;
98 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
99 
100 Node	*curnode = NULL;	/* the node being executed, for debugging */
101 
102 /* buffer memory management */
103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
104 	const char *whatrtn)
105 /* pbuf:    address of pointer to buffer being managed
106  * psiz:    address of buffer size variable
107  * minlen:  minimum length of buffer needed
108  * quantum: buffer size quantum
109  * pbptr:   address of movable pointer into buffer, or 0 if none
110  * whatrtn: name of the calling routine if failure should cause fatal error
111  *
112  * return   0 for realloc failure, !=0 for success
113  */
114 {
115 	if (minlen > *psiz) {
116 		char *tbuf;
117 		int rminlen = quantum ? minlen % quantum : 0;
118 		int boff = pbptr ? *pbptr - *pbuf : 0;
119 		/* round up to next multiple of quantum */
120 		if (rminlen)
121 			minlen += quantum - rminlen;
122 		tbuf = (char *) realloc(*pbuf, minlen);
123 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
124 		if (tbuf == NULL) {
125 			if (whatrtn)
126 				FATAL("out of memory in %s", whatrtn);
127 			return 0;
128 		}
129 		*pbuf = tbuf;
130 		*psiz = minlen;
131 		if (pbptr)
132 			*pbptr = tbuf + boff;
133 	}
134 	return 1;
135 }
136 
137 void run(Node *a)	/* execution of parse tree starts here */
138 {
139 
140 	stdinit();
141 	execute(a);
142 	closeall();
143 }
144 
145 Cell *execute(Node *u)	/* execute a node of the parse tree */
146 {
147 	Cell *(*proc)(Node **, int);
148 	Cell *x;
149 	Node *a;
150 
151 	if (u == NULL)
152 		return(True);
153 	for (a = u; ; a = a->nnext) {
154 		curnode = a;
155 		if (isvalue(a)) {
156 			x = (Cell *) (a->narg[0]);
157 			if (isfld(x) && !donefld)
158 				fldbld();
159 			else if (isrec(x) && !donerec)
160 				recbld();
161 			return(x);
162 		}
163 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
164 			FATAL("illegal statement");
165 		proc = proctab[a->nobj-FIRSTTOKEN];
166 		x = (*proc)(a->narg, a->nobj);
167 		if (isfld(x) && !donefld)
168 			fldbld();
169 		else if (isrec(x) && !donerec)
170 			recbld();
171 		if (isexpr(a))
172 			return(x);
173 		if (isjump(x))
174 			return(x);
175 		if (a->nnext == NULL)
176 			return(x);
177 		tempfree(x);
178 	}
179 }
180 
181 
182 Cell *program(Node **a, int n)	/* execute an awk program */
183 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
184 	Cell *x;
185 
186 	if (setjmp(env) != 0)
187 		goto ex;
188 	if (a[0]) {		/* BEGIN */
189 		x = execute(a[0]);
190 		if (isexit(x))
191 			return(True);
192 		if (isjump(x))
193 			FATAL("illegal break, continue, next or nextfile from BEGIN");
194 		tempfree(x);
195 	}
196 	if (a[1] || a[2])
197 		while (getrec(&record, &recsize, true) > 0) {
198 			x = execute(a[1]);
199 			if (isexit(x))
200 				break;
201 			tempfree(x);
202 		}
203   ex:
204 	if (setjmp(env) != 0)	/* handles exit within END */
205 		goto ex1;
206 	if (a[2]) {		/* END */
207 		x = execute(a[2]);
208 		if (isbreak(x) || isnext(x) || iscont(x))
209 			FATAL("illegal break, continue, next or nextfile from END");
210 		tempfree(x);
211 	}
212   ex1:
213 	return(True);
214 }
215 
216 struct Frame {	/* stack frame for awk function calls */
217 	int nargs;	/* number of arguments in this call */
218 	Cell *fcncell;	/* pointer to Cell for function */
219 	Cell **args;	/* pointer to array of arguments after execute */
220 	Cell *retval;	/* return value */
221 };
222 
223 #define	NARGS	50	/* max args in a call */
224 
225 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
226 int	nframe = 0;		/* number of frames allocated */
227 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
228 
229 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
230 {
231 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
232 	int i, ncall, ndef;
233 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 	Node *x;
235 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
236 	Cell *y, *z, *fcn;
237 	char *s;
238 
239 	fcn = execute(a[0]);	/* the function itself */
240 	s = fcn->nval;
241 	if (!isfcn(fcn))
242 		FATAL("calling undefined function %s", s);
243 	if (frame == NULL) {
244 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
245 		if (frame == NULL)
246 			FATAL("out of space for stack frames calling %s", s);
247 	}
248 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
249 		ncall++;
250 	ndef = (int) fcn->fval;			/* args in defn */
251 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
252 	if (ncall > ndef)
253 		WARNING("function %s called with %d args, uses only %d",
254 			s, ncall, ndef);
255 	if (ncall + ndef > NARGS)
256 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
259 		y = execute(x);
260 		oargs[i] = y;
261 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
262 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
263 		if (isfcn(y))
264 			FATAL("can't use function %s as argument in %s", y->nval, s);
265 		if (isarr(y))
266 			args[i] = y;	/* arrays by ref */
267 		else
268 			args[i] = copycell(y);
269 		tempfree(y);
270 	}
271 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
272 		args[i] = gettemp();
273 		*args[i] = newcopycell;
274 	}
275 	frp++;	/* now ok to up frame */
276 	if (frp >= frame + nframe) {
277 		int dfp = frp - frame;	/* old index */
278 		frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
279 		if (frame == NULL)
280 			FATAL("out of space for stack frames in %s", s);
281 		frp = frame + dfp;
282 	}
283 	frp->fcncell = fcn;
284 	frp->args = args;
285 	frp->nargs = ndef;	/* number defined with (excess are locals) */
286 	frp->retval = gettemp();
287 
288 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
289 	y = execute((Node *)(fcn->sval));	/* execute body */
290 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
291 
292 	for (i = 0; i < ndef; i++) {
293 		Cell *t = frp->args[i];
294 		if (isarr(t)) {
295 			if (t->csub == CCOPY) {
296 				if (i >= ncall) {
297 					freesymtab(t);
298 					t->csub = CTEMP;
299 					tempfree(t);
300 				} else {
301 					oargs[i]->tval = t->tval;
302 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 					oargs[i]->sval = t->sval;
304 					tempfree(t);
305 				}
306 			}
307 		} else if (t != y) {	/* kludge to prevent freeing twice */
308 			t->csub = CTEMP;
309 			tempfree(t);
310 		} else if (t == y && t->csub == CCOPY) {
311 			t->csub = CTEMP;
312 			tempfree(t);
313 			freed = 1;
314 		}
315 	}
316 	tempfree(fcn);
317 	if (isexit(y) || isnext(y))
318 		return y;
319 	if (freed == 0) {
320 		tempfree(y);	/* don't free twice! */
321 	}
322 	z = frp->retval;			/* return value */
323 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
324 	frp--;
325 	return(z);
326 }
327 
328 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
329 {
330 	Cell *y;
331 
332 	/* copy is not constant or field */
333 
334 	y = gettemp();
335 	y->tval = x->tval & ~(CON|FLD|REC);
336 	y->csub = CCOPY;	/* prevents freeing until call is over */
337 	y->nval = x->nval;	/* BUG? */
338 	if (isstr(x) /* || x->ctype == OCELL */) {
339 		y->sval = tostring(x->sval);
340 		y->tval &= ~DONTFREE;
341 	} else
342 		y->tval |= DONTFREE;
343 	y->fval = x->fval;
344 	return y;
345 }
346 
347 Cell *arg(Node **a, int n)	/* nth argument of a function */
348 {
349 
350 	n = ptoi(a[0]);	/* argument number, counting from 0 */
351 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
352 	if (n+1 > frp->nargs)
353 		FATAL("argument #%d of function %s was not supplied",
354 			n+1, frp->fcncell->nval);
355 	return frp->args[n];
356 }
357 
358 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
359 {
360 	Cell *y;
361 
362 	switch (n) {
363 	case EXIT:
364 		if (a[0] != NULL) {
365 			y = execute(a[0]);
366 			errorflag = (int) getfval(y);
367 			tempfree(y);
368 		}
369 		longjmp(env, 1);
370 	case RETURN:
371 		if (a[0] != NULL) {
372 			y = execute(a[0]);
373 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 				setsval(frp->retval, getsval(y));
375 				frp->retval->fval = getfval(y);
376 				frp->retval->tval |= NUM;
377 			}
378 			else if (y->tval & STR)
379 				setsval(frp->retval, getsval(y));
380 			else if (y->tval & NUM)
381 				setfval(frp->retval, getfval(y));
382 			else		/* can't happen */
383 				FATAL("bad type variable %d", y->tval);
384 			tempfree(y);
385 		}
386 		return(jret);
387 	case NEXT:
388 		return(jnext);
389 	case NEXTFILE:
390 		nextfile();
391 		return(jnextfile);
392 	case BREAK:
393 		return(jbreak);
394 	case CONTINUE:
395 		return(jcont);
396 	default:	/* can't happen */
397 		FATAL("illegal jump type %d", n);
398 	}
399 	return 0;	/* not reached */
400 }
401 
402 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
403 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
404 	Cell *r, *x;
405 	extern Cell **fldtab;
406 	FILE *fp;
407 	char *buf;
408 	int bufsize = recsize;
409 	int mode;
410 	bool newflag;
411 	double result;
412 
413 	if ((buf = (char *) malloc(bufsize)) == NULL)
414 		FATAL("out of memory in getline");
415 
416 	fflush(stdout);	/* in case someone is waiting for a prompt */
417 	r = gettemp();
418 	if (a[1] != NULL) {		/* getline < file */
419 		x = execute(a[2]);		/* filename */
420 		mode = ptoi(a[1]);
421 		if (mode == '|')		/* input pipe */
422 			mode = LE;	/* arbitrary flag */
423 		fp = openfile(mode, getsval(x), &newflag);
424 		tempfree(x);
425 		if (fp == NULL)
426 			n = -1;
427 		else
428 			n = readrec(&buf, &bufsize, fp, newflag);
429 		if (n <= 0) {
430 			;
431 		} else if (a[0] != NULL) {	/* getline var <file */
432 			x = execute(a[0]);
433 			setsval(x, buf);
434 			if (is_number(x->sval, & result)) {
435 				x->fval = result;
436 				x->tval |= NUM;
437 			}
438 			tempfree(x);
439 		} else {			/* getline <file */
440 			setsval(fldtab[0], buf);
441 			if (is_number(fldtab[0]->sval, & result)) {
442 				fldtab[0]->fval = result;
443 				fldtab[0]->tval |= NUM;
444 			}
445 		}
446 	} else {			/* bare getline; use current input */
447 		if (a[0] == NULL)	/* getline */
448 			n = getrec(&record, &recsize, true);
449 		else {			/* getline var */
450 			n = getrec(&buf, &bufsize, false);
451 			if (n > 0) {
452 				x = execute(a[0]);
453 				setsval(x, buf);
454 				if (is_number(x->sval, & result)) {
455 					x->fval = result;
456 					x->tval |= NUM;
457 				}
458 				tempfree(x);
459 			}
460 		}
461 	}
462 	setfval(r, (Awkfloat) n);
463 	free(buf);
464 	return r;
465 }
466 
467 Cell *getnf(Node **a, int n)	/* get NF */
468 {
469 	if (!donefld)
470 		fldbld();
471 	return (Cell *) a[0];
472 }
473 
474 static char *
475 makearraystring(Node *p, const char *func)
476 {
477 	char *buf;
478 	int bufsz = recsize;
479 	size_t blen;
480 
481 	if ((buf = (char *) malloc(bufsz)) == NULL) {
482 		FATAL("%s: out of memory", func);
483 	}
484 
485 	blen = 0;
486 	buf[blen] = '\0';
487 
488 	for (; p; p = p->nnext) {
489 		Cell *x = execute(p);	/* expr */
490 		char *s = getsval(x);
491 		size_t seplen = strlen(getsval(subseploc));
492 		size_t nsub = p->nnext ? seplen : 0;
493 		size_t slen = strlen(s);
494 		size_t tlen = blen + slen + nsub;
495 
496 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
497 			FATAL("%s: out of memory %s[%s...]",
498 			    func, x->nval, buf);
499 		}
500 		memcpy(buf + blen, s, slen);
501 		if (nsub) {
502 			memcpy(buf + blen + slen, *SUBSEP, nsub);
503 		}
504 		buf[tlen] = '\0';
505 		blen = tlen;
506 		tempfree(x);
507 	}
508 	return buf;
509 }
510 
511 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
512 {
513 	Cell *x, *z;
514 	char *buf;
515 
516 	x = execute(a[0]);	/* Cell* for symbol table */
517 	buf = makearraystring(a[1], __func__);
518 	if (!isarr(x)) {
519 		DPRINTF("making %s into an array\n", NN(x->nval));
520 		if (freeable(x))
521 			xfree(x->sval);
522 		x->tval &= ~(STR|NUM|DONTFREE);
523 		x->tval |= ARR;
524 		x->sval = (char *) makesymtab(NSYMTAB);
525 	}
526 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
527 	z->ctype = OCELL;
528 	z->csub = CVAR;
529 	tempfree(x);
530 	free(buf);
531 	return(z);
532 }
533 
534 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
535 {
536 	Cell *x;
537 
538 	x = execute(a[0]);	/* Cell* for symbol table */
539 	if (x == symtabloc) {
540 		FATAL("cannot delete SYMTAB or its elements");
541 	}
542 	if (!isarr(x))
543 		return True;
544 	if (a[1] == NULL) {	/* delete the elements, not the table */
545 		freesymtab(x);
546 		x->tval &= ~STR;
547 		x->tval |= ARR;
548 		x->sval = (char *) makesymtab(NSYMTAB);
549 	} else {
550 		char *buf = makearraystring(a[1], __func__);
551 		freeelem(x, buf);
552 		free(buf);
553 	}
554 	tempfree(x);
555 	return True;
556 }
557 
558 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
559 {
560 	Cell *ap, *k;
561 	char *buf;
562 
563 	ap = execute(a[1]);	/* array name */
564 	if (!isarr(ap)) {
565 		DPRINTF("making %s into an array\n", ap->nval);
566 		if (freeable(ap))
567 			xfree(ap->sval);
568 		ap->tval &= ~(STR|NUM|DONTFREE);
569 		ap->tval |= ARR;
570 		ap->sval = (char *) makesymtab(NSYMTAB);
571 	}
572 	buf = makearraystring(a[0], __func__);
573 	k = lookup(buf, (Array *) ap->sval);
574 	tempfree(ap);
575 	free(buf);
576 	if (k == NULL)
577 		return(False);
578 	else
579 		return(True);
580 }
581 
582 
583 Cell *matchop(Node **a, int n)	/* ~ and match() */
584 {
585 	Cell *x, *y;
586 	char *s, *t;
587 	int i;
588 	fa *pfa;
589 	int (*mf)(fa *, const char *) = match, mode = 0;
590 
591 	if (n == MATCHFCN) {
592 		mf = pmatch;
593 		mode = 1;
594 	}
595 	x = execute(a[1]);	/* a[1] = target text */
596 	s = getsval(x);
597 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
598 		i = (*mf)((fa *) a[2], s);
599 	else {
600 		y = execute(a[2]);	/* a[2] = regular expr */
601 		t = getsval(y);
602 		pfa = makedfa(t, mode);
603 		i = (*mf)(pfa, s);
604 		tempfree(y);
605 	}
606 	tempfree(x);
607 	if (n == MATCHFCN) {
608 		int start = patbeg - s + 1;
609 		if (patlen < 0)
610 			start = 0;
611 		setfval(rstartloc, (Awkfloat) start);
612 		setfval(rlengthloc, (Awkfloat) patlen);
613 		x = gettemp();
614 		x->tval = NUM;
615 		x->fval = start;
616 		return x;
617 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
618 		return(True);
619 	else
620 		return(False);
621 }
622 
623 
624 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
625 {
626 	Cell *x, *y;
627 	int i;
628 
629 	x = execute(a[0]);
630 	i = istrue(x);
631 	tempfree(x);
632 	switch (n) {
633 	case BOR:
634 		if (i) return(True);
635 		y = execute(a[1]);
636 		i = istrue(y);
637 		tempfree(y);
638 		if (i) return(True);
639 		else return(False);
640 	case AND:
641 		if ( !i ) return(False);
642 		y = execute(a[1]);
643 		i = istrue(y);
644 		tempfree(y);
645 		if (i) return(True);
646 		else return(False);
647 	case NOT:
648 		if (i) return(False);
649 		else return(True);
650 	default:	/* can't happen */
651 		FATAL("unknown boolean operator %d", n);
652 	}
653 	return 0;	/*NOTREACHED*/
654 }
655 
656 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
657 {
658 	int i;
659 	Cell *x, *y;
660 	Awkfloat j;
661 
662 	x = execute(a[0]);
663 	y = execute(a[1]);
664 	if (x->tval&NUM && y->tval&NUM) {
665 		j = x->fval - y->fval;
666 		i = j<0? -1: (j>0? 1: 0);
667 	} else {
668 		i = strcmp(getsval(x), getsval(y));
669 	}
670 	tempfree(x);
671 	tempfree(y);
672 	switch (n) {
673 	case LT:	if (i<0) return(True);
674 			else return(False);
675 	case LE:	if (i<=0) return(True);
676 			else return(False);
677 	case NE:	if (i!=0) return(True);
678 			else return(False);
679 	case EQ:	if (i == 0) return(True);
680 			else return(False);
681 	case GE:	if (i>=0) return(True);
682 			else return(False);
683 	case GT:	if (i>0) return(True);
684 			else return(False);
685 	default:	/* can't happen */
686 		FATAL("unknown relational operator %d", n);
687 	}
688 	return 0;	/*NOTREACHED*/
689 }
690 
691 void tfree(Cell *a)	/* free a tempcell */
692 {
693 	if (freeable(a)) {
694 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
695 		xfree(a->sval);
696 	}
697 	if (a == tmps)
698 		FATAL("tempcell list is curdled");
699 	a->cnext = tmps;
700 	tmps = a;
701 }
702 
703 Cell *gettemp(void)	/* get a tempcell */
704 {	int i;
705 	Cell *x;
706 
707 	if (!tmps) {
708 		tmps = (Cell *) calloc(100, sizeof(*tmps));
709 		if (!tmps)
710 			FATAL("out of space for temporaries");
711 		for (i = 1; i < 100; i++)
712 			tmps[i-1].cnext = &tmps[i];
713 		tmps[i-1].cnext = NULL;
714 	}
715 	x = tmps;
716 	tmps = x->cnext;
717 	*x = tempcell;
718 	return(x);
719 }
720 
721 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
722 {
723 	Awkfloat val;
724 	Cell *x;
725 	int m;
726 	char *s;
727 
728 	x = execute(a[0]);
729 	val = getfval(x);	/* freebsd: defend against super large field numbers */
730 	if ((Awkfloat)INT_MAX < val)
731 		FATAL("trying to access out of range field %s", x->nval);
732 	m = (int) val;
733 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
734 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
735 		/* BUG: can x->nval ever be null??? */
736 	tempfree(x);
737 	x = fieldadr(m);
738 	x->ctype = OCELL;	/* BUG?  why are these needed? */
739 	x->csub = CFLD;
740 	return(x);
741 }
742 
743 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
744 {
745 	int k, m, n;
746 	char *s;
747 	int temp;
748 	Cell *x, *y, *z = NULL;
749 
750 	x = execute(a[0]);
751 	y = execute(a[1]);
752 	if (a[2] != NULL)
753 		z = execute(a[2]);
754 	s = getsval(x);
755 	k = strlen(s) + 1;
756 	if (k <= 1) {
757 		tempfree(x);
758 		tempfree(y);
759 		if (a[2] != NULL) {
760 			tempfree(z);
761 		}
762 		x = gettemp();
763 		setsval(x, "");
764 		return(x);
765 	}
766 	m = (int) getfval(y);
767 	if (m <= 0)
768 		m = 1;
769 	else if (m > k)
770 		m = k;
771 	tempfree(y);
772 	if (a[2] != NULL) {
773 		n = (int) getfval(z);
774 		tempfree(z);
775 	} else
776 		n = k - 1;
777 	if (n < 0)
778 		n = 0;
779 	else if (n > k - m)
780 		n = k - m;
781 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
782 	y = gettemp();
783 	temp = s[n+m-1];	/* with thanks to John Linderman */
784 	s[n+m-1] = '\0';
785 	setsval(y, s + m - 1);
786 	s[n+m-1] = temp;
787 	tempfree(x);
788 	return(y);
789 }
790 
791 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
792 {
793 	Cell *x, *y, *z;
794 	char *s1, *s2, *p1, *p2, *q;
795 	Awkfloat v = 0.0;
796 
797 	x = execute(a[0]);
798 	s1 = getsval(x);
799 	y = execute(a[1]);
800 	s2 = getsval(y);
801 
802 	z = gettemp();
803 	for (p1 = s1; *p1 != '\0'; p1++) {
804 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
805 			continue;
806 		if (*p2 == '\0') {
807 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
808 			break;
809 		}
810 	}
811 	tempfree(x);
812 	tempfree(y);
813 	setfval(z, v);
814 	return(z);
815 }
816 
817 #define	MAXNUMSIZE	50
818 
819 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
820 {
821 	char *fmt;
822 	char *p, *t;
823 	const char *os;
824 	Cell *x;
825 	int flag = 0, n;
826 	int fmtwd; /* format width */
827 	int fmtsz = recsize;
828 	char *buf = *pbuf;
829 	int bufsize = *pbufsize;
830 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
831 #define BUFSZ(a)   (bufsize - ((a) - buf))
832 
833 	static bool first = true;
834 	static bool have_a_format = false;
835 
836 	if (first) {
837 		char xbuf[100];
838 
839 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
840 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
841 		first = false;
842 	}
843 
844 	os = s;
845 	p = buf;
846 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
847 		FATAL("out of memory in format()");
848 	while (*s) {
849 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
850 		if (*s != '%') {
851 			*p++ = *s++;
852 			continue;
853 		}
854 		if (*(s+1) == '%') {
855 			*p++ = '%';
856 			s += 2;
857 			continue;
858 		}
859 		/* have to be real careful in case this is a huge number, eg, %100000d */
860 		fmtwd = atoi(s+1);
861 		if (fmtwd < 0)
862 			fmtwd = -fmtwd;
863 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
864 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
865 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
866 				FATAL("format item %.30s... ran format() out of memory", os);
867 			/* Ignore size specifiers */
868 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
869 				t--;
870 				continue;
871 			}
872 			if (isalpha((uschar)*s))
873 				break;
874 			if (*s == '$') {
875 				FATAL("'$' not permitted in awk formats");
876 			}
877 			if (*s == '*') {
878 				if (a == NULL) {
879 					FATAL("not enough args in printf(%s)", os);
880 				}
881 				x = execute(a);
882 				a = a->nnext;
883 				snprintf(t - 1, FMTSZ(t - 1),
884 				    "%d", fmtwd=(int) getfval(x));
885 				if (fmtwd < 0)
886 					fmtwd = -fmtwd;
887 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
888 				t = fmt + strlen(fmt);
889 				tempfree(x);
890 			}
891 		}
892 		*t = '\0';
893 		if (fmtwd < 0)
894 			fmtwd = -fmtwd;
895 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
896 		switch (*s) {
897 		case 'a': case 'A':
898 			if (have_a_format)
899 				flag = *s;
900 			else
901 				flag = 'f';
902 			break;
903 		case 'f': case 'e': case 'g': case 'E': case 'G':
904 			flag = 'f';
905 			break;
906 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
907 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
908 			*(t-1) = 'j';
909 			*t = *s;
910 			*++t = '\0';
911 			break;
912 		case 's':
913 			flag = 's';
914 			break;
915 		case 'c':
916 			flag = 'c';
917 			break;
918 		default:
919 			WARNING("weird printf conversion %s", fmt);
920 			flag = '?';
921 			break;
922 		}
923 		if (a == NULL)
924 			FATAL("not enough args in printf(%s)", os);
925 		x = execute(a);
926 		a = a->nnext;
927 		n = MAXNUMSIZE;
928 		if (fmtwd > n)
929 			n = fmtwd;
930 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
931 		switch (flag) {
932 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
933 			t = getsval(x);
934 			n = strlen(t);
935 			if (fmtwd > n)
936 				n = fmtwd;
937 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
938 			p += strlen(p);
939 			snprintf(p, BUFSZ(p), "%s", t);
940 			break;
941 		case 'a':
942 		case 'A':
943 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
944 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
945 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
946 		case 's':
947 			t = getsval(x);
948 			n = strlen(t);
949 			if (fmtwd > n)
950 				n = fmtwd;
951 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
952 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
953 			snprintf(p, BUFSZ(p), fmt, t);
954 			break;
955 		case 'c':
956 			if (isnum(x)) {
957 				if ((int)getfval(x))
958 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
959 				else {
960 					*p++ = '\0'; /* explicit null byte */
961 					*p = '\0';   /* next output will start here */
962 				}
963 			} else
964 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
965 			break;
966 		default:
967 			FATAL("can't happen: bad conversion %c in format()", flag);
968 		}
969 		tempfree(x);
970 		p += strlen(p);
971 		s++;
972 	}
973 	*p = '\0';
974 	free(fmt);
975 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
976 		execute(a);
977 	*pbuf = buf;
978 	*pbufsize = bufsize;
979 	return p - buf;
980 }
981 
982 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
983 {
984 	Cell *x;
985 	Node *y;
986 	char *buf;
987 	int bufsz=3*recsize;
988 
989 	if ((buf = (char *) malloc(bufsz)) == NULL)
990 		FATAL("out of memory in awksprintf");
991 	y = a[0]->nnext;
992 	x = execute(a[0]);
993 	if (format(&buf, &bufsz, getsval(x), y) == -1)
994 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
995 	tempfree(x);
996 	x = gettemp();
997 	x->sval = buf;
998 	x->tval = STR;
999 	return(x);
1000 }
1001 
1002 Cell *awkprintf(Node **a, int n)		/* printf */
1003 {	/* a[0] is list of args, starting with format string */
1004 	/* a[1] is redirection operator, a[2] is redirection file */
1005 	FILE *fp;
1006 	Cell *x;
1007 	Node *y;
1008 	char *buf;
1009 	int len;
1010 	int bufsz=3*recsize;
1011 
1012 	if ((buf = (char *) malloc(bufsz)) == NULL)
1013 		FATAL("out of memory in awkprintf");
1014 	y = a[0]->nnext;
1015 	x = execute(a[0]);
1016 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1017 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1018 	tempfree(x);
1019 	if (a[1] == NULL) {
1020 		/* fputs(buf, stdout); */
1021 		fwrite(buf, len, 1, stdout);
1022 		if (ferror(stdout))
1023 			FATAL("write error on stdout");
1024 	} else {
1025 		fp = redirect(ptoi(a[1]), a[2]);
1026 		/* fputs(buf, fp); */
1027 		fwrite(buf, len, 1, fp);
1028 		fflush(fp);
1029 		if (ferror(fp))
1030 			FATAL("write error on %s", filename(fp));
1031 	}
1032 	free(buf);
1033 	return(True);
1034 }
1035 
1036 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1037 {
1038 	Awkfloat i, j = 0;
1039 	double v;
1040 	Cell *x, *y, *z;
1041 
1042 	x = execute(a[0]);
1043 	i = getfval(x);
1044 	tempfree(x);
1045 	if (n != UMINUS && n != UPLUS) {
1046 		y = execute(a[1]);
1047 		j = getfval(y);
1048 		tempfree(y);
1049 	}
1050 	z = gettemp();
1051 	switch (n) {
1052 	case ADD:
1053 		i += j;
1054 		break;
1055 	case MINUS:
1056 		i -= j;
1057 		break;
1058 	case MULT:
1059 		i *= j;
1060 		break;
1061 	case DIVIDE:
1062 		if (j == 0)
1063 			FATAL("division by zero");
1064 		i /= j;
1065 		break;
1066 	case MOD:
1067 		if (j == 0)
1068 			FATAL("division by zero in mod");
1069 		modf(i/j, &v);
1070 		i = i - j * v;
1071 		break;
1072 	case UMINUS:
1073 		i = -i;
1074 		break;
1075 	case UPLUS: /* handled by getfval(), above */
1076 		break;
1077 	case POWER:
1078 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1079 			i = ipow(i, (int) j);
1080                else {
1081 			errno = 0;
1082 			i = errcheck(pow(i, j), "pow");
1083                }
1084 		break;
1085 	default:	/* can't happen */
1086 		FATAL("illegal arithmetic operator %d", n);
1087 	}
1088 	setfval(z, i);
1089 	return(z);
1090 }
1091 
1092 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1093 {
1094 	double v;
1095 
1096 	if (n <= 0)
1097 		return 1;
1098 	v = ipow(x, n/2);
1099 	if (n % 2 == 0)
1100 		return v * v;
1101 	else
1102 		return x * v * v;
1103 }
1104 
1105 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1106 {
1107 	Cell *x, *z;
1108 	int k;
1109 	Awkfloat xf;
1110 
1111 	x = execute(a[0]);
1112 	xf = getfval(x);
1113 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1114 	if (n == PREINCR || n == PREDECR) {
1115 		setfval(x, xf + k);
1116 		return(x);
1117 	}
1118 	z = gettemp();
1119 	setfval(z, xf);
1120 	setfval(x, xf + k);
1121 	tempfree(x);
1122 	return(z);
1123 }
1124 
1125 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1126 {		/* this is subtle; don't muck with it. */
1127 	Cell *x, *y;
1128 	Awkfloat xf, yf;
1129 	double v;
1130 
1131 	y = execute(a[1]);
1132 	x = execute(a[0]);
1133 	if (n == ASSIGN) {	/* ordinary assignment */
1134 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1135 			;	/* self-assignment: leave alone unless it's a field or NF */
1136 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1137 			setsval(x, getsval(y));
1138 			x->fval = getfval(y);
1139 			x->tval |= NUM;
1140 		}
1141 		else if (isstr(y))
1142 			setsval(x, getsval(y));
1143 		else if (isnum(y))
1144 			setfval(x, getfval(y));
1145 		else
1146 			funnyvar(y, "read value of");
1147 		tempfree(y);
1148 		return(x);
1149 	}
1150 	xf = getfval(x);
1151 	yf = getfval(y);
1152 	switch (n) {
1153 	case ADDEQ:
1154 		xf += yf;
1155 		break;
1156 	case SUBEQ:
1157 		xf -= yf;
1158 		break;
1159 	case MULTEQ:
1160 		xf *= yf;
1161 		break;
1162 	case DIVEQ:
1163 		if (yf == 0)
1164 			FATAL("division by zero in /=");
1165 		xf /= yf;
1166 		break;
1167 	case MODEQ:
1168 		if (yf == 0)
1169 			FATAL("division by zero in %%=");
1170 		modf(xf/yf, &v);
1171 		xf = xf - yf * v;
1172 		break;
1173 	case POWEQ:
1174 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1175 			xf = ipow(xf, (int) yf);
1176                else {
1177 			errno = 0;
1178 			xf = errcheck(pow(xf, yf), "pow");
1179                }
1180 		break;
1181 	default:
1182 		FATAL("illegal assignment operator %d", n);
1183 		break;
1184 	}
1185 	tempfree(y);
1186 	setfval(x, xf);
1187 	return(x);
1188 }
1189 
1190 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1191 {
1192 	Cell *x, *y, *z;
1193 	int n1, n2;
1194 	char *s = NULL;
1195 	int ssz = 0;
1196 
1197 	x = execute(a[0]);
1198 	n1 = strlen(getsval(x));
1199 	adjbuf(&s, &ssz, n1, recsize, 0, "cat1");
1200 	memcpy(s, x->sval, n1);
1201 
1202 	y = execute(a[1]);
1203 	n2 = strlen(getsval(y));
1204 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1205 	memcpy(s + n1, y->sval, n2);
1206 	s[n1 + n2] = '\0';
1207 
1208 	tempfree(x);
1209 	tempfree(y);
1210 
1211 	z = gettemp();
1212 	z->sval = s;
1213 	z->tval = STR;
1214 
1215 	return(z);
1216 }
1217 
1218 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1219 {
1220 	Cell *x;
1221 
1222 	if (a[0] == NULL)
1223 		x = execute(a[1]);
1224 	else {
1225 		x = execute(a[0]);
1226 		if (istrue(x)) {
1227 			tempfree(x);
1228 			x = execute(a[1]);
1229 		}
1230 	}
1231 	return x;
1232 }
1233 
1234 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1235 {
1236 	Cell *x;
1237 	int pair;
1238 
1239 	pair = ptoi(a[3]);
1240 	if (pairstack[pair] == 0) {
1241 		x = execute(a[0]);
1242 		if (istrue(x))
1243 			pairstack[pair] = 1;
1244 		tempfree(x);
1245 	}
1246 	if (pairstack[pair] == 1) {
1247 		x = execute(a[1]);
1248 		if (istrue(x))
1249 			pairstack[pair] = 0;
1250 		tempfree(x);
1251 		x = execute(a[2]);
1252 		return(x);
1253 	}
1254 	return(False);
1255 }
1256 
1257 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1258 {
1259 	Cell *x = NULL, *y, *ap;
1260 	const char *s, *origs, *t;
1261 	const char *fs = NULL;
1262 	char *origfs = NULL;
1263 	int sep;
1264 	char temp, num[50];
1265 	int n, tempstat, arg3type;
1266 	double result;
1267 
1268 	y = execute(a[0]);	/* source string */
1269 	origs = s = strdup(getsval(y));
1270 	if (s == NULL)
1271 		FATAL("out of space in split");
1272 	arg3type = ptoi(a[3]);
1273 	if (a[2] == NULL)		/* fs string */
1274 		fs = getsval(fsloc);
1275 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1276 		x = execute(a[2]);
1277 		fs = origfs = strdup(getsval(x));
1278 		if (fs == NULL)
1279 			FATAL("out of space in split");
1280 		tempfree(x);
1281 	} else if (arg3type == REGEXPR)
1282 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1283 	else
1284 		FATAL("illegal type of split");
1285 	sep = *fs;
1286 	ap = execute(a[1]);	/* array name */
1287 	freesymtab(ap);
1288 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1289 	ap->tval &= ~STR;
1290 	ap->tval |= ARR;
1291 	ap->sval = (char *) makesymtab(NSYMTAB);
1292 
1293 	n = 0;
1294         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1295 		/* split(s, a, //); have to arrange that it looks like empty sep */
1296 		arg3type = 0;
1297 		fs = "";
1298 		sep = 0;
1299 	}
1300 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1301 		fa *pfa;
1302 		if (arg3type == REGEXPR) {	/* it's ready already */
1303 			pfa = (fa *) a[2];
1304 		} else {
1305 			pfa = makedfa(fs, 1);
1306 		}
1307 		if (nematch(pfa,s)) {
1308 			tempstat = pfa->initstat;
1309 			pfa->initstat = 2;
1310 			do {
1311 				n++;
1312 				snprintf(num, sizeof(num), "%d", n);
1313 				temp = *patbeg;
1314 				setptr(patbeg, '\0');
1315 				if (is_number(s, & result))
1316 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1317 				else
1318 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1319 				setptr(patbeg, temp);
1320 				s = patbeg + patlen;
1321 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1322 					n++;
1323 					snprintf(num, sizeof(num), "%d", n);
1324 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1325 					pfa->initstat = tempstat;
1326 					goto spdone;
1327 				}
1328 			} while (nematch(pfa,s));
1329 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1330 							/* cf gsub and refldbld */
1331 		}
1332 		n++;
1333 		snprintf(num, sizeof(num), "%d", n);
1334 		if (is_number(s, & result))
1335 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1336 		else
1337 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1338   spdone:
1339 		pfa = NULL;
1340 	} else if (sep == ' ') {
1341 		for (n = 0; ; ) {
1342 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1343 			while (ISWS(*s))
1344 				s++;
1345 			if (*s == '\0')
1346 				break;
1347 			n++;
1348 			t = s;
1349 			do
1350 				s++;
1351 			while (*s != '\0' && !ISWS(*s));
1352 			temp = *s;
1353 			setptr(s, '\0');
1354 			snprintf(num, sizeof(num), "%d", n);
1355 			if (is_number(t, & result))
1356 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1357 			else
1358 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1359 			setptr(s, temp);
1360 			if (*s != '\0')
1361 				s++;
1362 		}
1363 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1364 		for (n = 0; *s != '\0'; s++) {
1365 			char buf[2];
1366 			n++;
1367 			snprintf(num, sizeof(num), "%d", n);
1368 			buf[0] = *s;
1369 			buf[1] = '\0';
1370 			if (isdigit((uschar)buf[0]))
1371 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1372 			else
1373 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1374 		}
1375 	} else if (*s != '\0') {
1376 		for (;;) {
1377 			n++;
1378 			t = s;
1379 			while (*s != sep && *s != '\n' && *s != '\0')
1380 				s++;
1381 			temp = *s;
1382 			setptr(s, '\0');
1383 			snprintf(num, sizeof(num), "%d", n);
1384 			if (is_number(t, & result))
1385 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1386 			else
1387 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1388 			setptr(s, temp);
1389 			if (*s++ == '\0')
1390 				break;
1391 		}
1392 	}
1393 	tempfree(ap);
1394 	tempfree(y);
1395 	xfree(origs);
1396 	xfree(origfs);
1397 	x = gettemp();
1398 	x->tval = NUM;
1399 	x->fval = n;
1400 	return(x);
1401 }
1402 
1403 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1404 {
1405 	Cell *x;
1406 
1407 	x = execute(a[0]);
1408 	if (istrue(x)) {
1409 		tempfree(x);
1410 		x = execute(a[1]);
1411 	} else {
1412 		tempfree(x);
1413 		x = execute(a[2]);
1414 	}
1415 	return(x);
1416 }
1417 
1418 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1419 {
1420 	Cell *x;
1421 
1422 	x = execute(a[0]);
1423 	if (istrue(x)) {
1424 		tempfree(x);
1425 		x = execute(a[1]);
1426 	} else if (a[2] != NULL) {
1427 		tempfree(x);
1428 		x = execute(a[2]);
1429 	}
1430 	return(x);
1431 }
1432 
1433 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1434 {
1435 	Cell *x;
1436 
1437 	for (;;) {
1438 		x = execute(a[0]);
1439 		if (!istrue(x))
1440 			return(x);
1441 		tempfree(x);
1442 		x = execute(a[1]);
1443 		if (isbreak(x)) {
1444 			x = True;
1445 			return(x);
1446 		}
1447 		if (isnext(x) || isexit(x) || isret(x))
1448 			return(x);
1449 		tempfree(x);
1450 	}
1451 }
1452 
1453 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1454 {
1455 	Cell *x;
1456 
1457 	for (;;) {
1458 		x = execute(a[0]);
1459 		if (isbreak(x))
1460 			return True;
1461 		if (isnext(x) || isexit(x) || isret(x))
1462 			return(x);
1463 		tempfree(x);
1464 		x = execute(a[1]);
1465 		if (!istrue(x))
1466 			return(x);
1467 		tempfree(x);
1468 	}
1469 }
1470 
1471 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1472 {
1473 	Cell *x;
1474 
1475 	x = execute(a[0]);
1476 	tempfree(x);
1477 	for (;;) {
1478 		if (a[1]!=NULL) {
1479 			x = execute(a[1]);
1480 			if (!istrue(x)) return(x);
1481 			else tempfree(x);
1482 		}
1483 		x = execute(a[3]);
1484 		if (isbreak(x))		/* turn off break */
1485 			return True;
1486 		if (isnext(x) || isexit(x) || isret(x))
1487 			return(x);
1488 		tempfree(x);
1489 		x = execute(a[2]);
1490 		tempfree(x);
1491 	}
1492 }
1493 
1494 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1495 {
1496 	Cell *x, *vp, *arrayp, *cp, *ncp;
1497 	Array *tp;
1498 	int i;
1499 
1500 	vp = execute(a[0]);
1501 	arrayp = execute(a[1]);
1502 	if (!isarr(arrayp)) {
1503 		return True;
1504 	}
1505 	tp = (Array *) arrayp->sval;
1506 	tempfree(arrayp);
1507 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1508 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1509 			setsval(vp, cp->nval);
1510 			ncp = cp->cnext;
1511 			x = execute(a[2]);
1512 			if (isbreak(x)) {
1513 				tempfree(vp);
1514 				return True;
1515 			}
1516 			if (isnext(x) || isexit(x) || isret(x)) {
1517 				tempfree(vp);
1518 				return(x);
1519 			}
1520 			tempfree(x);
1521 		}
1522 	}
1523 	return True;
1524 }
1525 
1526 static char *nawk_convert(const char *s, int (*fun_c)(int),
1527     wint_t (*fun_wc)(wint_t))
1528 {
1529 	char *buf      = NULL;
1530 	char *pbuf     = NULL;
1531 	const char *ps = NULL;
1532 	size_t n       = 0;
1533 	wchar_t wc;
1534 	size_t sz = MB_CUR_MAX;
1535 
1536 	if (sz == 1) {
1537 		buf = tostring(s);
1538 
1539 		for (pbuf = buf; *pbuf; pbuf++)
1540 			*pbuf = fun_c((uschar)*pbuf);
1541 
1542 		return buf;
1543 	} else {
1544 		/* upper/lower character may be shorter/longer */
1545 		buf = tostringN(s, strlen(s) * sz + 1);
1546 
1547 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
1548 		/*
1549 		 * Reset internal state here too.
1550 		 * Assign result to avoid a compiler warning. (Casting to void
1551 		 * doesn't work.)
1552 		 * Increment said variable to avoid a different warning.
1553 		 */
1554 		int unused = wctomb(NULL, L'\0');
1555 		unused++;
1556 
1557 		ps   = s;
1558 		pbuf = buf;
1559 		while (n = mbtowc(&wc, ps, sz),
1560 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1561 		{
1562 			ps += n;
1563 
1564 			n = wctomb(pbuf, fun_wc(wc));
1565 			if (n == (size_t)-1)
1566 				FATAL("illegal wide character %s", s);
1567 
1568 			pbuf += n;
1569 		}
1570 
1571 		*pbuf = '\0';
1572 
1573 		if (n)
1574 			FATAL("illegal byte sequence %s", s);
1575 
1576 		return buf;
1577 	}
1578 }
1579 
1580 #ifdef __DJGPP__
1581 static wint_t towupper(wint_t wc)
1582 {
1583 	if (wc >= 0 && wc < 256)
1584 		return toupper(wc & 0xFF);
1585 
1586 	return wc;
1587 }
1588 
1589 static wint_t towlower(wint_t wc)
1590 {
1591 	if (wc >= 0 && wc < 256)
1592 		return tolower(wc & 0xFF);
1593 
1594 	return wc;
1595 }
1596 #endif
1597 
1598 static char *nawk_toupper(const char *s)
1599 {
1600 	return nawk_convert(s, toupper, towupper);
1601 }
1602 
1603 static char *nawk_tolower(const char *s)
1604 {
1605 	return nawk_convert(s, tolower, towlower);
1606 }
1607 
1608 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1609 {
1610 	Cell *x, *y;
1611 	Awkfloat u;
1612 	int t, sz;
1613 	Awkfloat tmp;
1614 	char *buf, *fmt;
1615 	Node *nextarg;
1616 	FILE *fp;
1617 	int status = 0;
1618 	time_t tv;
1619 	struct tm *tm, tmbuf;
1620 
1621 	t = ptoi(a[0]);
1622 	x = execute(a[1]);
1623 	nextarg = a[1]->nnext;
1624 	switch (t) {
1625 	case FLENGTH:
1626 		if (isarr(x))
1627 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1628 		else
1629 			u = strlen(getsval(x));
1630 		break;
1631 	case FLOG:
1632 		errno = 0;
1633 		u = errcheck(log(getfval(x)), "log");
1634 		break;
1635 	case FINT:
1636 		modf(getfval(x), &u); break;
1637 	case FEXP:
1638 		errno = 0;
1639 		u = errcheck(exp(getfval(x)), "exp");
1640 		break;
1641 	case FSQRT:
1642 		errno = 0;
1643 		u = errcheck(sqrt(getfval(x)), "sqrt");
1644 		break;
1645 	case FSIN:
1646 		u = sin(getfval(x)); break;
1647 	case FCOS:
1648 		u = cos(getfval(x)); break;
1649 	case FATAN:
1650 		if (nextarg == NULL) {
1651 			WARNING("atan2 requires two arguments; returning 1.0");
1652 			u = 1.0;
1653 		} else {
1654 			y = execute(a[1]->nnext);
1655 			u = atan2(getfval(x), getfval(y));
1656 			tempfree(y);
1657 			nextarg = nextarg->nnext;
1658 		}
1659 		break;
1660 	case FCOMPL:
1661 		u = ~((int)getfval(x));
1662 		break;
1663 	case FAND:
1664 		if (nextarg == 0) {
1665 			WARNING("and requires two arguments; returning 0");
1666 			u = 0;
1667 			break;
1668 		}
1669 		y = execute(a[1]->nnext);
1670 		u = ((int)getfval(x)) & ((int)getfval(y));
1671 		tempfree(y);
1672 		nextarg = nextarg->nnext;
1673 		break;
1674 	case FFOR:
1675 		if (nextarg == 0) {
1676 			WARNING("or requires two arguments; returning 0");
1677 			u = 0;
1678 			break;
1679 		}
1680 		y = execute(a[1]->nnext);
1681 		u = ((int)getfval(x)) | ((int)getfval(y));
1682 		tempfree(y);
1683 		nextarg = nextarg->nnext;
1684 		break;
1685 	case FXOR:
1686 		if (nextarg == 0) {
1687 			WARNING("xor requires two arguments; returning 0");
1688 			u = 0;
1689 			break;
1690 		}
1691 		y = execute(a[1]->nnext);
1692 		u = ((int)getfval(x)) ^ ((int)getfval(y));
1693 		tempfree(y);
1694 		nextarg = nextarg->nnext;
1695 		break;
1696 	case FLSHIFT:
1697 		if (nextarg == 0) {
1698 			WARNING("lshift requires two arguments; returning 0");
1699 			u = 0;
1700 			break;
1701 		}
1702 		y = execute(a[1]->nnext);
1703 		u = ((int)getfval(x)) << ((int)getfval(y));
1704 		tempfree(y);
1705 		nextarg = nextarg->nnext;
1706 		break;
1707 	case FRSHIFT:
1708 		if (nextarg == 0) {
1709 			WARNING("rshift requires two arguments; returning 0");
1710 			u = 0;
1711 			break;
1712 		}
1713 		y = execute(a[1]->nnext);
1714 		u = ((int)getfval(x)) >> ((int)getfval(y));
1715 		tempfree(y);
1716 		nextarg = nextarg->nnext;
1717 		break;
1718 	case FSYSTEM:
1719 		fflush(stdout);		/* in case something is buffered already */
1720 		status = system(getsval(x));
1721 		u = status;
1722 		if (status != -1) {
1723 			if (WIFEXITED(status)) {
1724 				u = WEXITSTATUS(status);
1725 			} else if (WIFSIGNALED(status)) {
1726 				u = WTERMSIG(status) + 256;
1727 #ifdef WCOREDUMP
1728 				if (WCOREDUMP(status))
1729 					u += 256;
1730 #endif
1731 			} else	/* something else?!? */
1732 				u = 0;
1733 		}
1734 		break;
1735 	case FRAND:
1736 		/* random() returns numbers in [0..2^31-1]
1737 		 * in order to get a number in [0, 1), divide it by 2^31
1738 		 */
1739 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
1740 		break;
1741 	case FSRAND:
1742 		if (isrec(x)) {		/* no argument provided */
1743 			u = time(NULL);
1744 			tmp = u;
1745 			srandom((unsigned int) u);
1746 		} else {
1747 			u = getfval(x);
1748 			tmp = u;
1749 			srandom_deterministic((unsigned int) u);
1750 		}
1751 		u = srand_seed;
1752 		srand_seed = tmp;
1753 		break;
1754 	case FTOUPPER:
1755 	case FTOLOWER:
1756 		if (t == FTOUPPER)
1757 			buf = nawk_toupper(getsval(x));
1758 		else
1759 			buf = nawk_tolower(getsval(x));
1760 		tempfree(x);
1761 		x = gettemp();
1762 		setsval(x, buf);
1763 		free(buf);
1764 		return x;
1765 	case FFLUSH:
1766 		if (isrec(x) || strlen(getsval(x)) == 0) {
1767 			flush_all();	/* fflush() or fflush("") -> all */
1768 			u = 0;
1769 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1770 			u = EOF;
1771 		else
1772 			u = fflush(fp);
1773 		break;
1774 	case FMKTIME:
1775 		memset(&tmbuf, 0, sizeof(tmbuf));
1776 		tm = &tmbuf;
1777 		t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
1778 		    &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
1779 		    &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
1780 		switch (t) {
1781 		case 6:
1782 			tm->tm_isdst = -1;	/* let mktime figure it out */
1783 			/* FALLTHROUGH */
1784 		case 7:
1785 			tm->tm_year -= 1900;
1786 			tm->tm_mon--;
1787 			u = mktime(tm);
1788 			break;
1789 		default:
1790 			u = -1;
1791 			break;
1792 		}
1793 		break;
1794 	case FSYSTIME:
1795 		u = time((time_t *) 0);
1796 		break;
1797 	case FSTRFTIME:
1798 		/* strftime([format [,timestamp]]) */
1799 		if (nextarg) {
1800 			y = execute(nextarg);
1801 			nextarg = nextarg->nnext;
1802 			tv = (time_t) getfval(y);
1803 			tempfree(y);
1804 		} else
1805 			tv = time((time_t *) 0);
1806 		tm = localtime(&tv);
1807 		if (tm == NULL)
1808 			FATAL("bad time %ld", (long)tv);
1809 
1810 		if (isrec(x)) {
1811 			/* format argument not provided, use default */
1812 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1813 		} else
1814 			fmt = tostring(getsval(x));
1815 
1816 		sz = 32;
1817 		buf = NULL;
1818 		do {
1819 			if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
1820 				FATAL("out of memory in strftime");
1821 			sz *= 2;
1822 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
1823 
1824 		y = gettemp();
1825 		setsval(y, buf);
1826 		free(fmt);
1827 		free(buf);
1828 
1829 		return y;
1830 	default:	/* can't happen */
1831 		FATAL("illegal function type %d", t);
1832 		break;
1833 	}
1834 	tempfree(x);
1835 	x = gettemp();
1836 	setfval(x, u);
1837 	if (nextarg != NULL) {
1838 		WARNING("warning: function has too many arguments");
1839 		for ( ; nextarg; nextarg = nextarg->nnext)
1840 			execute(nextarg);
1841 	}
1842 	return(x);
1843 }
1844 
1845 Cell *printstat(Node **a, int n)	/* print a[0] */
1846 {
1847 	Node *x;
1848 	Cell *y;
1849 	FILE *fp;
1850 
1851 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
1852 		fp = stdout;
1853 	else
1854 		fp = redirect(ptoi(a[1]), a[2]);
1855 	for (x = a[0]; x != NULL; x = x->nnext) {
1856 		y = execute(x);
1857 		fputs(getpssval(y), fp);
1858 		tempfree(y);
1859 		if (x->nnext == NULL)
1860 			fputs(getsval(orsloc), fp);
1861 		else
1862 			fputs(getsval(ofsloc), fp);
1863 	}
1864 	if (a[1] != NULL)
1865 		fflush(fp);
1866 	if (ferror(fp))
1867 		FATAL("write error on %s", filename(fp));
1868 	return(True);
1869 }
1870 
1871 Cell *nullproc(Node **a, int n)
1872 {
1873 	return 0;
1874 }
1875 
1876 
1877 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1878 {
1879 	FILE *fp;
1880 	Cell *x;
1881 	char *fname;
1882 
1883 	x = execute(b);
1884 	fname = getsval(x);
1885 	fp = openfile(a, fname, NULL);
1886 	if (fp == NULL)
1887 		FATAL("can't open file %s", fname);
1888 	tempfree(x);
1889 	return fp;
1890 }
1891 
1892 struct files {
1893 	FILE	*fp;
1894 	const char	*fname;
1895 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1896 } *files;
1897 
1898 size_t nfiles;
1899 
1900 static void stdinit(void)	/* in case stdin, etc., are not constants */
1901 {
1902 	nfiles = FOPEN_MAX;
1903 	files = (struct files *) calloc(nfiles, sizeof(*files));
1904 	if (files == NULL)
1905 		FATAL("can't allocate file memory for %zu files", nfiles);
1906         files[0].fp = stdin;
1907 	files[0].fname = "/dev/stdin";
1908 	files[0].mode = LT;
1909         files[1].fp = stdout;
1910 	files[1].fname = "/dev/stdout";
1911 	files[1].mode = GT;
1912         files[2].fp = stderr;
1913 	files[2].fname = "/dev/stderr";
1914 	files[2].mode = GT;
1915 }
1916 
1917 FILE *openfile(int a, const char *us, bool *pnewflag)
1918 {
1919 	const char *s = us;
1920 	size_t i;
1921 	int m;
1922 	FILE *fp = NULL;
1923 
1924 	if (*s == '\0')
1925 		FATAL("null file name in print or getline");
1926 	for (i = 0; i < nfiles; i++)
1927 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1928 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1929 		     a == FFLUSH)) {
1930 			if (pnewflag)
1931 				*pnewflag = false;
1932 			return files[i].fp;
1933 		}
1934 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1935 		return NULL;
1936 
1937 	for (i = 0; i < nfiles; i++)
1938 		if (files[i].fp == NULL)
1939 			break;
1940 	if (i >= nfiles) {
1941 		struct files *nf;
1942 		size_t nnf = nfiles + FOPEN_MAX;
1943 		nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
1944 		if (nf == NULL)
1945 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1946 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1947 		nfiles = nnf;
1948 		files = nf;
1949 	}
1950 	fflush(stdout);	/* force a semblance of order */
1951 	m = a;
1952 	if (a == GT) {
1953 		fp = fopen(s, "w");
1954 	} else if (a == APPEND) {
1955 		fp = fopen(s, "a");
1956 		m = GT;	/* so can mix > and >> */
1957 	} else if (a == '|') {	/* output pipe */
1958 		fp = popen(s, "w");
1959 	} else if (a == LE) {	/* input pipe */
1960 		fp = popen(s, "r");
1961 	} else if (a == LT) {	/* getline <file */
1962 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1963 	} else	/* can't happen */
1964 		FATAL("illegal redirection %d", a);
1965 	if (fp != NULL) {
1966 		files[i].fname = tostring(s);
1967 		files[i].fp = fp;
1968 		files[i].mode = m;
1969 		if (pnewflag)
1970 			*pnewflag = true;
1971 		if (fp != stdin && fp != stdout && fp != stderr)
1972 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
1973 	}
1974 	return fp;
1975 }
1976 
1977 const char *filename(FILE *fp)
1978 {
1979 	size_t i;
1980 
1981 	for (i = 0; i < nfiles; i++)
1982 		if (fp == files[i].fp)
1983 			return files[i].fname;
1984 	return "???";
1985 }
1986 
1987  Cell *closefile(Node **a, int n)
1988  {
1989  	Cell *x;
1990 	size_t i;
1991 	bool stat;
1992 
1993  	x = execute(a[0]);
1994  	getsval(x);
1995 	stat = true;
1996  	for (i = 0; i < nfiles; i++) {
1997 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
1998 			continue;
1999 		if (ferror(files[i].fp))
2000 			FATAL("i/o error occurred on %s", files[i].fname);
2001 		if (files[i].fp == stdin || files[i].fp == stdout ||
2002 		    files[i].fp == stderr)
2003 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2004 		else if (files[i].mode == '|' || files[i].mode == LE)
2005 			stat = pclose(files[i].fp) == -1;
2006 		else
2007 			stat = fclose(files[i].fp) == EOF;
2008 		if (stat)
2009 			FATAL("i/o error occurred closing %s", files[i].fname);
2010 		if (i > 2)	/* don't do /dev/std... */
2011 			xfree(files[i].fname);
2012 		files[i].fname = NULL;	/* watch out for ref thru this */
2013 		files[i].fp = NULL;
2014 		break;
2015  	}
2016  	tempfree(x);
2017  	x = gettemp();
2018 	setfval(x, (Awkfloat) (stat ? -1 : 0));
2019  	return(x);
2020  }
2021 
2022 void closeall(void)
2023 {
2024 	size_t i;
2025 	bool stat = false;
2026 
2027 	for (i = 0; i < nfiles; i++) {
2028 		if (! files[i].fp)
2029 			continue;
2030 		if (ferror(files[i].fp))
2031 			FATAL( "i/o error occurred on %s", files[i].fname );
2032 		if (files[i].fp == stdin)
2033 			continue;
2034 		if (files[i].mode == '|' || files[i].mode == LE)
2035 			stat = pclose(files[i].fp) == -1;
2036 		else if (files[i].fp == stdout || files[i].fp == stderr)
2037 			stat = fflush(files[i].fp) == EOF;
2038 		else
2039 			stat = fclose(files[i].fp) == EOF;
2040 		if (stat)
2041 			FATAL( "i/o error occurred while closing %s", files[i].fname );
2042 	}
2043 }
2044 
2045 static void flush_all(void)
2046 {
2047 	size_t i;
2048 
2049 	for (i = 0; i < nfiles; i++)
2050 		if (files[i].fp)
2051 			fflush(files[i].fp);
2052 }
2053 
2054 void backsub(char **pb_ptr, const char **sptr_ptr);
2055 
2056 Cell *sub(Node **a, int nnn)	/* substitute command */
2057 {
2058 	const char *sptr, *q;
2059 	Cell *x, *y, *result;
2060 	char *t, *buf, *pb;
2061 	fa *pfa;
2062 	int bufsz = recsize;
2063 
2064 	if ((buf = (char *) malloc(bufsz)) == NULL)
2065 		FATAL("out of memory in sub");
2066 	x = execute(a[3]);	/* target string */
2067 	t = getsval(x);
2068 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2069 		pfa = (fa *) a[1];	/* regular expression */
2070 	else {
2071 		y = execute(a[1]);
2072 		pfa = makedfa(getsval(y), 1);
2073 		tempfree(y);
2074 	}
2075 	y = execute(a[2]);	/* replacement string */
2076 	result = False;
2077 	if (pmatch(pfa, t)) {
2078 		sptr = t;
2079 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
2080 		pb = buf;
2081 		while (sptr < patbeg)
2082 			*pb++ = *sptr++;
2083 		sptr = getsval(y);
2084 		while (*sptr != '\0') {
2085 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
2086 			if (*sptr == '\\') {
2087 				backsub(&pb, &sptr);
2088 			} else if (*sptr == '&') {
2089 				sptr++;
2090 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
2091 				for (q = patbeg; q < patbeg+patlen; )
2092 					*pb++ = *q++;
2093 			} else
2094 				*pb++ = *sptr++;
2095 		}
2096 		*pb = '\0';
2097 		if (pb > buf + bufsz)
2098 			FATAL("sub result1 %.30s too big; can't happen", buf);
2099 		sptr = patbeg + patlen;
2100 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
2101 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
2102 			while ((*pb++ = *sptr++) != '\0')
2103 				continue;
2104 		}
2105 		if (pb > buf + bufsz)
2106 			FATAL("sub result2 %.30s too big; can't happen", buf);
2107 		setsval(x, buf);	/* BUG: should be able to avoid copy */
2108 		result = True;
2109 	}
2110 	tempfree(x);
2111 	tempfree(y);
2112 	free(buf);
2113 	return result;
2114 }
2115 
2116 Cell *gsub(Node **a, int nnn)	/* global substitute */
2117 {
2118 	Cell *x, *y;
2119 	char *rptr, *pb;
2120 	const char *q, *t, *sptr;
2121 	char *buf;
2122 	fa *pfa;
2123 	int mflag, tempstat, num;
2124 	int bufsz = recsize;
2125 
2126 	if ((buf = (char *) malloc(bufsz)) == NULL)
2127 		FATAL("out of memory in gsub");
2128 	mflag = 0;	/* if mflag == 0, can replace empty string */
2129 	num = 0;
2130 	x = execute(a[3]);	/* target string */
2131 	t = getsval(x);
2132 	if (a[0] == NULL)	/* 0 => a[1] is already-compiled regexpr */
2133 		pfa = (fa *) a[1];	/* regular expression */
2134 	else {
2135 		y = execute(a[1]);
2136 		pfa = makedfa(getsval(y), 1);
2137 		tempfree(y);
2138 	}
2139 	y = execute(a[2]);	/* replacement string */
2140 	if (pmatch(pfa, t)) {
2141 		tempstat = pfa->initstat;
2142 		pfa->initstat = 2;
2143 		pb = buf;
2144 		rptr = getsval(y);
2145 		do {
2146 			if (patlen == 0 && *patbeg != '\0') {	/* matched empty string */
2147 				if (mflag == 0) {	/* can replace empty */
2148 					num++;
2149 					sptr = rptr;
2150 					while (*sptr != '\0') {
2151 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2152 						if (*sptr == '\\') {
2153 							backsub(&pb, &sptr);
2154 						} else if (*sptr == '&') {
2155 							sptr++;
2156 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2157 							for (q = patbeg; q < patbeg+patlen; )
2158 								*pb++ = *q++;
2159 						} else
2160 							*pb++ = *sptr++;
2161 					}
2162 				}
2163 				if (*t == '\0')	/* at end */
2164 					goto done;
2165 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2166 				*pb++ = *t++;
2167 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2168 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2169 				mflag = 0;
2170 			}
2171 			else {	/* matched nonempty string */
2172 				num++;
2173 				sptr = t;
2174 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2175 				while (sptr < patbeg)
2176 					*pb++ = *sptr++;
2177 				sptr = rptr;
2178 				while (*sptr != '\0') {
2179 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2180 					if (*sptr == '\\') {
2181 						backsub(&pb, &sptr);
2182 					} else if (*sptr == '&') {
2183 						sptr++;
2184 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2185 						for (q = patbeg; q < patbeg+patlen; )
2186 							*pb++ = *q++;
2187 					} else
2188 						*pb++ = *sptr++;
2189 				}
2190 				t = patbeg + patlen;
2191 				if (patlen == 0 || *t == '\0' || *(t-1) == '\0')
2192 					goto done;
2193 				if (pb > buf + bufsz)
2194 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2195 				mflag = 1;
2196 			}
2197 		} while (pmatch(pfa,t));
2198 		sptr = t;
2199 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2200 		while ((*pb++ = *sptr++) != '\0')
2201 			continue;
2202 	done:	if (pb < buf + bufsz)
2203 			*pb = '\0';
2204 		else if (*(pb-1) != '\0')
2205 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2206 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2207 		pfa->initstat = tempstat;
2208 	}
2209 	tempfree(x);
2210 	tempfree(y);
2211 	x = gettemp();
2212 	x->tval = NUM;
2213 	x->fval = num;
2214 	free(buf);
2215 	return(x);
2216 }
2217 
2218 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2219 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2220 {
2221 	Cell *x, *y, *res, *h;
2222 	char *rptr;
2223 	const char *sptr;
2224 	char *buf, *pb;
2225 	const char *t, *q;
2226 	fa *pfa;
2227 	int mflag, tempstat, num, whichm;
2228 	int bufsz = recsize;
2229 
2230 	if ((buf = malloc(bufsz)) == NULL)
2231 		FATAL("out of memory in gensub");
2232 	mflag = 0;	/* if mflag == 0, can replace empty string */
2233 	num = 0;
2234 	x = execute(a[4]);	/* source string */
2235 	t = getsval(x);
2236 	res = copycell(x);	/* target string - initially copy of source */
2237 	res->csub = CTEMP;	/* result values are temporary */
2238 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2239 		pfa = (fa *) a[1];	/* regular expression */
2240 	else {
2241 		y = execute(a[1]);
2242 		pfa = makedfa(getsval(y), 1);
2243 		tempfree(y);
2244 	}
2245 	y = execute(a[2]);	/* replacement string */
2246 	h = execute(a[3]);	/* which matches should be replaced */
2247 	sptr = getsval(h);
2248 	if (sptr[0] == 'g' || sptr[0] == 'G')
2249 		whichm = -1;
2250 	else {
2251 		/*
2252 		 * The specified number is index of replacement, starting
2253 		 * from 1. GNU awk treats index lower than 0 same as
2254 		 * 1, we do same for compatibility.
2255 		 */
2256 		whichm = (int) getfval(h) - 1;
2257 		if (whichm < 0)
2258 			whichm = 0;
2259 	}
2260 	tempfree(h);
2261 
2262 	if (pmatch(pfa, t)) {
2263 		char *sl;
2264 
2265 		tempstat = pfa->initstat;
2266 		pfa->initstat = 2;
2267 		pb = buf;
2268 		rptr = getsval(y);
2269 		/*
2270 		 * XXX if there are any backreferences in subst string,
2271 		 * complain now.
2272 		 */
2273 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2274 			if (strchr("0123456789", sl[1])) {
2275 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2276 			}
2277 		}
2278 
2279 		do {
2280 			if (whichm >= 0 && whichm != num) {
2281 				num++;
2282 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2283 
2284 				/* copy the part of string up to and including
2285 				 * match to output buffer */
2286 				while (t < patbeg + patlen)
2287 					*pb++ = *t++;
2288 				continue;
2289 			}
2290 
2291 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2292 				if (mflag == 0) {	/* can replace empty */
2293 					num++;
2294 					sptr = rptr;
2295 					while (*sptr != 0) {
2296 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2297 						if (*sptr == '\\') {
2298 							backsub(&pb, &sptr);
2299 						} else if (*sptr == '&') {
2300 							sptr++;
2301 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2302 							for (q = patbeg; q < patbeg+patlen; )
2303 								*pb++ = *q++;
2304 						} else
2305 							*pb++ = *sptr++;
2306 					}
2307 				}
2308 				if (*t == 0)	/* at end */
2309 					goto done;
2310 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2311 				*pb++ = *t++;
2312 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2313 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2314 				mflag = 0;
2315 			}
2316 			else {	/* matched nonempty string */
2317 				num++;
2318 				sptr = t;
2319 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2320 				while (sptr < patbeg)
2321 					*pb++ = *sptr++;
2322 				sptr = rptr;
2323 				while (*sptr != 0) {
2324 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2325 					if (*sptr == '\\') {
2326 						backsub(&pb, &sptr);
2327 					} else if (*sptr == '&') {
2328 						sptr++;
2329 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2330 						for (q = patbeg; q < patbeg+patlen; )
2331 							*pb++ = *q++;
2332 					} else
2333 						*pb++ = *sptr++;
2334 				}
2335 				t = patbeg + patlen;
2336 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2337 					goto done;
2338 				if (pb > buf + bufsz)
2339 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2340 				mflag = 1;
2341 			}
2342 		} while (pmatch(pfa,t));
2343 		sptr = t;
2344 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2345 		while ((*pb++ = *sptr++) != 0)
2346 			;
2347 	done:	if (pb > buf + bufsz)
2348 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2349 		*pb = '\0';
2350 		setsval(res, buf);
2351 		pfa->initstat = tempstat;
2352 	}
2353 	tempfree(x);
2354 	tempfree(y);
2355 	free(buf);
2356 	return(res);
2357 }
2358 
2359 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2360 {						/* sptr[0] == '\\' */
2361 	char *pb = *pb_ptr;
2362 	const char *sptr = *sptr_ptr;
2363 
2364 	if (sptr[1] == '\\') {
2365 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2366 			*pb++ = '\\';
2367 			*pb++ = '&';
2368 			sptr += 4;
2369 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2370 			*pb++ = '\\';
2371 			sptr += 2;
2372 		} else if (do_posix) {		/* \\x -> \x */
2373 			sptr++;
2374 			*pb++ = *sptr++;
2375 		} else {			/* \\x -> \\x */
2376 			*pb++ = *sptr++;
2377 			*pb++ = *sptr++;
2378 		}
2379 	} else if (sptr[1] == '&') {	/* literal & */
2380 		sptr++;
2381 		*pb++ = *sptr++;
2382 	} else				/* literal \ */
2383 		*pb++ = *sptr++;
2384 
2385 	*pb_ptr = pb;
2386 	*sptr_ptr = sptr;
2387 }
2388