xref: /minix/external/historical/nawk/dist/run.c (revision 0a6a1f1d)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 #if HAVE_NBTOOL_CONFIG_H
26 #include "nbtool_config.h"
27 #endif
28 
29 #define DEBUG
30 #include <stdio.h>
31 #include <ctype.h>
32 #include <wchar.h>
33 #include <wctype.h>
34 #include <setjmp.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <time.h>
40 #include <stdint.h>
41 #include "awk.h"
42 #include "awkgram.h"
43 
44 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
45 
46 void stdinit(void);
47 
48 /*
49 #undef tempfree
50 
51 void tempfree(Cell *p) {
52 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 		WARNING("bad csub %d in Cell %d %s",
54 			p->csub, p->ctype, p->sval);
55 	}
56 	if (istemp(p))
57 		tfree(p);
58 }
59 */
60 
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /*  */
68 /* #ifndef	FOPEN_MAX */
69 /* #define	FOPEN_MAX	40 */	/* max number of open files */
70 /* #endif */
71 /*  */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
74 /* #endif */
75 
76 jmp_buf env;
77 extern	int	pairstack[];
78 extern	unsigned int srand_seed;
79 
80 Node	*winner = NULL;	/* root of parse tree */
81 Cell	*tmps;		/* free temporary cells for execution */
82 
83 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL };
84 Cell	*True	= &truecell;
85 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL };
86 Cell	*False	= &falsecell;
87 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL };
88 Cell	*jbreak	= &breakcell;
89 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL };
90 Cell	*jcont	= &contcell;
91 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL };
92 Cell	*jnext	= &nextcell;
93 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL};
94 Cell	*jnextfile	= &nextfilecell;
95 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL };
96 Cell	*jexit	= &exitcell;
97 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL };
98 Cell	*jret	= &retcell;
99 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
100 
101 Node	*curnode = NULL;	/* the node being executed, for debugging */
102 
103 /* buffer memory management */
adjbuf(uschar ** pbuf,int * psiz,int minlen,int quantum,uschar ** pbptr,const char * whatrtn)104 int adjbuf(uschar **pbuf, int *psiz, int minlen, int quantum, uschar **pbptr,
105 	const char *whatrtn)
106 /* pbuf:    address of pointer to buffer being managed
107  * psiz:    address of buffer size variable
108  * minlen:  minimum length of buffer needed
109  * quantum: buffer size quantum
110  * pbptr:   address of movable pointer into buffer, or 0 if none
111  * whatrtn: name of the calling routine if failure should cause fatal error
112  *
113  * return   0 for realloc failure, !=0 for success
114  */
115 {
116 	if (minlen > *psiz) {
117 		char *tbuf;
118 		int rminlen = quantum ? minlen % quantum : 0;
119 		int boff = pbptr ? *pbptr - *pbuf : 0;
120 		/* round up to next multiple of quantum */
121 		if (rminlen)
122 			minlen += quantum - rminlen;
123 		tbuf = realloc(*pbuf, minlen);
124 		dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf) );
125 		if (tbuf == NULL) {
126 			if (whatrtn)
127 				FATAL("out of memory in %s", whatrtn);
128 			return 0;
129 		}
130 		*pbuf = tbuf;
131 		*psiz = minlen;
132 		if (pbptr)
133 			*pbptr = tbuf + boff;
134 	}
135 	return 1;
136 }
137 
run(Node * a)138 void run(Node *a)	/* execution of parse tree starts here */
139 {
140 	stdinit();
141 	execute(a);
142 	closeall();
143 }
144 
execute(Node * u)145 Cell *execute(Node *u)	/* execute a node of the parse tree */
146 {
147 	Cell *(*proc)(Node **, int);
148 	Cell *x;
149 	Node *a;
150 
151 	if (u == NULL)
152 		return(True);
153 	for (a = u; ; a = a->nnext) {
154 		curnode = a;
155 		if (isvalue(a)) {
156 			x = (Cell *) (a->narg[0]);
157 			if (isfld(x) && !donefld)
158 				fldbld();
159 			else if (isrec(x) && !donerec)
160 				recbld();
161 			return(x);
162 		}
163 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
164 			FATAL("illegal statement");
165 		proc = proctab[a->nobj-FIRSTTOKEN];
166 		x = (*proc)(a->narg, a->nobj);
167 		if (isfld(x) && !donefld)
168 			fldbld();
169 		else if (isrec(x) && !donerec)
170 			recbld();
171 		if (isexpr(a))
172 			return(x);
173 		if (isjump(x))
174 			return(x);
175 		if (a->nnext == NULL)
176 			return(x);
177 		tempfree(x);
178 	}
179 }
180 
181 
program(Node ** a,int n)182 Cell *program(Node **a, int n)	/* execute an awk program */
183 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
184 	Cell *x;
185 
186 	if (setjmp(env) != 0)
187 		goto ex;
188 	if (a[0]) {		/* BEGIN */
189 		x = execute(a[0]);
190 		if (isexit(x))
191 			return(True);
192 		if (isjump(x))
193 			FATAL("illegal break, continue, next or nextfile from BEGIN");
194 		tempfree(x);
195 	}
196 	if (a[1] || a[2])
197 		while (getrec(&record, &recsize, 1) > 0) {
198 			x = execute(a[1]);
199 			if (isexit(x))
200 				break;
201 			tempfree(x);
202 		}
203   ex:
204 	if (setjmp(env) != 0)	/* handles exit within END */
205 		goto ex1;
206 	if (a[2]) {		/* END */
207 		x = execute(a[2]);
208 		if (isbreak(x) || isnext(x) || iscont(x))
209 			FATAL("illegal break, continue, next or nextfile from END");
210 		tempfree(x);
211 	}
212   ex1:
213 	return(True);
214 }
215 
216 struct Frame {	/* stack frame for awk function calls */
217 	int nargs;	/* number of arguments in this call */
218 	Cell *fcncell;	/* pointer to Cell for function */
219 	Cell **args;	/* pointer to array of arguments after execute */
220 	Cell *retval;	/* return value */
221 };
222 
223 #define	NARGS	50	/* max args in a call */
224 
225 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
226 int	nframe = 0;		/* number of frames allocated */
227 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
228 
call(Node ** a,int n)229 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
230 {
231 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL };
232 	int i, ncall, ndef;
233 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
234 	Node *x;
235 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
236 	Cell *y, *z, *fcn;
237 	char *s;
238 
239 	fcn = execute(a[0]);	/* the function itself */
240 	s = fcn->nval;
241 	if (!isfcn(fcn))
242 		FATAL("calling undefined function %s", s);
243 	if (frame == NULL) {
244 		frp = frame = calloc(nframe += 100, sizeof(*frp));
245 		if (frame == NULL)
246 			FATAL("out of space for stack frames calling %s", s);
247 	}
248 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
249 		ncall++;
250 	ndef = (int) fcn->fval;			/* args in defn */
251 	   dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (frp-frame)) );
252 	if (ncall > ndef)
253 		WARNING("function %s called with %d args, uses only %d",
254 			s, ncall, ndef);
255 	if (ncall + ndef > NARGS)
256 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
257 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
258 		   dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (frp-frame)) );
259 		y = execute(x);
260 		oargs[i] = y;
261 		   dprintf( ("args[%d]: %s %f <%s>, t=%o\n",
262 			   i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) );
263 		if (isfcn(y))
264 			FATAL("can't use function %s as argument in %s", y->nval, s);
265 		if (isarr(y))
266 			args[i] = y;	/* arrays by ref */
267 		else
268 			args[i] = copycell(y);
269 		tempfree(y);
270 	}
271 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
272 		args[i] = gettemp();
273 		*args[i] = newcopycell;
274 	}
275 	frp++;	/* now ok to up frame */
276 	if (frp >= frame + nframe) {
277 		int dfp = frp - frame;	/* old index */
278 		frame = realloc(frame, (nframe += 100) * sizeof(*frame));
279 		if (frame == NULL)
280 			FATAL("out of space for stack frames in %s", s);
281 		frp = frame + dfp;
282 	}
283 	frp->fcncell = fcn;
284 	frp->args = args;
285 	frp->nargs = ndef;	/* number defined with (excess are locals) */
286 	frp->retval = gettemp();
287 
288 	   dprintf( ("start exec of %s, fp=%d\n", s, (int) (frp-frame)) );
289 	y = execute((Node *)(fcn->sval));	/* execute body */
290 	   dprintf( ("finished exec of %s, fp=%d\n", s, (int) (frp-frame)) );
291 
292 	for (i = 0; i < ndef; i++) {
293 		Cell *t = frp->args[i];
294 		if (isarr(t)) {
295 			if (t->csub == CCOPY) {
296 				if (i >= ncall) {
297 					freesymtab(t);
298 					t->csub = CTEMP;
299 					tempfree(t);
300 				} else {
301 					oargs[i]->tval = t->tval;
302 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
303 					oargs[i]->sval = t->sval;
304 					tempfree(t);
305 				}
306 			}
307 		} else if (t != y) {	/* kludge to prevent freeing twice */
308 			t->csub = CTEMP;
309 			tempfree(t);
310 		} else if (t == y && t->csub == CCOPY) {
311 			t->csub = CTEMP;
312 			tempfree(t);
313 			freed = 1;
314 		}
315 	}
316 	tempfree(fcn);
317 	if (isexit(y) || isnext(y))
318 		return y;
319 	if (freed == 0) {
320 		tempfree(y);	/* don't free twice! */
321 	}
322 	z = frp->retval;			/* return value */
323 	   dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) );
324 	frp--;
325 	return(z);
326 }
327 
copycell(Cell * x)328 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
329 {
330 	Cell *y;
331 
332 	/* copy is not constant or field */
333 
334 	y = gettemp();
335 	y->tval = x->tval & ~(CON|FLD|REC);
336 	y->csub = CCOPY;	/* prevents freeing until call is over */
337 	y->nval = x->nval;	/* BUG? */
338 	if (isstr(x) /* || x->ctype == OCELL */) {
339 		y->sval = tostring(x->sval);
340 		y->tval &= ~DONTFREE;
341 	} else
342 		y->tval |= DONTFREE;
343 	y->fval = x->fval;
344 	return y;
345 }
346 
arg(Node ** a,int n)347 Cell *arg(Node **a, int n)	/* nth argument of a function */
348 {
349 
350 	n = ptoi(a[0]);	/* argument number, counting from 0 */
351 	   dprintf( ("arg(%d), fp->nargs=%d\n", n, frp->nargs) );
352 	if (n+1 > frp->nargs)
353 		FATAL("argument #%d of function %s was not supplied",
354 			n+1, frp->fcncell->nval);
355 	return frp->args[n];
356 }
357 
jump(Node ** a,int n)358 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
359 {
360 	Cell *y;
361 
362 	switch (n) {
363 	case EXIT:
364 		if (a[0] != NULL) {
365 			y = execute(a[0]);
366 			errorflag = (int) getfval(y);
367 			tempfree(y);
368 		}
369 		longjmp(env, 1);
370 	case RETURN:
371 		if (a[0] != NULL) {
372 			y = execute(a[0]);
373 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
374 				setsval(frp->retval, getsval(y));
375 				frp->retval->fval = getfval(y);
376 				frp->retval->tval |= NUM;
377 			}
378 			else if (y->tval & STR)
379 				setsval(frp->retval, getsval(y));
380 			else if (y->tval & NUM)
381 				setfval(frp->retval, getfval(y));
382 			else		/* can't happen */
383 				FATAL("bad type variable %d", y->tval);
384 			tempfree(y);
385 		}
386 		return(jret);
387 	case NEXT:
388 		return(jnext);
389 	case NEXTFILE:
390 		nextfile();
391 		return(jnextfile);
392 	case BREAK:
393 		return(jbreak);
394 	case CONTINUE:
395 		return(jcont);
396 	default:	/* can't happen */
397 		FATAL("illegal jump type %d", n);
398 	}
399 	return 0;	/* not reached */
400 }
401 
awkgetline(Node ** a,int n)402 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
403 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
404 	Cell *r, *x;
405 	extern Cell **fldtab;
406 	FILE *fp;
407 	uschar *buf;
408 	int bufsize = recsize;
409 	int mode, newflag;
410 
411 	if ((buf = malloc(bufsize)) == NULL)
412 		FATAL("out of memory in getline");
413 
414 	fflush(stdout);	/* in case someone is waiting for a prompt */
415 	r = gettemp();
416 	if (a[1] != NULL) {		/* getline < file */
417 		x = execute(a[2]);		/* filename */
418 		mode = ptoi(a[1]);
419 		if (mode == '|')		/* input pipe */
420 			mode = LE;	/* arbitrary flag */
421 		fp = openfile(mode, getsval(x), &newflag);
422 		tempfree(x);
423 		if (fp == NULL)
424 			n = -1;
425 		else
426 			n = readrec(&buf, &bufsize, fp, newflag);
427 		if (n <= 0) {
428 			;
429 		} else if (a[0] != NULL) {	/* getline var <file */
430 			x = execute(a[0]);
431 			setsval(x, buf);
432 			tempfree(x);
433 		} else {			/* getline <file */
434 			setsval(fldtab[0], buf);
435 			if (is_number(fldtab[0]->sval)) {
436 				fldtab[0]->fval = atof(fldtab[0]->sval);
437 				fldtab[0]->tval |= NUM;
438 			}
439 		}
440 	} else {			/* bare getline; use current input */
441 		if (a[0] == NULL)	/* getline */
442 			n = getrec(&record, &recsize, 1);
443 		else {			/* getline var */
444 			n = getrec(&buf, &bufsize, 0);
445 			x = execute(a[0]);
446 			setsval(x, buf);
447 			tempfree(x);
448 		}
449 	}
450 	setfval(r, (Awkfloat) n);
451 	free(buf);
452 	return r;
453 }
454 
getnf(Node ** a,int n)455 Cell *getnf(Node **a, int n)	/* get NF */
456 {
457 	if (donefld == 0)
458 		fldbld();
459 	return (Cell *) a[0];
460 }
461 
array(Node ** a,int n)462 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
463 {
464 	Cell *x, *y, *z;
465 	char *s;
466 	Node *np;
467 	uschar *buf;
468 	int bufsz = recsize;
469 	int nsub = strlen(*SUBSEP);
470 
471 	if ((buf = malloc(bufsz)) == NULL)
472 		FATAL("out of memory in array");
473 
474 	x = execute(a[0]);	/* Cell* for symbol table */
475 	buf[0] = 0;
476 	for (np = a[1]; np; np = np->nnext) {
477 		y = execute(np);	/* subscript */
478 		s = getsval(y);
479 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array"))
480 			FATAL("out of memory for %s[%s...]", x->nval, buf);
481 		strlcat(buf, s, bufsz);
482 		if (np->nnext)
483 			strlcat(buf, *SUBSEP, bufsz);
484 		tempfree(y);
485 	}
486 	if (!isarr(x)) {
487 		   dprintf( ("making %s into an array\n", NN(x->nval)) );
488 		if (freeable(x))
489 			xfree(x->sval);
490 		x->tval &= ~(STR|NUM|DONTFREE);
491 		x->tval |= ARR;
492 		x->sval = (char *) makesymtab(NSYMTAB);
493 	}
494 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
495 	z->ctype = OCELL;
496 	z->csub = CVAR;
497 	tempfree(x);
498 	free(buf);
499 	return(z);
500 }
501 
awkdelete(Node ** a,int n)502 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
503 {
504 	Cell *x, *y;
505 	Node *np;
506 	uschar *s;
507 	int nsub = strlen(*SUBSEP);
508 
509 	x = execute(a[0]);	/* Cell* for symbol table */
510 	if (!isarr(x))
511 		return True;
512 	if (a[1] == 0) {	/* delete the elements, not the table */
513 		freesymtab(x);
514 		x->tval &= ~STR;
515 		x->tval |= ARR;
516 		x->sval = (char *) makesymtab(NSYMTAB);
517 	} else {
518 		int bufsz = recsize;
519 		uschar *buf;
520 		if ((buf = malloc(bufsz)) == NULL)
521 			FATAL("out of memory in adelete");
522 		buf[0] = 0;
523 		for (np = a[1]; np; np = np->nnext) {
524 			y = execute(np);	/* subscript */
525 			s = getsval(y);
526 			if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete"))
527 				FATAL("out of memory deleting %s[%s...]", x->nval, buf);
528 			strlcat(buf, s, bufsz);
529 			if (np->nnext)
530 				strlcat(buf, *SUBSEP, bufsz);
531 			tempfree(y);
532 		}
533 		freeelem(x, buf);
534 		free(buf);
535 	}
536 	tempfree(x);
537 	return True;
538 }
539 
intest(Node ** a,int n)540 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
541 {
542 	Cell *x, *ap, *k;
543 	Node *p;
544 	uschar *buf;
545 	char *s;
546 	int bufsz = recsize;
547 	int nsub = strlen(*SUBSEP);
548 
549 	ap = execute(a[1]);	/* array name */
550 	if (!isarr(ap)) {
551 		   dprintf( ("making %s into an array\n", ap->nval) );
552 		if (freeable(ap))
553 			xfree(ap->sval);
554 		ap->tval &= ~(STR|NUM|DONTFREE);
555 		ap->tval |= ARR;
556 		ap->sval = (char *) makesymtab(NSYMTAB);
557 	}
558 	if ((buf = malloc(bufsz)) == NULL) {
559 		FATAL("out of memory in intest");
560 	}
561 	buf[0] = 0;
562 	for (p = a[0]; p; p = p->nnext) {
563 		x = execute(p);	/* expr */
564 		s = getsval(x);
565 		if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest"))
566 			FATAL("out of memory deleting %s[%s...]", x->nval, buf);
567 		strcat(buf, s);
568 		tempfree(x);
569 		if (p->nnext)
570 			strcat(buf, *SUBSEP);
571 	}
572 	k = lookup(buf, (Array *) ap->sval);
573 	tempfree(ap);
574 	free(buf);
575 	if (k == NULL)
576 		return(False);
577 	else
578 		return(True);
579 }
580 
581 
matchop(Node ** a,int n)582 Cell *matchop(Node **a, int n)	/* ~ and match() */
583 {
584 	Cell *x, *y;
585 	uschar *s;
586 	char *t;
587 	int i;
588 	fa *pfa;
589 	int (*mf)(fa *, const char *) = match, mode = 0;
590 
591 	if (n == MATCHFCN) {
592 		mf = pmatch;
593 		mode = 1;
594 	}
595 	x = execute(a[1]);	/* a[1] = target text */
596 	s = getsval(x);
597 	if (a[0] == 0)		/* a[1] == 0: already-compiled reg expr */
598 		i = (*mf)((fa *) a[2], s);
599 	else {
600 		y = execute(a[2]);	/* a[2] = regular expr */
601 		t = getsval(y);
602 		pfa = makedfa(t, mode);
603 		i = (*mf)(pfa, s);
604 		tempfree(y);
605 	}
606 	tempfree(x);
607 	if (n == MATCHFCN) {
608 		int start = patbeg - s + 1;
609 		if (patlen < 0)
610 			start = 0;
611 		setfval(rstartloc, (Awkfloat) start);
612 		setfval(rlengthloc, (Awkfloat) patlen);
613 		x = gettemp();
614 		x->tval = NUM;
615 		x->fval = start;
616 		return x;
617 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
618 		return(True);
619 	else
620 		return(False);
621 }
622 
623 
boolop(Node ** a,int n)624 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
625 {
626 	Cell *x, *y;
627 	int i;
628 
629 	x = execute(a[0]);
630 	i = istrue(x);
631 	tempfree(x);
632 	switch (n) {
633 	case BOR:
634 		if (i) return(True);
635 		y = execute(a[1]);
636 		i = istrue(y);
637 		tempfree(y);
638 		if (i) return(True);
639 		else return(False);
640 	case AND:
641 		if ( !i ) return(False);
642 		y = execute(a[1]);
643 		i = istrue(y);
644 		tempfree(y);
645 		if (i) return(True);
646 		else return(False);
647 	case NOT:
648 		if (i) return(False);
649 		else return(True);
650 	default:	/* can't happen */
651 		FATAL("unknown boolean operator %d", n);
652 	}
653 	return 0;	/*NOTREACHED*/
654 }
655 
relop(Node ** a,int n)656 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
657 {
658 	int i;
659 	Cell *x, *y;
660 	Awkfloat j;
661 
662 	x = execute(a[0]);
663 	y = execute(a[1]);
664 	if (x->tval&NUM && y->tval&NUM) {
665 		j = x->fval - y->fval;
666 		i = j<0? -1: (j>0? 1: 0);
667 	} else {
668 		i = strcmp(getsval(x), getsval(y));
669 	}
670 	tempfree(x);
671 	tempfree(y);
672 	switch (n) {
673 	case LT:	if (i<0) return(True);
674 			else return(False);
675 	case LE:	if (i<=0) return(True);
676 			else return(False);
677 	case NE:	if (i!=0) return(True);
678 			else return(False);
679 	case EQ:	if (i == 0) return(True);
680 			else return(False);
681 	case GE:	if (i>=0) return(True);
682 			else return(False);
683 	case GT:	if (i>0) return(True);
684 			else return(False);
685 	default:	/* can't happen */
686 		FATAL("unknown relational operator %d", n);
687 	}
688 	return 0;	/*NOTREACHED*/
689 }
690 
tfree(Cell * a)691 void tfree(Cell *a)	/* free a tempcell */
692 {
693 	if (freeable(a)) {
694 		   dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) );
695 		xfree(a->sval);
696 	}
697 	if (a == tmps)
698 		FATAL("tempcell list is curdled");
699 	a->cnext = tmps;
700 	tmps = a;
701 }
702 
gettemp(void)703 Cell *gettemp(void)	/* get a tempcell */
704 {	int i;
705 	Cell *x;
706 
707 	if (!tmps) {
708 		tmps = calloc(100, sizeof(*tmps));
709 		if (!tmps)
710 			FATAL("out of space for temporaries");
711 		for(i = 1; i < 100; i++)
712 			tmps[i-1].cnext = &tmps[i];
713 		tmps[i-1].cnext = 0;
714 	}
715 	x = tmps;
716 	tmps = x->cnext;
717 	*x = tempcell;
718 	return(x);
719 }
720 
indirect(Node ** a,int n)721 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
722 {
723 	Awkfloat val;
724 	Cell *x;
725 	int m;
726 	char *s;
727 
728 	x = execute(a[0]);
729 	val = getfval(x);	/* freebsd: defend against super large field numbers */
730 	if ((Awkfloat)INT_MAX < val)
731 		FATAL("trying to access out of range field %s", x->nval);
732 	m = (int) val;
733 	if (m == 0 && !is_number(s = getsval(x)))	/* suspicion! */
734 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
735 		/* BUG: can x->nval ever be null??? */
736 	tempfree(x);
737 	x = fieldadr(m);
738 	x->ctype = OCELL;	/* BUG?  why are these needed? */
739 	x->csub = CFLD;
740 	return(x);
741 }
742 
substr(Node ** a,int nnn)743 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
744 {
745 	int k, m, n;
746 	char *s;
747 	int temp;
748 	Cell *x, *y, *z = 0;
749 
750 	x = execute(a[0]);
751 	y = execute(a[1]);
752 	if (a[2] != 0)
753 		z = execute(a[2]);
754 	s = getsval(x);
755 	k = strlen(s) + 1;
756 	if (k <= 1) {
757 		tempfree(x);
758 		tempfree(y);
759 		if (a[2] != 0) {
760 			tempfree(z);
761 		}
762 		x = gettemp();
763 		setsval(x, "");
764 		return(x);
765 	}
766 	m = (int) getfval(y);
767 	if (m <= 0)
768 		m = 1;
769 	else if (m > k)
770 		m = k;
771 	tempfree(y);
772 	if (a[2] != 0) {
773 		n = (int) getfval(z);
774 		tempfree(z);
775 	} else
776 		n = k - 1;
777 	if (n < 0)
778 		n = 0;
779 	else if (n > k - m)
780 		n = k - m;
781 	   dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) );
782 	y = gettemp();
783 	temp = s[n+m-1];	/* with thanks to John Linderman */
784 	s[n+m-1] = '\0';
785 	setsval(y, s + m - 1);
786 	s[n+m-1] = temp;
787 	tempfree(x);
788 	return(y);
789 }
790 
sindex(Node ** a,int nnn)791 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
792 {
793 	Cell *x, *y, *z;
794 	char *s1, *s2, *p1, *p2, *q;
795 	Awkfloat v = 0.0;
796 
797 	x = execute(a[0]);
798 	s1 = getsval(x);
799 	y = execute(a[1]);
800 	s2 = getsval(y);
801 
802 	z = gettemp();
803 	for (p1 = s1; *p1 != '\0'; p1++) {
804 		for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++)
805 			;
806 		if (*p2 == '\0') {
807 			v = (Awkfloat) (p1 - s1 + 1);	/* origin 1 */
808 			break;
809 		}
810 	}
811 	tempfree(x);
812 	tempfree(y);
813 	setfval(z, v);
814 	return(z);
815 }
816 
817 #define	MAXNUMSIZE	50
818 
format(char ** pbuf,int * pbufsize,const char * s,Node * a)819 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
820 {
821 	uschar *fmt, *p, *t;
822 	const char *os;
823 	Cell *x;
824 	int flag = 0, n;
825 	int fmtwd; /* format width */
826 	int fmtsz = recsize;
827 	uschar *buf = *pbuf;
828 	int bufsize = *pbufsize;
829 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
830 #define BUFSZ(a)   (bufsize - ((a) - buf))
831 
832 	os = s;
833 	p = buf;
834 	if ((fmt = malloc(fmtsz)) == NULL)
835 		FATAL("out of memory in format()");
836 	while (*s) {
837 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
838 		if (*s != '%') {
839 			*p++ = *s++;
840 			continue;
841 		}
842 		if (*(s+1) == '%') {
843 			*p++ = '%';
844 			s += 2;
845 			continue;
846 		}
847 		/* have to be real careful in case this is a huge number, eg, %100000d */
848 		fmtwd = atoi(s+1);
849 		if (fmtwd < 0)
850 			fmtwd = -fmtwd;
851 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
852 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
853 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
854 				FATAL("format item %.30s... ran format() out of memory", os);
855 			if (*s == 'l' || *s == 'h' || *s == 'L')
856 				goto weird;
857 			if (isalpha((uschar)*s))
858 				break;	/* the ansi panoply */
859 			if (*s == '*') {
860 				if (a == NULL)
861 					FATAL("not enough args in printf("
862 					    "\"%.30s\")", os);
863 				x = execute(a);
864 				a = a->nnext;
865 				snprintf(t - 1, FMTSZ(t - 1),
866 				    "%d", fmtwd=(int) getfval(x));
867 				if (fmtwd < 0)
868 					fmtwd = -fmtwd;
869 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
870 				t = fmt + strlen(fmt);
871 				tempfree(x);
872 			}
873 		}
874 		*t = '\0';
875 		if (fmtwd < 0)
876 			fmtwd = -fmtwd;
877 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
878 
879 		switch (*s) {
880 		case 'f': case 'e': case 'g': case 'E': case 'G':
881 			flag = 'f';
882 			break;
883 		case 'd': case 'i':
884 			flag = 'd';
885 			if(*(s-1) == 'l') break;
886 			*(t-1) = 'j';
887 			*t = 'd';
888 			*++t = '\0';
889 			break;
890 		case 'o': case 'x': case 'X': case 'u':
891 			flag = *(s-1) == 'l' ? 'd' : 'u';
892 			*(t-1) = 'j';
893 			*t = *s;
894 			*++t = '\0';
895 			break;
896 		case 's':
897 			flag = 's';
898 			break;
899 		case 'c':
900 			flag = 'c';
901 			break;
902 		default:
903 		weird:
904 			WARNING("weird printf conversion %s", fmt);
905 			flag = '?';
906 			break;
907 		}
908 		if (a == NULL)
909 			FATAL("not enough args in printf(%s)", os);
910 		x = execute(a);
911 		a = a->nnext;
912 		n = MAXNUMSIZE;
913 		if (fmtwd > n)
914 			n = fmtwd;
915 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
916 		switch (flag) {
917 		case '?':	snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
918 			t = getsval(x);
919 			n = strlen(t);
920 			if (fmtwd > n)
921 				n = fmtwd;
922 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
923 			p += strlen(p);
924 			snprintf(p, BUFSZ(p), "%s", t);
925 			break;
926 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
927 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
928 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
929 		case 's':
930 			t = getsval(x);
931 			n = strlen(t);
932 			if (fmtwd > n)
933 				n = fmtwd;
934 			if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
935 				FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t);
936 			snprintf(p, BUFSZ(p), fmt, t);
937 			break;
938 		case 'c':
939 			if (isnum(x)) {
940 				if (getfval(x))
941 					snprintf(p, BUFSZ(p), fmt, (int) getfval(x));
942 				else {
943 					*p++ = '\0'; /* explicit null byte */
944 					*p = '\0';   /* next output will start here */
945 				}
946 			} else
947 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
948 			break;
949 		default:
950 			FATAL("can't happen: bad conversion %c in format()", flag);
951 		}
952 		tempfree(x);
953 		p += strlen(p);
954 		s++;
955 	}
956 	*p = '\0';
957 	free(fmt);
958 	for ( ; a; a = a->nnext)		/* evaluate any remaining args */
959 		execute(a);
960 	*pbuf = buf;
961 	*pbufsize = bufsize;
962 	return p - buf;
963 }
964 
awksprintf(Node ** a,int n)965 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
966 {
967 	Cell *x;
968 	Node *y;
969 	char *buf;
970 	int bufsz=3*recsize;
971 
972 	if ((buf = malloc(bufsz)) == NULL)
973 		FATAL("out of memory in awksprintf");
974 	y = a[0]->nnext;
975 	x = execute(a[0]);
976 	if (format(&buf, &bufsz, getsval(x), y) == -1)
977 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
978 	tempfree(x);
979 	x = gettemp();
980 	x->sval = buf;
981 	x->tval = STR;
982 	return(x);
983 }
984 
awkprintf(Node ** a,int n)985 Cell *awkprintf(Node **a, int n)		/* printf */
986 {	/* a[0] is list of args, starting with format string */
987 	/* a[1] is redirection operator, a[2] is redirection file */
988 	FILE *fp;
989 	Cell *x;
990 	Node *y;
991 	char *buf;
992 	int len;
993 	int bufsz=3*recsize;
994 
995 	if ((buf = malloc(bufsz)) == NULL)
996 		FATAL("out of memory in awkprintf");
997 	y = a[0]->nnext;
998 	x = execute(a[0]);
999 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1000 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1001 	tempfree(x);
1002 	if (a[1] == NULL) {
1003 		/* fputs(buf, stdout); */
1004 		fwrite(buf, len, 1, stdout);
1005 		if (ferror(stdout))
1006 			FATAL("write error on stdout");
1007 	} else {
1008 		fp = redirect(ptoi(a[1]), a[2]);
1009 		/* fputs(buf, fp); */
1010 		fwrite(buf, len, 1, fp);
1011 		fflush(fp);
1012 		if (ferror(fp))
1013 			FATAL("write error on %s", filename(fp));
1014 	}
1015 	free(buf);
1016 	return(True);
1017 }
1018 
arith(Node ** a,int n)1019 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1020 {
1021 	Awkfloat i, j = 0;
1022 	double v;
1023 	Cell *x, *y, *z;
1024 
1025 	x = execute(a[0]);
1026 	i = getfval(x);
1027 	tempfree(x);
1028 	if (n != UMINUS) {
1029 		y = execute(a[1]);
1030 		j = getfval(y);
1031 		tempfree(y);
1032 	}
1033 	z = gettemp();
1034 	switch (n) {
1035 	case ADD:
1036 		i += j;
1037 		break;
1038 	case MINUS:
1039 		i -= j;
1040 		break;
1041 	case MULT:
1042 		i *= j;
1043 		break;
1044 	case DIVIDE:
1045 		if (j == 0)
1046 			FATAL("division by zero");
1047 		i /= j;
1048 		break;
1049 	case MOD:
1050 		if (j == 0)
1051 			FATAL("division by zero in mod");
1052 		modf(i/j, &v);
1053 		i = i - j * v;
1054 		break;
1055 	case UMINUS:
1056 		i = -i;
1057 		break;
1058 	case POWER:
1059 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1060 			i = ipow(i, (int) j);
1061 		else
1062 			i = errcheck(pow(i, j), "pow");
1063 		break;
1064 	default:	/* can't happen */
1065 		FATAL("illegal arithmetic operator %d", n);
1066 	}
1067 	setfval(z, i);
1068 	return(z);
1069 }
1070 
ipow(double x,int n)1071 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1072 {
1073 	double v;
1074 
1075 	if (n <= 0)
1076 		return 1;
1077 	v = ipow(x, n/2);
1078 	if (n % 2 == 0)
1079 		return v * v;
1080 	else
1081 		return x * v * v;
1082 }
1083 
incrdecr(Node ** a,int n)1084 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1085 {
1086 	Cell *x, *z;
1087 	int k;
1088 	Awkfloat xf;
1089 
1090 	x = execute(a[0]);
1091 	xf = getfval(x);
1092 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1093 	if (n == PREINCR || n == PREDECR) {
1094 		setfval(x, xf + k);
1095 		return(x);
1096 	}
1097 	z = gettemp();
1098 	setfval(z, xf);
1099 	setfval(x, xf + k);
1100 	tempfree(x);
1101 	return(z);
1102 }
1103 
assign(Node ** a,int n)1104 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1105 {		/* this is subtle; don't muck with it. */
1106 	Cell *x, *y;
1107 	Awkfloat xf, yf;
1108 	double v;
1109 
1110 	y = execute(a[1]);
1111 	x = execute(a[0]);
1112 	if (n == ASSIGN) {	/* ordinary assignment */
1113 		if (x == y && !(x->tval & (FLD|REC)))	/* self-assignment: */
1114 			;		/* leave alone unless it's a field */
1115 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1116 			setsval(x, getsval(y));
1117 			x->fval = getfval(y);
1118 			x->tval |= NUM;
1119 		}
1120 		else if (isstr(y))
1121 			setsval(x, getsval(y));
1122 		else if (isnum(y))
1123 			setfval(x, getfval(y));
1124 		else
1125 			funnyvar(y, "read value of");
1126 		tempfree(y);
1127 		return(x);
1128 	}
1129 	xf = getfval(x);
1130 	yf = getfval(y);
1131 	switch (n) {
1132 	case ADDEQ:
1133 		xf += yf;
1134 		break;
1135 	case SUBEQ:
1136 		xf -= yf;
1137 		break;
1138 	case MULTEQ:
1139 		xf *= yf;
1140 		break;
1141 	case DIVEQ:
1142 		if (yf == 0)
1143 			FATAL("division by zero in /=");
1144 		xf /= yf;
1145 		break;
1146 	case MODEQ:
1147 		if (yf == 0)
1148 			FATAL("division by zero in %%=");
1149 		modf(xf/yf, &v);
1150 		xf = xf - yf * v;
1151 		break;
1152 	case POWEQ:
1153 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1154 			xf = ipow(xf, (int) yf);
1155 		else
1156 			xf = errcheck(pow(xf, yf), "pow");
1157 		break;
1158 	default:
1159 		FATAL("illegal assignment operator %d", n);
1160 		break;
1161 	}
1162 	tempfree(y);
1163 	setfval(x, xf);
1164 	return(x);
1165 }
1166 
cat(Node ** a,int q)1167 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1168 {
1169 	Cell *x, *y, *z;
1170 	int n1, n2;
1171 	char *s;
1172 
1173 	x = execute(a[0]);
1174 	y = execute(a[1]);
1175 	getsval(x);
1176 	getsval(y);
1177 	n1 = strlen(x->sval);
1178 	n2 = strlen(y->sval);
1179 	s = malloc(n1 + n2 + 1);
1180 	if (s == NULL)
1181 		FATAL("out of space concatenating %.15s... and %.15s...",
1182 			x->sval, y->sval);
1183 	strcpy(s, x->sval);
1184 	strcpy(s+n1, y->sval);
1185 	tempfree(x);
1186 	tempfree(y);
1187 	z = gettemp();
1188 	z->sval = s;
1189 	z->tval = STR;
1190 	return(z);
1191 }
1192 
pastat(Node ** a,int n)1193 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1194 {
1195 	Cell *x;
1196 
1197 	if (a[0] == 0)
1198 		x = execute(a[1]);
1199 	else {
1200 		x = execute(a[0]);
1201 		if (istrue(x)) {
1202 			tempfree(x);
1203 			x = execute(a[1]);
1204 		}
1205 	}
1206 	return x;
1207 }
1208 
dopa2(Node ** a,int n)1209 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1210 {
1211 	Cell *x;
1212 	int pair;
1213 
1214 	pair = ptoi(a[3]);
1215 	if (pairstack[pair] == 0) {
1216 		x = execute(a[0]);
1217 		if (istrue(x))
1218 			pairstack[pair] = 1;
1219 		tempfree(x);
1220 	}
1221 	if (pairstack[pair] == 1) {
1222 		x = execute(a[1]);
1223 		if (istrue(x))
1224 			pairstack[pair] = 0;
1225 		tempfree(x);
1226 		x = execute(a[2]);
1227 		return(x);
1228 	}
1229 	return(False);
1230 }
1231 
1232 static char regexpr[] = "(regexpr)";
split(Node ** a,int nnn)1233 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1234 {
1235 	Cell *x = 0, *y, *ap;
1236 	char *s, *origs;
1237 	int sep;
1238 	char *t, temp, num[50], *fs = 0;
1239 	int n, tempstat, arg3type;
1240 
1241 	y = execute(a[0]);	/* source string */
1242 	origs = s = strdup(getsval(y));
1243 	arg3type = ptoi(a[3]);
1244 	if (a[2] == 0)		/* fs string */
1245 		fs = *FS;
1246 	else if (arg3type == STRING) {	/* split(str,arr,"string") */
1247 		x = execute(a[2]);
1248 		fs = getsval(x);
1249 	} else if (arg3type == REGEXPR)
1250 		fs = regexpr;	/* split(str,arr,/regexpr/) */
1251 	else
1252 		FATAL("illegal type of split");
1253 	sep = *fs;
1254 	ap = execute(a[1]);	/* array name */
1255 	freesymtab(ap);
1256 	   dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) );
1257 	ap->tval &= ~STR;
1258 	ap->tval |= ARR;
1259 	ap->sval = (char *) makesymtab(NSYMTAB);
1260 
1261 	n = 0;
1262         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1263 		/* split(s, a, //); have to arrange that it looks like empty sep */
1264 		arg3type = 0;
1265 		fs = EMPTY;
1266 		sep = 0;
1267 	}
1268 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1269 		fa *pfa;
1270 		if (arg3type == REGEXPR) {	/* it's ready already */
1271 			pfa = (fa *) a[2];
1272 		} else {
1273 			pfa = makedfa(fs, 1);
1274 		}
1275 		if (nematch(pfa,s)) {
1276 			tempstat = pfa->initstat;
1277 			pfa->initstat = 2;
1278 			do {
1279 				n++;
1280 				snprintf(num, sizeof(num), "%d", n);
1281 				temp = *patbeg;
1282 				*patbeg = '\0';
1283 				if (is_number(s))
1284 					setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1285 				else
1286 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1287 				*patbeg = temp;
1288 				s = patbeg + patlen;
1289 				if (*(patbeg+patlen-1) == 0 || *s == 0) {
1290 					n++;
1291 					snprintf(num, sizeof(num), "%d", n);
1292 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1293 					pfa->initstat = tempstat;
1294 					goto spdone;
1295 				}
1296 			} while (nematch(pfa,s));
1297 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1298 							/* cf gsub and refldbld */
1299 		}
1300 		n++;
1301 		snprintf(num, sizeof(num), "%d", n);
1302 		if (is_number(s))
1303 			setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval);
1304 		else
1305 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1306   spdone:
1307 		pfa = NULL;
1308 	} else if (sep == ' ') {
1309 		for (n = 0; ; ) {
1310 			while (*s == ' ' || *s == '\t' || *s == '\n')
1311 				s++;
1312 			if (*s == 0)
1313 				break;
1314 			n++;
1315 			t = s;
1316 			do
1317 				s++;
1318 			while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0');
1319 			temp = *s;
1320 			*s = '\0';
1321 			snprintf(num, sizeof(num), "%d", n);
1322 			if (is_number(t))
1323 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1324 			else
1325 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1326 			*s = temp;
1327 			if (*s != 0)
1328 				s++;
1329 		}
1330 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1331 		for (n = 0; *s != 0; s++) {
1332 			char buf[2];
1333 			n++;
1334 			snprintf(num, sizeof(num), "%d", n);
1335 			buf[0] = *s;
1336 			buf[1] = 0;
1337 			if (isdigit((uschar)buf[0]))
1338 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1339 			else
1340 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1341 		}
1342 	} else if (*s != 0) {
1343 		for (;;) {
1344 			n++;
1345 			t = s;
1346 			while (*s != sep && *s != '\n' && *s != '\0')
1347 				s++;
1348 			temp = *s;
1349 			*s = '\0';
1350 			snprintf(num, sizeof(num), "%d", n);
1351 			if (is_number(t))
1352 				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
1353 			else
1354 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1355 			*s = temp;
1356 			if (*s++ == 0)
1357 				break;
1358 		}
1359 	}
1360 	tempfree(ap);
1361 	tempfree(y);
1362 	free(origs);
1363 	if (a[2] != 0 && arg3type == STRING) {
1364 		tempfree(x);
1365 	}
1366 	x = gettemp();
1367 	x->tval = NUM;
1368 	x->fval = n;
1369 	return(x);
1370 }
1371 
condexpr(Node ** a,int n)1372 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1373 {
1374 	Cell *x;
1375 
1376 	x = execute(a[0]);
1377 	if (istrue(x)) {
1378 		tempfree(x);
1379 		x = execute(a[1]);
1380 	} else {
1381 		tempfree(x);
1382 		x = execute(a[2]);
1383 	}
1384 	return(x);
1385 }
1386 
ifstat(Node ** a,int n)1387 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1388 {
1389 	Cell *x;
1390 
1391 	x = execute(a[0]);
1392 	if (istrue(x)) {
1393 		tempfree(x);
1394 		x = execute(a[1]);
1395 	} else if (a[2] != 0) {
1396 		tempfree(x);
1397 		x = execute(a[2]);
1398 	}
1399 	return(x);
1400 }
1401 
whilestat(Node ** a,int n)1402 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1403 {
1404 	Cell *x;
1405 
1406 	for (;;) {
1407 		x = execute(a[0]);
1408 		if (!istrue(x))
1409 			return(x);
1410 		tempfree(x);
1411 		x = execute(a[1]);
1412 		if (isbreak(x)) {
1413 			x = True;
1414 			return(x);
1415 		}
1416 		if (isnext(x) || isexit(x) || isret(x))
1417 			return(x);
1418 		tempfree(x);
1419 	}
1420 }
1421 
dostat(Node ** a,int n)1422 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1423 {
1424 	Cell *x;
1425 
1426 	for (;;) {
1427 		x = execute(a[0]);
1428 		if (isbreak(x))
1429 			return True;
1430 		if (isnext(x) || isexit(x) || isret(x))
1431 			return(x);
1432 		tempfree(x);
1433 		x = execute(a[1]);
1434 		if (!istrue(x))
1435 			return(x);
1436 		tempfree(x);
1437 	}
1438 }
1439 
forstat(Node ** a,int n)1440 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1441 {
1442 	Cell *x;
1443 
1444 	x = execute(a[0]);
1445 	tempfree(x);
1446 	for (;;) {
1447 		if (a[1]!=0) {
1448 			x = execute(a[1]);
1449 			if (!istrue(x)) return(x);
1450 			else tempfree(x);
1451 		}
1452 		x = execute(a[3]);
1453 		if (isbreak(x))		/* turn off break */
1454 			return True;
1455 		if (isnext(x) || isexit(x) || isret(x))
1456 			return(x);
1457 		tempfree(x);
1458 		x = execute(a[2]);
1459 		tempfree(x);
1460 	}
1461 }
1462 
instat(Node ** a,int n)1463 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1464 {
1465 	Cell *x, *vp, *arrayp, *cp, *ncp;
1466 	Array *tp;
1467 	int i;
1468 
1469 	vp = execute(a[0]);
1470 	arrayp = execute(a[1]);
1471 	if (!isarr(arrayp)) {
1472 		return True;
1473 	}
1474 	tp = (Array *) arrayp->sval;
1475 	tempfree(arrayp);
1476 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1477 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1478 			setsval(vp, cp->nval);
1479 			ncp = cp->cnext;
1480 			x = execute(a[2]);
1481 			if (isbreak(x)) {
1482 				tempfree(vp);
1483 				return True;
1484 			}
1485 			if (isnext(x) || isexit(x) || isret(x)) {
1486 				tempfree(vp);
1487 				return(x);
1488 			}
1489 			tempfree(x);
1490 		}
1491 	}
1492 	return True;
1493 }
1494 
1495 void flush_all(void);
1496 
nawk_toXXX(const char * s,int (* fun_c)(int),wint_t (* fun_wc)(wint_t))1497 static char *nawk_toXXX(const char *s,
1498 			int (*fun_c)(int),
1499 			wint_t (*fun_wc)(wint_t))
1500 {
1501 	char *buf      = NULL;
1502 	char *pbuf     = NULL;
1503 	const char *ps = NULL;
1504 	size_t n       = 0;
1505 	mbstate_t mbs, mbs2;
1506 	wchar_t wc;
1507 	size_t sz = MB_CUR_MAX;
1508 
1509 	if (sz == 1) {
1510 		buf = tostring(s);
1511 
1512 		for (pbuf = buf; *pbuf; pbuf++)
1513 			*pbuf = fun_c((uschar)*pbuf);
1514 
1515 		return buf;
1516 	} else {
1517 		/* upper/lower character may be shorter/longer */
1518 		buf = tostringN(s, strlen(s) * sz + 1);
1519 
1520 		memset(&mbs,  0, sizeof(mbs));
1521 		memset(&mbs2, 0, sizeof(mbs2));
1522 
1523 		ps   = s;
1524 		pbuf = buf;
1525 		while (n = mbrtowc(&wc, ps, sz, &mbs),
1526 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
1527 		{
1528 			ps += n;
1529 
1530 			n = wcrtomb(pbuf, fun_wc(wc), &mbs2);
1531 			if (n == (size_t)-1)
1532 				FATAL("illegal wide character %s", s);
1533 
1534 			pbuf += n;
1535 		}
1536 
1537 		*pbuf = 0;
1538 
1539 		if (n)
1540 			FATAL("illegal byte sequence %s", s);
1541 
1542 		return buf;
1543 	}
1544 }
1545 
nawk_toupper(const char * s)1546 static char *nawk_toupper(const char *s)
1547 {
1548 	return nawk_toXXX(s, toupper, towupper);
1549 }
1550 
nawk_tolower(const char * s)1551 static char *nawk_tolower(const char *s)
1552 {
1553 	return nawk_toXXX(s, tolower, towlower);
1554 }
1555 
bltin(Node ** a,int n)1556 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
1557 {
1558 	Cell *x, *y;
1559 	Awkfloat u;
1560 	int t, sz;
1561 	unsigned int tmp;
1562 	char *buf, *fmt;
1563 	Node *nextarg;
1564 	FILE *fp;
1565 	time_t tv;
1566 	struct tm *tm;
1567 
1568 	t = ptoi(a[0]);
1569 	x = execute(a[1]);
1570 	nextarg = a[1]->nnext;
1571 	switch (t) {
1572 	case FLENGTH:
1573 		if (isarr(x))
1574 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
1575 		else
1576 			u = strlen(getsval(x));
1577 		break;
1578 	case FLOG:
1579 		u = errcheck(log(getfval(x)), "log"); break;
1580 	case FINT:
1581 		modf(getfval(x), &u); break;
1582 	case FEXP:
1583 		u = errcheck(exp(getfval(x)), "exp"); break;
1584 	case FSQRT:
1585 		u = errcheck(sqrt(getfval(x)), "sqrt"); break;
1586 	case FSIN:
1587 		u = sin(getfval(x)); break;
1588 	case FCOS:
1589 		u = cos(getfval(x)); break;
1590 	case FATAN:
1591 		if (nextarg == 0) {
1592 			WARNING("atan2 requires two arguments; returning 1.0");
1593 			u = 1.0;
1594 		} else {
1595 			y = execute(a[1]->nnext);
1596 			u = atan2(getfval(x), getfval(y));
1597 			tempfree(y);
1598 			nextarg = nextarg->nnext;
1599 		}
1600 		break;
1601 	case FSYSTEM:
1602 		fflush(stdout);		/* in case something is buffered already */
1603 		u = (Awkfloat) system(getsval(x)) / 256;   /* 256 is unix-dep */
1604 		break;
1605 	case FRAND:
1606 		/* in principle, rand() returns something in 0..RAND_MAX */
1607 		u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX;
1608 		break;
1609 	case FSRAND:
1610 		if (isrec(x))	/* no argument provided */
1611 			u = time((time_t *)0);
1612 		else
1613 			u = getfval(x);
1614 		tmp = (unsigned int) u;
1615 		srand(tmp);
1616 		u = srand_seed;
1617 		srand_seed = tmp;
1618 		break;
1619 	case FTOUPPER:
1620 	case FTOLOWER:
1621 		if (t == FTOUPPER)
1622 			buf = nawk_toupper(getsval(x));
1623 		else
1624 			buf = nawk_tolower(getsval(x));
1625 		tempfree(x);
1626 		x = gettemp();
1627 		setsval(x, buf);
1628 		free(buf);
1629 		return x;
1630 	case FFLUSH:
1631 		if (isrec(x) || strlen(getsval(x)) == 0) {
1632 			flush_all();	/* fflush() or fflush("") -> all */
1633 			u = 0;
1634 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
1635 			u = -1;
1636 		else
1637 			u = fflush(fp);
1638 		break;
1639 	case FSYSTIME:
1640 		u = time((time_t *) 0); break;
1641 	case FSTRFTIME:
1642 		/* strftime([format [,timestamp]]) */
1643 		if (nextarg) {
1644 			y = execute(nextarg), nextarg = nextarg->nnext;
1645 			tv = (time_t) getfval(y);
1646 			tempfree(y);
1647 		} else
1648 			tv = time((time_t *) 0);
1649 		tm = localtime(&tv);
1650 		if (tm == NULL)
1651 			FATAL("bad time %jd", (intmax_t)tv);
1652 
1653 		if (isrec(x)) {
1654 			/* format argument not provided, use default */
1655 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
1656 		} else
1657 			fmt = tostring(getsval(x));
1658 
1659 		sz = 32, buf = NULL;
1660 		do {
1661 			if ((buf = realloc(buf, (sz *= 2))) == NULL)
1662 				FATAL("out of memory in strftime");
1663 		} while(strftime(buf, sz, fmt, tm) == 0);
1664 
1665 		y = gettemp();
1666 		setsval(y, buf);
1667 		free(fmt);
1668 		free(buf);
1669 
1670 		return y;
1671 	default:	/* can't happen */
1672 		FATAL("illegal function type %d", t);
1673 		break;
1674 	}
1675 	tempfree(x);
1676 	x = gettemp();
1677 	setfval(x, u);
1678 	if (nextarg != 0) {
1679 		WARNING("warning: function has too many arguments");
1680 		for ( ; nextarg; nextarg = nextarg->nnext)
1681 			execute(nextarg);
1682 	}
1683 	return(x);
1684 }
1685 
printstat(Node ** a,int n)1686 Cell *printstat(Node **a, int n)	/* print a[0] */
1687 {
1688 	Node *x;
1689 	Cell *y;
1690 	FILE *fp;
1691 
1692 	if (a[1] == 0)	/* a[1] is redirection operator, a[2] is file */
1693 		fp = stdout;
1694 	else
1695 		fp = redirect(ptoi(a[1]), a[2]);
1696 	for (x = a[0]; x != NULL; x = x->nnext) {
1697 		y = execute(x);
1698 		fputs(getpssval(y), fp);
1699 		tempfree(y);
1700 		if (x->nnext == NULL)
1701 			fputs(*ORS, fp);
1702 		else
1703 			fputs(*OFS, fp);
1704 	}
1705 	if (a[1] != 0)
1706 		fflush(fp);
1707 	if (ferror(fp))
1708 		FATAL("write error on %s", filename(fp));
1709 	return(True);
1710 }
1711 
nullproc(Node ** a,int n)1712 Cell *nullproc(Node **a, int n)
1713 {
1714 	n = n;
1715 	a = a;
1716 	return 0;
1717 }
1718 
1719 
redirect(int a,Node * b)1720 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
1721 {
1722 	FILE *fp;
1723 	Cell *x;
1724 	char *fname;
1725 
1726 	x = execute(b);
1727 	fname = getsval(x);
1728 	fp = openfile(a, fname, NULL);
1729 	if (fp == NULL)
1730 		FATAL("can't open file %s", fname);
1731 	tempfree(x);
1732 	return fp;
1733 }
1734 
1735 struct files {
1736 	FILE	*fp;
1737 	const char	*fname;
1738 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
1739 } *files;
1740 
1741 size_t nfiles;
1742 
stdinit(void)1743 void stdinit(void)	/* in case stdin, etc., are not constants */
1744 {
1745 	nfiles = FOPEN_MAX;
1746 	files = calloc(nfiles, sizeof(*files));
1747 	if (files == NULL)
1748 		FATAL("can't allocate file memory for %zu files", nfiles);
1749 	files[0].fp = stdin;
1750 	files[0].fname = "/dev/stdin";
1751 	files[0].mode = LT;
1752 	files[1].fp = stdout;
1753 	files[1].fname = "/dev/stdout";
1754 	files[1].mode = GT;
1755 	files[2].fp = stderr;
1756 	files[2].fname = "/dev/stderr";
1757 	files[2].mode = GT;
1758 }
1759 
openfile(int a,const char * us,int * pnewflag)1760 FILE *openfile(int a, const char *us, int *pnewflag)
1761 {
1762 	const char *s = us;
1763 	size_t i;
1764 	int m;
1765 	FILE *fp = 0;
1766 
1767 	if (*s == '\0')
1768 		FATAL("null file name in print or getline");
1769 	for (i = 0; i < nfiles; i++)
1770 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
1771 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
1772 		     a == FFLUSH)) {
1773 			if (pnewflag)
1774 				*pnewflag = 0;
1775 			return files[i].fp;
1776 		}
1777 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
1778 		return NULL;
1779 
1780 	for (i = 0; i < nfiles; i++)
1781 		if (files[i].fp == NULL)
1782 			break;
1783 	if (i >= nfiles) {
1784 		struct files *nf;
1785 		size_t nnf = nfiles + FOPEN_MAX;
1786 		nf = realloc(files, nnf * sizeof(*nf));
1787 		if (nf == NULL)
1788 			FATAL("cannot grow files for %s and %zu files", s, nnf);
1789 		(void)memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
1790 		nfiles = nnf;
1791 		files = nf;
1792 	}
1793 	fflush(stdout);	/* force a semblance of order */
1794 	m = a;
1795 	if (a == GT) {
1796 		fp = fopen(s, "w");
1797 	} else if (a == APPEND) {
1798 		fp = fopen(s, "a");
1799 		m = GT;	/* so can mix > and >> */
1800 	} else if (a == '|') {	/* output pipe */
1801 		fp = popen(s, "w");
1802 	} else if (a == LE) {	/* input pipe */
1803 		fp = popen(s, "r");
1804 	} else if (a == LT) {	/* getline <file */
1805 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
1806 	} else	/* can't happen */
1807 		FATAL("illegal redirection %d", a);
1808 	if (fp != NULL) {
1809 		files[i].fname = tostring(s);
1810 		files[i].fp = fp;
1811 		files[i].mode = m;
1812 		if (pnewflag)
1813 			*pnewflag = 1;
1814 	}
1815 	return fp;
1816 }
1817 
filename(FILE * fp)1818 const char *filename(FILE *fp)
1819 {
1820 	size_t i;
1821 
1822 	for (i = 0; i < nfiles; i++)
1823 		if (fp == files[i].fp)
1824 			return files[i].fname;
1825 	return "???";
1826 }
1827 
closefile(Node ** a,int n)1828 Cell *closefile(Node **a, int n)
1829 {
1830 	Cell *x;
1831 	size_t i;
1832 	int stat;
1833 
1834 	n = n;
1835 	x = execute(a[0]);
1836 	getsval(x);
1837 	stat = -1;
1838 	for (i = 0; i < nfiles; i++) {
1839 		if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) {
1840 			if (ferror(files[i].fp))
1841 				WARNING( "i/o error occurred on %s", files[i].fname );
1842 			if (files[i].mode == '|' || files[i].mode == LE)
1843 				stat = pclose(files[i].fp) == -1;
1844 			else
1845 				stat = fclose(files[i].fp) == EOF;
1846 			if (stat) {
1847 				stat = -1;
1848 				WARNING( "i/o error occurred closing %s",
1849 				    files[i].fname );
1850 			}
1851 			if (i > 2)	/* don't do /dev/std... */
1852 				free(__UNCONST(files[i].fname));
1853 			files[i].fname = NULL;	/* watch out for ref thru this */
1854 			files[i].fp = NULL;
1855 		}
1856 	}
1857 	tempfree(x);
1858 	x = gettemp();
1859 	setfval(x, (Awkfloat) stat);
1860 	return(x);
1861 }
1862 
closeall(void)1863 void closeall(void)
1864 {
1865 	size_t i;
1866 	int stat;
1867 
1868 	for (i = 0; i < nfiles; i++) {
1869 		if (files[i].fp) {
1870 			if (ferror(files[i].fp))
1871 				WARNING( "i/o error occurred on %s", files[i].fname );
1872 			if (i == 0)
1873 				stat = fpurge(files[i].fp) == EOF;
1874 			else if (i <= 2)
1875 				stat = fflush(files[i].fp) == EOF;
1876 			else if (files[i].mode == '|' || files[i].mode == LE)
1877 				stat = pclose(files[i].fp) == -1;
1878 			else
1879 				stat = fclose(files[i].fp) == EOF;
1880 			if (stat)
1881 				WARNING( "i/o error occurred while closing %s", files[i].fname );
1882 		}
1883 	}
1884 }
1885 
flush_all(void)1886 void flush_all(void)
1887 {
1888 	size_t i;
1889 
1890 	for (i = 0; i < nfiles; i++)
1891 		if (files[i].fp)
1892 			fflush(files[i].fp);
1893 }
1894 
1895 void backsub(uschar **pb_ptr, const uschar **sptr_ptr);
1896 
sub(Node ** a,int nnn)1897 Cell *sub(Node **a, int nnn)	/* substitute command */
1898 {
1899 	const uschar *sptr;
1900 	uschar *q;
1901 	Cell *x, *y, *result;
1902 	uschar *t, *buf, *pb;
1903 	fa *pfa;
1904 	int bufsz = recsize;
1905 
1906 	if ((buf = malloc(bufsz)) == NULL)
1907 		FATAL("out of memory in sub");
1908 	x = execute(a[3]);	/* target string */
1909 	t = getsval(x);
1910 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
1911 		pfa = (fa *) a[1];	/* regular expression */
1912 	else {
1913 		y = execute(a[1]);
1914 		pfa = makedfa(getsval(y), 1);
1915 		tempfree(y);
1916 	}
1917 	y = execute(a[2]);	/* replacement string */
1918 	result = False;
1919 	if (pmatch(pfa, t)) {
1920 		sptr = t;
1921 		adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub");
1922 		pb = buf;
1923 		while (sptr < patbeg)
1924 			*pb++ = *sptr++;
1925 		sptr = getsval(y);
1926 		while (*sptr != 0) {
1927 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub");
1928 			if (*sptr == '\\') {
1929 				backsub(&pb, &sptr);
1930 			} else if (*sptr == '&') {
1931 				sptr++;
1932 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub");
1933 				for (q = patbeg; q < patbeg+patlen; )
1934 					*pb++ = *q++;
1935 			} else
1936 				*pb++ = *sptr++;
1937 		}
1938 		*pb = '\0';
1939 		if (pb > buf + bufsz)
1940 			FATAL("sub result1 %.30s too big; can't happen", buf);
1941 		sptr = patbeg + patlen;
1942 		if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) {
1943 			adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub");
1944 			while ((*pb++ = *sptr++) != 0)
1945 				;
1946 		}
1947 		if (pb > buf + bufsz)
1948 			FATAL("sub result2 %.30s too big; can't happen", buf);
1949 		setsval(x, buf);	/* BUG: should be able to avoid copy */
1950 		result = True;;
1951 	}
1952 	tempfree(x);
1953 	tempfree(y);
1954 	free(buf);
1955 	return result;
1956 }
1957 
gsub(Node ** a,int nnn)1958 Cell *gsub(Node **a, int nnn)	/* global substitute */
1959 {
1960 	Cell *x, *y;
1961 	const char *rptr;
1962 	const uschar *sptr;
1963 	uschar *t, *q;
1964 	uschar *pb, *buf;
1965 	fa *pfa;
1966 	int mflag, tempstat, num;
1967 	int bufsz = recsize;
1968 
1969 	if ((buf = malloc(bufsz)) == NULL)
1970 		FATAL("out of memory in gsub");
1971 	mflag = 0;	/* if mflag == 0, can replace empty string */
1972 	num = 0;
1973 	x = execute(a[3]);	/* target string */
1974 	t = getsval(x);
1975 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
1976 		pfa = (fa *) a[1];	/* regular expression */
1977 	else {
1978 		y = execute(a[1]);
1979 		pfa = makedfa(getsval(y), 1);
1980 		tempfree(y);
1981 	}
1982 	y = execute(a[2]);	/* replacement string */
1983 	if (pmatch(pfa, t)) {
1984 		tempstat = pfa->initstat;
1985 		pfa->initstat = 2;
1986 		pb = buf;
1987 		rptr = getsval(y);
1988 		do {
1989 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
1990 				if (mflag == 0) {	/* can replace empty */
1991 					num++;
1992 					sptr = rptr;
1993 					while (*sptr != 0) {
1994 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
1995 						if (*sptr == '\\') {
1996 							backsub(&pb, &sptr);
1997 						} else if (*sptr == '&') {
1998 							sptr++;
1999 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2000 							for (q = patbeg; q < patbeg+patlen; )
2001 								*pb++ = *q++;
2002 						} else
2003 							*pb++ = *sptr++;
2004 					}
2005 				}
2006 				if (*t == 0)	/* at end */
2007 					goto done;
2008 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub");
2009 				*pb++ = *t++;
2010 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2011 					FATAL("gsub result0 %.30s too big; can't happen", buf);
2012 				mflag = 0;
2013 			}
2014 			else {	/* matched nonempty string */
2015 				num++;
2016 				sptr = t;
2017 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub");
2018 				while (sptr < patbeg)
2019 					*pb++ = *sptr++;
2020 				sptr = rptr;
2021 				while (*sptr != 0) {
2022 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub");
2023 					if (*sptr == '\\') {
2024 						backsub(&pb, &sptr);
2025 					} else if (*sptr == '&') {
2026 						sptr++;
2027 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub");
2028 						for (q = patbeg; q < patbeg+patlen; )
2029 							*pb++ = *q++;
2030 					} else
2031 						*pb++ = *sptr++;
2032 				}
2033 				t = patbeg + patlen;
2034 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2035 					goto done;
2036 				if (pb > buf + bufsz)
2037 					FATAL("gsub result1 %.30s too big; can't happen", buf);
2038 				mflag = 1;
2039 			}
2040 		} while (pmatch(pfa,t));
2041 		sptr = t;
2042 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub");
2043 		while ((*pb++ = *sptr++) != 0)
2044 			;
2045 	done:	if (pb < buf + bufsz)
2046 			*pb = '\0';
2047 		else if (*(pb-1) != '\0')
2048 			FATAL("gsub result2 %.30s truncated; can't happen", buf);
2049 		setsval(x, buf);	/* BUG: should be able to avoid copy + free */
2050 		pfa->initstat = tempstat;
2051 	}
2052 	tempfree(x);
2053 	tempfree(y);
2054 	x = gettemp();
2055 	x->tval = NUM;
2056 	x->fval = num;
2057 	free(buf);
2058 	return(x);
2059 }
2060 
gensub(Node ** a,int nnn)2061 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2062 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2063 {
2064 	Cell *x, *y, *res, *h;
2065 	char *rptr;
2066 	const uschar *sptr;
2067 	uschar *q, *pb, *t, *buf;
2068 	fa *pfa;
2069 	int mflag, tempstat, num, whichm;
2070 	int bufsz = recsize;
2071 
2072 	if ((buf = malloc(bufsz)) == NULL)
2073 		FATAL("out of memory in gensub");
2074 	mflag = 0;	/* if mflag == 0, can replace empty string */
2075 	num = 0;
2076 	x = execute(a[4]);	/* source string */
2077 	t = getsval(x);
2078 	res = copycell(x);	/* target string - initially copy of source */
2079 	res->csub = CTEMP;	/* result values are temporary */
2080 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2081 		pfa = (fa *) a[1];	/* regular expression */
2082 	else {
2083 		y = execute(a[1]);
2084 		pfa = makedfa(getsval(y), 1);
2085 		tempfree(y);
2086 	}
2087 	y = execute(a[2]);	/* replacement string */
2088 	h = execute(a[3]);	/* which matches should be replaced */
2089 	sptr = getsval(h);
2090 	if (sptr[0] == 'g' || sptr[0] == 'G')
2091 		whichm = -1;
2092 	else {
2093 		/*
2094 		 * The specified number is index of replacement, starting
2095 		 * from 1. GNU awk treats index lower than 0 same as
2096 		 * 1, we do same for compatibility.
2097 		 */
2098 		whichm = (int) getfval(h) - 1;
2099 		if (whichm < 0)
2100 			whichm = 0;
2101 	}
2102 	tempfree(h);
2103 
2104 	if (pmatch(pfa, t)) {
2105 		char *sl;
2106 
2107 		tempstat = pfa->initstat;
2108 		pfa->initstat = 2;
2109 		pb = buf;
2110 		rptr = getsval(y);
2111 		/*
2112 		 * XXX if there are any backreferences in subst string,
2113 		 * complain now.
2114 		 */
2115 		for(sl=rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2116 			if (strchr("0123456789", sl[1])) {
2117 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2118 			}
2119 		}
2120 
2121 		do {
2122 			if (whichm >= 0 && whichm != num) {
2123 				num++;
2124 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2125 
2126 				/* copy the part of string up to and including
2127 				 * match to output buffer */
2128 				while (t < patbeg + patlen)
2129 					*pb++ = *t++;
2130 				continue;
2131 			}
2132 
2133 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2134 				if (mflag == 0) {	/* can replace empty */
2135 					num++;
2136 					sptr = rptr;
2137 					while (*sptr != 0) {
2138 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2139 						if (*sptr == '\\') {
2140 							backsub(&pb, &sptr);
2141 						} else if (*sptr == '&') {
2142 							sptr++;
2143 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2144 							for (q = patbeg; q < patbeg+patlen; )
2145 								*pb++ = *q++;
2146 						} else
2147 							*pb++ = *sptr++;
2148 					}
2149 				}
2150 				if (*t == 0)	/* at end */
2151 					goto done;
2152 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2153 				*pb++ = *t++;
2154 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2155 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2156 				mflag = 0;
2157 			}
2158 			else {	/* matched nonempty string */
2159 				num++;
2160 				sptr = t;
2161 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2162 				while (sptr < patbeg)
2163 					*pb++ = *sptr++;
2164 				sptr = rptr;
2165 				while (*sptr != 0) {
2166 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2167 					if (*sptr == '\\') {
2168 						backsub(&pb, &sptr);
2169 					} else if (*sptr == '&') {
2170 						sptr++;
2171 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2172 						for (q = patbeg; q < patbeg+patlen; )
2173 							*pb++ = *q++;
2174 					} else
2175 						*pb++ = *sptr++;
2176 				}
2177 				t = patbeg + patlen;
2178 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2179 					goto done;
2180 				if (pb > buf + bufsz)
2181 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2182 				mflag = 1;
2183 			}
2184 		} while (pmatch(pfa,t));
2185 		sptr = t;
2186 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2187 		while ((*pb++ = *sptr++) != 0)
2188 			;
2189 	done:	if (pb > buf + bufsz)
2190 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2191 		*pb = '\0';
2192 		setsval(res, buf);
2193 		pfa->initstat = tempstat;
2194 	}
2195 	tempfree(x);
2196 	tempfree(y);
2197 	free(buf);
2198 	return(res);
2199 }
2200 
backsub(uschar ** pb_ptr,const uschar ** sptr_ptr)2201 void backsub(uschar **pb_ptr, const uschar **sptr_ptr)/* handle \\& variations */
2202 {						/* sptr[0] == '\\' */
2203 	uschar *pb = *pb_ptr;
2204 	const uschar *sptr = *sptr_ptr;
2205 
2206 	if (sptr[1] == '\\') {
2207 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2208 			*pb++ = '\\';
2209 			*pb++ = '&';
2210 			sptr += 4;
2211 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2212 			*pb++ = '\\';
2213 			sptr += 2;
2214 		} else {			/* \\x -> \\x */
2215 			*pb++ = *sptr++;
2216 			*pb++ = *sptr++;
2217 		}
2218 	} else if (sptr[1] == '&') {	/* literal & */
2219 		sptr++;
2220 		*pb++ = *sptr++;
2221 	} else				/* literal \ */
2222 		*pb++ = *sptr++;
2223 
2224 	*pb_ptr = pb;
2225 	*sptr_ptr = sptr;
2226 }
2227