xref: /openbsd/usr.bin/awk/run.c (revision 510d2225)
1 /*	$OpenBSD: run.c,v 1.83 2023/11/28 20:54:38 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 #define DEBUG
27 #include <stdio.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <wctype.h>
31 #include <fcntl.h>
32 #include <setjmp.h>
33 #include <limits.h>
34 #include <math.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <time.h>
38 #include <sys/types.h>
39 #include <sys/wait.h>
40 #include "awk.h"
41 #include "awkgram.tab.h"
42 
43 
44 static void stdinit(void);
45 static void flush_all(void);
46 static char *wide_char_to_byte_str(int rune, size_t *outlen);
47 
48 #if 1
49 #define tempfree(x)	do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0)
50 #else
51 void tempfree(Cell *p) {
52 	if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) {
53 		WARNING("bad csub %d in Cell %d %s",
54 			p->csub, p->ctype, p->sval);
55 	}
56 	if (istemp(p))
57 		tfree(p);
58 }
59 #endif
60 
61 /* do we really need these? */
62 /* #ifdef _NFILE */
63 /* #ifndef FOPEN_MAX */
64 /* #define FOPEN_MAX _NFILE */
65 /* #endif */
66 /* #endif */
67 /*  */
68 /* #ifndef	FOPEN_MAX */
69 /* #define	FOPEN_MAX	40 */	/* max number of open files */
70 /* #endif */
71 /*  */
72 /* #ifndef RAND_MAX */
73 /* #define RAND_MAX	32767 */	/* all that ansi guarantees */
74 /* #endif */
75 
76 jmp_buf env;
77 extern	int	pairstack[];
78 extern	Awkfloat	srand_seed;
79 
80 Node	*winner = NULL;	/* root of parse tree */
81 Cell	*tmps;		/* free temporary cells for execution */
82 
83 static Cell	truecell	={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL };
84 Cell	*True	= &truecell;
85 static Cell	falsecell	={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL };
86 Cell	*False	= &falsecell;
87 static Cell	breakcell	={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL };
88 Cell	*jbreak	= &breakcell;
89 static Cell	contcell	={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL };
90 Cell	*jcont	= &contcell;
91 static Cell	nextcell	={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL };
92 Cell	*jnext	= &nextcell;
93 static Cell	nextfilecell	={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL };
94 Cell	*jnextfile	= &nextfilecell;
95 static Cell	exitcell	={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL };
96 Cell	*jexit	= &exitcell;
97 static Cell	retcell		={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL };
98 Cell	*jret	= &retcell;
99 static Cell	tempcell	={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
100 
101 Node	*curnode = NULL;	/* the node being executed, for debugging */
102 
103 /* buffer memory management */
104 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr,
105 	const char *whatrtn)
106 /* pbuf:    address of pointer to buffer being managed
107  * psiz:    address of buffer size variable
108  * minlen:  minimum length of buffer needed
109  * quantum: buffer size quantum
110  * pbptr:   address of movable pointer into buffer, or 0 if none
111  * whatrtn: name of the calling routine if failure should cause fatal error
112  *
113  * return   0 for realloc failure, !=0 for success
114  */
115 {
116 	if (minlen > *psiz) {
117 		char *tbuf;
118 		int rminlen = quantum ? minlen % quantum : 0;
119 		int boff = pbptr ? *pbptr - *pbuf : 0;
120 		/* round up to next multiple of quantum */
121 		if (rminlen)
122 			minlen += quantum - rminlen;
123 		tbuf = (char *) realloc(*pbuf, minlen);
124 		DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf);
125 		if (tbuf == NULL) {
126 			if (whatrtn)
127 				FATAL("out of memory in %s", whatrtn);
128 			return 0;
129 		}
130 		*pbuf = tbuf;
131 		*psiz = minlen;
132 		if (pbptr)
133 			*pbptr = tbuf + boff;
134 	}
135 	return 1;
136 }
137 
138 void run(Node *a)	/* execution of parse tree starts here */
139 {
140 
141 	stdinit();
142 	execute(a);
143 	closeall();
144 }
145 
146 Cell *execute(Node *u)	/* execute a node of the parse tree */
147 {
148 	Cell *(*proc)(Node **, int);
149 	Cell *x;
150 	Node *a;
151 
152 	if (u == NULL)
153 		return(True);
154 	for (a = u; ; a = a->nnext) {
155 		curnode = a;
156 		if (isvalue(a)) {
157 			x = (Cell *) (a->narg[0]);
158 			if (isfld(x) && !donefld)
159 				fldbld();
160 			else if (isrec(x) && !donerec)
161 				recbld();
162 			return(x);
163 		}
164 		if (notlegal(a->nobj))	/* probably a Cell* but too risky to print */
165 			FATAL("illegal statement");
166 		proc = proctab[a->nobj-FIRSTTOKEN];
167 		x = (*proc)(a->narg, a->nobj);
168 		if (isfld(x) && !donefld)
169 			fldbld();
170 		else if (isrec(x) && !donerec)
171 			recbld();
172 		if (isexpr(a))
173 			return(x);
174 		if (isjump(x))
175 			return(x);
176 		if (a->nnext == NULL)
177 			return(x);
178 		tempfree(x);
179 	}
180 }
181 
182 
183 Cell *program(Node **a, int n)	/* execute an awk program */
184 {				/* a[0] = BEGIN, a[1] = body, a[2] = END */
185 	Cell *x;
186 
187 	if (setjmp(env) != 0)
188 		goto ex;
189 	if (a[0]) {		/* BEGIN */
190 		x = execute(a[0]);
191 		if (isexit(x))
192 			return(True);
193 		if (isjump(x))
194 			FATAL("illegal break, continue, next or nextfile from BEGIN");
195 		tempfree(x);
196 	}
197 	if (a[1] || a[2])
198 		while (getrec(&record, &recsize, true) > 0) {
199 			x = execute(a[1]);
200 			if (isexit(x))
201 				break;
202 			tempfree(x);
203 		}
204   ex:
205 	if (setjmp(env) != 0)	/* handles exit within END */
206 		goto ex1;
207 	if (a[2]) {		/* END */
208 		x = execute(a[2]);
209 		if (isbreak(x) || isnext(x) || iscont(x))
210 			FATAL("illegal break, continue, next or nextfile from END");
211 		tempfree(x);
212 	}
213   ex1:
214 	return(True);
215 }
216 
217 struct Frame {	/* stack frame for awk function calls */
218 	int nargs;	/* number of arguments in this call */
219 	Cell *fcncell;	/* pointer to Cell for function */
220 	Cell **args;	/* pointer to array of arguments after execute */
221 	Cell *retval;	/* return value */
222 };
223 
224 #define	NARGS	50	/* max args in a call */
225 
226 struct Frame *frame = NULL;	/* base of stack frames; dynamically allocated */
227 int	nframe = 0;		/* number of frames allocated */
228 struct Frame *frp = NULL;	/* frame pointer. bottom level unused */
229 
230 Cell *call(Node **a, int n)	/* function call.  very kludgy and fragile */
231 {
232 	static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL };
233 	int i, ncall, ndef;
234 	int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */
235 	Node *x;
236 	Cell *args[NARGS], *oargs[NARGS];	/* BUG: fixed size arrays */
237 	Cell *y, *z, *fcn;
238 	char *s;
239 
240 	fcn = execute(a[0]);	/* the function itself */
241 	s = fcn->nval;
242 	if (!isfcn(fcn))
243 		FATAL("calling undefined function %s", s);
244 	if (frame == NULL) {
245 		frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame));
246 		if (frame == NULL)
247 			FATAL("out of space for stack frames calling %s", s);
248 	}
249 	for (ncall = 0, x = a[1]; x != NULL; x = x->nnext)	/* args in call */
250 		ncall++;
251 	ndef = (int) fcn->fval;			/* args in defn */
252 	DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame));
253 	if (ncall > ndef)
254 		WARNING("function %s called with %d args, uses only %d",
255 			s, ncall, ndef);
256 	if (ncall + ndef > NARGS)
257 		FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS);
258 	for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) {	/* get call args */
259 		DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame));
260 		y = execute(x);
261 		oargs[i] = y;
262 		DPRINTF("args[%d]: %s %f <%s>, t=%o\n",
263 			i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval);
264 		if (isfcn(y))
265 			FATAL("can't use function %s as argument in %s", y->nval, s);
266 		if (isarr(y))
267 			args[i] = y;	/* arrays by ref */
268 		else
269 			args[i] = copycell(y);
270 		tempfree(y);
271 	}
272 	for ( ; i < ndef; i++) {	/* add null args for ones not provided */
273 		args[i] = gettemp();
274 		*args[i] = newcopycell;
275 	}
276 	frp++;	/* now ok to up frame */
277 	if (frp >= frame + nframe) {
278 		int dfp = frp - frame;	/* old index */
279 		frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame));
280 		if (frame == NULL)
281 			FATAL("out of space for stack frames in %s", s);
282 		frp = frame + dfp;
283 	}
284 	frp->fcncell = fcn;
285 	frp->args = args;
286 	frp->nargs = ndef;	/* number defined with (excess are locals) */
287 	frp->retval = gettemp();
288 
289 	DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame));
290 	y = execute((Node *)(fcn->sval));	/* execute body */
291 	DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame));
292 
293 	for (i = 0; i < ndef; i++) {
294 		Cell *t = frp->args[i];
295 		if (isarr(t)) {
296 			if (t->csub == CCOPY) {
297 				if (i >= ncall) {
298 					freesymtab(t);
299 					t->csub = CTEMP;
300 					tempfree(t);
301 				} else {
302 					oargs[i]->tval = t->tval;
303 					oargs[i]->tval &= ~(STR|NUM|DONTFREE);
304 					oargs[i]->sval = t->sval;
305 					tempfree(t);
306 				}
307 			}
308 		} else if (t != y) {	/* kludge to prevent freeing twice */
309 			t->csub = CTEMP;
310 			tempfree(t);
311 		} else if (t == y && t->csub == CCOPY) {
312 			t->csub = CTEMP;
313 			tempfree(t);
314 			freed = 1;
315 		}
316 	}
317 	tempfree(fcn);
318 	if (isexit(y) || isnext(y))
319 		return y;
320 	if (freed == 0) {
321 		tempfree(y);	/* don't free twice! */
322 	}
323 	z = frp->retval;			/* return value */
324 	DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval);
325 	frp--;
326 	return(z);
327 }
328 
329 Cell *copycell(Cell *x)	/* make a copy of a cell in a temp */
330 {
331 	Cell *y;
332 
333 	/* copy is not constant or field */
334 
335 	y = gettemp();
336 	y->tval = x->tval & ~(CON|FLD|REC);
337 	y->csub = CCOPY;	/* prevents freeing until call is over */
338 	y->nval = x->nval;	/* BUG? */
339 	if (isstr(x) /* || x->ctype == OCELL */) {
340 		y->sval = tostring(x->sval);
341 		y->tval &= ~DONTFREE;
342 	} else
343 		y->tval |= DONTFREE;
344 	y->fval = x->fval;
345 	return y;
346 }
347 
348 Cell *arg(Node **a, int n)	/* nth argument of a function */
349 {
350 
351 	n = ptoi(a[0]);	/* argument number, counting from 0 */
352 	DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs);
353 	if (n+1 > frp->nargs)
354 		FATAL("argument #%d of function %s was not supplied",
355 			n+1, frp->fcncell->nval);
356 	return frp->args[n];
357 }
358 
359 Cell *jump(Node **a, int n)	/* break, continue, next, nextfile, return */
360 {
361 	Cell *y;
362 
363 	switch (n) {
364 	case EXIT:
365 		if (a[0] != NULL) {
366 			y = execute(a[0]);
367 			errorflag = (int) getfval(y);
368 			tempfree(y);
369 		}
370 		longjmp(env, 1);
371 	case RETURN:
372 		if (a[0] != NULL) {
373 			y = execute(a[0]);
374 			if ((y->tval & (STR|NUM)) == (STR|NUM)) {
375 				setsval(frp->retval, getsval(y));
376 				frp->retval->fval = getfval(y);
377 				frp->retval->tval |= NUM;
378 			}
379 			else if (y->tval & STR)
380 				setsval(frp->retval, getsval(y));
381 			else if (y->tval & NUM)
382 				setfval(frp->retval, getfval(y));
383 			else		/* can't happen */
384 				FATAL("bad type variable %d", y->tval);
385 			tempfree(y);
386 		}
387 		return(jret);
388 	case NEXT:
389 		return(jnext);
390 	case NEXTFILE:
391 		nextfile();
392 		return(jnextfile);
393 	case BREAK:
394 		return(jbreak);
395 	case CONTINUE:
396 		return(jcont);
397 	default:	/* can't happen */
398 		FATAL("illegal jump type %d", n);
399 	}
400 	return 0;	/* not reached */
401 }
402 
403 Cell *awkgetline(Node **a, int n)	/* get next line from specific input */
404 {		/* a[0] is variable, a[1] is operator, a[2] is filename */
405 	Cell *r, *x;
406 	extern Cell **fldtab;
407 	FILE *fp;
408 	char *buf;
409 	int bufsize = recsize;
410 	int mode;
411 	bool newflag;
412 	double result;
413 
414 	if ((buf = (char *) malloc(bufsize)) == NULL)
415 		FATAL("out of memory in getline");
416 
417 	fflush(stdout);	/* in case someone is waiting for a prompt */
418 	r = gettemp();
419 	if (a[1] != NULL) {		/* getline < file */
420 		x = execute(a[2]);		/* filename */
421 		mode = ptoi(a[1]);
422 		if (mode == '|')		/* input pipe */
423 			mode = LE;	/* arbitrary flag */
424 		fp = openfile(mode, getsval(x), &newflag);
425 		tempfree(x);
426 		if (fp == NULL)
427 			n = -1;
428 		else
429 			n = readrec(&buf, &bufsize, fp, newflag);
430 		if (n <= 0) {
431 			;
432 		} else if (a[0] != NULL) {	/* getline var <file */
433 			x = execute(a[0]);
434 			setsval(x, buf);
435 			if (is_number(x->sval, & result)) {
436 				x->fval = result;
437 				x->tval |= NUM;
438 			}
439 			tempfree(x);
440 		} else {			/* getline <file */
441 			setsval(fldtab[0], buf);
442 			if (is_number(fldtab[0]->sval, & result)) {
443 				fldtab[0]->fval = result;
444 				fldtab[0]->tval |= NUM;
445 			}
446 		}
447 	} else {			/* bare getline; use current input */
448 		if (a[0] == NULL)	/* getline */
449 			n = getrec(&record, &recsize, true);
450 		else {			/* getline var */
451 			n = getrec(&buf, &bufsize, false);
452 			if (n > 0) {
453 				x = execute(a[0]);
454 				setsval(x, buf);
455 				if (is_number(x->sval, & result)) {
456 					x->fval = result;
457 					x->tval |= NUM;
458 				}
459 				tempfree(x);
460 			}
461 		}
462 	}
463 	setfval(r, (Awkfloat) n);
464 	free(buf);
465 	return r;
466 }
467 
468 Cell *getnf(Node **a, int n)	/* get NF */
469 {
470 	if (!donefld)
471 		fldbld();
472 	return (Cell *) a[0];
473 }
474 
475 static char *
476 makearraystring(Node *p, const char *func)
477 {
478 	char *buf;
479 	int bufsz = recsize;
480 	size_t blen;
481 
482 	if ((buf = (char *) malloc(bufsz)) == NULL) {
483 		FATAL("%s: out of memory", func);
484 	}
485 
486 	blen = 0;
487 	buf[blen] = '\0';
488 
489 	for (; p; p = p->nnext) {
490 		Cell *x = execute(p);	/* expr */
491 		char *s = getsval(x);
492 		size_t seplen = strlen(getsval(subseploc));
493 		size_t nsub = p->nnext ? seplen : 0;
494 		size_t slen = strlen(s);
495 		size_t tlen = blen + slen + nsub;
496 
497 		if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) {
498 			FATAL("%s: out of memory %s[%s...]",
499 			    func, x->nval, buf);
500 		}
501 		memcpy(buf + blen, s, slen);
502 		if (nsub) {
503 			memcpy(buf + blen + slen, *SUBSEP, nsub);
504 		}
505 		buf[tlen] = '\0';
506 		blen = tlen;
507 		tempfree(x);
508 	}
509 	return buf;
510 }
511 
512 Cell *array(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
513 {
514 	Cell *x, *z;
515 	char *buf;
516 
517 	x = execute(a[0]);	/* Cell* for symbol table */
518 	buf = makearraystring(a[1], __func__);
519 	if (!isarr(x)) {
520 		DPRINTF("making %s into an array\n", NN(x->nval));
521 		if (freeable(x))
522 			xfree(x->sval);
523 		x->tval &= ~(STR|NUM|DONTFREE);
524 		x->tval |= ARR;
525 		x->sval = (char *) makesymtab(NSYMTAB);
526 	}
527 	z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval);
528 	z->ctype = OCELL;
529 	z->csub = CVAR;
530 	tempfree(x);
531 	free(buf);
532 	return(z);
533 }
534 
535 Cell *awkdelete(Node **a, int n)	/* a[0] is symtab, a[1] is list of subscripts */
536 {
537 	Cell *x;
538 
539 	x = execute(a[0]);	/* Cell* for symbol table */
540 	if (x == symtabloc) {
541 		FATAL("cannot delete SYMTAB or its elements");
542 	}
543 	if (!isarr(x))
544 		return True;
545 	if (a[1] == NULL) {	/* delete the elements, not the table */
546 		freesymtab(x);
547 		x->tval &= ~STR;
548 		x->tval |= ARR;
549 		x->sval = (char *) makesymtab(NSYMTAB);
550 	} else {
551 		char *buf = makearraystring(a[1], __func__);
552 		freeelem(x, buf);
553 		free(buf);
554 	}
555 	tempfree(x);
556 	return True;
557 }
558 
559 Cell *intest(Node **a, int n)	/* a[0] is index (list), a[1] is symtab */
560 {
561 	Cell *ap, *k;
562 	char *buf;
563 
564 	ap = execute(a[1]);	/* array name */
565 	if (!isarr(ap)) {
566 		DPRINTF("making %s into an array\n", ap->nval);
567 		if (freeable(ap))
568 			xfree(ap->sval);
569 		ap->tval &= ~(STR|NUM|DONTFREE);
570 		ap->tval |= ARR;
571 		ap->sval = (char *) makesymtab(NSYMTAB);
572 	}
573 	buf = makearraystring(a[0], __func__);
574 	k = lookup(buf, (Array *) ap->sval);
575 	tempfree(ap);
576 	free(buf);
577 	if (k == NULL)
578 		return(False);
579 	else
580 		return(True);
581 }
582 
583 
584 /* ======== utf-8 code ========== */
585 
586 /*
587  * Awk strings can contain ascii, random 8-bit items (eg Latin-1),
588  * or utf-8.  u8_isutf tests whether a string starts with a valid
589  * utf-8 sequence, and returns 0 if not (e.g., high bit set).
590  * u8_nextlen returns length of next valid sequence, which is
591  * 1 for ascii, 2..4 for utf-8, or 1 for high bit non-utf.
592  * u8_strlen returns length of string in valid utf-8 sequences
593  * and/or high-bit bytes.  Conversion functions go between byte
594  * number and character number.
595  *
596  * In theory, this behaves the same as before for non-utf8 bytes.
597  *
598  * Limited checking! This is a potential security hole.
599  */
600 
601 /* is s the beginning of a valid utf-8 string? */
602 /* return length 1..4 if yes, 0 if no */
603 int u8_isutf(const char *s)
604 {
605 	int n, ret;
606 	unsigned char c;
607 
608 	c = s[0];
609 	if (c < 128 || awk_mb_cur_max == 1)
610 		return 1; /* what if it's 0? */
611 
612 	n = strlen(s);
613 	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
614 		ret = 2; /* 110xxxxx 10xxxxxx */
615 	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
616 			 && (s[2] & 0xC0) == 0x80) {
617 		ret = 3; /* 1110xxxx 10xxxxxx 10xxxxxx */
618 	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
619 			 && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
620 		ret = 4; /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
621 	} else {
622 		ret = 0;
623 	}
624 	return ret;
625 }
626 
627 /* Convert (prefix of) utf8 string to utf-32 rune. */
628 /* Sets *rune to the value, returns the length. */
629 /* No error checking: watch out. */
630 int u8_rune(int *rune, const char *s)
631 {
632 	int n, ret;
633 	unsigned char c;
634 
635 	c = s[0];
636 	if (c < 128 || awk_mb_cur_max == 1) {
637 		*rune = c;
638 		return 1;
639 	}
640 
641 	n = strlen(s);
642 	if (n >= 2 && ((c>>5) & 0x7) == 0x6 && (s[1] & 0xC0) == 0x80) {
643 		*rune = ((c & 0x1F) << 6) | (s[1] & 0x3F); /* 110xxxxx 10xxxxxx */
644 		ret = 2;
645 	} else if (n >= 3 && ((c>>4) & 0xF) == 0xE && (s[1] & 0xC0) == 0x80
646 			  && (s[2] & 0xC0) == 0x80) {
647 		*rune = ((c & 0xF) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
648 			/* 1110xxxx 10xxxxxx 10xxxxxx */
649 		ret = 3;
650 	} else if (n >= 4 && ((c>>3) & 0x1F) == 0x1E && (s[1] & 0xC0) == 0x80
651 			  && (s[2] & 0xC0) == 0x80 && (s[3] & 0xC0) == 0x80) {
652 		*rune = ((c & 0x7) << 18) | ((s[1] & 0x3F) << 12) | ((s[2] & 0x3F) << 6) | (s[3] & 0x3F);
653 			/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
654 		ret = 4;
655 	} else {
656 		*rune = c;
657 		ret = 1;
658 	}
659 	return ret; /* returns one byte if sequence doesn't look like utf */
660 }
661 
662 /* return length of next sequence: 1 for ascii or random, 2..4 for valid utf8 */
663 int u8_nextlen(const char *s)
664 {
665 	int len;
666 
667 	len = u8_isutf(s);
668 	if (len == 0)
669 		len = 1;
670 	return len;
671 }
672 
673 /* return number of utf characters or single non-utf bytes */
674 int u8_strlen(const char *s)
675 {
676 	int i, len, n, totlen;
677 	unsigned char c;
678 
679 	n = strlen(s);
680 	totlen = 0;
681 	for (i = 0; i < n; i += len) {
682 		c = s[i];
683 		if (c < 128 || awk_mb_cur_max == 1) {
684 			len = 1;
685 		} else {
686 			len = u8_nextlen(&s[i]);
687 		}
688 		totlen++;
689 		if (i > n)
690 			FATAL("bad utf count [%s] n=%d i=%d\n", s, n, i);
691 	}
692 	return totlen;
693 }
694 
695 /* convert utf-8 char number in a string to its byte offset */
696 int u8_char2byte(const char *s, int charnum)
697 {
698 	int n;
699 	int bytenum = 0;
700 
701 	while (charnum > 0) {
702 		n = u8_nextlen(s);
703 		s += n;
704 		bytenum += n;
705 		charnum--;
706 	}
707 	return bytenum;
708 }
709 
710 /* convert byte offset in s to utf-8 char number that starts there */
711 int u8_byte2char(const char *s, int bytenum)
712 {
713 	int i, len, b;
714 	int charnum = 0; /* BUG: what origin? */
715 	/* should be 0 to match start==0 which means no match */
716 
717 	b = strlen(s);
718 	if (bytenum > b) {
719 		return -1; /* ??? */
720 	}
721 	for (i = 0; i <= bytenum; i += len) {
722 		len = u8_nextlen(s+i);
723 		charnum++;
724 	}
725 	return charnum;
726 }
727 
728 /* runetochar() adapted from rune.c in the Plan 9 distributione */
729 
730 enum
731 {
732 	Runeerror = 128, /* from somewhere else */
733 	Runemax = 0x10FFFF,
734 
735 	Bit1    = 7,
736 	Bitx    = 6,
737 	Bit2    = 5,
738 	Bit3    = 4,
739 	Bit4    = 3,
740 	Bit5    = 2,
741 
742 	T1      = ((1<<(Bit1+1))-1) ^ 0xFF,     /* 0000 0000 */
743 	Tx      = ((1<<(Bitx+1))-1) ^ 0xFF,     /* 1000 0000 */
744 	T2      = ((1<<(Bit2+1))-1) ^ 0xFF,     /* 1100 0000 */
745 	T3      = ((1<<(Bit3+1))-1) ^ 0xFF,     /* 1110 0000 */
746 	T4      = ((1<<(Bit4+1))-1) ^ 0xFF,     /* 1111 0000 */
747 	T5      = ((1<<(Bit5+1))-1) ^ 0xFF,     /* 1111 1000 */
748 
749 	Rune1   = (1<<(Bit1+0*Bitx))-1,	 	/* 0000 0000 0000 0000 0111 1111 */
750 	Rune2   = (1<<(Bit2+1*Bitx))-1,	 	/* 0000 0000 0000 0111 1111 1111 */
751 	Rune3   = (1<<(Bit3+2*Bitx))-1,	 	/* 0000 0000 1111 1111 1111 1111 */
752 	Rune4   = (1<<(Bit4+3*Bitx))-1,	 	/* 0011 1111 1111 1111 1111 1111 */
753 
754 	Maskx   = (1<<Bitx)-1,		  	/* 0011 1111 */
755 	Testx   = Maskx ^ 0xFF,		 	/* 1100 0000 */
756 
757 };
758 
759 int runetochar(char *str, int c)
760 {
761 	/* one character sequence 00000-0007F => 00-7F */
762 	if (c <= Rune1) {
763 		str[0] = c;
764 		return 1;
765 	}
766 
767 	/* two character sequence 00080-007FF => T2 Tx */
768 	if (c <= Rune2) {
769 		str[0] = T2 | (c >> 1*Bitx);
770 		str[1] = Tx | (c & Maskx);
771 		return 2;
772 	}
773 
774 	/* three character sequence 00800-0FFFF => T3 Tx Tx */
775 	if (c > Runemax)
776 		c = Runeerror;
777 	if (c <= Rune3) {
778 		str[0] = T3 |  (c >> 2*Bitx);
779 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
780 		str[2] = Tx |  (c & Maskx);
781 		return 3;
782 	}
783 
784 	/* four character sequence 010000-1FFFFF => T4 Tx Tx Tx */
785 	str[0] = T4 |  (c >> 3*Bitx);
786 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
787 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
788 	str[3] = Tx |  (c & Maskx);
789 	return 4;
790 }
791 
792 
793 /* ========== end of utf8 code =========== */
794 
795 
796 
797 Cell *matchop(Node **a, int n)	/* ~ and match() */
798 {
799 	Cell *x, *y;
800 	char *s, *t;
801 	int i;
802 	int cstart, cpatlen, len;
803 	fa *pfa;
804 	int (*mf)(fa *, const char *) = match, mode = 0;
805 
806 	if (n == MATCHFCN) {
807 		mf = pmatch;
808 		mode = 1;
809 	}
810 	x = execute(a[1]);	/* a[1] = target text */
811 	s = getsval(x);
812 	if (a[0] == NULL)	/* a[1] == 0: already-compiled reg expr */
813 		i = (*mf)((fa *) a[2], s);
814 	else {
815 		y = execute(a[2]);	/* a[2] = regular expr */
816 		t = getsval(y);
817 		pfa = makedfa(t, mode);
818 		i = (*mf)(pfa, s);
819 		tempfree(y);
820 	}
821 	tempfree(x);
822 	if (n == MATCHFCN) {
823 		int start = patbeg - s + 1; /* origin 1 */
824 		if (patlen < 0) {
825 			start = 0; /* not found */
826 		} else {
827 			cstart = u8_byte2char(s, start-1);
828 			cpatlen = 0;
829 			for (i = 0; i < patlen; i += len) {
830 				len = u8_nextlen(patbeg+i);
831 				cpatlen++;
832 			}
833 
834 			start = cstart;
835 			patlen = cpatlen;
836 		}
837 
838 		setfval(rstartloc, (Awkfloat) start);
839 		setfval(rlengthloc, (Awkfloat) patlen);
840 		x = gettemp();
841 		x->tval = NUM;
842 		x->fval = start;
843 		return x;
844 	} else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0))
845 		return(True);
846 	else
847 		return(False);
848 }
849 
850 
851 Cell *boolop(Node **a, int n)	/* a[0] || a[1], a[0] && a[1], !a[0] */
852 {
853 	Cell *x, *y;
854 	int i;
855 
856 	x = execute(a[0]);
857 	i = istrue(x);
858 	tempfree(x);
859 	switch (n) {
860 	case BOR:
861 		if (i) return(True);
862 		y = execute(a[1]);
863 		i = istrue(y);
864 		tempfree(y);
865 		if (i) return(True);
866 		else return(False);
867 	case AND:
868 		if ( !i ) return(False);
869 		y = execute(a[1]);
870 		i = istrue(y);
871 		tempfree(y);
872 		if (i) return(True);
873 		else return(False);
874 	case NOT:
875 		if (i) return(False);
876 		else return(True);
877 	default:	/* can't happen */
878 		FATAL("unknown boolean operator %d", n);
879 	}
880 	return 0;	/*NOTREACHED*/
881 }
882 
883 Cell *relop(Node **a, int n)	/* a[0 < a[1], etc. */
884 {
885 	int i;
886 	Cell *x, *y;
887 	Awkfloat j;
888 	bool x_is_nan, y_is_nan;
889 
890 	x = execute(a[0]);
891 	y = execute(a[1]);
892 	x_is_nan = isnan(x->fval);
893 	y_is_nan = isnan(y->fval);
894 	if (x->tval&NUM && y->tval&NUM) {
895 		if ((x_is_nan || y_is_nan) && n != NE)
896 			return(False);
897 		j = x->fval - y->fval;
898 		i = j<0? -1: (j>0? 1: 0);
899 	} else {
900 		i = strcmp(getsval(x), getsval(y));
901 	}
902 	tempfree(x);
903 	tempfree(y);
904 	switch (n) {
905 	case LT:	if (i<0) return(True);
906 			else return(False);
907 	case LE:	if (i<=0) return(True);
908 			else return(False);
909 	case NE:	if (x_is_nan && y_is_nan) return(True);
910 			else if (i!=0) return(True);
911 			else return(False);
912 	case EQ:	if (i == 0) return(True);
913 			else return(False);
914 	case GE:	if (i>=0) return(True);
915 			else return(False);
916 	case GT:	if (i>0) return(True);
917 			else return(False);
918 	default:	/* can't happen */
919 		FATAL("unknown relational operator %d", n);
920 	}
921 	return 0;	/*NOTREACHED*/
922 }
923 
924 void tfree(Cell *a)	/* free a tempcell */
925 {
926 	if (freeable(a)) {
927 		DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval);
928 		xfree(a->sval);
929 	}
930 	if (a == tmps)
931 		FATAL("tempcell list is curdled");
932 	a->cnext = tmps;
933 	tmps = a;
934 }
935 
936 Cell *gettemp(void)	/* get a tempcell */
937 {	int i;
938 	Cell *x;
939 
940 	if (!tmps) {
941 		tmps = (Cell *) calloc(100, sizeof(*tmps));
942 		if (!tmps)
943 			FATAL("out of space for temporaries");
944 		for (i = 1; i < 100; i++)
945 			tmps[i-1].cnext = &tmps[i];
946 		tmps[i-1].cnext = NULL;
947 	}
948 	x = tmps;
949 	tmps = x->cnext;
950 	*x = tempcell;
951 	return(x);
952 }
953 
954 Cell *indirect(Node **a, int n)	/* $( a[0] ) */
955 {
956 	Awkfloat val;
957 	Cell *x;
958 	int m;
959 	char *s;
960 
961 	x = execute(a[0]);
962 	val = getfval(x);	/* freebsd: defend against super large field numbers */
963 	if ((Awkfloat)INT_MAX < val)
964 		FATAL("trying to access out of range field %s", x->nval);
965 	m = (int) val;
966 	if (m == 0 && !is_number(s = getsval(x), NULL))	/* suspicion! */
967 		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
968 		/* BUG: can x->nval ever be null??? */
969 	tempfree(x);
970 	x = fieldadr(m);
971 	x->ctype = OCELL;	/* BUG?  why are these needed? */
972 	x->csub = CFLD;
973 	return(x);
974 }
975 
976 Cell *substr(Node **a, int nnn)		/* substr(a[0], a[1], a[2]) */
977 {
978 	int k, m, n;
979 	int mb, nb;
980 	char *s;
981 	int temp;
982 	Cell *x, *y, *z = NULL;
983 
984 	x = execute(a[0]);
985 	y = execute(a[1]);
986 	if (a[2] != NULL)
987 		z = execute(a[2]);
988 	s = getsval(x);
989 	k = u8_strlen(s) + 1;
990 	if (k <= 1) {
991 		tempfree(x);
992 		tempfree(y);
993 		if (a[2] != NULL) {
994 			tempfree(z);
995 		}
996 		x = gettemp();
997 		setsval(x, "");
998 		return(x);
999 	}
1000 	m = (int) getfval(y);
1001 	if (m <= 0)
1002 		m = 1;
1003 	else if (m > k)
1004 		m = k;
1005 	tempfree(y);
1006 	if (a[2] != NULL) {
1007 		n = (int) getfval(z);
1008 		tempfree(z);
1009 	} else
1010 		n = k - 1;
1011 	if (n < 0)
1012 		n = 0;
1013 	else if (n > k - m)
1014 		n = k - m;
1015 	/* m is start, n is length from there */
1016 	DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s);
1017 	y = gettemp();
1018 	mb = u8_char2byte(s, m-1); /* byte offset of start char in s */
1019 	nb = u8_char2byte(s, m-1+n);  /* byte offset of end+1 char in s */
1020 
1021 	temp = s[nb];	/* with thanks to John Linderman */
1022 	s[nb] = '\0';
1023 	setsval(y, s + mb);
1024 	s[nb] = temp;
1025 	tempfree(x);
1026 	return(y);
1027 }
1028 
1029 Cell *sindex(Node **a, int nnn)		/* index(a[0], a[1]) */
1030 {
1031 	Cell *x, *y, *z;
1032 	char *s1, *s2, *p1, *p2, *q;
1033 	Awkfloat v = 0.0;
1034 
1035 	x = execute(a[0]);
1036 	s1 = getsval(x);
1037 	y = execute(a[1]);
1038 	s2 = getsval(y);
1039 
1040 	z = gettemp();
1041 	for (p1 = s1; *p1 != '\0'; p1++) {
1042 		for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++)
1043 			continue;
1044 		if (*p2 == '\0') {
1045 			/* v = (Awkfloat) (p1 - s1 + 1);	 origin 1 */
1046 
1047 		   /* should be a function: used in match() as well */
1048 			int i, len;
1049 			v = 0;
1050 			for (i = 0; i < p1-s1+1; i += len) {
1051 				len = u8_nextlen(s1+i);
1052 				v++;
1053 			}
1054 			break;
1055 		}
1056 	}
1057 	tempfree(x);
1058 	tempfree(y);
1059 	setfval(z, v);
1060 	return(z);
1061 }
1062 
1063 int has_utf8(char *s)	/* return 1 if s contains any utf-8 (2 bytes or more) character */
1064 {
1065 	int n;
1066 
1067 	for (n = 0; *s != 0; s += n) {
1068 		n = u8_nextlen(s);
1069 		if (n > 1)
1070 			return 1;
1071 	}
1072 	return 0;
1073 }
1074 
1075 #define	MAXNUMSIZE	50
1076 
1077 int format(char **pbuf, int *pbufsize, const char *s, Node *a)	/* printf-like conversions */
1078 {
1079 	char *fmt;
1080 	char *p, *t;
1081 	const char *os;
1082 	Cell *x;
1083 	int flag = 0, n;
1084 	int fmtwd; /* format width */
1085 	int fmtsz = recsize;
1086 	char *buf = *pbuf;
1087 	int bufsize = *pbufsize;
1088 #define FMTSZ(a)   (fmtsz - ((a) - fmt))
1089 #define BUFSZ(a)   (bufsize - ((a) - buf))
1090 
1091 	static bool first = true;
1092 	static bool have_a_format = false;
1093 
1094 	if (first) {
1095 		char xbuf[100];
1096 
1097 		snprintf(xbuf, sizeof(xbuf), "%a", 42.0);
1098 		have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0);
1099 		first = false;
1100 	}
1101 
1102 	os = s;
1103 	p = buf;
1104 	if ((fmt = (char *) malloc(fmtsz)) == NULL)
1105 		FATAL("out of memory in format()");
1106 	while (*s) {
1107 		adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1");
1108 		if (*s != '%') {
1109 			*p++ = *s++;
1110 			continue;
1111 		}
1112 		if (*(s+1) == '%') {
1113 			*p++ = '%';
1114 			s += 2;
1115 			continue;
1116 		}
1117 		fmtwd = atoi(s+1);
1118 		if (fmtwd < 0)
1119 			fmtwd = -fmtwd;
1120 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2");
1121 		for (t = fmt; (*t++ = *s) != '\0'; s++) {
1122 			if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3"))
1123 				FATAL("format item %.30s... ran format() out of memory", os);
1124 			/* Ignore size specifiers */
1125 			if (strchr("hjLlqtz", *s) != NULL) {	/* the ansi panoply */
1126 				t--;
1127 				continue;
1128 			}
1129 			if (isalpha((uschar)*s))
1130 				break;
1131 			if (*s == '$') {
1132 				FATAL("'$' not permitted in awk formats");
1133 			}
1134 			if (*s == '*') {
1135 				if (a == NULL) {
1136 					FATAL("not enough args in printf(%s)", os);
1137 				}
1138 				x = execute(a);
1139 				a = a->nnext;
1140 				snprintf(t - 1, FMTSZ(t - 1),
1141 				    "%d", fmtwd=(int) getfval(x));
1142 				if (fmtwd < 0)
1143 					fmtwd = -fmtwd;
1144 				adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format");
1145 				t = fmt + strlen(fmt);
1146 				tempfree(x);
1147 			}
1148 		}
1149 		*t = '\0';
1150 		if (fmtwd < 0)
1151 			fmtwd = -fmtwd;
1152 		adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4");
1153 		switch (*s) {
1154 		case 'a': case 'A':
1155 			if (have_a_format)
1156 				flag = *s;
1157 			else
1158 				flag = 'f';
1159 			break;
1160 		case 'f': case 'e': case 'g': case 'E': case 'G':
1161 			flag = 'f';
1162 			break;
1163 		case 'd': case 'i': case 'o': case 'x': case 'X': case 'u':
1164 			flag = (*s == 'd' || *s == 'i') ? 'd' : 'u';
1165 			*(t-1) = 'j';
1166 			*t = *s;
1167 			*++t = '\0';
1168 			break;
1169 		case 's':
1170 			flag = 's';
1171 			break;
1172 		case 'c':
1173 			flag = 'c';
1174 			break;
1175 		default:
1176 			WARNING("weird printf conversion %s", fmt);
1177 			flag = '?';
1178 			break;
1179 		}
1180 		if (a == NULL)
1181 			FATAL("not enough args in printf(%s)", os);
1182 		x = execute(a);
1183 		a = a->nnext;
1184 		n = MAXNUMSIZE;
1185 		if (fmtwd > n)
1186 			n = fmtwd;
1187 		adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5");
1188 		switch (flag) {
1189 		case '?':
1190 			snprintf(p, BUFSZ(p), "%s", fmt);	/* unknown, so dump it too */
1191 			t = getsval(x);
1192 			n = strlen(t);
1193 			if (fmtwd > n)
1194 				n = fmtwd;
1195 			adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6");
1196 			p += strlen(p);
1197 			snprintf(p, BUFSZ(p), "%s", t);
1198 			break;
1199 		case 'a':
1200 		case 'A':
1201 		case 'f':	snprintf(p, BUFSZ(p), fmt, getfval(x)); break;
1202 		case 'd':	snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break;
1203 		case 'u':	snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break;
1204 
1205 		case 's': {
1206 			t = getsval(x);
1207 			n = strlen(t);
1208 			/* if simple format or no utf-8 in the string, sprintf works */
1209 			if (!has_utf8(t) || strcmp(fmt,"%s") == 0) {
1210 				if (fmtwd > n)
1211 					n = fmtwd;
1212 				if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7"))
1213 					FATAL("huge string/format (%d chars) in printf %.30s..." \
1214 						" ran format() out of memory", n, t);
1215 				snprintf(p, BUFSZ(p), fmt, t);
1216 				break;
1217 			}
1218 
1219 			/* get here if string has utf-8 chars and fmt is not plain %s */
1220 			/* "%-w.ps", where -, w and .p are all optional */
1221 			/* '0' before the w is a flag character */
1222 			/* fmt points at % */
1223 			int ljust = 0, wid = 0, prec = n, pad = 0;
1224 			char *f = fmt+1;
1225 			if (f[0] == '-') {
1226 				ljust = 1;
1227 				f++;
1228 			}
1229 			// flags '0' and '+' are recognized but skipped
1230 			if (f[0] == '0') {
1231 				f++;
1232 				if (f[0] == '+')
1233 					f++;
1234 			}
1235 			if (f[0] == '+') {
1236 				f++;
1237 				if (f[0] == '0')
1238 					f++;
1239 			}
1240 			if (isdigit((uschar)f[0])) { /* there is a wid */
1241 				wid = strtol(f, &f, 10);
1242 			}
1243 			if (f[0] == '.') { /* there is a .prec */
1244 				prec = strtol(++f, &f, 10);
1245 			}
1246 			if (prec > u8_strlen(t))
1247 				prec = u8_strlen(t);
1248 			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1249 			int i, k, n;
1250 
1251 			if (ljust) { // print prec chars from t, then pad blanks
1252 				n = u8_char2byte(t, prec);
1253 				for (k = 0; k < n; k++) {
1254 					//putchar(t[k]);
1255 					*p++ = t[k];
1256 				}
1257 				for (i = 0; i < pad; i++) {
1258 					//printf(" ");
1259 					*p++ = ' ';
1260 				}
1261 			} else { // print pad blanks, then prec chars from t
1262 				for (i = 0; i < pad; i++) {
1263 					//printf(" ");
1264 					*p++ = ' ';
1265 				}
1266 				n = u8_char2byte(t, prec);
1267 				for (k = 0; k < n; k++) {
1268 					//putchar(t[k]);
1269 					*p++ = t[k];
1270 				}
1271 			}
1272 			*p = 0;
1273 			break;
1274 		}
1275 
1276                case 'c': {
1277 			/*
1278 			 * If a numeric value is given, awk should just turn
1279 			 * it into a character and print it:
1280 			 *      BEGIN { printf("%c\n", 65) }
1281 			 * prints "A".
1282 			 *
1283 			 * But what if the numeric value is > 128 and
1284 			 * represents a valid Unicode code point?!? We do
1285 			 * our best to convert it back into UTF-8. If we
1286 			 * can't, we output the encoding of the Unicode
1287 			 * "invalid character", 0xFFFD.
1288 			 */
1289 			if (isnum(x)) {
1290 				int charval = (int) getfval(x);
1291 
1292 				if (charval != 0) {
1293 					if (charval < 128 || awk_mb_cur_max == 1)
1294 						snprintf(p, BUFSZ(p), fmt, charval);
1295 					else {
1296 						// possible unicode character
1297 						size_t count;
1298 						char *bs = wide_char_to_byte_str(charval, &count);
1299 
1300 						if (bs == NULL)	{ // invalid character
1301 							// use unicode invalid character, 0xFFFD
1302 							bs = "\357\277\275";
1303 							count = 3;
1304 						}
1305 						t = bs;
1306 						n = count;
1307 						goto format_percent_c;
1308 					}
1309 				} else {
1310 					*p++ = '\0'; /* explicit null byte */
1311 					*p = '\0';   /* next output will start here */
1312 				}
1313 				break;
1314 			}
1315 			t = getsval(x);
1316 			n = u8_nextlen(t);
1317 		format_percent_c:
1318 			if (n < 2) { /* not utf8 */
1319 				snprintf(p, BUFSZ(p), fmt, getsval(x)[0]);
1320 				break;
1321 			}
1322 
1323 			// utf8 character, almost same song and dance as for %s
1324 			int ljust = 0, wid = 0, prec = n, pad = 0;
1325 			char *f = fmt+1;
1326 			if (f[0] == '-') {
1327 				ljust = 1;
1328 				f++;
1329 			}
1330 			// flags '0' and '+' are recognized but skipped
1331 			if (f[0] == '0') {
1332 				f++;
1333 				if (f[0] == '+')
1334 					f++;
1335 			}
1336 			if (f[0] == '+') {
1337 				f++;
1338 				if (f[0] == '0')
1339 					f++;
1340 			}
1341 			if (isdigit((uschar)f[0])) { /* there is a wid */
1342 				wid = strtol(f, &f, 10);
1343 			}
1344 			if (f[0] == '.') { /* there is a .prec */
1345 				prec = strtol(++f, &f, 10);
1346 			}
1347 			if (prec > 1)           // %c --> only one character
1348 				prec = 1;
1349 			pad = wid>prec ? wid - prec : 0;  // has to be >= 0
1350 			int i;
1351 
1352 			if (ljust) { // print one char from t, then pad blanks
1353 				for (i = 0; i < n; i++)
1354 					*p++ = t[i];
1355 				for (i = 0; i < pad; i++) {
1356 					//printf(" ");
1357 					*p++ = ' ';
1358 				}
1359 			} else { // print pad blanks, then prec chars from t
1360 				for (i = 0; i < pad; i++) {
1361 					//printf(" ");
1362 					*p++ = ' ';
1363 				}
1364 				for (i = 0; i < n; i++)
1365 					*p++ = t[i];
1366 			}
1367 			*p = 0;
1368 			break;
1369 		}
1370 		default:
1371 			FATAL("can't happen: bad conversion %c in format()", flag);
1372 		}
1373 
1374 		tempfree(x);
1375 		p += strlen(p);
1376 		s++;
1377 	}
1378 	*p = '\0';
1379 	free(fmt);
1380 	for ( ; a; a = a->nnext) {		/* evaluate any remaining args */
1381 		x = execute(a);
1382 		tempfree(x);
1383 	}
1384 	*pbuf = buf;
1385 	*pbufsize = bufsize;
1386 	return p - buf;
1387 }
1388 
1389 Cell *awksprintf(Node **a, int n)		/* sprintf(a[0]) */
1390 {
1391 	Cell *x;
1392 	Node *y;
1393 	char *buf;
1394 	int bufsz=3*recsize;
1395 
1396 	if ((buf = (char *) malloc(bufsz)) == NULL)
1397 		FATAL("out of memory in awksprintf");
1398 	y = a[0]->nnext;
1399 	x = execute(a[0]);
1400 	if (format(&buf, &bufsz, getsval(x), y) == -1)
1401 		FATAL("sprintf string %.30s... too long.  can't happen.", buf);
1402 	tempfree(x);
1403 	x = gettemp();
1404 	x->sval = buf;
1405 	x->tval = STR;
1406 	return(x);
1407 }
1408 
1409 Cell *awkprintf(Node **a, int n)		/* printf */
1410 {	/* a[0] is list of args, starting with format string */
1411 	/* a[1] is redirection operator, a[2] is redirection file */
1412 	FILE *fp;
1413 	Cell *x;
1414 	Node *y;
1415 	char *buf;
1416 	int len;
1417 	int bufsz=3*recsize;
1418 
1419 	if ((buf = (char *) malloc(bufsz)) == NULL)
1420 		FATAL("out of memory in awkprintf");
1421 	y = a[0]->nnext;
1422 	x = execute(a[0]);
1423 	if ((len = format(&buf, &bufsz, getsval(x), y)) == -1)
1424 		FATAL("printf string %.30s... too long.  can't happen.", buf);
1425 	tempfree(x);
1426 	if (a[1] == NULL) {
1427 		/* fputs(buf, stdout); */
1428 		fwrite(buf, len, 1, stdout);
1429 		if (ferror(stdout))
1430 			FATAL("write error on stdout");
1431 	} else {
1432 		fp = redirect(ptoi(a[1]), a[2]);
1433 		/* fputs(buf, fp); */
1434 		fwrite(buf, len, 1, fp);
1435 		fflush(fp);
1436 		if (ferror(fp))
1437 			FATAL("write error on %s", filename(fp));
1438 	}
1439 	free(buf);
1440 	return(True);
1441 }
1442 
1443 Cell *arith(Node **a, int n)	/* a[0] + a[1], etc.  also -a[0] */
1444 {
1445 	Awkfloat i, j = 0;
1446 	double v;
1447 	Cell *x, *y, *z;
1448 
1449 	x = execute(a[0]);
1450 	i = getfval(x);
1451 	tempfree(x);
1452 	if (n != UMINUS && n != UPLUS) {
1453 		y = execute(a[1]);
1454 		j = getfval(y);
1455 		tempfree(y);
1456 	}
1457 	z = gettemp();
1458 	switch (n) {
1459 	case ADD:
1460 		i += j;
1461 		break;
1462 	case MINUS:
1463 		i -= j;
1464 		break;
1465 	case MULT:
1466 		i *= j;
1467 		break;
1468 	case DIVIDE:
1469 		if (j == 0)
1470 			FATAL("division by zero");
1471 		i /= j;
1472 		break;
1473 	case MOD:
1474 		if (j == 0)
1475 			FATAL("division by zero in mod");
1476 		modf(i/j, &v);
1477 		i = i - j * v;
1478 		break;
1479 	case UMINUS:
1480 		i = -i;
1481 		break;
1482 	case UPLUS: /* handled by getfval(), above */
1483 		break;
1484 	case POWER:
1485 		if (j >= 0 && modf(j, &v) == 0.0)	/* pos integer exponent */
1486 			i = ipow(i, (int) j);
1487                else {
1488 			errno = 0;
1489 			i = errcheck(pow(i, j), "pow");
1490                }
1491 		break;
1492 	default:	/* can't happen */
1493 		FATAL("illegal arithmetic operator %d", n);
1494 	}
1495 	setfval(z, i);
1496 	return(z);
1497 }
1498 
1499 double ipow(double x, int n)	/* x**n.  ought to be done by pow, but isn't always */
1500 {
1501 	double v;
1502 
1503 	if (n <= 0)
1504 		return 1;
1505 	v = ipow(x, n/2);
1506 	if (n % 2 == 0)
1507 		return v * v;
1508 	else
1509 		return x * v * v;
1510 }
1511 
1512 Cell *incrdecr(Node **a, int n)		/* a[0]++, etc. */
1513 {
1514 	Cell *x, *z;
1515 	int k;
1516 	Awkfloat xf;
1517 
1518 	x = execute(a[0]);
1519 	xf = getfval(x);
1520 	k = (n == PREINCR || n == POSTINCR) ? 1 : -1;
1521 	if (n == PREINCR || n == PREDECR) {
1522 		setfval(x, xf + k);
1523 		return(x);
1524 	}
1525 	z = gettemp();
1526 	setfval(z, xf);
1527 	setfval(x, xf + k);
1528 	tempfree(x);
1529 	return(z);
1530 }
1531 
1532 Cell *assign(Node **a, int n)	/* a[0] = a[1], a[0] += a[1], etc. */
1533 {		/* this is subtle; don't muck with it. */
1534 	Cell *x, *y;
1535 	Awkfloat xf, yf;
1536 	double v;
1537 
1538 	y = execute(a[1]);
1539 	x = execute(a[0]);
1540 	if (n == ASSIGN) {	/* ordinary assignment */
1541 		if (x == y && !(x->tval & (FLD|REC)) && x != nfloc)
1542 			;	/* self-assignment: leave alone unless it's a field or NF */
1543 		else if ((y->tval & (STR|NUM)) == (STR|NUM)) {
1544 			yf = getfval(y);
1545 			setsval(x, getsval(y));
1546 			x->fval = yf;
1547 			x->tval |= NUM;
1548 		}
1549 		else if (isstr(y))
1550 			setsval(x, getsval(y));
1551 		else if (isnum(y))
1552 			setfval(x, getfval(y));
1553 		else
1554 			funnyvar(y, "read value of");
1555 		tempfree(y);
1556 		return(x);
1557 	}
1558 	xf = getfval(x);
1559 	yf = getfval(y);
1560 	switch (n) {
1561 	case ADDEQ:
1562 		xf += yf;
1563 		break;
1564 	case SUBEQ:
1565 		xf -= yf;
1566 		break;
1567 	case MULTEQ:
1568 		xf *= yf;
1569 		break;
1570 	case DIVEQ:
1571 		if (yf == 0)
1572 			FATAL("division by zero in /=");
1573 		xf /= yf;
1574 		break;
1575 	case MODEQ:
1576 		if (yf == 0)
1577 			FATAL("division by zero in %%=");
1578 		modf(xf/yf, &v);
1579 		xf = xf - yf * v;
1580 		break;
1581 	case POWEQ:
1582 		if (yf >= 0 && modf(yf, &v) == 0.0)	/* pos integer exponent */
1583 			xf = ipow(xf, (int) yf);
1584                else {
1585 			errno = 0;
1586 			xf = errcheck(pow(xf, yf), "pow");
1587                }
1588 		break;
1589 	default:
1590 		FATAL("illegal assignment operator %d", n);
1591 		break;
1592 	}
1593 	tempfree(y);
1594 	setfval(x, xf);
1595 	return(x);
1596 }
1597 
1598 Cell *cat(Node **a, int q)	/* a[0] cat a[1] */
1599 {
1600 	Cell *x, *y, *z;
1601 	int n1, n2;
1602 	char *s = NULL;
1603 	int ssz = 0;
1604 
1605 	x = execute(a[0]);
1606 	n1 = strlen(getsval(x));
1607 	adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1");
1608 	memcpy(s, x->sval, n1);
1609 
1610 	tempfree(x);
1611 
1612 	y = execute(a[1]);
1613 	n2 = strlen(getsval(y));
1614 	adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2");
1615 	memcpy(s + n1, y->sval, n2);
1616 	s[n1 + n2] = '\0';
1617 
1618 	tempfree(y);
1619 
1620 	z = gettemp();
1621 	z->sval = s;
1622 	z->tval = STR;
1623 
1624 	return(z);
1625 }
1626 
1627 Cell *pastat(Node **a, int n)	/* a[0] { a[1] } */
1628 {
1629 	Cell *x;
1630 
1631 	if (a[0] == NULL)
1632 		x = execute(a[1]);
1633 	else {
1634 		x = execute(a[0]);
1635 		if (istrue(x)) {
1636 			tempfree(x);
1637 			x = execute(a[1]);
1638 		}
1639 	}
1640 	return x;
1641 }
1642 
1643 Cell *dopa2(Node **a, int n)	/* a[0], a[1] { a[2] } */
1644 {
1645 	Cell *x;
1646 	int pair;
1647 
1648 	pair = ptoi(a[3]);
1649 	if (pairstack[pair] == 0) {
1650 		x = execute(a[0]);
1651 		if (istrue(x))
1652 			pairstack[pair] = 1;
1653 		tempfree(x);
1654 	}
1655 	if (pairstack[pair] == 1) {
1656 		x = execute(a[1]);
1657 		if (istrue(x))
1658 			pairstack[pair] = 0;
1659 		tempfree(x);
1660 		x = execute(a[2]);
1661 		return(x);
1662 	}
1663 	return(False);
1664 }
1665 
1666 Cell *split(Node **a, int nnn)	/* split(a[0], a[1], a[2]); a[3] is type */
1667 {
1668 	Cell *x = NULL, *y, *ap;
1669 	const char *s, *origs, *t;
1670 	const char *fs = NULL;
1671 	char *origfs = NULL;
1672 	int sep;
1673 	char temp, num[50];
1674 	int j, n, tempstat, arg3type;
1675 	double result;
1676 
1677 	y = execute(a[0]);	/* source string */
1678 	origs = s = strdup(getsval(y));
1679 	if (s == NULL)
1680 		FATAL("out of space in split");
1681 	tempfree(y);
1682 	arg3type = ptoi(a[3]);
1683 	if (a[2] == NULL) {		/* BUG: CSV should override implicit fs but not explicit */
1684 		fs = getsval(fsloc);
1685 	} else if (arg3type == STRING) {	/* split(str,arr,"string") */
1686 		x = execute(a[2]);
1687 		fs = origfs = strdup(getsval(x));
1688 		if (fs == NULL)
1689 			FATAL("out of space in split");
1690 		tempfree(x);
1691 	} else if (arg3type == REGEXPR) {
1692 		fs = "(regexpr)";	/* split(str,arr,/regexpr/) */
1693 	} else {
1694 		FATAL("illegal type of split");
1695 	}
1696 	sep = *fs;
1697 	ap = execute(a[1]);	/* array name */
1698 	/* BUG 7/26/22: this appears not to reset array: see C1/asplit */
1699 	freesymtab(ap);
1700 	DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs);
1701 	ap->tval &= ~STR;
1702 	ap->tval |= ARR;
1703 	ap->sval = (char *) makesymtab(NSYMTAB);
1704 
1705 	n = 0;
1706         if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) {
1707 		/* split(s, a, //); have to arrange that it looks like empty sep */
1708 		arg3type = 0;
1709 		fs = "";
1710 		sep = 0;
1711 	}
1712 	if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) {	/* reg expr */
1713 		fa *pfa;
1714 		if (arg3type == REGEXPR) {	/* it's ready already */
1715 			pfa = (fa *) a[2];
1716 		} else {
1717 			pfa = makedfa(fs, 1);
1718 		}
1719 		if (nematch(pfa,s)) {
1720 			tempstat = pfa->initstat;
1721 			pfa->initstat = 2;
1722 			do {
1723 				n++;
1724 				snprintf(num, sizeof(num), "%d", n);
1725 				temp = *patbeg;
1726 				setptr(patbeg, '\0');
1727 				if (is_number(s, & result))
1728 					setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1729 				else
1730 					setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1731 				setptr(patbeg, temp);
1732 				s = patbeg + patlen;
1733 				if (*(patbeg+patlen-1) == '\0' || *s == '\0') {
1734 					n++;
1735 					snprintf(num, sizeof(num), "%d", n);
1736 					setsymtab(num, "", 0.0, STR, (Array *) ap->sval);
1737 					pfa->initstat = tempstat;
1738 					goto spdone;
1739 				}
1740 			} while (nematch(pfa,s));
1741 			pfa->initstat = tempstat; 	/* bwk: has to be here to reset */
1742 							/* cf gsub and refldbld */
1743 		}
1744 		n++;
1745 		snprintf(num, sizeof(num), "%d", n);
1746 		if (is_number(s, & result))
1747 			setsymtab(num, s, result, STR|NUM, (Array *) ap->sval);
1748 		else
1749 			setsymtab(num, s, 0.0, STR, (Array *) ap->sval);
1750   spdone:
1751 		pfa = NULL;
1752 
1753 	} else if (a[2] == NULL && CSV) {	/* CSV only if no explicit separator */
1754 		char *newt = (char *) malloc(strlen(s)); /* for building new string; reuse for each field */
1755 		for (;;) {
1756 			char *fr = newt;
1757 			n++;
1758 			if (*s == '"' ) { /* start of "..." */
1759 				for (s++ ; *s != '\0'; ) {
1760 					if (*s == '"' && s[1] != '\0' && s[1] == '"') {
1761 						s += 2; /* doubled quote */
1762 						*fr++ = '"';
1763 					} else if (*s == '"' && (s[1] == '\0' || s[1] == ',')) {
1764 						s++; /* skip over closing quote */
1765 						break;
1766 					} else {
1767 						*fr++ = *s++;
1768 					}
1769 				}
1770 				*fr++ = 0;
1771 			} else {	/* unquoted field */
1772 				while (*s != ',' && *s != '\0')
1773 					*fr++ = *s++;
1774 				*fr++ = 0;
1775 			}
1776 			snprintf(num, sizeof(num), "%d", n);
1777 			if (is_number(newt, &result))
1778 				setsymtab(num, newt, result, STR|NUM, (Array *) ap->sval);
1779 			else
1780 				setsymtab(num, newt, 0.0, STR, (Array *) ap->sval);
1781 			if (*s++ == '\0')
1782 				break;
1783 		}
1784 		free(newt);
1785 
1786 	} else if (!CSV && sep == ' ') { /* usual case: split on white space */
1787 		for (n = 0; ; ) {
1788 #define ISWS(c)	((c) == ' ' || (c) == '\t' || (c) == '\n')
1789 			while (ISWS(*s))
1790 				s++;
1791 			if (*s == '\0')
1792 				break;
1793 			n++;
1794 			t = s;
1795 			do
1796 				s++;
1797 			while (*s != '\0' && !ISWS(*s));
1798 			temp = *s;
1799 			setptr(s, '\0');
1800 			snprintf(num, sizeof(num), "%d", n);
1801 			if (is_number(t, & result))
1802 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1803 			else
1804 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1805 			setptr(s, temp);
1806 			if (*s != '\0')
1807 				s++;
1808 		}
1809 
1810 	} else if (sep == 0) {	/* new: split(s, a, "") => 1 char/elem */
1811 		for (n = 0; *s != '\0'; s += u8_nextlen(s)) {
1812 			char buf[10];
1813 			n++;
1814 			snprintf(num, sizeof(num), "%d", n);
1815 
1816 			for (j = 0; j < u8_nextlen(s); j++) {
1817 				buf[j] = s[j];
1818 			}
1819 			buf[j] = '\0';
1820 
1821 			if (isdigit((uschar)buf[0]))
1822 				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
1823 			else
1824 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
1825 		}
1826 
1827 	} else if (*s != '\0') {  /* some random single character */
1828 		for (;;) {
1829 			n++;
1830 			t = s;
1831 			while (*s != sep && *s != '\n' && *s != '\0')
1832 				s++;
1833 			temp = *s;
1834 			setptr(s, '\0');
1835 			snprintf(num, sizeof(num), "%d", n);
1836 			if (is_number(t, & result))
1837 				setsymtab(num, t, result, STR|NUM, (Array *) ap->sval);
1838 			else
1839 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
1840 			setptr(s, temp);
1841 			if (*s++ == '\0')
1842 				break;
1843 		}
1844 	}
1845 	tempfree(ap);
1846 	xfree(origs);
1847 	xfree(origfs);
1848 	x = gettemp();
1849 	x->tval = NUM;
1850 	x->fval = n;
1851 	return(x);
1852 }
1853 
1854 Cell *condexpr(Node **a, int n)	/* a[0] ? a[1] : a[2] */
1855 {
1856 	Cell *x;
1857 
1858 	x = execute(a[0]);
1859 	if (istrue(x)) {
1860 		tempfree(x);
1861 		x = execute(a[1]);
1862 	} else {
1863 		tempfree(x);
1864 		x = execute(a[2]);
1865 	}
1866 	return(x);
1867 }
1868 
1869 Cell *ifstat(Node **a, int n)	/* if (a[0]) a[1]; else a[2] */
1870 {
1871 	Cell *x;
1872 
1873 	x = execute(a[0]);
1874 	if (istrue(x)) {
1875 		tempfree(x);
1876 		x = execute(a[1]);
1877 	} else if (a[2] != NULL) {
1878 		tempfree(x);
1879 		x = execute(a[2]);
1880 	}
1881 	return(x);
1882 }
1883 
1884 Cell *whilestat(Node **a, int n)	/* while (a[0]) a[1] */
1885 {
1886 	Cell *x;
1887 
1888 	for (;;) {
1889 		x = execute(a[0]);
1890 		if (!istrue(x))
1891 			return(x);
1892 		tempfree(x);
1893 		x = execute(a[1]);
1894 		if (isbreak(x)) {
1895 			x = True;
1896 			return(x);
1897 		}
1898 		if (isnext(x) || isexit(x) || isret(x))
1899 			return(x);
1900 		tempfree(x);
1901 	}
1902 }
1903 
1904 Cell *dostat(Node **a, int n)	/* do a[0]; while(a[1]) */
1905 {
1906 	Cell *x;
1907 
1908 	for (;;) {
1909 		x = execute(a[0]);
1910 		if (isbreak(x))
1911 			return True;
1912 		if (isnext(x) || isexit(x) || isret(x))
1913 			return(x);
1914 		tempfree(x);
1915 		x = execute(a[1]);
1916 		if (!istrue(x))
1917 			return(x);
1918 		tempfree(x);
1919 	}
1920 }
1921 
1922 Cell *forstat(Node **a, int n)	/* for (a[0]; a[1]; a[2]) a[3] */
1923 {
1924 	Cell *x;
1925 
1926 	x = execute(a[0]);
1927 	tempfree(x);
1928 	for (;;) {
1929 		if (a[1]!=NULL) {
1930 			x = execute(a[1]);
1931 			if (!istrue(x)) return(x);
1932 			else tempfree(x);
1933 		}
1934 		x = execute(a[3]);
1935 		if (isbreak(x))		/* turn off break */
1936 			return True;
1937 		if (isnext(x) || isexit(x) || isret(x))
1938 			return(x);
1939 		tempfree(x);
1940 		x = execute(a[2]);
1941 		tempfree(x);
1942 	}
1943 }
1944 
1945 Cell *instat(Node **a, int n)	/* for (a[0] in a[1]) a[2] */
1946 {
1947 	Cell *x, *vp, *arrayp, *cp, *ncp;
1948 	Array *tp;
1949 	int i;
1950 
1951 	vp = execute(a[0]);
1952 	arrayp = execute(a[1]);
1953 	if (!isarr(arrayp)) {
1954 		return True;
1955 	}
1956 	tp = (Array *) arrayp->sval;
1957 	tempfree(arrayp);
1958 	for (i = 0; i < tp->size; i++) {	/* this routine knows too much */
1959 		for (cp = tp->tab[i]; cp != NULL; cp = ncp) {
1960 			setsval(vp, cp->nval);
1961 			ncp = cp->cnext;
1962 			x = execute(a[2]);
1963 			if (isbreak(x)) {
1964 				tempfree(vp);
1965 				return True;
1966 			}
1967 			if (isnext(x) || isexit(x) || isret(x)) {
1968 				tempfree(vp);
1969 				return(x);
1970 			}
1971 			tempfree(x);
1972 		}
1973 	}
1974 	return True;
1975 }
1976 
1977 static char *nawk_convert(const char *s, int (*fun_c)(int),
1978     wint_t (*fun_wc)(wint_t))
1979 {
1980 	char *buf      = NULL;
1981 	char *pbuf     = NULL;
1982 	const char *ps = NULL;
1983 	size_t n       = 0;
1984 	wchar_t wc;
1985 	const size_t sz = awk_mb_cur_max;
1986 	int unused;
1987 
1988 	if (sz == 1) {
1989 		buf = tostring(s);
1990 
1991 		for (pbuf = buf; *pbuf; pbuf++)
1992 			*pbuf = fun_c((uschar)*pbuf);
1993 
1994 		return buf;
1995 	} else {
1996 		/* upper/lower character may be shorter/longer */
1997 		buf = tostringN(s, strlen(s) * sz + 1);
1998 
1999 		(void) mbtowc(NULL, NULL, 0);	/* reset internal state */
2000 		/*
2001 		 * Reset internal state here too.
2002 		 * Assign result to avoid a compiler warning. (Casting to void
2003 		 * doesn't work.)
2004 		 * Increment said variable to avoid a different warning.
2005 		 */
2006 		unused = wctomb(NULL, L'\0');
2007 		unused++;
2008 
2009 		ps   = s;
2010 		pbuf = buf;
2011 		while (n = mbtowc(&wc, ps, sz),
2012 		       n > 0 && n != (size_t)-1 && n != (size_t)-2)
2013 		{
2014 			ps += n;
2015 
2016 			n = wctomb(pbuf, fun_wc(wc));
2017 			if (n == (size_t)-1)
2018 				FATAL("illegal wide character %s", s);
2019 
2020 			pbuf += n;
2021 		}
2022 
2023 		*pbuf = '\0';
2024 
2025 		if (n)
2026 			FATAL("illegal byte sequence %s", s);
2027 
2028 		return buf;
2029 	}
2030 }
2031 
2032 #ifdef __DJGPP__
2033 static wint_t towupper(wint_t wc)
2034 {
2035 	if (wc >= 0 && wc < 256)
2036 		return toupper(wc & 0xFF);
2037 
2038 	return wc;
2039 }
2040 
2041 static wint_t towlower(wint_t wc)
2042 {
2043 	if (wc >= 0 && wc < 256)
2044 		return tolower(wc & 0xFF);
2045 
2046 	return wc;
2047 }
2048 #endif
2049 
2050 static char *nawk_toupper(const char *s)
2051 {
2052 	return nawk_convert(s, toupper, towupper);
2053 }
2054 
2055 static char *nawk_tolower(const char *s)
2056 {
2057 	return nawk_convert(s, tolower, towlower);
2058 }
2059 
2060 Cell *bltin(Node **a, int n)	/* builtin functions. a[0] is type, a[1] is arg list */
2061 {
2062 	Cell *x, *y;
2063 	Awkfloat u;
2064 	int t, sz;
2065 	Awkfloat tmp;
2066 	char *buf, *fmt;
2067 	Node *nextarg;
2068 	FILE *fp;
2069 	int status = 0;
2070 	time_t tv;
2071 	struct tm *tm, tmbuf;
2072 	int estatus = 0;
2073 
2074 	t = ptoi(a[0]);
2075 	x = execute(a[1]);
2076 	nextarg = a[1]->nnext;
2077 	switch (t) {
2078 	case FLENGTH:
2079 		if (isarr(x))
2080 			u = ((Array *) x->sval)->nelem;	/* GROT.  should be function*/
2081 		else
2082 			u = u8_strlen(getsval(x));
2083 		break;
2084 	case FLOG:
2085 		errno = 0;
2086 		u = errcheck(log(getfval(x)), "log");
2087 		break;
2088 	case FINT:
2089 		modf(getfval(x), &u); break;
2090 	case FEXP:
2091 		errno = 0;
2092 		u = errcheck(exp(getfval(x)), "exp");
2093 		break;
2094 	case FSQRT:
2095 		errno = 0;
2096 		u = errcheck(sqrt(getfval(x)), "sqrt");
2097 		break;
2098 	case FSIN:
2099 		u = sin(getfval(x)); break;
2100 	case FCOS:
2101 		u = cos(getfval(x)); break;
2102 	case FATAN:
2103 		if (nextarg == NULL) {
2104 			WARNING("atan2 requires two arguments; returning 1.0");
2105 			u = 1.0;
2106 		} else {
2107 			y = execute(a[1]->nnext);
2108 			u = atan2(getfval(x), getfval(y));
2109 			tempfree(y);
2110 			nextarg = nextarg->nnext;
2111 		}
2112 		break;
2113 	case FCOMPL:
2114 		u = ~((int)getfval(x));
2115 		break;
2116 	case FAND:
2117 		if (nextarg == 0) {
2118 			WARNING("and requires two arguments; returning 0");
2119 			u = 0;
2120 			break;
2121 		}
2122 		y = execute(a[1]->nnext);
2123 		u = ((int)getfval(x)) & ((int)getfval(y));
2124 		tempfree(y);
2125 		nextarg = nextarg->nnext;
2126 		break;
2127 	case FFOR:
2128 		if (nextarg == 0) {
2129 			WARNING("or requires two arguments; returning 0");
2130 			u = 0;
2131 			break;
2132 		}
2133 		y = execute(a[1]->nnext);
2134 		u = ((int)getfval(x)) | ((int)getfval(y));
2135 		tempfree(y);
2136 		nextarg = nextarg->nnext;
2137 		break;
2138 	case FXOR:
2139 		if (nextarg == 0) {
2140 			WARNING("xor requires two arguments; returning 0");
2141 			u = 0;
2142 			break;
2143 		}
2144 		y = execute(a[1]->nnext);
2145 		u = ((int)getfval(x)) ^ ((int)getfval(y));
2146 		tempfree(y);
2147 		nextarg = nextarg->nnext;
2148 		break;
2149 	case FLSHIFT:
2150 		if (nextarg == 0) {
2151 			WARNING("lshift requires two arguments; returning 0");
2152 			u = 0;
2153 			break;
2154 		}
2155 		y = execute(a[1]->nnext);
2156 		u = ((int)getfval(x)) << ((int)getfval(y));
2157 		tempfree(y);
2158 		nextarg = nextarg->nnext;
2159 		break;
2160 	case FRSHIFT:
2161 		if (nextarg == 0) {
2162 			WARNING("rshift requires two arguments; returning 0");
2163 			u = 0;
2164 			break;
2165 		}
2166 		y = execute(a[1]->nnext);
2167 		u = ((int)getfval(x)) >> ((int)getfval(y));
2168 		tempfree(y);
2169 		nextarg = nextarg->nnext;
2170 		break;
2171 	case FSYSTEM:
2172 		fflush(stdout);		/* in case something is buffered already */
2173 		estatus = status = system(getsval(x));
2174 		if (status != -1) {
2175 			if (WIFEXITED(status)) {
2176 				estatus = WEXITSTATUS(status);
2177 			} else if (WIFSIGNALED(status)) {
2178 				estatus = WTERMSIG(status) + 256;
2179 #ifdef WCOREDUMP
2180 				if (WCOREDUMP(status))
2181 					estatus += 256;
2182 #endif
2183 			} else	/* something else?!? */
2184 				estatus = 0;
2185 		}
2186 		/* else estatus was set to -1 */
2187 		u = estatus;
2188 		break;
2189 	case FRAND:
2190 		/* random() returns numbers in [0..2^31-1]
2191 		 * in order to get a number in [0, 1), divide it by 2^31
2192 		 */
2193 		u = (Awkfloat) random() / (0x7fffffffL + 0x1UL);
2194 		break;
2195 	case FSRAND:
2196 		if (isrec(x)) {		/* no argument provided */
2197 			u = time(NULL);
2198 			tmp = u;
2199 			srandom((unsigned int) u);
2200 		} else {
2201 			u = getfval(x);
2202 			tmp = u;
2203 			srandom_deterministic((unsigned int) u);
2204 		}
2205 		u = srand_seed;
2206 		srand_seed = tmp;
2207 		break;
2208 	case FTOUPPER:
2209 	case FTOLOWER:
2210 		if (t == FTOUPPER)
2211 			buf = nawk_toupper(getsval(x));
2212 		else
2213 			buf = nawk_tolower(getsval(x));
2214 		tempfree(x);
2215 		x = gettemp();
2216 		setsval(x, buf);
2217 		free(buf);
2218 		return x;
2219 	case FFLUSH:
2220 		if (isrec(x) || strlen(getsval(x)) == 0) {
2221 			flush_all();	/* fflush() or fflush("") -> all */
2222 			u = 0;
2223 		} else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL)
2224 			u = EOF;
2225 		else
2226 			u = fflush(fp);
2227 		break;
2228 	case FMKTIME:
2229 		memset(&tmbuf, 0, sizeof(tmbuf));
2230 		tm = &tmbuf;
2231 		t = sscanf(getsval(x), "%d %d %d %d %d %d %d",
2232 		    &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour,
2233 		    &tm->tm_min, &tm->tm_sec, &tm->tm_isdst);
2234 		switch (t) {
2235 		case 6:
2236 			tm->tm_isdst = -1;	/* let mktime figure it out */
2237 			/* FALLTHROUGH */
2238 		case 7:
2239 			tm->tm_year -= 1900;
2240 			tm->tm_mon--;
2241 			u = mktime(tm);
2242 			break;
2243 		default:
2244 			u = -1;
2245 			break;
2246 		}
2247 		break;
2248 	case FSYSTIME:
2249 		u = time((time_t *) 0);
2250 		break;
2251 	case FSTRFTIME:
2252 		/* strftime([format [,timestamp]]) */
2253 		if (nextarg) {
2254 			y = execute(nextarg);
2255 			nextarg = nextarg->nnext;
2256 			tv = (time_t) getfval(y);
2257 			tempfree(y);
2258 		} else
2259 			tv = time((time_t *) 0);
2260 		tm = localtime(&tv);
2261 		if (tm == NULL)
2262 			FATAL("bad time %ld", (long)tv);
2263 
2264 		if (isrec(x)) {
2265 			/* format argument not provided, use default */
2266 			fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
2267 		} else
2268 			fmt = tostring(getsval(x));
2269 
2270 		sz = 32;
2271 		buf = NULL;
2272 		do {
2273 			if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL)
2274 				FATAL("out of memory in strftime");
2275 			sz *= 2;
2276 		} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
2277 
2278 		y = gettemp();
2279 		setsval(y, buf);
2280 		free(fmt);
2281 		free(buf);
2282 
2283 		return y;
2284 	default:	/* can't happen */
2285 		FATAL("illegal function type %d", t);
2286 		break;
2287 	}
2288 	tempfree(x);
2289 	x = gettemp();
2290 	setfval(x, u);
2291 	if (nextarg != NULL) {
2292 		WARNING("warning: function has too many arguments");
2293 		for ( ; nextarg; nextarg = nextarg->nnext) {
2294 			y = execute(nextarg);
2295 			tempfree(y);
2296 		}
2297 	}
2298 	return(x);
2299 }
2300 
2301 Cell *printstat(Node **a, int n)	/* print a[0] */
2302 {
2303 	Node *x;
2304 	Cell *y;
2305 	FILE *fp;
2306 
2307 	if (a[1] == NULL)	/* a[1] is redirection operator, a[2] is file */
2308 		fp = stdout;
2309 	else
2310 		fp = redirect(ptoi(a[1]), a[2]);
2311 	for (x = a[0]; x != NULL; x = x->nnext) {
2312 		y = execute(x);
2313 		fputs(getpssval(y), fp);
2314 		tempfree(y);
2315 		if (x->nnext == NULL)
2316 			fputs(getsval(orsloc), fp);
2317 		else
2318 			fputs(getsval(ofsloc), fp);
2319 	}
2320 	if (a[1] != NULL)
2321 		fflush(fp);
2322 	if (ferror(fp))
2323 		FATAL("write error on %s", filename(fp));
2324 	return(True);
2325 }
2326 
2327 Cell *nullproc(Node **a, int n)
2328 {
2329 	return 0;
2330 }
2331 
2332 
2333 FILE *redirect(int a, Node *b)	/* set up all i/o redirections */
2334 {
2335 	FILE *fp;
2336 	Cell *x;
2337 	char *fname;
2338 
2339 	x = execute(b);
2340 	fname = getsval(x);
2341 	fp = openfile(a, fname, NULL);
2342 	if (fp == NULL)
2343 		FATAL("can't open file %s", fname);
2344 	tempfree(x);
2345 	return fp;
2346 }
2347 
2348 struct files {
2349 	FILE	*fp;
2350 	const char	*fname;
2351 	int	mode;	/* '|', 'a', 'w' => LE/LT, GT */
2352 } *files;
2353 
2354 size_t nfiles;
2355 
2356 static void stdinit(void)	/* in case stdin, etc., are not constants */
2357 {
2358 	nfiles = FOPEN_MAX;
2359 	files = (struct files *) calloc(nfiles, sizeof(*files));
2360 	if (files == NULL)
2361 		FATAL("can't allocate file memory for %zu files", nfiles);
2362         files[0].fp = stdin;
2363 	files[0].fname = tostring("/dev/stdin");
2364 	files[0].mode = LT;
2365         files[1].fp = stdout;
2366 	files[1].fname = tostring("/dev/stdout");
2367 	files[1].mode = GT;
2368         files[2].fp = stderr;
2369 	files[2].fname = tostring("/dev/stderr");
2370 	files[2].mode = GT;
2371 }
2372 
2373 FILE *openfile(int a, const char *us, bool *pnewflag)
2374 {
2375 	const char *s = us;
2376 	size_t i;
2377 	int m;
2378 	FILE *fp = NULL;
2379 
2380 	if (*s == '\0')
2381 		FATAL("null file name in print or getline");
2382 	for (i = 0; i < nfiles; i++)
2383 		if (files[i].fname && strcmp(s, files[i].fname) == 0 &&
2384 		    (a == files[i].mode || (a==APPEND && files[i].mode==GT) ||
2385 		     a == FFLUSH)) {
2386 			if (pnewflag)
2387 				*pnewflag = false;
2388 			return files[i].fp;
2389 		}
2390 	if (a == FFLUSH)	/* didn't find it, so don't create it! */
2391 		return NULL;
2392 
2393 	for (i = 0; i < nfiles; i++)
2394 		if (files[i].fp == NULL)
2395 			break;
2396 	if (i >= nfiles) {
2397 		struct files *nf;
2398 		size_t nnf = nfiles + FOPEN_MAX;
2399 		nf = (struct files *) reallocarray(files, nnf, sizeof(*nf));
2400 		if (nf == NULL)
2401 			FATAL("cannot grow files for %s and %zu files", s, nnf);
2402 		memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf));
2403 		nfiles = nnf;
2404 		files = nf;
2405 	}
2406 	fflush(stdout);	/* force a semblance of order */
2407 	m = a;
2408 	if (a == GT) {
2409 		fp = fopen(s, "w");
2410 	} else if (a == APPEND) {
2411 		fp = fopen(s, "a");
2412 		m = GT;	/* so can mix > and >> */
2413 	} else if (a == '|') {	/* output pipe */
2414 		fp = popen(s, "w");
2415 	} else if (a == LE) {	/* input pipe */
2416 		fp = popen(s, "r");
2417 	} else if (a == LT) {	/* getline <file */
2418 		fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r");	/* "-" is stdin */
2419 	} else	/* can't happen */
2420 		FATAL("illegal redirection %d", a);
2421 	if (fp != NULL) {
2422 		files[i].fname = tostring(s);
2423 		files[i].fp = fp;
2424 		files[i].mode = m;
2425 		if (pnewflag)
2426 			*pnewflag = true;
2427 		if (fp != stdin && fp != stdout && fp != stderr)
2428 			(void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC);
2429 	}
2430 	return fp;
2431 }
2432 
2433 const char *filename(FILE *fp)
2434 {
2435 	size_t i;
2436 
2437 	for (i = 0; i < nfiles; i++)
2438 		if (fp == files[i].fp)
2439 			return files[i].fname;
2440 	return "???";
2441 }
2442 
2443 Cell *closefile(Node **a, int n)
2444 {
2445  	Cell *x;
2446 	size_t i;
2447 	bool stat;
2448 
2449  	x = execute(a[0]);
2450  	getsval(x);
2451 	stat = true;
2452  	for (i = 0; i < nfiles; i++) {
2453 		if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0)
2454 			continue;
2455 		if (files[i].mode == GT || files[i].mode == '|')
2456 			fflush(files[i].fp);
2457 		if (ferror(files[i].fp)) {
2458 			if ((files[i].mode == GT && files[i].fp != stderr)
2459 			  || files[i].mode == '|')
2460 				FATAL("write error on %s", files[i].fname);
2461 			else
2462 				WARNING("i/o error occurred on %s", files[i].fname);
2463 		}
2464 		if (files[i].fp == stdin || files[i].fp == stdout ||
2465 		    files[i].fp == stderr)
2466 			stat = freopen("/dev/null", "r+", files[i].fp) == NULL;
2467 		else if (files[i].mode == '|' || files[i].mode == LE)
2468 			stat = pclose(files[i].fp) == -1;
2469 		else
2470 			stat = fclose(files[i].fp) == EOF;
2471 		if (stat)
2472 			WARNING("i/o error occurred closing %s", files[i].fname);
2473 		xfree(files[i].fname);
2474 		files[i].fname = NULL;	/* watch out for ref thru this */
2475 		files[i].fp = NULL;
2476 		break;
2477  	}
2478  	tempfree(x);
2479  	x = gettemp();
2480 	setfval(x, (Awkfloat) (stat ? -1 : 0));
2481  	return(x);
2482 }
2483 
2484 void closeall(void)
2485 {
2486 	size_t i;
2487 	bool stat = false;
2488 
2489 	for (i = 0; i < nfiles; i++) {
2490 		if (! files[i].fp)
2491 			continue;
2492 		if (files[i].mode == GT || files[i].mode == '|')
2493 			fflush(files[i].fp);
2494 		if (ferror(files[i].fp)) {
2495 			if ((files[i].mode == GT && files[i].fp != stderr)
2496 			  || files[i].mode == '|')
2497 				FATAL("write error on %s", files[i].fname);
2498 			else
2499 				WARNING("i/o error occurred on %s", files[i].fname);
2500 		}
2501 		if (files[i].fp == stdin || files[i].fp == stdout ||
2502 		    files[i].fp == stderr)
2503 			continue;
2504 		if (files[i].mode == '|' || files[i].mode == LE)
2505 			stat = pclose(files[i].fp) == -1;
2506 		else
2507 			stat = fclose(files[i].fp) == EOF;
2508 		if (stat)
2509 			WARNING("i/o error occurred while closing %s", files[i].fname);
2510 	}
2511 }
2512 
2513 static void flush_all(void)
2514 {
2515 	size_t i;
2516 
2517 	for (i = 0; i < nfiles; i++)
2518 		if (files[i].fp)
2519 			fflush(files[i].fp);
2520 }
2521 
2522 void backsub(char **pb_ptr, const char **sptr_ptr);
2523 
2524 Cell *dosub(Node **a, int subop)        /* sub and gsub */
2525 {
2526 	fa *pfa;
2527 	int tempstat;
2528 	char *repl;
2529 	Cell *x;
2530 
2531 	char *buf = NULL;
2532 	char *pb = NULL;
2533 	int bufsz = recsize;
2534 
2535 	const char *r, *s;
2536 	const char *start;
2537 	const char *noempty = NULL;      /* empty match disallowed here */
2538 	size_t m = 0;                    /* match count */
2539 	size_t whichm;                   /* which match to select, 0 = global */
2540 	int mtype;                       /* match type */
2541 
2542 	if (a[0] == NULL) {	/* 0 => a[1] is already-compiled regexpr */
2543 		pfa = (fa *) a[1];
2544 	} else {
2545 		x = execute(a[1]);
2546 		pfa = makedfa(getsval(x), 1);
2547 		tempfree(x);
2548 	}
2549 
2550 	x = execute(a[2]);	/* replacement string */
2551 	repl = tostring(getsval(x));
2552 	tempfree(x);
2553 
2554 	switch (subop) {
2555 	case SUB:
2556 		whichm = 1;
2557 		x = execute(a[3]);    /* source string */
2558 		break;
2559 	case GSUB:
2560 		whichm = 0;
2561 		x = execute(a[3]);    /* source string */
2562 		break;
2563 	default:
2564 		FATAL("dosub: unrecognized subop: %d", subop);
2565 	}
2566 
2567 	start = getsval(x);
2568 	while (pmatch(pfa, start)) {
2569 		if (buf == NULL) {
2570 			if ((pb = buf = malloc(bufsz)) == NULL)
2571 				FATAL("out of memory in dosub");
2572 			tempstat = pfa->initstat;
2573 			pfa->initstat = 2;
2574 		}
2575 
2576 		/* match types */
2577 		#define	MT_IGNORE  0  /* unselected or invalid */
2578 		#define MT_INSERT  1  /* selected, empty */
2579 		#define MT_REPLACE 2  /* selected, not empty */
2580 
2581 		/* an empty match just after replacement is invalid */
2582 
2583 		if (patbeg == noempty && patlen == 0) {
2584 			mtype = MT_IGNORE;    /* invalid, not counted */
2585 		} else if (whichm == ++m || whichm == 0) {
2586 			mtype = patlen ? MT_REPLACE : MT_INSERT;
2587 		} else {
2588 			mtype = MT_IGNORE;    /* unselected, but counted */
2589 		}
2590 
2591 		/* leading text: */
2592 		if (patbeg > start) {
2593 			adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - start),
2594 				recsize, &pb, "dosub");
2595 			s = start;
2596 			while (s < patbeg)
2597 				*pb++ = *s++;
2598 		}
2599 
2600 		if (mtype == MT_IGNORE)
2601 			goto matching_text;  /* skip replacement text */
2602 
2603 		r = repl;
2604 		while (*r != 0) {
2605 			adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "dosub");
2606 			if (*r == '\\') {
2607 				backsub(&pb, &r);
2608 			} else if (*r == '&') {
2609 				r++;
2610 				adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize,
2611 					&pb, "dosub");
2612 				for (s = patbeg; s < patbeg+patlen; )
2613 					*pb++ = *s++;
2614 			} else {
2615 				*pb++ = *r++;
2616 			}
2617 		}
2618 
2619 matching_text:
2620 		if (mtype == MT_REPLACE || *patbeg == '\0')
2621 			goto next_search;  /* skip matching text */
2622 
2623 		if (patlen == 0)
2624 			patlen = u8_nextlen(patbeg);
2625 		adjbuf(&buf, &bufsz, (pb-buf) + patlen, recsize, &pb, "dosub");
2626 		s = patbeg;
2627 		while (s < patbeg + patlen)
2628 			*pb++ = *s++;
2629 
2630 next_search:
2631 		start = patbeg + patlen;
2632 		if (m == whichm || *patbeg == '\0')
2633 			break;
2634 		if (mtype == MT_REPLACE)
2635 			noempty = start;
2636 
2637 		#undef MT_IGNORE
2638 		#undef MT_INSERT
2639 		#undef MT_REPLACE
2640 	}
2641 
2642 	xfree(repl);
2643 
2644 	if (buf != NULL) {
2645 		pfa->initstat = tempstat;
2646 
2647 		/* trailing text */
2648 		adjbuf(&buf, &bufsz, 1+strlen(start)+pb-buf, 0, &pb, "dosub");
2649 		while ((*pb++ = *start++) != '\0')
2650 			;
2651 
2652 		setsval(x, buf);
2653 		free(buf);
2654 	}
2655 
2656 	tempfree(x);
2657 	x = gettemp();
2658 	x->tval = NUM;
2659 	x->fval = m;
2660 	return x;
2661 }
2662 
2663 Cell *gensub(Node **a, int nnn)	/* global selective substitute */
2664 	/* XXX incomplete - doesn't support backreferences \0 ... \9 */
2665 {
2666 	Cell *x, *y, *res, *h;
2667 	char *rptr;
2668 	const char *sptr;
2669 	char *buf, *pb;
2670 	const char *t, *q;
2671 	fa *pfa;
2672 	int mflag, tempstat, num, whichm;
2673 	int bufsz = recsize;
2674 
2675 	if ((buf = malloc(bufsz)) == NULL)
2676 		FATAL("out of memory in gensub");
2677 	mflag = 0;	/* if mflag == 0, can replace empty string */
2678 	num = 0;
2679 	x = execute(a[4]);	/* source string */
2680 	t = getsval(x);
2681 	res = copycell(x);	/* target string - initially copy of source */
2682 	res->csub = CTEMP;	/* result values are temporary */
2683 	if (a[0] == 0)		/* 0 => a[1] is already-compiled regexpr */
2684 		pfa = (fa *) a[1];	/* regular expression */
2685 	else {
2686 		y = execute(a[1]);
2687 		pfa = makedfa(getsval(y), 1);
2688 		tempfree(y);
2689 	}
2690 	y = execute(a[2]);	/* replacement string */
2691 	h = execute(a[3]);	/* which matches should be replaced */
2692 	sptr = getsval(h);
2693 	if (sptr[0] == 'g' || sptr[0] == 'G')
2694 		whichm = -1;
2695 	else {
2696 		/*
2697 		 * The specified number is index of replacement, starting
2698 		 * from 1. GNU awk treats index lower than 0 same as
2699 		 * 1, we do same for compatibility.
2700 		 */
2701 		whichm = (int) getfval(h) - 1;
2702 		if (whichm < 0)
2703 			whichm = 0;
2704 	}
2705 	tempfree(h);
2706 
2707 	if (pmatch(pfa, t)) {
2708 		char *sl;
2709 
2710 		tempstat = pfa->initstat;
2711 		pfa->initstat = 2;
2712 		pb = buf;
2713 		rptr = getsval(y);
2714 		/*
2715 		 * XXX if there are any backreferences in subst string,
2716 		 * complain now.
2717 		 */
2718 		for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
2719 			if (strchr("0123456789", sl[1])) {
2720 				FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
2721 			}
2722 		}
2723 
2724 		do {
2725 			if (whichm >= 0 && whichm != num) {
2726 				num++;
2727 				adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
2728 
2729 				/* copy the part of string up to and including
2730 				 * match to output buffer */
2731 				while (t < patbeg + patlen)
2732 					*pb++ = *t++;
2733 				continue;
2734 			}
2735 
2736 			if (patlen == 0 && *patbeg != 0) {	/* matched empty string */
2737 				if (mflag == 0) {	/* can replace empty */
2738 					num++;
2739 					sptr = rptr;
2740 					while (*sptr != 0) {
2741 						adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2742 						if (*sptr == '\\') {
2743 							backsub(&pb, &sptr);
2744 						} else if (*sptr == '&') {
2745 							sptr++;
2746 							adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2747 							for (q = patbeg; q < patbeg+patlen; )
2748 								*pb++ = *q++;
2749 						} else
2750 							*pb++ = *sptr++;
2751 					}
2752 				}
2753 				if (*t == 0)	/* at end */
2754 					goto done;
2755 				adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
2756 				*pb++ = *t++;
2757 				if (pb > buf + bufsz)	/* BUG: not sure of this test */
2758 					FATAL("gensub result0 %.30s too big; can't happen", buf);
2759 				mflag = 0;
2760 			}
2761 			else {	/* matched nonempty string */
2762 				num++;
2763 				sptr = t;
2764 				adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
2765 				while (sptr < patbeg)
2766 					*pb++ = *sptr++;
2767 				sptr = rptr;
2768 				while (*sptr != 0) {
2769 					adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
2770 					if (*sptr == '\\') {
2771 						backsub(&pb, &sptr);
2772 					} else if (*sptr == '&') {
2773 						sptr++;
2774 						adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
2775 						for (q = patbeg; q < patbeg+patlen; )
2776 							*pb++ = *q++;
2777 					} else
2778 						*pb++ = *sptr++;
2779 				}
2780 				t = patbeg + patlen;
2781 				if (patlen == 0 || *t == 0 || *(t-1) == 0)
2782 					goto done;
2783 				if (pb > buf + bufsz)
2784 					FATAL("gensub result1 %.30s too big; can't happen", buf);
2785 				mflag = 1;
2786 			}
2787 		} while (pmatch(pfa,t));
2788 		sptr = t;
2789 		adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
2790 		while ((*pb++ = *sptr++) != 0)
2791 			;
2792 	done:	if (pb > buf + bufsz)
2793 			FATAL("gensub result2 %.30s too big; can't happen", buf);
2794 		*pb = '\0';
2795 		setsval(res, buf);
2796 		pfa->initstat = tempstat;
2797 	}
2798 	tempfree(x);
2799 	tempfree(y);
2800 	free(buf);
2801 	return(res);
2802 }
2803 
2804 void backsub(char **pb_ptr, const char **sptr_ptr)	/* handle \\& variations */
2805 {						/* sptr[0] == '\\' */
2806 	char *pb = *pb_ptr;
2807 	const char *sptr = *sptr_ptr;
2808 
2809 	if (sptr[1] == '\\') {
2810 		if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */
2811 			*pb++ = '\\';
2812 			*pb++ = '&';
2813 			sptr += 4;
2814 		} else if (sptr[2] == '&') {	/* \\& -> \ + matched */
2815 			*pb++ = '\\';
2816 			sptr += 2;
2817 		} else if (do_posix) {		/* \\x -> \x */
2818 			sptr++;
2819 			*pb++ = *sptr++;
2820 		} else {			/* \\x -> \\x */
2821 			*pb++ = *sptr++;
2822 			*pb++ = *sptr++;
2823 		}
2824 	} else if (sptr[1] == '&') {	/* literal & */
2825 		sptr++;
2826 		*pb++ = *sptr++;
2827 	} else				/* literal \ */
2828 		*pb++ = *sptr++;
2829 
2830 	*pb_ptr = pb;
2831 	*sptr_ptr = sptr;
2832 }
2833 
2834 static char *wide_char_to_byte_str(int rune, size_t *outlen)
2835 {
2836 	static char buf[5];
2837 	int len;
2838 
2839 	if (rune < 0 || rune > 0x10FFFF)
2840 		return NULL;
2841 
2842 	memset(buf, 0, sizeof(buf));
2843 
2844 	len = 0;
2845 	if (rune <= 0x0000007F) {
2846 		buf[len++] = rune;
2847 	} else if (rune <= 0x000007FF) {
2848 		// 110xxxxx 10xxxxxx
2849 		buf[len++] = 0xC0 | (rune >> 6);
2850 		buf[len++] = 0x80 | (rune & 0x3F);
2851 	} else if (rune <= 0x0000FFFF) {
2852 		// 1110xxxx 10xxxxxx 10xxxxxx
2853 		buf[len++] = 0xE0 | (rune >> 12);
2854 		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2855 		buf[len++] = 0x80 | (rune & 0x3F);
2856 
2857 	} else {
2858 		// 0x00010000 - 0x10FFFF
2859 		// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2860 		buf[len++] = 0xF0 | (rune >> 18);
2861 		buf[len++] = 0x80 | ((rune >> 12) & 0x3F);
2862 		buf[len++] = 0x80 | ((rune >> 6) & 0x3F);
2863 		buf[len++] = 0x80 | (rune & 0x3F);
2864 	}
2865 
2866 	*outlen = len;
2867 	buf[len++] = '\0';
2868 
2869 	return buf;
2870 }
2871