1 /*
2  * builtin.c - Builtin functions and various utility procedures.
3  */
4 
5 /*
6  * Copyright (C) 1986, 1988, 1989, 1991-2021,
7  * the Free Software Foundation, Inc.
8  *
9  * This file is part of GAWK, the GNU implementation of the
10  * AWK Programming Language.
11  *
12  * GAWK is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 3 of the License, or
15  * (at your option) any later version.
16  *
17  * GAWK is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
25  */
26 
27 
28 #include "awk.h"
29 #if defined(HAVE_FCNTL_H)
30 #include <fcntl.h>
31 #endif
32 #include "random.h"
33 #include "floatmagic.h"
34 
35 #if defined(HAVE_POPEN_H)
36 #include "popen.h"
37 #endif
38 
39 #ifndef CHAR_BIT
40 # define CHAR_BIT 8
41 #endif
42 
43 /* The extra casts work around common compiler bugs.  */
44 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
45 /* Note:  these assume that negative integers are represented internally
46    via 2's complement, which is not mandated by C.  They also ignore the
47    fact that signed integer arithmetic overflow can trigger exceptions,
48    unlike unsigned which is guaranteed not to do so. */
49 #define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
50 			      ? ~ (uintmax_t) 0 << (sizeof (t) * CHAR_BIT - 1) \
51 			      : 0))
52 #define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
53 
54 #ifndef INTMAX_MIN
55 # define INTMAX_MIN TYPE_MINIMUM (intmax_t)
56 #endif
57 #ifndef UINTMAX_MAX
58 # define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
59 #endif
60 
61 #ifndef SIZE_MAX	/* C99 constant, can't rely on it everywhere */
62 #define SIZE_MAX ((size_t) -1)
63 #endif
64 
65 #define DEFAULT_G_PRECISION 6
66 
67 static size_t mbc_byte_count(const char *ptr, size_t numchars);
68 static size_t mbc_char_count(const char *ptr, size_t numbytes);
69 
70 /* Can declare these, since we always use the random shipped with gawk */
71 extern char *initstate(unsigned long seed, char *state, long n);
72 extern char *setstate(char *state);
73 extern long random(void);
74 extern void srandom(unsigned long seed);
75 
76 extern NODE **args_array;
77 extern int max_args;
78 extern NODE **fields_arr;
79 extern bool output_is_tty;
80 extern FILE *output_fp;
81 
82 static const char *add_thousands(const char *original, struct lconv *loc);
83 
84 #define POP_TWO_SCALARS(s1, s2) \
85 s2 = POP_SCALAR(); \
86 s1 = POP(); \
87 do { if (s1->type == Node_var_array) { \
88 DEREF(s2); \
89 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(s1)); \
90 }} while (false)
91 
92 
93 /*
94  * Since we supply the version of random(), we know what
95  * value to use here.
96  */
97 #define GAWK_RANDOM_MAX 0x7fffffffL
98 
99 /* efwrite --- like fwrite, but with error checking */
100 
101 static void
efwrite(const void * ptr,size_t size,size_t count,FILE * fp,const char * from,struct redirect * rp,bool flush)102 efwrite(const void *ptr,
103 	size_t size,
104 	size_t count,
105 	FILE *fp,
106 	const char *from,
107 	struct redirect *rp,
108 	bool flush)
109 {
110 	errno = 0;
111 	if (rp != NULL) {
112 		if (rp->output.gawk_fwrite(ptr, size, count, fp, rp->output.opaque) != count)
113 			goto wrerror;
114 	} else if (fwrite(ptr, size, count, fp) != count)
115 		goto wrerror;
116 	if (flush
117 	  && ((fp == stdout && output_is_tty)
118 	      || (rp != NULL && (rp->flag & RED_NOBUF) != 0))) {
119 		if (rp != NULL) {
120 			rp->output.gawk_fflush(fp, rp->output.opaque);
121 			if (rp->output.gawk_ferror(fp, rp->output.opaque))
122 				goto wrerror;
123 		} else {
124 			fflush(fp);
125 			if (ferror(fp))
126 				goto wrerror;
127 		}
128 	}
129 	return;
130 
131 wrerror:
132 #ifdef __MINGW32__
133 	if (errno == 0 || errno == EINVAL)
134 		w32_maybe_set_errno();
135 #endif
136 	/* for stdout, die with a real SIGPIPE, like other awks */
137 	if (fp == stdout && errno == EPIPE)
138 		die_via_sigpipe();
139 
140 	/* otherwise die verbosely */
141 	if ((rp != NULL) ? is_non_fatal_redirect(rp->value, strlen(rp->value)) : is_non_fatal_std(fp))
142 		update_ERRNO_int(errno);
143 	else
144 		fatal(_("%s to \"%s\" failed: %s"), from,
145 			rp != NULL
146 				? rp->value
147 				: fp == stdout
148 					? _("standard output")
149 					: _("standard error"),
150 			errno ? strerror(errno) : _("reason unknown"));
151 }
152 
153 /* do_exp --- exponential function */
154 
155 NODE *
do_exp(int nargs)156 do_exp(int nargs)
157 {
158 	NODE *tmp;
159 	double d, res;
160 
161 	tmp = POP_SCALAR();
162 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
163 		lintwarn(_("%s: received non-numeric argument"), "exp");
164 	d = force_number(tmp)->numbr;
165 	DEREF(tmp);
166 	errno = 0;
167 	res = exp(d);
168 	if (errno == ERANGE)
169 		warning(_("exp: argument %g is out of range"), d);
170 	return make_number((AWKNUM) res);
171 }
172 
173 /* stdfile --- return fp for a standard file */
174 
175 /*
176  * This function allows `fflush("/dev/stdout")' to work.
177  * The other files will be available via getredirect().
178  * /dev/stdin is not included, since fflush is only for output.
179  */
180 
181 static FILE *
stdfile(const char * name,size_t len)182 stdfile(const char *name, size_t len)
183 {
184 	if (len == 11) {
185 		if (strncmp(name, "/dev/stderr", 11) == 0)
186 			return stderr;
187 		else if (strncmp(name, "/dev/stdout", 11) == 0)
188 			return stdout;
189 	}
190 
191 	return NULL;
192 }
193 
194 /* do_fflush --- flush output, either named file or pipe or everything */
195 
196 NODE *
do_fflush(int nargs)197 do_fflush(int nargs)
198 {
199 	struct redirect *rp;
200 	NODE *tmp;
201 	FILE *fp;
202 	int status = 0;
203 	const char *file;
204 	int len;
205 
206 	/*
207 	 * November, 2012.
208 	 * It turns out that circa 2002, when BWK
209 	 * added fflush() and fflush("") to his awk, he made both of
210 	 * them flush everything.
211 	 *
212 	 * Now, with our inside agent getting ready to try to get fflush()
213 	 * standardized in POSIX, we are going to make our awk consistent
214 	 * with his.  This should not really affect anyone, as flushing
215 	 * everything also flushes stdout.
216 	 *
217 	 * So. Once upon a time:
218 	 * 	fflush()	--- flush stdout
219 	 * 	fflush("")	--- flush everything
220 	 * Now, both calls flush everything.
221 	 */
222 
223 	/* fflush() */
224 	if (nargs == 0) {
225 		status = flush_io();	// ERRNO updated
226 		return make_number((AWKNUM) status);
227 	}
228 
229 	tmp = POP_STRING();
230 	file = tmp->stptr;
231 	len = tmp->stlen;
232 
233 	/* fflush("") */
234 	if (tmp->stlen == 0) {
235 		status = flush_io();	// ERRNO updated
236 		DEREF(tmp);
237 		return make_number((AWKNUM) status);
238 	}
239 
240 	/* fflush("/some/path") */
241 	rp = getredirect(tmp->stptr, tmp->stlen);
242 	status = -1;
243 	if (rp != NULL) {
244 		if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
245 			if ((rp->flag & RED_PIPE) != 0)
246 				warning(_("fflush: cannot flush: pipe `%.*s' opened for reading, not writing"),
247 					len, file);
248 			else
249 				warning(_("fflush: cannot flush: file `%.*s' opened for reading, not writing"),
250 					len, file);
251 			DEREF(tmp);
252 			return make_number((AWKNUM) status);
253 		}
254 		fp = rp->output.fp;
255 		if (fp != NULL) {
256 			status = rp->output.gawk_fflush(fp, rp->output.opaque);
257 
258 			if (status != 0) {
259 				if (! is_non_fatal_redirect(tmp->stptr, tmp->stlen))
260 					fatal(_("fflush: cannot flush file `%.*s': %s"),
261 						len, file, strerror(errno));
262 				update_ERRNO_int(errno);
263 			}
264 		} else if ((rp->flag & RED_TWOWAY) != 0)
265 				warning(_("fflush: cannot flush: two-way pipe `%.*s' has closed write end"),
266 					len, file);
267 	} else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
268 		status = (non_fatal_flush_std_file(fp) == false);
269 	} else {
270 		status = -1;
271 		warning(_("fflush: `%.*s' is not an open file, pipe or co-process"), len, file);
272 	}
273 	DEREF(tmp);
274 	return make_number((AWKNUM) status);
275 }
276 
277 /* strncasecmpmbs --- like strncasecmp (multibyte string version)  */
278 
279 int
strncasecmpmbs(const unsigned char * s1,const unsigned char * s2,size_t n)280 strncasecmpmbs(const unsigned char *s1, const unsigned char *s2, size_t n)
281 {
282 	size_t i1, i2, mbclen1, mbclen2, gap;
283 	wchar_t wc1, wc2;
284 	mbstate_t mbs1, mbs2;
285 
286 	memset(& mbs1, 0, sizeof(mbs1));
287 	memset(& mbs2, 0, sizeof(mbs2));
288 
289 	for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
290 		if (is_valid_character(s1[i1])) {
291 			mbclen1 = 1;
292 			wc1 = btowc_cache(s1[i1]);
293 		} else {
294 			mbclen1 = mbrtowc(& wc1, (const char *)s1 + i1,
295 					  n - i1, & mbs1);
296 			if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
297 				/* We treat it as a singlebyte character. */
298 				mbclen1 = 1;
299 				wc1 = btowc_cache(s1[i1]);
300 			}
301 		}
302 		if (is_valid_character(s2[i2])) {
303 			mbclen2 = 1;
304 			wc2 = btowc_cache(s2[i2]);
305 		} else {
306 			mbclen2 = mbrtowc(& wc2, (const char *)s2 + i2,
307 					  n - i2, & mbs2);
308 			if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
309 				/* We treat it as a singlebyte character. */
310 				mbclen2 = 1;
311 				wc2 = btowc_cache(s2[i2]);
312 			}
313 		}
314 		if ((gap = towlower(wc1) - towlower(wc2)) != 0)
315 			/* s1 and s2 are not equivalent. */
316 			return gap;
317 	}
318 	/* s1 and s2 are equivalent. */
319 	return 0;
320 }
321 
322 /* Inspect the buffer `src' and write the index of each byte to `dest'.
323    Caller must allocate `dest'.
324    e.g. str = <mb1(1)>, <mb1(2)>, a, b, <mb2(1)>, <mb2(2)>, <mb2(3)>, c
325         where mb(i) means the `i'-th byte of a multibyte character.
326 		dest =       1,        2, 1, 1,        1,        2,        3. 1
327 */
328 static void
index_multibyte_buffer(char * src,char * dest,int len)329 index_multibyte_buffer(char* src, char* dest, int len)
330 {
331 	int idx, prev_idx;
332 	mbstate_t mbs, prevs;
333 
334 	memset(& prevs, 0, sizeof(mbstate_t));
335 	for (idx = prev_idx = 0 ; idx < len ; idx++) {
336 		size_t mbclen;
337 		mbs = prevs;
338 		mbclen = mbrlen(src + prev_idx, idx - prev_idx + 1, & mbs);
339 		if (mbclen == (size_t) -1 || mbclen == 1 || mbclen == 0) {
340 			/* singlebyte character.  */
341 			mbclen = 1;
342 			prev_idx = idx + 1;
343 		} else if (mbclen == (size_t) -2) {
344 			/* a part of a multibyte character.  */
345 			mbclen = idx - prev_idx + 1;
346 		} else if (mbclen > 1) {
347 			/* the end of a multibyte character.  */
348 			prev_idx = idx + 1;
349 			prevs = mbs;
350 		} else {
351 			/* Can't reach.  */
352 		}
353 		dest[idx] = mbclen;
354     }
355 }
356 
357 /* do_index --- find index of a string */
358 
359 NODE *
do_index(int nargs)360 do_index(int nargs)
361 {
362 	NODE *s1, *s2;
363 	const char *p1, *p2;
364 	size_t l1, l2;
365 	long ret;
366 	bool do_single_byte = false;
367 	mbstate_t mbs1, mbs2;
368 
369 	if (gawk_mb_cur_max > 1) {
370 		memset(& mbs1, 0, sizeof(mbstate_t));
371 		memset(& mbs2, 0, sizeof(mbstate_t));
372 	}
373 
374 	POP_TWO_SCALARS(s1, s2);
375 
376 	if (do_lint) {
377 		if ((fixtype(s1)->flags & STRING) == 0)
378 			lintwarn(_("%s: received non-string first argument"), "index");
379 		if ((fixtype(s2)->flags & STRING) == 0)
380 			lintwarn(_("%s: received non-string second argument"), "index");
381 	}
382 
383 	s1 = force_string(s1);
384 	s2 = force_string(s2);
385 
386 	p1 = s1->stptr;
387 	p2 = s2->stptr;
388 	l1 = s1->stlen;
389 	l2 = s2->stlen;
390 	ret = 0;
391 
392 	/*
393 	 * Icky special case, index(foo, "") should return 1,
394 	 * since both bwk awk and mawk do, and since match("foo", "")
395 	 * returns 1. This makes index("", "") work, too, fwiw.
396 	 */
397 	if (l2 == 0) {
398 		ret = 1;
399 		goto out;
400 	}
401 
402 	if (gawk_mb_cur_max > 1) {
403 		s1 = force_wstring(s1);
404 		s2 = force_wstring(s2);
405 		/*
406 		 * If we don't have valid wide character strings, use
407 		 * the real bytes.
408 		 */
409 		do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
410 					|| (s2->wstlen == 0 && s2->stlen > 0));
411 	}
412 
413 	/* IGNORECASE will already be false if posix */
414 	if (IGNORECASE) {
415 		while (l1 > 0) {
416 			if (l2 > l1)
417 				break;
418 			if (! do_single_byte && gawk_mb_cur_max > 1) {
419 				const wchar_t *pos;
420 
421 				pos = wcasestrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
422 				if (pos == NULL)
423 					ret = 0;
424 				else
425 					ret = pos - s1->wstptr + 1;	/* 1-based */
426 				goto out;
427 			} else {
428 				/*
429 				 * Could use tolower(*p1) == tolower(*p2) here.
430 				 * See discussion in eval.c as to why not.
431 				 */
432 				if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
433 				    && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
434 					ret = 1 + s1->stlen - l1;
435 					break;
436 				}
437 				l1--;
438 				p1++;
439 			}
440 		}
441 	} else {
442 		while (l1 > 0) {
443 			if (l2 > l1)
444 				break;
445 			if (*p1 == *p2
446 			    && (l2 == 1 || (l2 > 0 && memcmp(p1, p2, l2) == 0))) {
447 				ret = 1 + s1->stlen - l1;
448 				break;
449 			}
450 			if (! do_single_byte && gawk_mb_cur_max > 1) {
451 				const wchar_t *pos;
452 
453 				pos = wstrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
454 				if (pos == NULL)
455 					ret = 0;
456 				else
457 					ret = pos - s1->wstptr + 1;	/* 1-based */
458 				goto out;
459 			} else {
460 				l1--;
461 				p1++;
462 			}
463 		}
464 	}
465 out:
466 	DEREF(s1);
467 	DEREF(s2);
468 	return make_number((AWKNUM) ret);
469 }
470 
471 /* double_to_int --- convert double to int, used several places */
472 
473 double
double_to_int(double d)474 double_to_int(double d)
475 {
476 	if (d >= 0)
477 		d = floor(d);
478 	else
479 		d = ceil(d);
480 	return d;
481 }
482 
483 /* do_int --- convert double to int for awk */
484 
485 NODE *
do_int(int nargs)486 do_int(int nargs)
487 {
488 	NODE *tmp;
489 	double d;
490 
491 	tmp = POP_SCALAR();
492 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
493 		lintwarn(_("%s: received non-numeric argument"), "int");
494 	d = force_number(tmp)->numbr;
495 	d = double_to_int(d);
496 	DEREF(tmp);
497 	return make_number((AWKNUM) d);
498 }
499 
500 /* do_isarray --- check if argument is array */
501 
502 NODE *
do_isarray(int nargs)503 do_isarray(int nargs)
504 {
505 	NODE *tmp;
506 	int ret = 1;
507 
508 	tmp = POP();
509 	if (tmp->type != Node_var_array) {
510 		ret = 0;
511 		// could be Node_var_new
512 		if (tmp->type == Node_val)
513 			DEREF(tmp);
514 	}
515 	return make_number((AWKNUM) ret);
516 }
517 
518 /* do_length --- length of a string, array or $0 */
519 
520 NODE *
do_length(int nargs)521 do_length(int nargs)
522 {
523 	NODE *tmp;
524 	size_t len;
525 
526 	tmp = POP();
527 	if (tmp->type == Node_var_array) {
528 		static bool warned = false;
529 		unsigned long size;
530 
531 		if (do_posix)
532 			fatal(_("length: received array argument"));
533    		if (do_lint_extensions && ! warned) {
534 			warned = true;
535 			lintwarn(_("`length(array)' is a gawk extension"));
536 		}
537 
538 		/*
539 		 * Support for deferred loading of array elements requires that
540 		 * we use the array length interface even though it isn't
541 		 * necessary for the built-in array types.
542 		 *
543 		 * 1/2015: The deferred arrays are gone, but this is probably
544 		 * still a good idea.
545 		 */
546 
547 		size = assoc_length(tmp);
548 		return make_number(size);
549 	}
550 
551 	assert(tmp->type == Node_val);
552 
553 	if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
554 		lintwarn(_("%s: received non-string argument"), "length");
555 	tmp = force_string(tmp);
556 
557 	if (gawk_mb_cur_max > 1) {
558 		tmp = force_wstring(tmp);
559 		len = tmp->wstlen;
560 		/*
561 		 * If the bytes don't make a valid wide character
562 		 * string, fall back to the bytes themselves.
563 		 */
564 		 if (len == 0 && tmp->stlen > 0)
565 			 len = tmp->stlen;
566 	} else
567 		len = tmp->stlen;
568 
569 	DEREF(tmp);
570 	return make_number((AWKNUM) len);
571 }
572 
573 /* do_log --- the log function */
574 
575 NODE *
do_log(int nargs)576 do_log(int nargs)
577 {
578 	NODE *tmp;
579 	double d, arg;
580 
581 	tmp = POP_SCALAR();
582 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
583 		lintwarn(_("%s: received non-numeric argument"), "log");
584 	arg = force_number(tmp)->numbr;
585 	if (arg < 0.0)
586 		warning(_("%s: received negative argument %g"), "log", arg);
587 	d = log(arg);
588 	DEREF(tmp);
589 	return make_number((AWKNUM) d);
590 }
591 
592 
593 #ifdef HAVE_MPFR
594 
595 /*
596  * mpz2mpfr --- convert an arbitrary-precision integer to a float
597  *	without any loss of precision. The returned value is only
598  * 	good for temporary use.
599  */
600 
601 
602 static mpfr_ptr
mpz2mpfr(mpz_ptr zi)603 mpz2mpfr(mpz_ptr zi)
604 {
605 	size_t prec;
606 	static mpfr_t mpfrval;
607 	static bool inited = false;
608 	int tval;
609 
610 	/* estimate minimum precision for exact conversion */
611 	prec = mpz_sizeinbase(zi, 2);	/* most significant 1 bit position starting at 1 */
612 	prec -= (size_t) mpz_scan1(zi, 0);	/* least significant 1 bit index starting at 0 */
613 	if (prec < MPFR_PREC_MIN)
614 		prec = MPFR_PREC_MIN;
615 	else if (prec > MPFR_PREC_MAX)
616 		prec = MPFR_PREC_MAX;
617 
618 	if (! inited) {
619 		mpfr_init2(mpfrval, prec);
620 		inited = true;
621 	} else
622 		mpfr_set_prec(mpfrval, prec);
623 	tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
624 	IEEE_FMT(mpfrval, tval);
625 	return mpfrval;
626 }
627 #endif
628 
629 /*
630  * format_tree() formats arguments of sprintf,
631  * and accordingly to a fmt_string providing a format like in
632  * printf family from C library.  Returns a string node which value
633  * is a formatted string.  Called by  sprintf function.
634  *
635  * It is one of the uglier parts of gawk.  Thanks to Michal Jaegermann
636  * for taming this beast and making it compatible with ANSI C.
637  */
638 
639 NODE *
format_tree(const char * fmt_string,size_t n0,NODE ** the_args,long num_args)640 format_tree(
641 	const char *fmt_string,
642 	size_t n0,
643 	NODE **the_args,
644 	long num_args)
645 {
646 /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
647 /* difference of pointers should be of ptrdiff_t type, but let us be kind */
648 #define bchunk(s, l) if (l) { \
649 	while ((l) > ofre) { \
650 		size_t olen = obufout - obuf; \
651 		erealloc(obuf, char *, osiz * 2, "format_tree"); \
652 		ofre += osiz; \
653 		osiz *= 2; \
654 		obufout = obuf + olen; \
655 	} \
656 	memcpy(obufout, s, (size_t) (l)); \
657 	obufout += (l); \
658 	ofre -= (l); \
659 }
660 
661 /* copy one byte from 's' to 'obufout' checking for space in the process */
662 #define bchunk_one(s) { \
663 	if (ofre < 1) { \
664 		size_t olen = obufout - obuf; \
665 		erealloc(obuf, char *, osiz * 2, "format_tree"); \
666 		ofre += osiz; \
667 		osiz *= 2; \
668 		obufout = obuf + olen; \
669 	} \
670 	*obufout++ = *s; \
671 	--ofre; \
672 }
673 
674 /* Is there space for something L big in the buffer? */
675 #define chksize(l)  if ((l) >= ofre) { \
676 	size_t olen = obufout - obuf; \
677 	size_t delta = osiz+l-ofre; \
678 	erealloc(obuf, char *, osiz + delta, "format_tree"); \
679 	obufout = obuf + olen; \
680 	ofre += delta; \
681 	osiz += delta; \
682 }
683 
684 	size_t cur_arg = 0;
685 	NODE *r = NULL;
686 	int i, nc;
687 	bool toofew = false;
688 	char *obuf, *obufout;
689 	size_t osiz, ofre, olen_final;
690 	const char *chbuf;
691 	const char *s0, *s1;
692 	int cs1;
693 	NODE *arg;
694 	long fw, prec, argnum;
695 	bool used_dollar;
696 	bool lj, alt, have_prec, need_format;
697 	long *cur = NULL;
698 	uintmax_t uval;
699 	bool sgn;
700 	int base;
701 	/*
702 	 * Although this is an array, the elements serve two different
703 	 * purposes. The first element is the general buffer meant
704 	 * to hold the entire result string.  The second one is a
705 	 * temporary buffer for large floating point values. They
706 	 * could just as easily be separate variables, and the
707 	 * code might arguably be clearer.
708 	 */
709 	struct {
710 		char *buf;
711 		size_t bufsize;
712 		char stackbuf[30];
713 	} cpbufs[2];
714 #define cpbuf	cpbufs[0].buf
715 	char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
716 	char *cp;
717 	const char *fill;
718 	AWKNUM tmpval = 0.0;
719 	char signchar = '\0';
720 	size_t len;
721 	bool zero_flag = false;
722 	bool quote_flag = false;
723 	int ii, jj;
724 	char *chp;
725 	size_t copy_count, char_count;
726 	char *nan_inf_val;
727 	bool magic_posix_flag;
728 #ifdef HAVE_MPFR
729 	mpz_ptr zi;
730 	mpfr_ptr mf;
731 #endif
732 	enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT } fmt_type;
733 
734 	static const char sp[] = " ";
735 	static const char zero_string[] = "0";
736 	static const char lchbuf[] = "0123456789abcdef";
737 	static const char Uchbuf[] = "0123456789ABCDEF";
738 	static const char bad_modifiers[] = "hjlLtz";
739 	static bool warned[sizeof(bad_modifiers)-1];	// auto-init to zero
740 
741 	bool modifier_seen[sizeof(bad_modifiers)-1];
742 #define modifier_index(c)  (strchr(bad_modifiers, c) - bad_modifiers)
743 
744 #define INITIAL_OUT_SIZE	64
745 	emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
746 	obufout = obuf;
747 	osiz = INITIAL_OUT_SIZE;
748 	ofre = osiz - 1;
749 
750 	cur_arg = 1;
751 
752 	{
753 		size_t k;
754 		for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
755 			cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
756 			cpbufs[k].buf = cpbufs[k].stackbuf;
757 		}
758 	}
759 
760 	/*
761 	 * The point of this goop is to grow the buffer
762 	 * holding the converted number, so that large
763 	 * values don't overflow a fixed length buffer.
764 	 */
765 #define PREPEND(CH) do {	\
766 	if (cp == cpbufs[0].buf) {	\
767 		char *prev = cpbufs[0].buf;	\
768 		emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
769 		 	"format_tree");	\
770 		memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev,	\
771 		       cpbufs[0].bufsize);	\
772 		cpbufs[0].bufsize *= 2;	\
773 		if (prev != cpbufs[0].stackbuf)	\
774 			efree(prev);	\
775 		cend = cpbufs[0].buf+cpbufs[0].bufsize;	\
776 	}	\
777 	*--cp = (CH);	\
778 } while(0)
779 
780 	/*
781 	 * Check first for use of `count$'.
782 	 * If plain argument retrieval was used earlier, choke.
783 	 *	Otherwise, return the requested argument.
784 	 * If not `count$' now, but it was used earlier, choke.
785 	 * If this format is more than total number of args, choke.
786 	 * Otherwise, return the current argument.
787 	 */
788 #define parse_next_arg() { \
789 	if (argnum > 0) { \
790 		if (cur_arg > 1) { \
791 			msg(_("fatal: must use `count$' on all formats or none")); \
792 			goto out; \
793 		} \
794 		arg = the_args[argnum]; \
795 	} else if (used_dollar) { \
796 		msg(_("fatal: must use `count$' on all formats or none")); \
797 		arg = 0; /* shutup the compiler */ \
798 		goto out; \
799 	} else if (cur_arg >= num_args) { \
800 		arg = 0; /* shutup the compiler */ \
801 		toofew = true; \
802 		break; \
803 	} else { \
804 		arg = the_args[cur_arg]; \
805 		cur_arg++; \
806 	} \
807 }
808 
809 	need_format = false;
810 	used_dollar = false;
811 
812 	s0 = s1 = fmt_string;
813 	while (n0-- > 0) {
814 		if (*s1 != '%') {
815 			s1++;
816 			continue;
817 		}
818 		need_format = true;
819 		bchunk(s0, s1 - s0);
820 		s0 = s1;
821 		cur = &fw;
822 		fw = 0;
823 		prec = 0;
824 		base = 0;
825 		argnum = 0;
826 		base = 0;
827 		have_prec = false;
828 		signchar = '\0';
829 		zero_flag = false;
830 		quote_flag = false;
831 		nan_inf_val = NULL;
832 #ifdef HAVE_MPFR
833 		mf = NULL;
834 		zi = NULL;
835 #endif
836 		fmt_type = MP_NONE;
837 
838 		lj = alt = false;
839 		memset(modifier_seen, 0, sizeof(modifier_seen));
840 		magic_posix_flag = false;
841 		fill = sp;
842 		cp = cend;
843 		chbuf = lchbuf;
844 		s1++;
845 
846 retry:
847 		if (n0-- == 0)	/* ran out early! */
848 			break;
849 
850 		switch (cs1 = *s1++) {
851 		case (-1):	/* dummy case to allow for checking */
852 check_pos:
853 			if (cur != &fw)
854 				break;		/* reject as a valid format */
855 			goto retry;
856 		case '%':
857 			need_format = false;
858 			/*
859 			 * 29 Oct. 2002:
860 			 * The C99 standard pages 274 and 279 seem to imply that
861 			 * since there's no arg converted, the field width doesn't
862 			 * apply.  The code already was that way, but this
863 			 * comment documents it, at least in the code.
864 			 */
865 			if (do_lint) {
866 				const char *msg = NULL;
867 
868 				if (fw && ! have_prec)
869 					msg = _("field width is ignored for `%%' specifier");
870 				else if (fw == 0 && have_prec)
871 					msg = _("precision is ignored for `%%' specifier");
872 				else if (fw && have_prec)
873 					msg = _("field width and precision are ignored for `%%' specifier");
874 
875 				if (msg != NULL)
876 					lintwarn("%s", msg);
877 			}
878 			bchunk_one("%");
879 			s0 = s1;
880 			break;
881 
882 		case '0':
883 			/*
884 			 * Only turn on zero_flag if we haven't seen
885 			 * the field width or precision yet.  Otherwise,
886 			 * screws up floating point formatting.
887 			 */
888 			if (cur == & fw)
889 				zero_flag = true;
890 			if (lj)
891 				goto retry;
892 			/* FALL through */
893 		case '1':
894 		case '2':
895 		case '3':
896 		case '4':
897 		case '5':
898 		case '6':
899 		case '7':
900 		case '8':
901 		case '9':
902 			if (cur == NULL)
903 				break;
904 			if (prec >= 0)
905 				*cur = cs1 - '0';
906 			/*
907 			 * with a negative precision *cur is already set
908 			 * to -1, so it will remain negative, but we have
909 			 * to "eat" precision digits in any case
910 			 */
911 			while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
912 				--n0;
913 				*cur = *cur * 10 + *s1++ - '0';
914 			}
915 			if (prec < 0) 	/* negative precision is discarded */
916 				have_prec = false;
917 			if (cur == &prec)
918 				cur = NULL;
919 			if (n0 == 0)	/* badly formatted control string */
920 				continue;
921 			goto retry;
922 		case '$':
923 			if (do_traditional) {
924 				msg(_("fatal: `$' is not permitted in awk formats"));
925 				goto out;
926 			}
927 
928 			if (cur == &fw) {
929 				argnum = fw;
930 				fw = 0;
931 				used_dollar = true;
932 				if (argnum <= 0) {
933 					msg(_("fatal: argument index with `$' must be > 0"));
934 					goto out;
935 				}
936 				if (argnum >= num_args) {
937 					msg(_("fatal: argument index %ld greater than total number of supplied arguments"), argnum);
938 					goto out;
939 				}
940 			} else {
941 				msg(_("fatal: `$' not permitted after period in format"));
942 				goto out;
943 			}
944 
945 			goto retry;
946 		case '*':
947 			if (cur == NULL)
948 				break;
949 			if (! do_traditional && used_dollar && ! isdigit((unsigned char) *s1)) {
950 				fatal(_("fatal: must use `count$' on all formats or none"));
951 				break;	/* silence warnings */
952 			} else if (! do_traditional && isdigit((unsigned char) *s1)) {
953 				int val = 0;
954 
955 				for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) {
956 					val *= 10;
957 					val += *s1 - '0';
958 				}
959 				if (*s1 != '$') {
960 					msg(_("fatal: no `$' supplied for positional field width or precision"));
961 					goto out;
962 				} else {
963 					s1++;
964 					n0--;
965 				}
966 				if (val >= num_args) {
967 					toofew = true;
968 					break;
969 				}
970 				arg = the_args[val];
971 			} else {
972 				parse_next_arg();
973 			}
974 			(void) force_number(arg);
975 			*cur = get_number_si(arg);
976 			if (*cur < 0 && cur == &fw) {
977 				*cur = -*cur;
978 				lj = true;
979 			}
980 			if (cur == &prec) {
981 				if (*cur >= 0)
982 					have_prec = true;
983 				else
984 					have_prec = false;
985 				cur = NULL;
986 			}
987 			goto retry;
988 		case ' ':		/* print ' ' or '-' */
989 					/* 'space' flag is ignored */
990 					/* if '+' already present  */
991 			if (signchar != false)
992 				goto check_pos;
993 			/* FALL THROUGH */
994 		case '+':		/* print '+' or '-' */
995 			signchar = cs1;
996 			goto check_pos;
997 		case '-':
998 			if (prec < 0)
999 				break;
1000 			if (cur == &prec) {
1001 				prec = -1;
1002 				goto retry;
1003 			}
1004 			fill = sp;      /* if left justified then other */
1005 			lj = true;	/* filling is ignored */
1006 			goto check_pos;
1007 		case '.':
1008 			if (cur != &fw)
1009 				break;
1010 			cur = &prec;
1011 			have_prec = true;
1012 			goto retry;
1013 		case '#':
1014 			alt = true;
1015 			goto check_pos;
1016 		case '\'':
1017 #if defined(HAVE_LOCALE_H)
1018 			quote_flag = true;
1019 			goto check_pos;
1020 #else
1021 			goto retry;
1022 #endif
1023 		case 'h':
1024 		case 'j':
1025 		case 'l':
1026 		case 'L':
1027 		case 't':
1028 		case 'z':
1029 			if (modifier_seen[modifier_index(cs1)])
1030 				break;
1031 			else {
1032 				int ind = modifier_index(cs1);
1033 
1034 				if (do_lint && ! warned[ind]) {
1035 					lintwarn(_("`%c' is meaningless in awk formats; ignored"), cs1);
1036 					warned[ind] = true;
1037 				}
1038 				if (do_posix) {
1039 					msg(_("fatal: `%c' is not permitted in POSIX awk formats"), cs1);
1040 					goto out;
1041 				}
1042 			}
1043 			modifier_seen[modifier_index(cs1)] = true;
1044 			goto retry;
1045 
1046 		case 'P':
1047 			if (magic_posix_flag)
1048 				break;
1049 			magic_posix_flag = true;
1050 			goto retry;
1051 		case 'c':
1052 			need_format = false;
1053 			parse_next_arg();
1054 			/* user input that looks numeric is numeric */
1055 			fixtype(arg);
1056 			if ((arg->flags & NUMBER) != 0) {
1057 				uval = get_number_uj(arg);
1058 				if (gawk_mb_cur_max > 1) {
1059 					char buf[100];
1060 					wchar_t wc;
1061 					mbstate_t mbs;
1062 					size_t count;
1063 
1064 					memset(& mbs, 0, sizeof(mbs));
1065 
1066 					/* handle systems with too small wchar_t */
1067 					if (sizeof(wchar_t) < 4 && uval > 0xffff) {
1068 						if (do_lint)
1069 							lintwarn(
1070 						_("[s]printf: value %g is too big for %%c format"),
1071 									arg->numbr);
1072 
1073 						goto out0;
1074 					}
1075 
1076 					wc = uval;
1077 
1078 					count = wcrtomb(buf, wc, & mbs);
1079 					if (count == 0
1080 					    || count == (size_t) -1) {
1081 						if (do_lint)
1082 							lintwarn(
1083 						_("[s]printf: value %g is not a valid wide character"),
1084 									arg->numbr);
1085 
1086 						goto out0;
1087 					}
1088 
1089 					memcpy(cpbuf, buf, count);
1090 					prec = count;
1091 					cp = cpbuf;
1092 					goto pr_tail;
1093 				}
1094 out0:
1095 				;
1096 				/* else,
1097 					fall through */
1098 
1099 				cpbuf[0] = uval;
1100 				prec = 1;
1101 				cp = cpbuf;
1102 				goto pr_tail;
1103 			}
1104 			/*
1105 			 * As per POSIX, only output first character of a
1106 			 * string value.  Thus, we ignore any provided
1107 			 * precision, forcing it to 1.  (Didn't this
1108 			 * used to work? 6/2003.)
1109 			 */
1110 			cp = arg->stptr;
1111 			prec = 1;
1112 			/*
1113 			 * First character can be multiple bytes if
1114 			 * it's a multibyte character. Grr.
1115 			 */
1116 			if (gawk_mb_cur_max > 1) {
1117 				mbstate_t state;
1118 				size_t count;
1119 
1120 				memset(& state, 0, sizeof(state));
1121 				count = mbrlen(cp, arg->stlen, & state);
1122 				if (count != (size_t) -1 && count != (size_t) -2 && count > 0) {
1123 					prec = count;
1124 					/* may need to increase fw so that padding happens, see pr_tail code */
1125 					if (fw > 0)
1126 						fw += count - 1;
1127 				}
1128 			}
1129 			goto pr_tail;
1130 		case 's':
1131 			need_format = false;
1132 			parse_next_arg();
1133 			arg = force_string(arg);
1134 			if (fw == 0 && ! have_prec)
1135 				prec = arg->stlen;
1136 			else {
1137 				char_count = mbc_char_count(arg->stptr, arg->stlen);
1138 				if (! have_prec || prec > char_count)
1139 					prec = char_count;
1140 			}
1141 			cp = arg->stptr;
1142 			goto pr_tail;
1143 		case 'd':
1144 		case 'i':
1145 			need_format = false;
1146 			parse_next_arg();
1147 			(void) force_number(arg);
1148 
1149 			/*
1150 			 * Check for Nan or Inf.
1151 			 */
1152 			if (out_of_range(arg))
1153 				goto out_of_range;
1154 #ifdef HAVE_MPFR
1155 			if (is_mpg_float(arg))
1156 				goto mpf0;
1157 			else if (is_mpg_integer(arg))
1158 				goto mpz0;
1159 			else
1160 #endif
1161 			tmpval = double_to_int(arg->numbr);
1162 
1163 			/*
1164 			 * ``The result of converting a zero value with a
1165 			 * precision of zero is no characters.''
1166 			 */
1167 			if (have_prec && prec == 0 && tmpval == 0)
1168 				goto pr_tail;
1169 
1170 			if (tmpval < 0) {
1171 				tmpval = -tmpval;
1172 				sgn = true;
1173 			} else {
1174 				if (tmpval == -0.0)
1175 					/* avoid printing -0 */
1176 					tmpval = 0.0;
1177 				sgn = false;
1178 			}
1179 			/*
1180 			 * Use snprintf return value to tell if there
1181 			 * is enough room in the buffer or not.
1182 			 */
1183 			while ((i = snprintf(cpbufs[1].buf,
1184 					     cpbufs[1].bufsize, "%.0f",
1185 					     tmpval)) >=
1186 			       cpbufs[1].bufsize) {
1187 				if (cpbufs[1].buf == cpbufs[1].stackbuf)
1188 					cpbufs[1].buf = NULL;
1189 				if (i > 0) {
1190 					cpbufs[1].bufsize += ((i > cpbufs[1].bufsize) ?
1191 							      i : cpbufs[1].bufsize);
1192 				}
1193 				else
1194 					cpbufs[1].bufsize *= 2;
1195 				assert(cpbufs[1].bufsize > 0);
1196 				erealloc(cpbufs[1].buf, char *,
1197 					 cpbufs[1].bufsize, "format_tree");
1198 			}
1199 			if (i < 1)
1200 				goto out_of_range;
1201 #if defined(HAVE_LOCALE_H)
1202 			quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
1203 #endif
1204 			chp = &cpbufs[1].buf[i-1];
1205 			ii = jj = 0;
1206 			do {
1207 				PREPEND(*chp);
1208 				chp--; i--;
1209 #if defined(HAVE_LOCALE_H)
1210 				if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
1211 					if (i) {	/* only add if more digits coming */
1212 						int k;
1213 						const char *ts = loc.thousands_sep;
1214 
1215 						for (k = strlen(ts) - 1; k >= 0; k--) {
1216 							PREPEND(ts[k]);
1217 						}
1218 					}
1219 					if (loc.grouping[ii+1] == 0)
1220 						jj = 0;		/* keep using current val in loc.grouping[ii] */
1221 					else if (loc.grouping[ii+1] == CHAR_MAX)
1222 						quote_flag = false;
1223 					else {
1224 						ii++;
1225 						jj = 0;
1226 					}
1227 				}
1228 #endif
1229 			} while (i > 0);
1230 
1231 			/* add more output digits to match the precision */
1232 			if (have_prec) {
1233 				while (cend - cp < prec)
1234 					PREPEND('0');
1235 			}
1236 
1237 			if (sgn)
1238 				PREPEND('-');
1239 			else if (signchar)
1240 				PREPEND(signchar);
1241 			/*
1242 			 * When to fill with zeroes is of course not simple.
1243 			 * First: No zero fill if left-justifying.
1244 			 * Next: There seem to be two cases:
1245 			 * 	A '0' without a precision, e.g. %06d
1246 			 * 	A precision with no field width, e.g. %.10d
1247 			 * Any other case, we don't want to fill with zeroes.
1248 			 */
1249 			if (! lj
1250 			    && ((zero_flag && ! have_prec)
1251 				 || (fw == 0 && have_prec)))
1252 				fill = zero_string;
1253 			if (prec > fw)
1254 				fw = prec;
1255 			prec = cend - cp;
1256 			if (fw > prec && ! lj && fill != sp
1257 			    && (*cp == '-' || signchar)) {
1258 				bchunk_one(cp);
1259 				cp++;
1260 				prec--;
1261 				fw--;
1262 			}
1263 			goto pr_tail;
1264 		case 'X':
1265 			chbuf = Uchbuf;	/* FALL THROUGH */
1266 		case 'x':
1267 			base += 6;	/* FALL THROUGH */
1268 		case 'u':
1269 			base += 2;	/* FALL THROUGH */
1270 		case 'o':
1271 			base += 8;
1272 			need_format = false;
1273 			parse_next_arg();
1274 			(void) force_number(arg);
1275 
1276 			if (out_of_range(arg))
1277 				goto out_of_range;
1278 #ifdef HAVE_MPFR
1279 			if (is_mpg_integer(arg)) {
1280 mpz0:
1281 				zi = arg->mpg_i;
1282 
1283 				if (cs1 != 'd' && cs1 != 'i') {
1284 					if (mpz_sgn(zi) <= 0) {
1285 						/*
1286 						 * Negative value or 0 requires special handling.
1287 						 * Unlike MPFR, GMP does not allow conversion
1288 						 * to (u)intmax_t. So we first convert GMP type to
1289 						 * a MPFR type.
1290 						 */
1291 						mf = mpz2mpfr(zi);
1292 						goto mpf1;
1293 					}
1294 					signchar = '\0';	/* Don't print '+' */
1295 				}
1296 
1297 				/* See comments above about when to fill with zeros */
1298 				zero_flag = (! lj
1299 						    && ((zero_flag && ! have_prec)
1300 							 || (fw == 0 && have_prec)));
1301 
1302  				fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
1303 				goto fmt0;
1304 
1305 			} else if (is_mpg_float(arg)) {
1306 mpf0:
1307 				mf = arg->mpg_numbr;
1308 				if (! mpfr_number_p(mf)) {
1309 					/* inf or NaN */
1310 					cs1 = 'g';
1311 					fmt_type = MP_FLOAT;
1312 					goto fmt1;
1313 				}
1314 
1315 				if (cs1 != 'd' && cs1 != 'i') {
1316 mpf1:
1317 					/*
1318 					 * The output of printf("%#.0x", 0) is 0 instead of 0x, hence <= in
1319 					 * the comparison below.
1320 					 */
1321 					if (mpfr_sgn(mf) <= 0) {
1322 						if (! mpfr_fits_intmax_p(mf, ROUND_MODE)) {
1323 							/* -ve number is too large */
1324 							cs1 = 'g';
1325 							fmt_type = MP_FLOAT;
1326 							goto fmt1;
1327 						}
1328 
1329 						tmpval = uval = (uintmax_t) mpfr_get_sj(mf, ROUND_MODE);
1330 						if (! alt && have_prec && prec == 0 && tmpval == 0)
1331 							goto pr_tail;	/* printf("%.0x", 0) is no characters */
1332 						goto int0;
1333 					}
1334 					signchar = '\0';	/* Don't print '+' */
1335 				}
1336 
1337 				/* See comments above about when to fill with zeros */
1338 				zero_flag = (! lj
1339 						    && ((zero_flag && ! have_prec)
1340 							 || (fw == 0 && have_prec)));
1341 
1342 				(void) mpfr_get_z(mpzval, mf, MPFR_RNDZ);	/* convert to GMP integer */
1343  				fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
1344 				zi = mpzval;
1345 				goto fmt0;
1346 			} else
1347 #endif
1348 				tmpval = arg->numbr;
1349 
1350 			/*
1351 			 * ``The result of converting a zero value with a
1352 			 * precision of zero is no characters.''
1353 			 *
1354 			 * If I remember the ANSI C standard, though,
1355 			 * it says that for octal conversions
1356 			 * the precision is artificially increased
1357 			 * to add an extra 0 if # is supplied.
1358 			 * Indeed, in C,
1359 			 * 	printf("%#.0o\n", 0);
1360 			 * prints a single 0.
1361 			 */
1362 			if (! alt && have_prec && prec == 0 && tmpval == 0)
1363 				goto pr_tail;
1364 
1365 			if (tmpval < 0) {
1366 				uval = (uintmax_t) (intmax_t) tmpval;
1367 				if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
1368 					goto out_of_range;
1369 			} else {
1370 				uval = (uintmax_t) tmpval;
1371 				if ((AWKNUM)uval != double_to_int(tmpval))
1372 					goto out_of_range;
1373 			}
1374 #ifdef HAVE_MPFR
1375 	int0:
1376 #endif
1377 #if defined(HAVE_LOCALE_H)
1378 			quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
1379 #endif
1380 			/*
1381 			 * When to fill with zeroes is of course not simple.
1382 			 * First: No zero fill if left-justifying.
1383 			 * Next: There seem to be two cases:
1384 			 * 	A '0' without a precision, e.g. %06d
1385 			 * 	A precision with no field width, e.g. %.10d
1386 			 * Any other case, we don't want to fill with zeroes.
1387 			 */
1388 			if (! lj
1389 			    && ((zero_flag && ! have_prec)
1390 				 || (fw == 0 && have_prec)))
1391 				fill = zero_string;
1392 			ii = jj = 0;
1393 			do {
1394 				PREPEND(chbuf[uval % base]);
1395 				uval /= base;
1396 #if defined(HAVE_LOCALE_H)
1397 				if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
1398 					if (uval) {	/* only add if more digits coming */
1399 						int k;
1400 						const char *ts = loc.thousands_sep;
1401 
1402 						for (k = strlen(ts) - 1; k >= 0; k--) {
1403 							PREPEND(ts[k]);
1404 						}
1405 					}
1406 					if (loc.grouping[ii+1] == 0)
1407 						jj = 0;     /* keep using current val in loc.grouping[ii] */
1408 					else if (loc.grouping[ii+1] == CHAR_MAX)
1409 						quote_flag = false;
1410 					else {
1411 						ii++;
1412 						jj = 0;
1413 					}
1414 				}
1415 #endif
1416 			} while (uval > 0);
1417 
1418 			/* add more output digits to match the precision */
1419 			if (have_prec) {
1420 				while (cend - cp < prec)
1421 					PREPEND('0');
1422 			}
1423 
1424 			if (alt && tmpval != 0) {
1425 				if (base == 16) {
1426 					PREPEND(cs1);
1427 					PREPEND('0');
1428 					if (fill != sp) {
1429 						bchunk(cp, 2);
1430 						cp += 2;
1431 						fw -= 2;
1432 					}
1433 				} else if (base == 8)
1434 					PREPEND('0');
1435 			}
1436 			base = 0;
1437 			if (prec > fw)
1438 				fw = prec;
1439 			prec = cend - cp;
1440 	pr_tail:
1441 			if (! lj) {
1442 				while (fw > prec) {
1443 			    		bchunk_one(fill);
1444 					fw--;
1445 				}
1446 			}
1447 			copy_count = prec;
1448 			if (fw == 0 && ! have_prec)
1449 				;
1450 			else if (gawk_mb_cur_max > 1) {
1451 				if (cs1 == 's') {
1452 					assert(cp == arg->stptr || cp == cpbuf);
1453 					copy_count = mbc_byte_count(arg->stptr, prec);
1454 				}
1455 				/* prec was set by code for %c */
1456 				/* else
1457 					copy_count = prec; */
1458 			}
1459 			bchunk(cp, copy_count);
1460 			while (fw > prec) {
1461 				bchunk_one(fill);
1462 				fw--;
1463 			}
1464 			s0 = s1;
1465 			break;
1466 
1467      out_of_range:
1468 			/*
1469 			 * out of range - emergency use of %g format,
1470 			 * or format NaN and INF values.
1471 			 */
1472 			nan_inf_val = format_nan_inf(arg, cs1);
1473 			if (do_posix || magic_posix_flag || nan_inf_val == NULL) {
1474 				if (do_lint && ! do_posix && ! magic_posix_flag)
1475 					lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
1476 								(double) tmpval, cs1);
1477 				tmpval = arg->numbr;
1478 				if (strchr("aAeEfFgG", cs1) == NULL)
1479 					cs1 = 'g';
1480 				goto fmt1;
1481 			} else {
1482 				if (do_lint)
1483 					lintwarn(_("[s]printf: value %s is out of range for `%%%c' format"),
1484 								nan_inf_val, cs1);
1485 				bchunk(nan_inf_val, strlen(nan_inf_val));
1486 				s0 = s1;
1487 				break;
1488 			}
1489 
1490 		case 'F':
1491 #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
1492 			cs1 = 'f';
1493 			/* FALL THROUGH */
1494 #endif
1495 		case 'g':
1496 		case 'G':
1497 		case 'e':
1498 		case 'f':
1499 		case 'E':
1500 #if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
1501 		case 'A':
1502 		case 'a':
1503 		{
1504 			static bool warned = false;
1505 
1506 			if (do_lint && tolower(cs1) == 'a' && ! warned) {
1507 				warned = true;
1508 				lintwarn(_("%%%c format is POSIX standard but not portable to other awks"), cs1);
1509 			}
1510 		}
1511 #endif
1512 			need_format = false;
1513 			parse_next_arg();
1514 			(void) force_number(arg);
1515 
1516 			if (! is_mpg_number(arg))
1517 				tmpval = arg->numbr;
1518 #ifdef HAVE_MPFR
1519 			else if (is_mpg_float(arg)) {
1520 				mf = arg->mpg_numbr;
1521 				fmt_type = MP_FLOAT;
1522 			} else {
1523 				/* arbitrary-precision integer, convert to MPFR float */
1524 				assert(mf == NULL);
1525 				mf = mpz2mpfr(arg->mpg_i);
1526 				fmt_type = MP_FLOAT;
1527 			}
1528 #endif
1529 			if (out_of_range(arg))
1530 				goto out_of_range;
1531 
1532      fmt1:
1533 			if (! have_prec)
1534 				prec = DEFAULT_G_PRECISION;
1535 #ifdef HAVE_MPFR
1536      fmt0:
1537 #endif
1538 			chksize(fw + prec + 11);	/* 11 == slop */
1539 			cp = cpbuf;
1540 			*cp++ = '%';
1541 			if (lj)
1542 				*cp++ = '-';
1543 			if (signchar)
1544 				*cp++ = signchar;
1545 			if (alt)
1546 				*cp++ = '#';
1547 			if (zero_flag)
1548 				*cp++ = '0';
1549 			if (quote_flag)
1550 				*cp++ = '\'';
1551 
1552 #if defined(LC_NUMERIC)
1553 			if (quote_flag && ! use_lc_numeric)
1554 				setlocale(LC_NUMERIC, "");
1555 #endif
1556 
1557 			bool need_to_add_thousands = false;
1558 			switch (fmt_type) {
1559 #ifdef HAVE_MPFR
1560 			case MP_INT_WITH_PREC:
1561 				sprintf(cp, "*.*Z%c", cs1);
1562 				while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1563 					     (int) fw, (int) prec, zi)) >= (int) ofre)
1564 					chksize(nc)
1565 				need_to_add_thousands = true;
1566 				break;
1567 			case MP_INT_WITHOUT_PREC:
1568 				sprintf(cp, "*Z%c", cs1);
1569 				while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1570 					     (int) fw, zi)) >= (int) ofre)
1571 					chksize(nc)
1572 				need_to_add_thousands = true;
1573 				break;
1574 			case MP_FLOAT:
1575 				sprintf(cp, "*.*R*%c", cs1);
1576 				while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1577 					     (int) fw, (int) prec, ROUND_MODE, mf)) >= (int) ofre)
1578 					chksize(nc)
1579 				break;
1580 #endif
1581 			default:
1582 				if (have_prec || tolower(cs1) != 'a') {
1583 					sprintf(cp, "*.*%c", cs1);
1584 					while ((nc = snprintf(obufout, ofre, cpbuf,
1585 						     (int) fw, (int) prec,
1586 						     (double) tmpval)) >= (int) ofre)
1587 						chksize(nc)
1588 				} else {
1589 					// For %a and %A, use the default precision if it
1590 					// wasn't supplied by the user.
1591 					sprintf(cp, "*%c", cs1);
1592 					while ((nc = snprintf(obufout, ofre, cpbuf,
1593 						     (int) fw,
1594 						     (double) tmpval)) >= (int) ofre)
1595 						chksize(nc)
1596 				}
1597 			}
1598 
1599 #if defined(LC_NUMERIC)
1600 			if (quote_flag && ! use_lc_numeric)
1601 				setlocale(LC_NUMERIC, "C");
1602 #endif
1603 			len = strlen(obufout);
1604 			if (quote_flag && need_to_add_thousands) {
1605 				const char *new_text = add_thousands(obufout, & loc);
1606 
1607 				len = strlen(new_text);
1608 				chksize(len)
1609 				strcpy(obufout, new_text);
1610 				free((void *) new_text);
1611 			}
1612 
1613 			ofre -= len;
1614 			obufout += len;
1615 			s0 = s1;
1616 			break;
1617 		default:
1618 			if (do_lint && is_alpha(cs1))
1619 				lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1);
1620 			break;
1621 		}
1622 		if (toofew) {
1623 			msg("%s\n\t`%s'\n\t%*s%s",
1624 			      _("fatal: not enough arguments to satisfy format string"),
1625 			      fmt_string, (int) (s1 - fmt_string - 1), "",
1626 			      _("^ ran out for this one"));
1627 			goto out;
1628 		}
1629 	}
1630 	if (do_lint) {
1631 		if (need_format)
1632 			lintwarn(
1633 			_("[s]printf: format specifier does not have control letter"));
1634 		if (cur_arg < num_args)
1635 			lintwarn(
1636 			_("too many arguments supplied for format string"));
1637 	}
1638 	bchunk(s0, s1 - s0);
1639 	olen_final = obufout - obuf;
1640 #define GIVE_BACK_SIZE (INITIAL_OUT_SIZE * 2)
1641 	if (ofre > GIVE_BACK_SIZE)
1642 		erealloc(obuf, char *, olen_final + 1, "format_tree");
1643 	r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
1644 	obuf = NULL;
1645 out:
1646 	{
1647 		size_t k;
1648 		size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
1649 		for (k = 0; k < count; k++) {
1650 			if (cpbufs[k].buf != cpbufs[k].stackbuf)
1651 				efree(cpbufs[k].buf);
1652 		}
1653 		if (obuf != NULL)
1654 			efree(obuf);
1655 	}
1656 
1657 	if (r == NULL)
1658 		gawk_exit(EXIT_FATAL);
1659 	return r;
1660 }
1661 
1662 
1663 /* printf_common --- common code for sprintf and printf */
1664 
1665 static NODE *
printf_common(int nargs)1666 printf_common(int nargs)
1667 {
1668 	int i;
1669 	NODE *r, *tmp;
1670 
1671 	assert(nargs > 0 && nargs <= max_args);
1672 	for (i = 1; i <= nargs; i++) {
1673 		tmp = args_array[nargs - i] = POP();
1674 		if (tmp->type == Node_var_array) {
1675 			while (--i > 0)
1676 				DEREF(args_array[nargs - i]);
1677 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
1678 		}
1679 	}
1680 
1681 	args_array[0] = force_string(args_array[0]);
1682 	r = format_tree(args_array[0]->stptr, args_array[0]->stlen, args_array, nargs);
1683 	for (i = 0; i < nargs; i++)
1684 		DEREF(args_array[i]);
1685 	return r;
1686 }
1687 
1688 /* do_sprintf --- perform sprintf */
1689 
1690 NODE *
do_sprintf(int nargs)1691 do_sprintf(int nargs)
1692 {
1693 	NODE *r;
1694 
1695 	if (nargs == 0)
1696 		fatal(_("sprintf: no arguments"));
1697 
1698 	r = printf_common(nargs);
1699 	if (r == NULL)
1700 		gawk_exit(EXIT_FATAL);
1701 	return r;
1702 }
1703 
1704 
1705 /* do_printf --- perform printf, including redirection */
1706 
1707 void
do_printf(int nargs,int redirtype)1708 do_printf(int nargs, int redirtype)
1709 {
1710 	FILE *fp = NULL;
1711 	NODE *tmp;
1712 	struct redirect *rp = NULL;
1713 	int errflg = 0;
1714 	NODE *redir_exp = NULL;
1715 
1716 	if (nargs == 0) {
1717 		if (do_traditional) {
1718 			if (do_lint)
1719 				lintwarn(_("printf: no arguments"));
1720 			if (redirtype != 0) {
1721 				redir_exp = TOP();
1722 				if (redir_exp->type != Node_val)
1723 					fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
1724 				rp = redirect(redir_exp, redirtype, & errflg, true);
1725 				DEREF(redir_exp);
1726 				decr_sp();
1727 			}
1728 			return;	/* bwk accepts it silently */
1729 		}
1730 		fatal(_("printf: no arguments"));
1731 	}
1732 
1733 	if (redirtype != 0) {
1734 		redir_exp = PEEK(nargs);
1735 		if (redir_exp->type != Node_val)
1736 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
1737 		rp = redirect(redir_exp, redirtype, & errflg, true);
1738 		if (rp != NULL) {
1739 			if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
1740 				if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
1741 					update_ERRNO_int(EBADF);
1742 					return;
1743 				}
1744 				(void) close_rp(rp, CLOSE_ALL);
1745 				fatal(_("printf: attempt to write to closed write end of two-way pipe"));
1746 			}
1747 			fp = rp->output.fp;
1748 		}
1749 		else if (errflg) {
1750 			update_ERRNO_int(errflg);
1751 			return;
1752 		}
1753 	} else if (do_debug)	/* only the debugger can change the default output */
1754 		fp = output_fp;
1755 	else
1756 		fp = stdout;
1757 
1758 	tmp = printf_common(nargs);
1759 	if (redir_exp != NULL) {
1760 		DEREF(redir_exp);
1761 		decr_sp();
1762 	}
1763 	if (tmp != NULL) {
1764 		if (fp == NULL) {
1765 			DEREF(tmp);
1766 			return;
1767 		}
1768 		efwrite(tmp->stptr, sizeof(char), tmp->stlen, fp, "printf", rp, true);
1769 		if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
1770 			rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
1771 		DEREF(tmp);
1772 	} else
1773 		gawk_exit(EXIT_FATAL);
1774 }
1775 
1776 /* do_sqrt --- do the sqrt function */
1777 
1778 NODE *
do_sqrt(int nargs)1779 do_sqrt(int nargs)
1780 {
1781 	NODE *tmp;
1782 	double arg;
1783 
1784 	tmp = POP_SCALAR();
1785 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
1786 		lintwarn(_("%s: received non-numeric argument"), "sqrt");
1787 	arg = (double) force_number(tmp)->numbr;
1788 	DEREF(tmp);
1789 	if (arg < 0.0)
1790 		warning(_("%s: received negative argument %g"), "sqrt", arg);
1791 	return make_number((AWKNUM) sqrt(arg));
1792 }
1793 
1794 /* do_substr --- do the substr function */
1795 
1796 NODE *
do_substr(int nargs)1797 do_substr(int nargs)
1798 {
1799 	NODE *t1;
1800 	NODE *r;
1801 	size_t indx;
1802 	size_t length = 0;
1803 	double d_index = 0, d_length = 0;
1804 	size_t src_len;
1805 
1806 	if (nargs == 3) {
1807 		t1 = POP_NUMBER();
1808 		d_length = get_number_d(t1);
1809 		DEREF(t1);
1810 	}
1811 
1812 	t1 = POP_NUMBER();
1813 	d_index = get_number_d(t1);
1814 	DEREF(t1);
1815 
1816 	t1 = POP_STRING();
1817 
1818 	if (nargs == 3) {
1819 		if (! (d_length >= 1)) {
1820 			if (do_lint == DO_LINT_ALL)
1821 				lintwarn(_("substr: length %g is not >= 1"), d_length);
1822 			else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0))
1823 				lintwarn(_("substr: length %g is not >= 0"), d_length);
1824 			DEREF(t1);
1825 			/*
1826 			 * Return explicit null string instead of doing
1827 			 * dupnode(Nnull_string) so that if the result
1828 			 * is checked with the combination of length()
1829 			 * and lint, no error is reported about using
1830 			 * an uninitialized value. Same thing later, too.
1831 			 */
1832 			return make_string("", 0);
1833 		}
1834 		if (do_lint) {
1835 			if (double_to_int(d_length) != d_length)
1836 				lintwarn(
1837 			_("substr: non-integer length %g will be truncated"),
1838 					d_length);
1839 
1840 			if (d_length > SIZE_MAX)
1841 				lintwarn(
1842 			_("substr: length %g too big for string indexing, truncating to %g"),
1843 					d_length, (double) SIZE_MAX);
1844 		}
1845 		if (d_length < SIZE_MAX)
1846 			length = d_length;
1847 		else
1848 			length = SIZE_MAX;
1849 	}
1850 
1851 	/* the weird `! (foo)' tests help catch NaN values. */
1852 	if (! (d_index >= 1)) {
1853 		if (do_lint)
1854 			lintwarn(_("substr: start index %g is invalid, using 1"),
1855 				 d_index);
1856 		d_index = 1;
1857 	}
1858 	if (do_lint && double_to_int(d_index) != d_index)
1859 		lintwarn(_("substr: non-integer start index %g will be truncated"),
1860 			 d_index);
1861 
1862 	/* awk indices are from 1, C's are from 0 */
1863 	if (d_index <= SIZE_MAX)
1864 		indx = d_index - 1;
1865 	else
1866 		indx = SIZE_MAX;
1867 
1868 	if (nargs == 2) {	/* third arg. missing */
1869 		/* use remainder of string */
1870 		length = t1->stlen - indx;	/* default to bytes */
1871 		if (gawk_mb_cur_max > 1) {
1872 			t1 = force_wstring(t1);
1873 			if (t1->wstlen > 0)	/* use length of wide char string if we have one */
1874 				length = t1->wstlen - indx;
1875 		}
1876 		d_length = length;	/* set here in case used in diagnostics, below */
1877 	}
1878 
1879 	if (t1->stlen == 0) {
1880 		/* substr("", 1, 0) produces a warning only if LINT_ALL */
1881 		if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0)))
1882 			lintwarn(_("substr: source string is zero length"));
1883 		DEREF(t1);
1884 		return make_string("", 0);
1885 	}
1886 
1887 	/* get total len of input string, for following checks */
1888 	if (gawk_mb_cur_max > 1) {
1889 		t1 = force_wstring(t1);
1890 		src_len = t1->wstlen;
1891 	} else
1892 		src_len = t1->stlen;
1893 
1894 	if (indx >= src_len) {
1895 		if (do_lint)
1896 			lintwarn(_("substr: start index %g is past end of string"),
1897 				d_index);
1898 		DEREF(t1);
1899 		return make_string("", 0);
1900 	}
1901 	if (length > src_len - indx) {
1902 		if (do_lint)
1903 			lintwarn(
1904 	_("substr: length %g at start index %g exceeds length of first argument (%lu)"),
1905 			d_length, d_index, (unsigned long int) src_len);
1906 		length = src_len - indx;
1907 	}
1908 
1909 	/* force_wstring() already called */
1910 	if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
1911 		/* single byte case */
1912 		r = make_string(t1->stptr + indx, length);
1913 	else {
1914 		/* multibyte case, more work */
1915 		size_t result;
1916 		wchar_t *wp;
1917 		mbstate_t mbs;
1918 		char *substr, *cp;
1919 
1920 		/*
1921 		 * Convert the wide chars in t1->wstptr back into m.b. chars.
1922 		 * This is pretty grotty, but it's the most straightforward
1923 		 * way to do things.
1924 		 */
1925 		memset(& mbs, 0, sizeof(mbs));
1926 		emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr");
1927 		wp = t1->wstptr + indx;
1928 		for (cp = substr; length > 0; length--) {
1929 			result = wcrtomb(cp, *wp, & mbs);
1930 			if (result == (size_t) -1)	/* what to do? break seems best */
1931 				break;
1932 			cp += result;
1933 			wp++;
1934 		}
1935 		*cp = '\0';
1936 		r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
1937 	}
1938 
1939 	DEREF(t1);
1940 	return r;
1941 }
1942 
1943 /* do_strftime --- format a time stamp */
1944 
1945 NODE *
do_strftime(int nargs)1946 do_strftime(int nargs)
1947 {
1948 	NODE *t1, *t2, *t3, *ret;
1949 	struct tm *tm;
1950 	time_t fclock;
1951 	double clock_val;
1952 	char *bufp;
1953 	size_t buflen, bufsize;
1954 	char buf[BUFSIZ];
1955 	const char *format;
1956 	int formatlen;
1957 	bool do_gmt;
1958 	NODE *val = NULL;
1959 	NODE *sub = NULL;
1960 	char save = '\0';	// initialize to avoid compiler warnings
1961 	static const time_t time_t_min = TYPE_MINIMUM(time_t);
1962 	static const time_t time_t_max = TYPE_MAXIMUM(time_t);
1963 
1964 	/* set defaults first */
1965 	format = def_strftime_format;	/* traditional date format */
1966 	formatlen = strlen(format);
1967 	(void) time(& fclock);	/* current time of day */
1968 	do_gmt = false;
1969 
1970 	if (PROCINFO_node != NULL) {
1971 		sub = make_string("strftime", 8);
1972 		val = in_array(PROCINFO_node, sub);
1973 		unref(sub);
1974 
1975 		if (val != NULL) {
1976 			if (do_lint && (fixtype(val)->flags & STRING) == 0)
1977 				lintwarn(_("strftime: format value in PROCINFO[\"strftime\"] has numeric type"));
1978 			val = force_string(val);
1979 			format = val->stptr;
1980 			formatlen = val->stlen;
1981 		}
1982 	}
1983 
1984 	t1 = t2 = t3 = NULL;
1985 	if (nargs > 0) {	/* have args */
1986 		NODE *tmp;
1987 
1988 		if (nargs == 3) {
1989 			t3 = POP_SCALAR();
1990 			do_gmt = boolval(t3);
1991 			DEREF(t3);
1992 		}
1993 
1994 		if (nargs >= 2) {
1995 			t2 = POP_SCALAR();
1996 			if (do_lint && (fixtype(t2)->flags & NUMBER) == 0)
1997 				lintwarn(_("%s: received non-numeric second argument"), "strftime");
1998 			(void) force_number(t2);
1999 			clock_val = get_number_d(t2);
2000 			fclock = (time_t) clock_val;
2001 			/*
2002 			 * Protect against negative value being assigned
2003 			 * to unsigned time_t.
2004 			 */
2005 			if (clock_val < 0 && fclock > 0) {
2006 				if (do_lint)
2007 					lintwarn(_("strftime: second argument less than 0 or too big for time_t"));
2008 				return make_string("", 0);
2009 			}
2010 
2011 			/* And check that the value is in range */
2012 			if (clock_val < time_t_min || clock_val > time_t_max) {
2013 				if (do_lint)
2014 					lintwarn(_("strftime: second argument out of range for time_t"));
2015 				return make_string("", 0);
2016 			}
2017 
2018 			DEREF(t2);
2019 		}
2020 
2021 		tmp = POP_SCALAR();
2022 		if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
2023 			lintwarn(_("%s: received non-string first argument"), "strftime");
2024 
2025 		t1 = force_string(tmp);
2026 		format = t1->stptr;
2027 		formatlen = t1->stlen;
2028 		if (formatlen == 0) {
2029 			if (do_lint)
2030 				lintwarn(_("strftime: received empty format string"));
2031 			DEREF(t1);
2032 			return make_string("", 0);
2033 		}
2034 		str_terminate(t1, save);
2035 	}
2036 
2037 	if (do_gmt)
2038 		tm = gmtime(& fclock);
2039 	else
2040 		tm = localtime(& fclock);
2041 
2042 	if (tm == NULL) {
2043 		ret = make_string("", 0);
2044 		goto done;
2045 	}
2046 
2047 	bufp = buf;
2048 	bufsize = sizeof(buf);
2049 	for (;;) {
2050 		*bufp = '\0';
2051 		buflen = strftime(bufp, bufsize, format, tm);
2052 		/*
2053 		 * buflen can be zero EITHER because there's not enough
2054 		 * room in the string, or because the control command
2055 		 * goes to the empty string. Make a reasonable guess that
2056 		 * if the buffer is 1024 times bigger than the length of the
2057 		 * format string, it's not failing for lack of room.
2058 		 * Thanks to Paul Eggert for pointing out this issue.
2059 		 */
2060 		if (buflen > 0 || bufsize >= 1024 * formatlen)
2061 			break;
2062 		bufsize *= 2;
2063 		if (bufp == buf)
2064 			emalloc(bufp, char *, bufsize, "do_strftime");
2065 		else
2066 			erealloc(bufp, char *, bufsize, "do_strftime");
2067 	}
2068 	ret = make_string(bufp, buflen);
2069 	if (bufp != buf)
2070 		efree(bufp);
2071 done:
2072 	if (t1) {
2073 		str_restore(t1, save);
2074 		DEREF(t1);
2075 	}
2076 	return ret;
2077 }
2078 
2079 /* do_systime --- get the time of day */
2080 
2081 NODE *
do_systime(int nargs ATTRIBUTE_UNUSED)2082 do_systime(int nargs ATTRIBUTE_UNUSED)
2083 {
2084 	time_t lclock;
2085 
2086 	(void) time(& lclock);
2087 	return make_number((AWKNUM) lclock);
2088 }
2089 
2090 /* do_mktime --- turn a time string into a timestamp */
2091 
2092 NODE *
do_mktime(int nargs)2093 do_mktime(int nargs)
2094 {
2095 	NODE *t1, *t2;
2096 	struct tm then;
2097 	long year;
2098 	int month, day, hour, minute, second, count;
2099 	int dst = -1; /* default is unknown */
2100 	time_t then_stamp;
2101 	char save;
2102 	bool do_gmt;
2103 
2104 	if (nargs == 2) {
2105 		t2 = POP_SCALAR();
2106 		do_gmt = boolval(t2);
2107 		DEREF(t2);
2108 	}
2109 	else
2110 		do_gmt = false;
2111 	t1 = POP_SCALAR();
2112 	if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2113 		lintwarn(_("%s: received non-string argument"), "mktime");
2114 	t1 = force_string(t1);
2115 
2116 	save = t1->stptr[t1->stlen];
2117 	t1->stptr[t1->stlen] = '\0';
2118 
2119 	count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d",
2120 		        & year, & month, & day,
2121 			& hour, & minute, & second,
2122 		        & dst);
2123 
2124 	// 9/2021: I've been told that according to the ISO 8601-1:2019 spec,
2125 	// hour cannot be 24. So the check for hour > 23 is valid.
2126 	if (   do_lint /* Ready? Set! Go: */
2127 	    && (   (second < 0 || second > 60)
2128 		|| (minute < 0 || minute > 59)
2129 		|| (hour < 0 || hour > 23)
2130 		|| (day < 1 || day > 31)
2131 		|| (month < 1 || month > 12) ))
2132 			lintwarn(_("mktime: at least one of the values is out of the default range"));
2133 
2134 	t1->stptr[t1->stlen] = save;
2135 	DEREF(t1);
2136 
2137 	if (count < 6
2138 	    || month == INT_MIN
2139 	    || year < INT_MIN + 1900
2140 	    || year - 1900 > INT_MAX)
2141 		return make_number((AWKNUM) -1);
2142 
2143 	memset(& then, '\0', sizeof(then));
2144 	then.tm_sec = second;
2145 	then.tm_min = minute;
2146 	then.tm_hour = hour;
2147 	then.tm_mday = day;
2148 	then.tm_mon = month - 1;
2149 	then.tm_year = year - 1900;
2150 	then.tm_isdst = dst;
2151 
2152 	then_stamp = (do_gmt ? timegm(& then) : mktime(& then));
2153 	return make_number((AWKNUM) then_stamp);
2154 }
2155 
2156 /* do_system --- run an external command */
2157 
2158 NODE *
do_system(int nargs)2159 do_system(int nargs)
2160 {
2161 	NODE *tmp;
2162 	AWKNUM ret = 0;		/* floating point on purpose, compat Unix awk */
2163 	char *cmd;
2164 	char save;
2165 	int status;
2166 
2167 	if (do_sandbox)
2168 		fatal(_("'system' function not allowed in sandbox mode"));
2169 
2170 	(void) flush_io();     /* so output is synchronous with gawk's */
2171 	tmp = POP_SCALAR();
2172 	if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
2173 		lintwarn(_("%s: received non-string argument"), "system");
2174 	cmd = force_string(tmp)->stptr;
2175 
2176 	if (cmd && *cmd) {
2177 		/* insure arg to system is zero-terminated */
2178 		save = cmd[tmp->stlen];
2179 		cmd[tmp->stlen] = '\0';
2180 
2181 		os_restore_mode(fileno(stdin));
2182 		set_sigpipe_to_default();
2183 
2184 		status = system(cmd);
2185 		/*
2186 		 * 3/2016. What to do with ret? It's never simple.
2187 		 * POSIX says to use the full return value. BWK awk
2188 		 * divides the result by 256.  That normally gives the
2189 		 * exit status but gives a weird result for death-by-signal.
2190 		 * So we compromise as follows:
2191 		 */
2192 		ret = status;
2193 		if (status != -1) {
2194 			if (do_posix)
2195 				;	/* leave it alone, full 16 bits */
2196 			else if (do_traditional)
2197 #ifdef __MINGW32__
2198 				ret = (((unsigned)status) & ~0xC0000000);
2199 #else
2200 				ret = (status / 256.0);
2201 #endif
2202 			else
2203 				ret = sanitize_exit_status(status);
2204 		}
2205 
2206 		if ((BINMODE & BINMODE_INPUT) != 0)
2207 			os_setbinmode(fileno(stdin), O_BINARY);
2208 		ignore_sigpipe();
2209 
2210 		cmd[tmp->stlen] = save;
2211 	}
2212 	DEREF(tmp);
2213 	return make_number((AWKNUM) ret);
2214 }
2215 
2216 /* do_print --- print items, separated by OFS, terminated with ORS */
2217 
2218 void
do_print(int nargs,int redirtype)2219 do_print(int nargs, int redirtype)
2220 {
2221 	struct redirect *rp = NULL;
2222 	int errflg = 0;
2223 	FILE *fp = NULL;
2224 	int i;
2225 	NODE *redir_exp = NULL;
2226 	NODE *tmp = NULL;
2227 
2228 	assert(nargs <= max_args);
2229 
2230 	if (redirtype != 0) {
2231 		redir_exp = PEEK(nargs);
2232 		if (redir_exp->type != Node_val)
2233 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
2234 		rp = redirect(redir_exp, redirtype, & errflg, true);
2235 		if (rp != NULL) {
2236 			if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
2237 				if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
2238 					update_ERRNO_int(EBADF);
2239 					return;
2240 				}
2241 				(void) close_rp(rp, CLOSE_ALL);
2242 				fatal(_("print: attempt to write to closed write end of two-way pipe"));
2243 			}
2244 			fp = rp->output.fp;
2245 		}
2246 		else if (errflg) {
2247 			update_ERRNO_int(errflg);
2248 			return;
2249 		}
2250 	} else if (do_debug)	/* only the debugger can change the default output */
2251 		fp = output_fp;
2252 	else
2253 		fp = stdout;
2254 
2255 	for (i = 1; i <= nargs; i++) {
2256 		tmp = args_array[i] = POP();
2257 		if (tmp->type == Node_var_array) {
2258 			while (--i > 0)
2259 				DEREF(args_array[i]);
2260 			fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
2261 		}
2262 		// Let force_string_ofmt handle checking if things
2263 		// are already valid.
2264 		args_array[i] = force_string_ofmt(tmp);
2265 		if (args_array[i] != tmp)
2266 			DEREF(tmp);
2267 	}
2268 
2269 	if (redir_exp != NULL) {
2270 		DEREF(redir_exp);
2271 		decr_sp();
2272 	}
2273 
2274 	if (fp == NULL) {
2275 		for (i = nargs; i > 0; i--)
2276 			DEREF(args_array[i]);
2277 		return;
2278 	}
2279 
2280 	for (i = nargs; i > 0; i--) {
2281 		efwrite(args_array[i]->stptr, sizeof(char), args_array[i]->stlen, fp, "print", rp, false);
2282 		DEREF(args_array[i]);
2283 		if (i != 1 && OFSlen > 0)
2284 			efwrite(OFS, sizeof(char), (size_t) OFSlen,
2285 				fp, "print", rp, false);
2286 
2287 	}
2288 	if (ORSlen > 0)
2289 		efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
2290 
2291 	if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
2292 		rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
2293 }
2294 
2295 /* do_print_rec --- special case printing of $0, for speed */
2296 
2297 void
do_print_rec(int nargs,int redirtype)2298 do_print_rec(int nargs, int redirtype)
2299 {
2300 	FILE *fp = NULL;
2301 	NODE *f0;
2302 	struct redirect *rp = NULL;
2303 	int errflg = 0;
2304 	NODE *redir_exp = NULL;
2305 
2306 	assert(nargs == 0);
2307 	if (redirtype != 0) {
2308 		redir_exp = TOP();
2309 		rp = redirect(redir_exp, redirtype, & errflg, true);
2310 		if (rp != NULL) {
2311 			if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
2312 				if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
2313 					update_ERRNO_int(EBADF);
2314 					return;
2315 				}
2316 				(void) close_rp(rp, CLOSE_ALL);
2317 				fatal(_("print: attempt to write to closed write end of two-way pipe"));
2318 			}
2319 			fp = rp->output.fp;
2320 		}
2321 		DEREF(redir_exp);
2322 		decr_sp();
2323 	} else
2324 		fp = output_fp;
2325 
2326 	if (errflg) {
2327 		update_ERRNO_int(errflg);
2328 		return;
2329 	}
2330 
2331 	if (fp == NULL)
2332 		return;
2333 
2334 	if (! field0_valid || do_lint)	// lint check for field access in END
2335 		(void) get_field(0L, NULL);
2336 
2337 	f0 = fields_arr[0];
2338 
2339 	if (do_lint && (f0->flags & NULL_FIELD) != 0)
2340 		lintwarn(_("reference to uninitialized field `$%d'"), 0);
2341 
2342 	efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, false);
2343 
2344 	if (ORSlen > 0)
2345 		efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
2346 
2347 	if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
2348 		rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
2349 }
2350 
2351 
2352 /* is_wupper --- function version of iswupper for passing function pointers */
2353 
2354 static int
is_wupper(wchar_t c)2355 is_wupper(wchar_t c)
2356 {
2357 	return iswupper(c);
2358 }
2359 
2360 /* is_wlower --- function version of iswlower for passing function pointers */
2361 
2362 static int
is_wlower(wchar_t c)2363 is_wlower(wchar_t c)
2364 {
2365 	return iswlower(c);
2366 }
2367 
2368 /* to_wupper --- function version of towupper for passing function pointers */
2369 
2370 static int
to_wlower(wchar_t c)2371 to_wlower(wchar_t c)
2372 {
2373 	return towlower(c);
2374 }
2375 
2376 /* to_wlower --- function version of towlower for passing function pointers */
2377 
2378 static int
to_wupper(wchar_t c)2379 to_wupper(wchar_t c)
2380 {
2381 	return towupper(c);
2382 }
2383 
2384 /* wide_change_case --- generic case converter for wide characters */
2385 
2386 static void
wide_change_case(wchar_t * wstr,size_t wlen,int (* is_x)(wchar_t c),int (* to_y)(wchar_t c))2387 wide_change_case(wchar_t *wstr,
2388 			size_t wlen,
2389 			int (*is_x)(wchar_t c),
2390 			int (*to_y)(wchar_t c))
2391 {
2392 	size_t i;
2393 	wchar_t *wcp;
2394 
2395 	for (i = 0, wcp = wstr; i < wlen; i++, wcp++)
2396 		if (is_x(*wcp))
2397 			*wcp = to_y(*wcp);
2398 }
2399 
2400 /* wide_toupper --- map a wide string to upper case */
2401 
2402 static void
wide_toupper(wchar_t * wstr,size_t wlen)2403 wide_toupper(wchar_t *wstr, size_t wlen)
2404 {
2405 	wide_change_case(wstr, wlen, is_wlower, to_wupper);
2406 }
2407 
2408 /* wide_tolower --- map a wide string to lower case */
2409 
2410 static void
wide_tolower(wchar_t * wstr,size_t wlen)2411 wide_tolower(wchar_t *wstr, size_t wlen)
2412 {
2413 	wide_change_case(wstr, wlen, is_wupper, to_wlower);
2414 }
2415 
2416 /* do_tolower --- lower case a string */
2417 
2418 NODE *
do_tolower(int nargs)2419 do_tolower(int nargs)
2420 {
2421 	NODE *t1, *t2;
2422 
2423 	t1 = POP_SCALAR();
2424 	if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2425 		lintwarn(_("%s: received non-string argument"), "tolower");
2426 	t1 = force_string(t1);
2427 	t2 = make_string(t1->stptr, t1->stlen);
2428 
2429 	if (gawk_mb_cur_max == 1) {
2430 		unsigned char *cp, *cp2;
2431 
2432 		for (cp = (unsigned char *)t2->stptr,
2433 		     cp2 = (unsigned char *)(t2->stptr + t2->stlen);
2434 			cp < cp2; cp++)
2435 			if (isupper(*cp))
2436 				*cp = tolower(*cp);
2437 	} else {
2438 		force_wstring(t2);
2439 		wide_tolower(t2->wstptr, t2->wstlen);
2440 		wstr2str(t2);
2441 	}
2442 
2443 	DEREF(t1);
2444 	return t2;
2445 }
2446 
2447 /* do_toupper --- upper case a string */
2448 
2449 NODE *
do_toupper(int nargs)2450 do_toupper(int nargs)
2451 {
2452 	NODE *t1, *t2;
2453 
2454 	t1 = POP_SCALAR();
2455 	if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2456 		lintwarn(_("%s: received non-string argument"), "toupper");
2457 	t1 = force_string(t1);
2458 	t2 = make_string(t1->stptr, t1->stlen);
2459 
2460 	if (gawk_mb_cur_max == 1) {
2461 		unsigned char *cp, *cp2;
2462 
2463 		for (cp = (unsigned char *)t2->stptr,
2464 		     cp2 = (unsigned char *)(t2->stptr + t2->stlen);
2465 			cp < cp2; cp++)
2466 			if (islower(*cp))
2467 				*cp = toupper(*cp);
2468 	} else {
2469 		force_wstring(t2);
2470 		wide_toupper(t2->wstptr, t2->wstlen);
2471 		wstr2str(t2);
2472 	}
2473 
2474 	DEREF(t1);
2475 	return t2;
2476 }
2477 
2478 /* do_atan2 --- do the atan2 function */
2479 
2480 NODE *
do_atan2(int nargs)2481 do_atan2(int nargs)
2482 {
2483 	NODE *t1, *t2;
2484 	double d1, d2;
2485 
2486 	POP_TWO_SCALARS(t1, t2);
2487 	if (do_lint) {
2488 		if ((fixtype(t1)->flags & NUMBER) == 0)
2489 			lintwarn(_("%s: received non-numeric first argument"), "atan2");
2490 		if ((fixtype(t2)->flags & NUMBER) == 0)
2491 			lintwarn(_("%s: received non-numeric second argument"), "atan2");
2492 	}
2493 	d1 = force_number(t1)->numbr;
2494 	d2 = force_number(t2)->numbr;
2495 	DEREF(t1);
2496 	DEREF(t2);
2497 	return make_number((AWKNUM) atan2(d1, d2));
2498 }
2499 
2500 /* do_sin --- do the sin function */
2501 
2502 NODE *
do_sin(int nargs)2503 do_sin(int nargs)
2504 {
2505 	NODE *tmp;
2506 	double d;
2507 
2508 	tmp = POP_SCALAR();
2509 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2510 		lintwarn(_("%s: received non-numeric argument"), "sin");
2511 	d = sin((double) force_number(tmp)->numbr);
2512 	DEREF(tmp);
2513 	return make_number((AWKNUM) d);
2514 }
2515 
2516 /* do_cos --- do the cos function */
2517 
2518 NODE *
do_cos(int nargs)2519 do_cos(int nargs)
2520 {
2521 	NODE *tmp;
2522 	double d;
2523 
2524 	tmp = POP_SCALAR();
2525 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2526 		lintwarn(_("%s: received non-numeric argument"), "cos");
2527 	d = cos((double) force_number(tmp)->numbr);
2528 	DEREF(tmp);
2529 	return make_number((AWKNUM) d);
2530 }
2531 
2532 /* do_rand --- do the rand function */
2533 
2534 static bool firstrand = true;
2535 /* Some systems require this array to be integer aligned. Sigh. */
2536 #define SIZEOF_STATE 256
2537 static uint32_t istate[SIZEOF_STATE/sizeof(uint32_t)];
2538 static char *const state = (char *const) istate;
2539 
2540 /* ARGSUSED */
2541 NODE *
do_rand(int nargs ATTRIBUTE_UNUSED)2542 do_rand(int nargs ATTRIBUTE_UNUSED)
2543 {
2544 	double tmprand;
2545 #define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
2546 	if (firstrand) {
2547 		(void) initstate((unsigned) 1, state, SIZEOF_STATE);
2548 		/* don't need to srandom(1), initstate() does it for us. */
2549 		firstrand = false;
2550 		setstate(state);
2551 	}
2552 	/*
2553 	 * Per historical practice and POSIX, return value N is
2554 	 *
2555 	 * 	0 <= n < 1
2556 	 */
2557  	/*
2558 	 * Date: Wed, 28 Aug 2013 17:52:46 -0700
2559 	 * From: Bob Jewett <jewett@bill.scs.agilent.com>
2560 	 *
2561  	 * Call random() twice to fill in more bits in the value
2562  	 * of the double.  Also, there is a bug in random() such
2563  	 * that when the values of successive values are combined
2564  	 * like (rand1*rand2)^2, (rand3*rand4)^2,  ...  the
2565  	 * resulting time series is not white noise.  The
2566  	 * following also seems to fix that bug.
2567  	 *
2568  	 * The add/subtract 0.5 keeps small bits from filling
2569  	 * below 2^-53 in the double, not that anyone should be
2570  	 * looking down there.
2571 	 *
2572 	 * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT)
2573 	 * From: "Nelson H. F. Beebe" <beebe@math.utah.edu>
2574 	 * (4) The code is typical of many published fragments for converting
2575 	 *     from integer to floating-point, and I discuss the serious pitfalls
2576 	 *     in my book, because it leads to platform-dependent behavior at the
2577 	 *     end points of the interval [0,1]
2578 	 *
2579 	 * (5) the documentation in the gawk info node says
2580 	 *
2581 	 *     `rand()'
2582 	 * 	 Return a random number.  The values of `rand()' are uniformly
2583 	 * 	 distributed between zero and one.  The value could be zero but is
2584 	 * 	 never one.(1)
2585 	 *
2586 	 *     The division by RAND_DIVISOR may not guarantee that 1.0 is never
2587 	 *     returned: the programmer forgot the platform-dependent issue of
2588 	 *     rounding.
2589 	 *
2590 	 * For points 4 and 5, the safe way is a loop:
2591 	 *
2592 	 *         double
2593 	 * 	   rand(void)		// return value in [0.0, 1.0)
2594 	 *         {
2595 	 * 	    value = internal_rand();
2596 	 *
2597 	 * 	    while (value == 1.0)
2598 	 *                 value = internal_rand();
2599 	 *
2600 	 * 	    return (value);
2601 	 *         }
2602  	 */
2603 
2604 	do {
2605 		long d1, d2;
2606 		/*
2607 		 * Do the calls in predictable order to avoid
2608 		 * compiler differences in order of evaluation.
2609 		 */
2610 		d1 = random();
2611 		d2 = random();
2612 	 	tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR );
2613 		tmprand -= 0.5;
2614 	} while (tmprand == 1.0);
2615 
2616  	return make_number((AWKNUM) tmprand);
2617 }
2618 
2619 /* do_srand --- seed the random number generator */
2620 
2621 NODE *
do_srand(int nargs)2622 do_srand(int nargs)
2623 {
2624 	NODE *tmp;
2625 	static long save_seed = 1;
2626 	long ret = save_seed;	/* SVR4 awk srand returns previous seed */
2627 
2628 	if (firstrand) {
2629 		(void) initstate((unsigned) 1, state, SIZEOF_STATE);
2630 		/* don't need to srandom(1), we're changing the seed below */
2631 		firstrand = false;
2632 		(void) setstate(state);
2633 	}
2634 
2635 	if (nargs == 0)
2636 		srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
2637 	else {
2638 		tmp = POP_SCALAR();
2639 		if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2640 			lintwarn(_("%s: received non-numeric argument"), "srand");
2641 		srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr));
2642 		DEREF(tmp);
2643 	}
2644 	return make_number((AWKNUM) ret);
2645 }
2646 
2647 /* do_match --- match a regexp, set RSTART and RLENGTH,
2648  * 	optional third arg is array filled with text of
2649  * 	subpatterns enclosed in parens and start and len info.
2650  */
2651 
2652 NODE *
do_match(int nargs)2653 do_match(int nargs)
2654 {
2655 	NODE *tre, *t1, *dest, *it;
2656 	int rstart, len, ii;
2657 	int rlength;
2658 	Regexp *rp;
2659 	regoff_t s;
2660 	char *start;
2661 	char *buf = NULL;
2662 	char buff[100];
2663 	size_t amt, oldamt = 0, ilen, slen;
2664 	char *subsepstr;
2665 	size_t subseplen;
2666 
2667 	dest = NULL;
2668 	if (nargs == 3) {	/* 3rd optional arg for the subpatterns */
2669 		dest = POP_PARAM();
2670 		if (dest->type != Node_var_array)
2671 			fatal(_("match: third argument is not an array"));
2672 		check_symtab_functab(dest, "match",
2673 				_("%s: cannot use %s as third argument"));
2674 		assoc_clear(dest);
2675 	}
2676 	tre = POP();
2677 	rp = re_update(tre);
2678 	t1 = POP_STRING();
2679 
2680 	rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START);
2681 	if (rstart >= 0) {	/* match succeded */
2682 		size_t *wc_indices = NULL;
2683 
2684 		rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr);	/* byte length */
2685 		if (rlength > 0 && gawk_mb_cur_max > 1) {
2686 			t1 = str2wstr(t1, & wc_indices);
2687 			rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
2688 			rstart = wc_indices[rstart];
2689 		}
2690 
2691 		rstart++;	/* now it's 1-based indexing */
2692 
2693 		/* Build the array only if the caller wants the optional subpatterns */
2694 		if (dest != NULL) {
2695 			subsepstr = SUBSEP_node->var_value->stptr;
2696 			subseplen = SUBSEP_node->var_value->stlen;
2697 
2698 			for (ii = 0; ii < NUMSUBPATS(rp, t1->stptr); ii++) {
2699 				/*
2700 				 * Loop over all the subpats; some of them may have
2701 				 * matched even if all of them did not.
2702 				 */
2703 				if ((s = SUBPATSTART(rp, t1->stptr, ii)) != -1) {
2704 					size_t subpat_start;
2705 					size_t subpat_len;
2706 
2707 					start = t1->stptr + s;
2708 					subpat_start = s;
2709 					subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
2710 					if (len > 0 && gawk_mb_cur_max > 1) {
2711 						subpat_start = wc_indices[s];
2712 						subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
2713 					}
2714 
2715 					it = make_string(start, len);
2716 					it->flags |= USER_INPUT;
2717 					assoc_set(dest, make_number((AWKNUM) (ii)), it);;
2718 
2719 					sprintf(buff, "%d", ii);
2720 					ilen = strlen(buff);
2721 					amt = ilen + subseplen + strlen("length") + 1;
2722 
2723 					if (oldamt == 0) {
2724 						emalloc(buf, char *, amt, "do_match");
2725 					} else if (amt > oldamt) {
2726 						erealloc(buf, char *, amt, "do_match");
2727 					}
2728 					oldamt = amt;
2729 					memcpy(buf, buff, ilen);
2730 					memcpy(buf + ilen, subsepstr, subseplen);
2731 					memcpy(buf + ilen + subseplen, "start", 6);
2732 
2733 					slen = ilen + subseplen + 5;
2734 
2735 					assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_start + 1));
2736 
2737 					memcpy(buf, buff, ilen);
2738 					memcpy(buf + ilen, subsepstr, subseplen);
2739 					memcpy(buf + ilen + subseplen, "length", 7);
2740 
2741 					slen = ilen + subseplen + 6;
2742 
2743 					assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_len));
2744 				}
2745 			}
2746 
2747 			efree(buf);
2748 		}
2749 		if (wc_indices != NULL)
2750 			efree(wc_indices);
2751 	} else {		/* match failed */
2752 		rstart = 0;
2753 		rlength = -1;
2754 	}
2755 
2756 	DEREF(t1);
2757 	unref(RSTART_node->var_value);
2758 	RSTART_node->var_value = make_number((AWKNUM) rstart);
2759 	unref(RLENGTH_node->var_value);
2760 	RLENGTH_node->var_value = make_number((AWKNUM) rlength);
2761 	return make_number((AWKNUM) rstart);
2762 }
2763 
2764 /* do_sub --- do the work for sub, gsub, and gensub */
2765 
2766 /*
2767  * Gsub can be tricksy; particularly when handling the case of null strings.
2768  * The following awk code was useful in debugging problems.  It is too bad
2769  * that it does not readily translate directly into the C code, below.
2770  *
2771  * #! /usr/local/bin/mawk -f
2772  *
2773  * BEGIN {
2774  * 	true = 1; false = 0
2775  * 	print "--->", mygsub("abc", "b+", "FOO")
2776  * 	print "--->", mygsub("abc", "x*", "X")
2777  * 	print "--->", mygsub("abc", "b*", "X")
2778  * 	print "--->", mygsub("abc", "c", "X")
2779  * 	print "--->", mygsub("abc", "c+", "X")
2780  * 	print "--->", mygsub("abc", "x*$", "X")
2781  * }
2782  *
2783  * function mygsub(str, regex, replace,	origstr, newstr, eosflag, nonzeroflag)
2784  * {
2785  * 	origstr = str;
2786  * 	eosflag = nonzeroflag = false
2787  * 	while (match(str, regex)) {
2788  * 		if (RLENGTH > 0) {	# easy case
2789  * 			nonzeroflag = true
2790  * 			if (RSTART == 1) {	# match at front of string
2791  * 				newstr = newstr replace
2792  * 			} else {
2793  * 				newstr = newstr substr(str, 1, RSTART-1) replace
2794  * 			}
2795  * 			str = substr(str, RSTART+RLENGTH)
2796  * 		} else if (nonzeroflag) {
2797  * 			# last match was non-zero in length, and at the
2798  * 			# current character, we get a zero length match,
2799  * 			# which we don't really want, so skip over it
2800  * 			newstr = newstr substr(str, 1, 1)
2801  * 			str = substr(str, 2)
2802  * 			nonzeroflag = false
2803  * 		} else {
2804  * 			# 0-length match
2805  * 			if (RSTART == 1) {
2806  * 				newstr = newstr replace substr(str, 1, 1)
2807  * 				str = substr(str, 2)
2808  * 			} else {
2809  * 				return newstr str replace
2810  * 			}
2811  * 		}
2812  * 		if (length(str) == 0)
2813  * 			if (eosflag)
2814  * 				break
2815  * 			else
2816  * 				eosflag = true
2817  * 	}
2818  * 	if (length(str) > 0)
2819  * 		newstr = newstr str	# rest of string
2820  *
2821  * 	return newstr
2822  * }
2823  */
2824 
2825 /*
2826  * 1/2004:  The gawk sub/gsub behavior dates from 1996, when we proposed it
2827  * for POSIX.  The proposal fell through the cracks, and the 2001 POSIX
2828  * standard chose a more simple behavior.
2829  *
2830  * The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the
2831  * 2001 standard:
2832  *
2833  * sub(ere, repl[, in ])
2834  *  Substitute the string repl in place of the first instance of the
2835  *  extended regular expression ERE in string in and return the number of
2836  *  substitutions. An ampersand ('&') appearing in the string repl shall
2837  *  be replaced by the string from in that matches the ERE. An ampersand
2838  *  preceded with a backslash ('\') shall be interpreted as the literal
2839  *  ampersand character. An occurrence of two consecutive backslashes shall
2840  *  be interpreted as just a single literal backslash character. Any other
2841  *  occurrence of a backslash (for example, preceding any other character)
2842  *  shall be treated as a literal backslash character. Note that if repl is a
2843  *  string literal (the lexical token STRING; see Grammar (on page 170)), the
2844  *  handling of the ampersand character occurs after any lexical processing,
2845  *  including any lexical backslash escape sequence processing. If in is
2846  *  specified and it is not an lvalue (see Expressions in awk (on page 156)),
2847  *  the behavior is undefined. If in is omitted, awk shall use the current
2848  *  record ($0) in its place.
2849  *
2850  * 11/2010: The text in the 2008 standard is the same as just quoted.
2851  * However, POSIX behavior is now the default.  This can change the behavior
2852  * of awk programs.  The old behavior is not available.
2853  *
2854  * 7/2011: Reverted backslash handling to what it used to be. It was in
2855  * gawk for too long. Should have known better.
2856  */
2857 
2858 /*
2859  * NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
2860  */
2861 
2862 NODE *
do_sub(int nargs,unsigned int flags)2863 do_sub(int nargs, unsigned int flags)
2864 {
2865 	char *scan;
2866 	char *bp, *cp;
2867 	char *buf = NULL;
2868 	size_t buflen;
2869 	char *matchend;
2870 	size_t len;
2871 	char *matchstart;
2872 	char *text;
2873 	size_t textlen = 0;
2874 	char *repl;
2875 	char *replend;
2876 	size_t repllen;
2877 	int sofar;
2878 	int ampersands;
2879 	int matches = 0;
2880 	Regexp *rp;
2881 	NODE *rep_node;		/* replacement text */
2882 	NODE *target;		/* string to make sub. in; $0 if none given */
2883 	NODE *tmp;
2884 	NODE **lhs = NULL;
2885 	long how_many = 1;	/* one substitution for sub, also gensub default */
2886 	bool global;
2887 	long current;
2888 	bool lastmatchnonzero;
2889 	char *mb_indices = NULL;
2890 
2891 	if ((flags & GENSUB) != 0) {
2892 		double d;
2893 		NODE *glob_flag;
2894 
2895 		tmp = PEEK(3);
2896 		rp = re_update(tmp);
2897 
2898 		target = POP_STRING();	/* original string */
2899 
2900 		glob_flag = POP_SCALAR();	/* value of global flag */
2901 		if (   (glob_flag->flags & STRING) != 0
2902 		    && glob_flag->stlen > 0
2903 		    && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G'))
2904 			how_many = -1;
2905 		else {
2906 			(void) force_number(glob_flag);
2907 			d = get_number_d(glob_flag);
2908 			if (d < 1)
2909 				how_many = 1;
2910 			else if (d < LONG_MAX)
2911 				how_many = d;
2912 			else
2913 				how_many = LONG_MAX;
2914 			if (d <= 0) {
2915 				(void) force_string(glob_flag);
2916 				warning(_("gensub: third argument `%.*s' treated as 1"),
2917 						(int) glob_flag->stlen,
2918 						glob_flag->stptr);
2919 			}
2920 		}
2921 		DEREF(glob_flag);
2922 	} else {
2923 		/* take care of regexp early, in case re_update is fatal */
2924 
2925 		tmp = PEEK(2);
2926 		rp = re_update(tmp);
2927 
2928 		if ((flags & GSUB) != 0)
2929 			how_many = -1;
2930 
2931 		/* original string */
2932 
2933 		if ((flags & LITERAL) != 0)
2934 			target = POP_STRING();
2935 		else {
2936 			lhs = POP_ADDRESS();
2937 			target = force_string(*lhs);
2938 		}
2939 	}
2940 
2941 	global = (how_many == -1);
2942 
2943 	rep_node = POP_STRING();	/* replacement text */
2944 	decr_sp();		/* regexp, already updated above */
2945 
2946 	/* do the search early to avoid work on non-match */
2947 	if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 ||
2948 			RESTART(rp, target->stptr) > target->stlen)
2949 		goto done;
2950 
2951 	text = target->stptr;
2952 	textlen = target->stlen;
2953 
2954 	repl = rep_node->stptr;
2955 	replend = repl + rep_node->stlen;
2956 	repllen = replend - repl;
2957 
2958 	ampersands = 0;
2959 
2960 	/*
2961 	 * Some systems' malloc() can't handle being called with an
2962 	 * argument of zero.  Thus we have to have some special case
2963 	 * code to check for `repllen == 0'.  This can occur for
2964 	 * something like:
2965 	 * 	sub(/foo/, "", mystring)
2966 	 * for example.
2967 	 */
2968 	if (gawk_mb_cur_max > 1 && repllen > 0) {
2969 		emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub");
2970 		index_multibyte_buffer(repl, mb_indices, repllen);
2971 	}
2972 
2973 	/* compute length of replacement string, number of ampersands */
2974 	for (scan = repl; scan < replend; scan++) {
2975 		if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
2976 		    && (*scan == '&')) {
2977 			repllen--;
2978 			ampersands++;
2979 		} else if (*scan == '\\') {
2980 			if ((flags & GENSUB) != 0) {	/* gensub, behave sanely */
2981 				if (isdigit((unsigned char) scan[1])) {
2982 					ampersands++;
2983 					scan++;
2984 				} else {	/* \q for any q --> q */
2985 					repllen--;
2986 					scan++;
2987 				}
2988 			} else if (do_posix) {
2989 				/* \& --> &, \\ --> \ */
2990 				if (scan[1] == '&' || scan[1] == '\\') {
2991 					repllen--;
2992 					scan++;
2993 				} /* else
2994 					leave alone, it goes into the output */
2995 			} else {
2996 				/* gawk default behavior since 1996 */
2997 				if (strncmp(scan, "\\\\\\&", 4) == 0
2998 				    || strncmp(scan, "\\\\\\\\", 4) == 0) {	/* 2016: fixed */
2999 					/* \\\& --> \& */
3000 					/* \\\\ --> \\ */
3001 					repllen -= 2;
3002 					scan += 3;
3003 				} else if (strncmp(scan, "\\\\&", 3) == 0) {
3004 					/* \\& --> \<string> */
3005 					ampersands++;
3006 					repllen--;
3007 					scan += 2;
3008 				} else if (scan[1] == '&') {
3009 					/* \& --> & */
3010 					repllen--;
3011 					scan++;
3012 				} /* else
3013 					leave alone, it goes into the output */
3014 			}
3015 		}
3016 	}
3017 
3018 	lastmatchnonzero = false;
3019 
3020 	/* guesstimate how much room to allocate; +1 forces > 0 */
3021 	buflen = textlen + (ampersands + 1) * repllen + 1;
3022 	emalloc(buf, char *, buflen + 1, "do_sub");
3023 	buf[buflen] = '\0';
3024 
3025 	bp = buf;
3026 	for (current = 1;; current++) {
3027 		matches++;
3028 		matchstart = target->stptr + RESTART(rp, target->stptr);
3029 		matchend = target->stptr + REEND(rp, target->stptr);
3030 
3031 		/*
3032 		 * create the result, copying in parts of the original
3033 		 * string. note that length of replacement string can
3034 		 * vary since ampersand is actual text of regexp match.
3035 		 */
3036 
3037 		/*
3038 		 * add 1 to len to handle "empty" case where
3039 		 * matchend == matchstart and we force a match on a single
3040 		 * char.  Use 'matchend - text' instead of 'matchstart - text'
3041 		 * because we may not actually make any substitution depending
3042 		 * on the 'global' and 'how_many' values.
3043 		 */
3044 		len = matchend - text + repllen
3045 		      + ampersands * (matchend - matchstart) + 1;
3046 		sofar = bp - buf;
3047 		while (buflen < (sofar + len + 1)) {
3048 			buflen *= 2;
3049 			erealloc(buf, char *, buflen, "sub_common");
3050 			bp = buf + sofar;
3051 		}
3052 		for (scan = text; scan < matchstart; scan++)
3053 			*bp++ = *scan;
3054 		if (global || current == how_many) {
3055 			/*
3056 			 * If the current match matched the null string,
3057 			 * and the last match didn't and did a replacement,
3058 			 * and the match of the null string is at the front of
3059 			 * the text (meaning right after end of the previous
3060 			 * replacement), then skip this one.
3061 			 */
3062 			if (matchstart == matchend
3063 			    && lastmatchnonzero
3064 			    && matchstart == text) {
3065 				lastmatchnonzero = false;
3066 				matches--;
3067 				goto empty;
3068 			}
3069 			/*
3070 			 * If replacing all occurrences, or this is the
3071 			 * match we want, copy in the replacement text,
3072 			 * making substitutions as we go.
3073 			 */
3074 			for (scan = repl; scan < replend; scan++)
3075 				if (*scan == '&'
3076 					/*
3077 					 * Don't test repllen here. A simple "&" could
3078 					 * end up with repllen == 0.
3079 					 */
3080 					&& (gawk_mb_cur_max == 1
3081 						|| mb_indices[scan - repl] == 1)
3082 				) {
3083 						for (cp = matchstart; cp < matchend; cp++)
3084 								*bp++ = *cp;
3085 				} else if (*scan == '\\'
3086 					&& (gawk_mb_cur_max == 1
3087 						|| (repllen > 0 && mb_indices[scan - repl] == 1))
3088 				) {
3089 					if (flags & GENSUB) {	/* gensub, behave sanely */
3090 						if (isdigit((unsigned char) scan[1])) {
3091 							int dig = scan[1] - '0';
3092 							if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
3093 								char *start, *end;
3094 
3095 								start = target->stptr
3096 								      + SUBPATSTART(rp, target->stptr, dig);
3097 								end = target->stptr
3098 								      + SUBPATEND(rp, target->stptr, dig);
3099 
3100 								for (cp = start; cp < end; cp++)
3101 									*bp++ = *cp;
3102 							}
3103 							scan++;
3104 						} else	/* \q for any q --> q */
3105 							*bp++ = *++scan;
3106 					} else if (do_posix) {
3107 						/* \& --> &, \\ --> \ */
3108 						if (scan[1] == '&' || scan[1] == '\\')
3109 							scan++;
3110 						*bp++ = *scan;
3111 					} else {
3112 						/* gawk default behavior since 1996 */
3113 						if (strncmp(scan, "\\\\\\&", 4) == 0
3114 						    || strncmp(scan, "\\\\\\\\", 4) == 0) {	/* 2016: fixed */
3115 							/* \\\& --> \& */
3116 							/* \\\\ --> \\ */
3117 							*bp++ = '\\';
3118 							*bp++ = scan[3];
3119 							scan += 3;
3120 						} else if (strncmp(scan, "\\\\&", 3) == 0) {
3121 							/* \\& --> \<string> */
3122 							*bp++ = '\\';
3123 							for (cp = matchstart; cp < matchend; cp++)
3124 								*bp++ = *cp;
3125 							scan += 2;
3126 						} else if (scan[1] == '&') {
3127 							/* \& --> & */
3128 							*bp++ = '&';
3129 							scan++;
3130 						} else
3131 							*bp++ = *scan;
3132 					}
3133 				} else
3134 					*bp++ = *scan;
3135 			if (matchstart != matchend)
3136 				lastmatchnonzero = true;
3137 		} else {
3138 			/*
3139 			 * don't want this match, skip over it by copying
3140 			 * in current text.
3141 			 */
3142 			for (cp = matchstart; cp < matchend; cp++)
3143 				*bp++ = *cp;
3144 		}
3145 	empty:
3146 		/* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
3147 		if (matchstart == matchend && matchend < text + textlen) {
3148 			*bp++ = *matchend;
3149 			matchend++;
3150 		}
3151 		textlen = text + textlen - matchend;
3152 		text = matchend;
3153 
3154 #if 0
3155 		if (bp - buf > sofar + len)
3156 			fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar));
3157 #endif
3158 
3159 		if ((current >= how_many && ! global)
3160 		    || ((long) textlen <= 0 && matchstart == matchend)
3161 		    || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1)
3162 			break;
3163 
3164 	}
3165 	sofar = bp - buf;
3166 	if (buflen < (sofar + textlen + 1)) {
3167 		buflen = sofar + textlen + 1;
3168 		erealloc(buf, char *, buflen, "do_sub");
3169 		bp = buf + sofar;
3170 	}
3171 	/*
3172 	 * Note that text == matchend, since that assignment is made before
3173 	 * exiting the 'for' loop above. Thus we copy in the rest of the
3174 	 * original string.
3175 	 */
3176 	for (scan = text; scan < text + textlen; scan++)
3177 		*bp++ = *scan;
3178 	*bp = '\0';
3179 	textlen = bp - buf;
3180 
3181 	if (mb_indices != NULL)
3182 		efree(mb_indices);
3183 
3184 done:
3185 	DEREF(rep_node);
3186 
3187 	if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) {
3188 		efree(buf);
3189 		buf = NULL;
3190 	}
3191 
3192 	if (flags & GENSUB) {
3193 		if (matches > 0) {
3194 			/* return the result string */
3195 			DEREF(target);
3196 			assert(buf != NULL);
3197 			return make_str_node(buf, textlen, ALREADY_MALLOCED);
3198 		} else if ((target->flags & STRING) == 0) {
3199 			/* return a copy of original string */
3200 			DEREF(target);
3201 			return make_str_node(target->stptr, target->stlen, 0);
3202 		}
3203 
3204 		/* return the original string */
3205 		return target;
3206 	}
3207 
3208 	/* For a string literal, must not change the original string. */
3209 	if ((flags & LITERAL) != 0)
3210 		DEREF(target);
3211 	else if (matches > 0) {
3212 		/*
3213 		 * 8/2021: There's a bit of a song and dance here.  If someone does
3214 		 *
3215 		 * 	x = @/abc/
3216 		 * 	sub(/b/, "x", x)
3217 		 *
3218 		 * What should the type of x be after the call? Does it get converted
3219 		 * to string? Or does it remain a regexp?  We've decided to let it
3220 		 * remain a regexp. In that case, we have to update the compiled
3221 		 * regular expression that it holds.
3222 		 */
3223 		bool is_regex = false;
3224 		NODE *target = *lhs;
3225 
3226 		if ((target->flags & REGEX) != 0) {
3227 			is_regex = true;
3228 
3229 			// free old regex registers
3230 			refree(target->typed_re->re_reg[0]);
3231 			if (target->typed_re->re_reg[1] != NULL)
3232 				refree(target->typed_re->re_reg[1]);
3233 			freenode(target->typed_re);
3234 		}
3235 		unref(*lhs);		// nuke original value
3236 		if (is_regex)
3237 			*lhs = make_typed_regex(buf, textlen);
3238 		else
3239 			*lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
3240 	}
3241 
3242 	return make_number((AWKNUM) matches);
3243 }
3244 
3245 /* call_sub --- call do_sub indirectly */
3246 
3247 NODE *
call_sub(const char * name,int nargs)3248 call_sub(const char *name, int nargs)
3249 {
3250 	unsigned int flags = 0;
3251 	NODE *regex, *replace, *glob_flag;
3252 	NODE **lhs, *rhs;
3253 	NODE *zero = make_number(0.0);
3254 	NODE *result;
3255 
3256 	if (name[0] == 'g') {
3257 		if (name[1] == 'e')
3258 			flags = GENSUB;
3259 		else
3260 			flags = GSUB;
3261 	}
3262 
3263 	bool need_free = false;
3264 	if (flags == 0 || flags == GSUB) {
3265 		/* sub or gsub */
3266 		if (nargs != 2)
3267 			fatal(_("%s: can be called indirectly only with two arguments"), name);
3268 
3269 		replace = POP_STRING();
3270 		regex = POP();	/* the regex */
3271 		/*
3272 		 * push regex
3273 		 * push replace
3274 		 * push $0
3275 		 */
3276 		if ((regex->flags & REGEX) != 0)
3277 			regex = regex->typed_re;
3278 		else {
3279 			regex = make_regnode(Node_regex, regex);
3280 			need_free = true;
3281 		}
3282 		PUSH(regex);
3283 		PUSH(replace);
3284 		lhs = r_get_field(zero, (Func_ptr *) 0, true);
3285 		nargs++;
3286 		PUSH_ADDRESS(lhs);
3287 	} else {
3288 		/* gensub */
3289 		if (nargs == 4)
3290 			rhs = POP();
3291 		else
3292 			rhs = NULL;
3293 		glob_flag = POP_STRING();
3294 		replace = POP_STRING();
3295 		regex = POP();	/* the regex */
3296 		/*
3297 		 * push regex
3298 		 * push replace
3299 		 * push glob_flag
3300 		 * if (nargs = 3) {
3301 		 *	 push $0
3302 		 *	 nargs++
3303 		 * }
3304 		 */
3305 		if ((regex->flags & REGEX) != 0)
3306 			regex = regex->typed_re;
3307 		else {
3308 			regex = make_regnode(Node_regex, regex);
3309 			need_free = true;
3310 		}
3311 		PUSH(regex);
3312 		PUSH(replace);
3313 		PUSH(glob_flag);
3314 		if (rhs == NULL) {
3315 			lhs = r_get_field(zero, (Func_ptr *) 0, true);
3316 			rhs = *lhs;
3317 			UPREF(rhs);
3318 			PUSH(rhs);
3319 			nargs++;
3320 		}
3321 		else
3322 			PUSH(rhs);
3323 	}
3324 
3325 	unref(zero);
3326 	result = do_sub(nargs, flags);
3327 
3328 	if (need_free) {
3329 		refree(regex->re_reg[0]);
3330 		if (regex->re_reg[1] != NULL)
3331 			refree(regex->re_reg[1]);
3332 		freenode(regex);
3333 	}
3334 
3335 	if (flags != GENSUB)
3336 		reset_record();
3337 	return result;
3338 }
3339 
3340 /* call_match --- call do_match indirectly */
3341 
3342 NODE *
call_match(int nargs)3343 call_match(int nargs)
3344 {
3345 	NODE *regex, *text, *array;
3346 	NODE *result;
3347 
3348 	regex = text = array = NULL;
3349 	if (nargs == 3)
3350 		array = POP();
3351 	regex = POP();
3352 
3353 	/* Don't need to pop the string just to push it back ... */
3354 
3355 	bool need_free = false;
3356 	if ((regex->flags & REGEX) != 0)
3357 		regex = regex->typed_re;
3358 	else {
3359 		regex = make_regnode(Node_regex, regex);
3360 		need_free = true;
3361 	}
3362 
3363 	PUSH(regex);
3364 
3365 	if (array)
3366 		PUSH(array);
3367 
3368 	result = do_match(nargs);
3369 
3370 	if (need_free) {
3371 		refree(regex->re_reg[0]);
3372 		if (regex->re_reg[1] != NULL)
3373 			refree(regex->re_reg[1]);
3374 		freenode(regex);
3375 	}
3376 
3377 	return result;
3378 }
3379 
3380 /* call_split_func --- call do_split or do_pat_split indirectly */
3381 
3382 NODE *
call_split_func(const char * name,int nargs)3383 call_split_func(const char *name, int nargs)
3384 {
3385 	NODE *regex, *seps;
3386 	NODE *result;
3387 
3388 	regex = seps = NULL;
3389 	if (nargs < 2)
3390 		fatal(_("indirect call to %s requires at least two arguments"),
3391 				name);
3392 
3393 	if (nargs == 4)
3394 		seps = POP();
3395 
3396 	bool need_free = false;
3397 	if (nargs >= 3) {
3398 		regex = POP_STRING();
3399 		if ((regex->flags & REGEX) != 0)
3400 			regex = regex->typed_re;
3401 		else {
3402 			regex = make_regnode(Node_regex, regex);
3403 			need_free = true;
3404 		}
3405 	} else {
3406 		if (name[0] == 's') {
3407 			regex = make_regnode(Node_regex, FS_node->var_value);
3408 			regex->re_flags |= FS_DFLT;
3409 		} else
3410 			regex = make_regnode(Node_regex, FPAT_node->var_value);
3411 
3412 		need_free = true;
3413 		nargs++;
3414 	}
3415 
3416 	/* Don't need to pop the string or the data array */
3417 
3418 	PUSH(regex);
3419 
3420 	if (seps)
3421 		PUSH(seps);
3422 
3423 	result = (name[0] == 's') ? do_split(nargs) : do_patsplit(nargs);
3424 
3425 	if (need_free) {
3426 		refree(regex->re_reg[0]);
3427 		if (regex->re_reg[1] != NULL)
3428 			refree(regex->re_reg[1]);
3429 		freenode(regex);
3430 	}
3431 
3432 	return result;
3433 }
3434 
3435 /* make_integer - Convert an integer to a number node.  */
3436 
3437 static NODE *
make_integer(uintmax_t n)3438 make_integer(uintmax_t n)
3439 {
3440 	n = adjust_uint(n);
3441 
3442 	return make_number((AWKNUM) n);
3443 }
3444 
3445 /* do_lshift --- perform a << operation */
3446 
3447 NODE *
do_lshift(int nargs)3448 do_lshift(int nargs)
3449 {
3450 	NODE *s1, *s2;
3451 	uintmax_t uval, ushift, res;
3452 	AWKNUM val, shift;
3453 
3454 	POP_TWO_SCALARS(s1, s2);
3455 	if (do_lint) {
3456 		if ((fixtype(s1)->flags & NUMBER) == 0)
3457 			lintwarn(_("%s: received non-numeric first argument"), "lshift");
3458 		if ((fixtype(s2)->flags & NUMBER) == 0)
3459 			lintwarn(_("%s: received non-numeric second argument"), "lshift");
3460 	}
3461 
3462 	val = force_number(s1)->numbr;
3463 	shift = force_number(s2)->numbr;
3464 	if (val < 0 || shift < 0)
3465 		fatal(_("lshift(%f, %f): negative values are not allowed"), val, shift);
3466 
3467 	if (do_lint) {
3468 		if (double_to_int(val) != val || double_to_int(shift) != shift)
3469 			lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift);
3470 		if (shift >= sizeof(uintmax_t) * CHAR_BIT)
3471 			lintwarn(_("lshift(%f, %f): too large shift value will give strange results"), val, shift);
3472 	}
3473 
3474 	DEREF(s1);
3475 	DEREF(s2);
3476 
3477 	uval = (uintmax_t) val;
3478 	ushift = (uintmax_t) shift;
3479 
3480 	res = uval << ushift;
3481 	return make_integer(res);
3482 }
3483 
3484 /* do_rshift --- perform a >> operation */
3485 
3486 NODE *
do_rshift(int nargs)3487 do_rshift(int nargs)
3488 {
3489 	NODE *s1, *s2;
3490 	uintmax_t uval, ushift, res;
3491 	AWKNUM val, shift;
3492 
3493 	POP_TWO_SCALARS(s1, s2);
3494 	if (do_lint) {
3495 		if ((fixtype(s1)->flags & NUMBER) == 0)
3496 			lintwarn(_("%s: received non-numeric first argument"), "rshift");
3497 		if ((fixtype(s2)->flags & NUMBER) == 0)
3498 			lintwarn(_("%s: received non-numeric second argument"), "rshift");
3499 	}
3500 
3501 	val = force_number(s1)->numbr;
3502 	shift = force_number(s2)->numbr;
3503 	if (val < 0 || shift < 0)
3504 		fatal(_("rshift(%f, %f): negative values are not allowed"), val, shift);
3505 
3506 	if (do_lint) {
3507 		if (double_to_int(val) != val || double_to_int(shift) != shift)
3508 			lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift);
3509 		if (shift >= sizeof(uintmax_t) * CHAR_BIT)
3510 			lintwarn(_("rshift(%f, %f): too large shift value will give strange results"), val, shift);
3511 	}
3512 
3513 	DEREF(s1);
3514 	DEREF(s2);
3515 
3516 	uval = (uintmax_t) val;
3517 	ushift = (uintmax_t) shift;
3518 
3519 	res = uval >> ushift;
3520 	return make_integer(res);
3521 }
3522 
3523 /* do_and --- perform an & operation */
3524 
3525 NODE *
do_and(int nargs)3526 do_and(int nargs)
3527 {
3528 	NODE *s1;
3529 	uintmax_t res, uval;
3530 	AWKNUM val;
3531 
3532 	res = ~(uintmax_t) 0;	/* start off with all ones */
3533 	if (nargs < 2)
3534 		fatal(_("%s: called with less than two arguments"), "and");
3535 
3536 	for (; nargs > 0; nargs--) {
3537 		s1 = POP_SCALAR();
3538 		if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3539 			lintwarn(_("%s: argument %d is non-numeric"), "and", nargs);
3540 
3541 		val = force_number(s1)->numbr;
3542 		if (val < 0)
3543 			fatal(_("%s: argument %d negative value %g is not allowed"), "and", nargs, val);
3544 
3545 		uval = (uintmax_t) val;
3546 		res &= uval;
3547 
3548 		DEREF(s1);
3549 	}
3550 
3551 	return make_integer(res);
3552 }
3553 
3554 /* do_or --- perform an | operation */
3555 
3556 NODE *
do_or(int nargs)3557 do_or(int nargs)
3558 {
3559 	NODE *s1;
3560 	uintmax_t res, uval;
3561 	AWKNUM val;
3562 
3563 	res = 0;
3564 	if (nargs < 2)
3565 		fatal(_("%s: called with less than two arguments"), "or");
3566 
3567 	for (; nargs > 0; nargs--) {
3568 		s1 = POP_SCALAR();
3569 		if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3570 			lintwarn(_("%s: argument %d is non-numeric"), "or", nargs);
3571 
3572 		val = force_number(s1)->numbr;
3573 		if (val < 0)
3574 			fatal(_("%s: argument %d negative value %g is not allowed"), "or", nargs, val);
3575 
3576 		uval = (uintmax_t) val;
3577 		res |= uval;
3578 
3579 		DEREF(s1);
3580 	}
3581 
3582 	return make_integer(res);
3583 }
3584 
3585 /* do_xor --- perform an ^ operation */
3586 
3587 NODE *
do_xor(int nargs)3588 do_xor(int nargs)
3589 {
3590 	NODE *s1;
3591 	uintmax_t res, uval;
3592 	AWKNUM val;
3593 
3594 	if (nargs < 2)
3595 		fatal(_("%s: called with less than two arguments"), "xor");
3596 
3597 	res = 0;	/* start with all zeroes */
3598 	for (; nargs > 0; nargs--) {
3599 		s1 = POP_SCALAR();
3600 		if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3601 			lintwarn(_("%s: argument %d is non-numeric"), "xor", nargs);
3602 
3603 		val = force_number(s1)->numbr;
3604 		if (val < 0)
3605 			fatal(_("%s: argument %d negative value %g is not allowed"), "xor", nargs, val);
3606 
3607 		uval = (uintmax_t) val;
3608 		res ^= uval;
3609 
3610 		DEREF(s1);
3611 	}
3612 
3613 	return make_integer(res);
3614 }
3615 
3616 /* do_compl --- perform a ~ operation */
3617 
3618 NODE *
do_compl(int nargs)3619 do_compl(int nargs)
3620 {
3621 	NODE *tmp;
3622 	double d;
3623 	uintmax_t uval;
3624 
3625 	tmp = POP_SCALAR();
3626 	if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
3627 		lintwarn(_("%s: received non-numeric argument"), "compl");
3628 	d = force_number(tmp)->numbr;
3629 	DEREF(tmp);
3630 
3631 	if (d < 0)
3632 		fatal(_("compl(%f): negative value is not allowed"), d);
3633 
3634 	if (do_lint && double_to_int(d) != d)
3635 		lintwarn(_("compl(%f): fractional value will be truncated"), d);
3636 
3637 	uval = (uintmax_t) d;
3638 	uval = ~ uval;
3639 	return make_integer(uval);
3640 }
3641 
3642 /* do_strtonum --- the strtonum function */
3643 
3644 NODE *
do_strtonum(int nargs)3645 do_strtonum(int nargs)
3646 {
3647 	NODE *tmp;
3648 	AWKNUM d;
3649 
3650 	tmp = fixtype(POP_SCALAR());
3651 	if ((tmp->flags & NUMBER) != 0)
3652 		d = (AWKNUM) tmp->numbr;
3653 	else if (get_numbase(tmp->stptr, tmp->stlen, use_lc_numeric) != 10)
3654 		d = nondec2awknum(tmp->stptr, tmp->stlen, NULL);
3655 	else
3656 		d = (AWKNUM) force_number(tmp)->numbr;
3657 
3658 	DEREF(tmp);
3659 	return make_number((AWKNUM) d);
3660 }
3661 
3662 /* nondec2awknum --- convert octal or hex value to double */
3663 
3664 /*
3665  * Because of awk's concatenation rules and the way awk.y:yylex()
3666  * collects a number, this routine has to be willing to stop on the
3667  * first invalid character.
3668  */
3669 
3670 AWKNUM
nondec2awknum(char * str,size_t len,char ** endptr)3671 nondec2awknum(char *str, size_t len, char **endptr)
3672 {
3673 	AWKNUM retval = 0.0;
3674 	char save;
3675 	short val;
3676 	char *start = str;
3677 
3678 	if (len >= 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
3679 		/*
3680 		 * User called strtonum("0x") or some such,
3681 		 * so just quit early.
3682 		 */
3683 		if (len <= 2) {
3684 			if (endptr)
3685 				*endptr = start;
3686 			return (AWKNUM) 0.0;
3687 		}
3688 
3689 		for (str += 2, len -= 2; len > 0; len--, str++) {
3690 			switch (*str) {
3691 			case '0':
3692 			case '1':
3693 			case '2':
3694 			case '3':
3695 			case '4':
3696 			case '5':
3697 			case '6':
3698 			case '7':
3699 			case '8':
3700 			case '9':
3701 				val = *str - '0';
3702 				break;
3703 			case 'a':
3704 			case 'b':
3705 			case 'c':
3706 			case 'd':
3707 			case 'e':
3708 			case 'f':
3709 				val = *str - 'a' + 10;
3710 				break;
3711 			case 'A':
3712 			case 'B':
3713 			case 'C':
3714 			case 'D':
3715 			case 'E':
3716 			case 'F':
3717 				val = *str - 'A' + 10;
3718 				break;
3719 			default:
3720 				if (endptr)
3721 					*endptr = str;
3722 				goto done;
3723 			}
3724 			retval = (retval * 16) + val;
3725 		}
3726 		if (endptr)
3727 			*endptr = str;
3728 	} else if (len >= 1 && *str == '0') {
3729 		int l;
3730 		// preserve len in case we go to decimal
3731 		for (l = len; l > 0; l--) {
3732 			if (! isdigit((unsigned char) *str)) {
3733 				if (endptr)
3734 					*endptr = str;
3735 				goto done;
3736 			}
3737 			else if (*str == '8' || *str == '9') {
3738 				str = start;
3739 				goto decimal;
3740 			}
3741 			retval = (retval * 8) + (*str - '0');
3742 			str++;
3743 		}
3744 		if (endptr)
3745 			*endptr = str;
3746 	} else {
3747 decimal:
3748 		save = str[len];
3749 		str[len] = '\0';
3750 		retval = strtod(str, endptr);
3751 		str[len] = save;
3752 	}
3753 done:
3754 	return retval;
3755 }
3756 
3757 /* do_dcgettext, do_dcngettext --- handle i18n translations */
3758 
3759 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3760 
3761 static int
localecategory_from_argument(NODE * t)3762 localecategory_from_argument(NODE *t)
3763 {
3764 	static const struct category_table {
3765 		int val;
3766 		const char *name;
3767 	} cat_tab[] = {
3768 #ifdef LC_ALL
3769 		{ LC_ALL,	"LC_ALL" },
3770 #endif /* LC_ALL */
3771 #ifdef LC_COLLATE
3772 		{ LC_COLLATE,	"LC_COLLATE" },
3773 #endif /* LC_COLLATE */
3774 #ifdef LC_CTYPE
3775 		{ LC_CTYPE,	"LC_CTYPE" },
3776 #endif /* LC_CTYPE */
3777 #ifdef LC_MESSAGES
3778 		{ LC_MESSAGES,	"LC_MESSAGES" },
3779 #endif /* LC_MESSAGES */
3780 #ifdef LC_MONETARY
3781 		{ LC_MONETARY,	"LC_MONETARY" },
3782 #endif /* LC_MONETARY */
3783 #ifdef LC_NUMERIC
3784 		{ LC_NUMERIC,	"LC_NUMERIC" },
3785 #endif /* LC_NUMERIC */
3786 #ifdef LC_RESPONSE
3787 		{ LC_RESPONSE,	"LC_RESPONSE" },
3788 #endif /* LC_RESPONSE */
3789 #ifdef LC_TIME
3790 		{ LC_TIME,	"LC_TIME" },
3791 #endif /* LC_TIME */
3792 	};
3793 
3794 	if (t != NULL) {
3795 		int low, high, i, mid;
3796 		char *category;
3797 		int lc_cat = -1;
3798 
3799 		char save = t->stptr[t->stlen];
3800 		t->stptr[t->stlen] = '\0';
3801 		category = t->stptr;
3802 
3803 		/* binary search the table */
3804 		low = 0;
3805 		high = (sizeof(cat_tab) / sizeof(cat_tab[0])) - 1;
3806 		while (low <= high) {
3807 			mid = (low + high) / 2;
3808 			i = strcmp(category, cat_tab[mid].name);
3809 
3810 			if (i < 0)		/* category < mid */
3811 				high = mid - 1;
3812 			else if (i > 0)		/* category > mid */
3813 				low = mid + 1;
3814 			else {
3815 				lc_cat = cat_tab[mid].val;
3816 				break;
3817 			}
3818 		}
3819 		t->stptr[t->stlen] = save;
3820 		if (lc_cat == -1)	/* not there */
3821 			fatal(_("dcgettext: `%s' is not a valid locale category"), category);
3822 
3823 		return lc_cat;
3824 	} else
3825 		return LC_MESSAGES;
3826 }
3827 
3828 #endif
3829 
3830 /*
3831  * awk usage is
3832  *
3833  * 	str = dcgettext(string [, domain [, category]])
3834  * 	str = dcngettext(string1, string2, number [, domain [, category]])
3835  *
3836  * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
3837  */
3838 
3839 NODE *
do_dcgettext(int nargs)3840 do_dcgettext(int nargs)
3841 {
3842 	NODE *tmp, *t1, *t2 = NULL;
3843 	char *string;
3844 	char *the_result;
3845 	size_t reslen;
3846 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3847 	int lc_cat;
3848 	char *domain;
3849 	char save1 = '\0', save2 = '\0';
3850 
3851 	if (nargs == 3) {	/* third argument */
3852 		tmp = POP_STRING();
3853 		lc_cat = localecategory_from_argument(tmp);
3854 		DEREF(tmp);
3855 	} else
3856 		lc_cat = LC_MESSAGES;
3857 
3858 	if (nargs >= 2) {  /* second argument */
3859 		t2 = POP_STRING();
3860 		domain = t2->stptr;
3861 		str_terminate(t2, save2);
3862 	} else
3863 		domain = TEXTDOMAIN;
3864 #else
3865 	if (nargs == 3) {
3866 		tmp = POP_STRING();
3867 		DEREF(tmp);
3868 	}
3869 	if (nargs >= 2) {
3870 		t2 = POP_STRING();
3871 		DEREF(t2);
3872 	}
3873 #endif
3874 
3875 	t1 = POP_STRING();	/* first argument */
3876 	string = t1->stptr;
3877 
3878 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3879 	str_terminate(t1, save1);
3880 	the_result = dcgettext(domain, string, lc_cat);
3881 	str_restore(t1, save1);
3882 	if (t2 != NULL) {
3883 		str_restore(t2, save2);
3884 		DEREF(t2);
3885 	}
3886 	reslen = strlen(the_result);
3887 #else
3888 	the_result = string;
3889 	reslen = t1->stlen;
3890 #endif
3891 	DEREF(t1);
3892 	return make_string(the_result, reslen);
3893 }
3894 
3895 
3896 NODE *
do_dcngettext(int nargs)3897 do_dcngettext(int nargs)
3898 {
3899 	NODE *tmp, *t1, *t2, *t3;
3900 	char *string1, *string2;
3901 	unsigned long number;
3902 	AWKNUM d;
3903 	char *the_result;
3904 	size_t reslen;
3905 
3906 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3907 	int lc_cat;
3908 	char *domain;
3909 	char save = '\0', save1 = '\0', save2 = '\0';
3910 	bool saved_end = false;
3911 
3912 	if (nargs == 5) {	/* fifth argument */
3913 		tmp = POP_STRING();
3914 		lc_cat = localecategory_from_argument(tmp);
3915 		DEREF(tmp);
3916 	} else
3917 		lc_cat = LC_MESSAGES;
3918 
3919 	t3 = NULL;
3920 	if (nargs >= 4) {	/* fourth argument */
3921 		t3 = POP_STRING();
3922 		domain = t3->stptr;
3923 		save = domain[t3->stlen];
3924 		domain[t3->stlen] = '\0';
3925 		saved_end = true;
3926 	} else
3927 		domain = TEXTDOMAIN;
3928 #else
3929 	if (nargs == 5) {
3930 		tmp = POP_STRING();
3931 		DEREF(tmp);
3932 	}
3933 	if (nargs >= 4) {
3934 		t3 = POP_STRING();
3935 		DEREF(t3);
3936 	}
3937 #endif
3938 
3939 	t2 = POP_NUMBER();	/* third argument */
3940 	d = get_number_d(t2);
3941 	DEREF(t2);
3942 
3943 	number = (unsigned long) double_to_int(d);
3944 	t2 = POP_STRING();	/* second argument */
3945 	string2 = t2->stptr;
3946 	t1 = POP_STRING();	/* first argument */
3947 	string1 = t1->stptr;
3948 
3949 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3950 
3951 	str_terminate(t1, save1);
3952 	str_terminate(t2, save2);
3953 	the_result = dcngettext(domain, string1, string2, number, lc_cat);
3954 	reslen = strlen(the_result);
3955 	str_restore(t1, save1);
3956 	str_restore(t2, save2);
3957 	if (saved_end)
3958 		domain[t3->stlen] = save;
3959 	if (t3 != NULL)
3960 		DEREF(t3);
3961 #else
3962 	if (number == 1) {
3963 		the_result = string1;
3964 		reslen = t1->stlen;
3965 	} else {
3966 		the_result = string2;
3967 		reslen = t2->stlen;
3968 	}
3969 #endif
3970 	DEREF(t1);
3971 	DEREF(t2);
3972 	return make_string(the_result, reslen);
3973 }
3974 
3975 /* do_bindtextdomain --- set the directory for a text domain */
3976 
3977 /*
3978  * awk usage is
3979  *
3980  * 	binding = bindtextdomain(dir [, domain])
3981  *
3982  * If dir is "", pass NULL to C version.
3983  * Default domain is TEXTDOMAIN.
3984  */
3985 
3986 NODE *
do_bindtextdomain(int nargs)3987 do_bindtextdomain(int nargs)
3988 {
3989 	NODE *t1, *t2;
3990 	const char *directory, *domain;
3991 	const char *the_result;
3992 
3993 	t1 = t2 = NULL;
3994 	/* set defaults */
3995 	directory = NULL;
3996 	domain = TEXTDOMAIN;
3997 	char save = '\0', save1 = '\0';
3998 
3999 	if (nargs == 2) {	/* second argument */
4000 		t2 = POP_STRING();
4001 		domain = (const char *) t2->stptr;
4002 		save = t2->stptr[t2->stlen];
4003 		t2->stptr[t2->stlen] = '\0';
4004 	}
4005 
4006 	/* first argument */
4007 	t1 = POP_STRING();
4008 	if (t1->stlen > 0) {
4009 		directory = (const char *) t1->stptr;
4010 		str_terminate(t1, save1);
4011 	}
4012 
4013 	the_result = bindtextdomain(domain, directory);
4014 	if (directory)
4015 		str_restore(t1, save1);
4016 
4017 	DEREF(t1);
4018 	if (t2 != NULL) {
4019 		t2->stptr[t2->stlen] = save;
4020 		DEREF(t2);
4021 	}
4022 
4023 	if (the_result == NULL)
4024 		the_result = "";
4025 
4026 	return make_string(the_result, strlen(the_result));
4027 }
4028 
4029 #ifdef SUPPLY_INTDIV
4030 /* do_intdiv --- do integer division, return quotient and remainder in dest array */
4031 
4032 /*
4033  * We define the semantics as:
4034  * 	numerator = int(numerator)
4035  *	denominator = int(denonmator)
4036  *	quotient = int(numerator / denomator)
4037  *	remainder = int(numerator % denomator)
4038  */
4039 
4040 NODE *
do_intdiv(int nargs)4041 do_intdiv(int nargs)
4042 {
4043 	NODE *numerator, *denominator, *result;
4044 	double num, denom, quotient, remainder;
4045 
4046 	result = POP_PARAM();
4047 	if (result->type != Node_var_array)
4048 		fatal(_("intdiv: third argument is not an array"));
4049 	assoc_clear(result);
4050 
4051 	denominator = POP_SCALAR();
4052 	numerator = POP_SCALAR();
4053 
4054 	if (do_lint) {
4055 		if ((fixtype(numerator)->flags & NUMBER) == 0)
4056 			lintwarn(_("%s: received non-numeric first argument"), "intdiv");
4057 		if ((fixtype(denominator)->flags & NUMBER) == 0)
4058 			lintwarn(_("%s: received non-numeric second argument"), "intdiv");
4059 	}
4060 
4061 	(void) force_number(numerator);
4062 	(void) force_number(denominator);
4063 	num = double_to_int(get_number_d(numerator));
4064 	denom = double_to_int(get_number_d(denominator));
4065 
4066 	if (denom == 0.0)
4067 		fatal(_("intdiv: division by zero attempted"));
4068 
4069 	quotient = double_to_int(num / denom);
4070 	/*
4071 	 * FIXME: This code is duplicated, factor it out to a
4072 	 * separate function.
4073 	 */
4074 #ifdef HAVE_FMOD
4075 	remainder = fmod(num, denom);
4076 #else	/* ! HAVE_FMOD */
4077 	(void) modf(num / denom, & remainder);
4078 	remainder = num - remainder * denom;
4079 #endif	/* ! HAVE_FMOD */
4080 	remainder = double_to_int(remainder);
4081 
4082 	assoc_set(result, make_string("quotient", 8), make_number((AWKNUM) quotient));
4083 
4084 	assoc_set(result, make_string("remainder", 9), make_number((AWKNUM) remainder));
4085 
4086 	DEREF(denominator);
4087 	DEREF(numerator);
4088 
4089 	return make_number((AWKNUM) 0.0);
4090 }
4091 #endif /* SUPPLY_INTDIV */
4092 
4093 /* do_typeof --- return a string with the type of the arg */
4094 
4095 NODE *
do_typeof(int nargs)4096 do_typeof(int nargs)
4097 {
4098 	NODE *arg;
4099 	const char *res = NULL;
4100 	bool deref = true;
4101 	NODE *dbg;
4102 
4103 	if (nargs == 2) {	/* 2nd optional arg for debugging */
4104 		dbg = POP_PARAM();
4105 		if (dbg->type != Node_var_array)
4106 			fatal(_("typeof: second argument is not an array"));
4107 		assoc_clear(dbg);
4108 	}
4109 	else
4110 		dbg = NULL;
4111 	arg = POP();
4112 	switch (arg->type) {
4113 	case Node_var_array:
4114 		/* Node_var_array is never UPREF'ed */
4115 		res = "array";
4116 		deref = false;
4117 		if (dbg) {
4118 			assoc_set(dbg, make_string("array_type", 10), make_string(arg->array_funcs->name, strlen(arg->array_funcs->name)));
4119 			if (arg == PROCINFO_node) {
4120 				int i;
4121 				for (i = 0; i < BLOCK_MAX; i++) {
4122 					char *p;
4123 					size_t nl = strlen(nextfree[i].name);
4124 					/*
4125 					 * save values before we create new
4126 					 * array elements so that we have a
4127 					 * snapshot at a consistent moment in
4128 					 * time
4129 					 */
4130 					long hw = nextfree[i].highwater;
4131 					long active;
4132 #ifdef MEMDEBUG
4133 					active = nextfree[i].active;
4134 #else
4135 					active = hw;
4136 					{
4137 						struct block_item *ip;
4138 						for (ip = nextfree[i].freep; ip; ip = ip->freep)
4139 							active--;
4140 					}
4141 #endif
4142 
4143 #define SETVAL(X, V) {	\
4144 	size_t l = nl + sizeof(#X);	\
4145 	emalloc(p, char *, l+1, "do_typeof");	\
4146 	sprintf(p, "%s_" #X, nextfree[i].name);	\
4147 	assoc_set(dbg, make_str_node(p, l, ALREADY_MALLOCED), make_number((AWKNUM) (V)));	\
4148 }
4149 					SETVAL(highwater, hw)
4150 					SETVAL(active, active)
4151 #undef SETVAL
4152 				}
4153 			}
4154 		}
4155 		break;
4156 	case Node_val:
4157 		switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
4158 		case NUMBER:
4159 			res = "number";
4160 			break;
4161 		case NUMBER|USER_INPUT:
4162 			res = "strnum";
4163 			break;
4164 		case REGEX:
4165 			res = "regexp";
4166 			break;
4167 		case STRING:
4168 			res = "string";
4169 			// fall through
4170 		case NUMBER|STRING:
4171 			if (arg == Nnull_string || (arg->flags & NULL_FIELD) != 0) {
4172 				res = "unassigned";
4173 				break;
4174 			}
4175 			/* fall through */
4176 		default:
4177 			if (res == NULL) {
4178 				warning(_("typeof detected invalid flags combination `%s'; please file a bug report"), flags2str(arg->flags));
4179 				res = "unknown";
4180 			}
4181 			break;
4182 		}
4183 		if (dbg) {
4184 			const char *s = flags2str(arg->flags);
4185 			assoc_set(dbg, make_string("flags", 5), make_string(s, strlen(s)));
4186 		}
4187 		break;
4188 	case Node_var_new:
4189 	case Node_array_ref:
4190 		res = "untyped";
4191 		deref = false;
4192 		break;
4193 	case Node_var:
4194 		/*
4195 		 * Note: this doesn't happen because the function calling code
4196 		 * in interpret.h pushes Node_var->var_value.
4197 		 */
4198 		fatal(_("typeof: invalid argument type `%s'"),
4199 				nodetype2str(arg->type));
4200 		break;
4201 	default:
4202 		fatal(_("typeof: unknown argument type `%s'"),
4203 				nodetype2str(arg->type));
4204 		break;
4205 	}
4206 
4207 	if (deref)
4208 		DEREF(arg);
4209 	return make_string(res, strlen(res));
4210 }
4211 
4212 /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
4213 
4214 static size_t
mbc_byte_count(const char * ptr,size_t numchars)4215 mbc_byte_count(const char *ptr, size_t numchars)
4216 {
4217 	mbstate_t cur_state;
4218 	size_t sum = 0;
4219 	int mb_len;
4220 
4221 	memset(& cur_state, 0, sizeof(cur_state));
4222 
4223 	assert(gawk_mb_cur_max > 1);
4224 	mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
4225 	if (mb_len <= 0)
4226 		return numchars;	/* no valid m.b. char */
4227 
4228 	for (; numchars > 0; numchars--) {
4229 		mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
4230 		if (mb_len <= 0)
4231 			break;
4232 		sum += mb_len;
4233 		ptr += mb_len;
4234 	}
4235 
4236 	return sum;
4237 }
4238 
4239 /* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
4240 
4241 static size_t
mbc_char_count(const char * ptr,size_t numbytes)4242 mbc_char_count(const char *ptr, size_t numbytes)
4243 {
4244 	mbstate_t cur_state;
4245 	size_t sum = 0;
4246 	int mb_len;
4247 
4248 	if (gawk_mb_cur_max == 1)
4249 		return numbytes;
4250 
4251 	memset(& cur_state, 0, sizeof(cur_state));
4252 
4253 	mb_len = mbrlen(ptr, numbytes, &cur_state);
4254 	if (mb_len <= 0)
4255 		return numbytes;	/* no valid m.b. char */
4256 
4257 	while (numbytes > 0) {
4258 		mb_len = mbrlen(ptr, numbytes, &cur_state);
4259 		if (mb_len <= 0)
4260 			break;
4261 		sum++;
4262 		ptr += mb_len;
4263 		numbytes -= mb_len;
4264 	}
4265 
4266 	return sum;
4267 }
4268 
4269 /* sanitize_exit_status --- convert a 16 bit Unix exit status into something reasonable */
4270 
sanitize_exit_status(int status)4271 int sanitize_exit_status(int status)
4272 {
4273 	int ret = 0;
4274 
4275 	if (WIFEXITED(status))
4276 		ret = WEXITSTATUS(status); /* normal exit */
4277 	else if (WIFSIGNALED(status)) {
4278 		bool coredumped = false;
4279 #ifdef WCOREDUMP
4280 		coredumped = WCOREDUMP(status);
4281 #endif
4282 		/* use 256 since exit values are 8 bits */
4283 		ret = WTERMSIG(status) + (coredumped ? 512 : 256);
4284 	} else
4285 		ret = 0;	/* shouldn't get here */
4286 
4287 	return ret;
4288 }
4289 
4290 /* out_of_range --- return true if a value is out of range */
4291 
4292 bool
out_of_range(NODE * n)4293 out_of_range(NODE *n)
4294 {
4295 #ifdef HAVE_MPFR
4296 	if (is_mpg_integer(n))
4297 		return false;
4298 	else if (is_mpg_float(n))
4299 		return (! mpfr_number_p(n->mpg_numbr));
4300 	else
4301 #endif
4302 		return (isnan(n->numbr) || isinf(n->numbr));
4303 }
4304 
4305 /* format_nan_inf --- format NaN and INF values */
4306 
4307 char *
format_nan_inf(NODE * n,char format)4308 format_nan_inf(NODE *n, char format)
4309 {
4310 	static char buf[100];
4311 	double val = n->numbr;
4312 
4313 #ifdef HAVE_MPFR
4314 	if (is_mpg_integer(n))
4315 		return NULL;
4316 	else if (is_mpg_float(n)) {
4317 		if (mpfr_nan_p(n->mpg_numbr)) {
4318 			strcpy(buf, mpfr_signbit(n->mpg_numbr) != 0 ? "-nan" : "+nan");
4319 
4320 			goto fmt;
4321 		} else if (mpfr_inf_p(n->mpg_numbr)) {
4322 			strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-inf" : "+inf");
4323 
4324 			goto fmt;
4325 		} else
4326 			return NULL;
4327 	}
4328 	/* else
4329 		fallthrough */
4330 #endif
4331 
4332 	if (isnan(val)) {
4333 		strcpy(buf, signbit(val) != 0 ? "-nan" : "+nan");
4334 
4335 		// fall through to end
4336 	} else if (isinf(val)) {
4337 		strcpy(buf, val < 0 ? "-inf" : "+inf");
4338 
4339 		// fall through to end
4340 	} else
4341 		return NULL;
4342 
4343 #ifdef HAVE_MPFR
4344 fmt:
4345 #endif
4346 	if (isupper(format)) {
4347 		int i;
4348 
4349 		for (i = 0; buf[i] != '\0'; i++)
4350 			buf[i] = toupper(buf[i]);
4351 	}
4352 	return buf;
4353 }
4354 
4355 
4356 /* check_symtab_functab --- check if dest is SYMTAB or FUNCTAB, fatal if so */
4357 
4358 void
check_symtab_functab(NODE * dest,const char * fname,const char * msg)4359 check_symtab_functab(NODE *dest, const char *fname, const char *msg)
4360 {
4361 	if (dest == symbol_table)
4362 		fatal(msg, fname, "SYMTAB");
4363 	else if (dest == func_table)
4364 		fatal(msg, fname, "FUNCTAB");
4365 }
4366 
4367 /* reverse --- reverse the contents of a string in place */
4368 
4369 static void
reverse(char * str)4370 reverse(char *str)
4371 {
4372 	int i, j;
4373 	char tmp;
4374 
4375 	for (i = 0, j = strlen(str) - 1; j > i; i++, j--) {
4376 		tmp = str[i];
4377 		str[i] = str[j];
4378 		str[j] = tmp;
4379 	}
4380 }
4381 
4382 /* add_thousands --- add the thousands separator. Needed for MPFR %d format */
4383 
4384 /*
4385  * Copy the source string into the destination string, backwards,
4386  * adding the thousands separator at the right points. Then reverse
4387  * the string when done. This gives us much cleaner code than trying
4388  * to work through the string backwards. (We tried it, it was yucky.)
4389  */
4390 
4391 static const char *
add_thousands(const char * original,struct lconv * loc)4392 add_thousands(const char *original, struct lconv *loc)
4393 {
4394 	size_t orig_len = strlen(original);
4395 	size_t new_len = orig_len + (orig_len * strlen(loc->thousands_sep)) + 1; 	// worst case
4396 	char *newbuf;
4397 	char decimal_point = '\0';
4398 	const char *dec = NULL;
4399 	const char *src;
4400 	char *dest;
4401 
4402 	emalloc(newbuf, char *, new_len, "add_thousands");
4403 	memset(newbuf, '\0', new_len);
4404 
4405 	src = original + strlen(original) - 1;
4406 	dest = newbuf;
4407 
4408 	if (loc->decimal_point[0] != '\0') {
4409 		decimal_point = loc->decimal_point[0];
4410 		if ((dec = strchr(original, decimal_point)) != NULL) {
4411 			while (src >= dec)
4412 				*dest++ = *src--;
4413 		}
4414 	}
4415 
4416 
4417 	int ii = 0;
4418 	int jj = 0;
4419 	do {
4420 		*dest++ = *src--;
4421 		if (loc->grouping[ii] && ++jj == loc->grouping[ii]) {
4422 			if (src >= original) {	/* only add if more digits coming */
4423 				const char *ts = loc->thousands_sep;
4424 				int k;
4425 
4426 				for (k = strlen(ts) - 1; k >= 0; k--)
4427 					*dest++ = ts[k];
4428 			}
4429 			if (loc->grouping[ii+1] == 0)
4430 				jj = 0;		/* keep using current val in loc.grouping[ii] */
4431 			else if (loc->grouping[ii+1] == CHAR_MAX) {
4432 				// copy in the rest and be done
4433 				while (src >= original)
4434 					*dest++ = *src--;
4435 				break;
4436 			} else {
4437 				ii++;
4438 				jj = 0;
4439 			}
4440 		}
4441 	} while (src >= original);
4442 
4443 	*dest++ = '\0';
4444 	reverse(newbuf);
4445 
4446 	return newbuf;
4447 }
4448 
4449 #if 0
4450 // test program
4451 
4452 int main(int argc, char **argv)
4453 {
4454 	struct lconv *l;
4455 
4456 	setlocale(LC_ALL, "");
4457 	l = localeconv();
4458 
4459 	const char *new = add_thousands("12345678901234567890.54321", l);
4460 	printf("%s\n", new);
4461 	free((void*) new);
4462 
4463 	new = add_thousands("12345678901234567890", l);
4464 	printf("%s\n", new);
4465 	free((void*) new);
4466 
4467 	return 0;
4468 }
4469 #endif
4470