1 /*
2 * builtin.c - Builtin functions and various utility procedures.
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2021,
7 * the Free Software Foundation, Inc.
8 *
9 * This file is part of GAWK, the GNU implementation of the
10 * AWK Programming Language.
11 *
12 * GAWK is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
16 *
17 * GAWK is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 */
26
27
28 #include "awk.h"
29 #if defined(HAVE_FCNTL_H)
30 #include <fcntl.h>
31 #endif
32 #include "random.h"
33 #include "floatmagic.h"
34
35 #if defined(HAVE_POPEN_H)
36 #include "popen.h"
37 #endif
38
39 #ifndef CHAR_BIT
40 # define CHAR_BIT 8
41 #endif
42
43 /* The extra casts work around common compiler bugs. */
44 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
45 /* Note: these assume that negative integers are represented internally
46 via 2's complement, which is not mandated by C. They also ignore the
47 fact that signed integer arithmetic overflow can trigger exceptions,
48 unlike unsigned which is guaranteed not to do so. */
49 #define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \
50 ? ~ (uintmax_t) 0 << (sizeof (t) * CHAR_BIT - 1) \
51 : 0))
52 #define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t)))
53
54 #ifndef INTMAX_MIN
55 # define INTMAX_MIN TYPE_MINIMUM (intmax_t)
56 #endif
57 #ifndef UINTMAX_MAX
58 # define UINTMAX_MAX TYPE_MAXIMUM (uintmax_t)
59 #endif
60
61 #ifndef SIZE_MAX /* C99 constant, can't rely on it everywhere */
62 #define SIZE_MAX ((size_t) -1)
63 #endif
64
65 #define DEFAULT_G_PRECISION 6
66
67 static size_t mbc_byte_count(const char *ptr, size_t numchars);
68 static size_t mbc_char_count(const char *ptr, size_t numbytes);
69
70 /* Can declare these, since we always use the random shipped with gawk */
71 extern char *initstate(unsigned long seed, char *state, long n);
72 extern char *setstate(char *state);
73 extern long random(void);
74 extern void srandom(unsigned long seed);
75
76 extern NODE **args_array;
77 extern int max_args;
78 extern NODE **fields_arr;
79 extern bool output_is_tty;
80 extern FILE *output_fp;
81
82 static const char *add_thousands(const char *original, struct lconv *loc);
83
84 #define POP_TWO_SCALARS(s1, s2) \
85 s2 = POP_SCALAR(); \
86 s1 = POP(); \
87 do { if (s1->type == Node_var_array) { \
88 DEREF(s2); \
89 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(s1)); \
90 }} while (false)
91
92
93 /*
94 * Since we supply the version of random(), we know what
95 * value to use here.
96 */
97 #define GAWK_RANDOM_MAX 0x7fffffffL
98
99 /* efwrite --- like fwrite, but with error checking */
100
101 static void
efwrite(const void * ptr,size_t size,size_t count,FILE * fp,const char * from,struct redirect * rp,bool flush)102 efwrite(const void *ptr,
103 size_t size,
104 size_t count,
105 FILE *fp,
106 const char *from,
107 struct redirect *rp,
108 bool flush)
109 {
110 errno = 0;
111 if (rp != NULL) {
112 if (rp->output.gawk_fwrite(ptr, size, count, fp, rp->output.opaque) != count)
113 goto wrerror;
114 } else if (fwrite(ptr, size, count, fp) != count)
115 goto wrerror;
116 if (flush
117 && ((fp == stdout && output_is_tty)
118 || (rp != NULL && (rp->flag & RED_NOBUF) != 0))) {
119 if (rp != NULL) {
120 rp->output.gawk_fflush(fp, rp->output.opaque);
121 if (rp->output.gawk_ferror(fp, rp->output.opaque))
122 goto wrerror;
123 } else {
124 fflush(fp);
125 if (ferror(fp))
126 goto wrerror;
127 }
128 }
129 return;
130
131 wrerror:
132 #ifdef __MINGW32__
133 if (errno == 0 || errno == EINVAL)
134 w32_maybe_set_errno();
135 #endif
136 /* for stdout, die with a real SIGPIPE, like other awks */
137 if (fp == stdout && errno == EPIPE)
138 die_via_sigpipe();
139
140 /* otherwise die verbosely */
141 if ((rp != NULL) ? is_non_fatal_redirect(rp->value, strlen(rp->value)) : is_non_fatal_std(fp))
142 update_ERRNO_int(errno);
143 else
144 fatal(_("%s to \"%s\" failed: %s"), from,
145 rp != NULL
146 ? rp->value
147 : fp == stdout
148 ? _("standard output")
149 : _("standard error"),
150 errno ? strerror(errno) : _("reason unknown"));
151 }
152
153 /* do_exp --- exponential function */
154
155 NODE *
do_exp(int nargs)156 do_exp(int nargs)
157 {
158 NODE *tmp;
159 double d, res;
160
161 tmp = POP_SCALAR();
162 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
163 lintwarn(_("%s: received non-numeric argument"), "exp");
164 d = force_number(tmp)->numbr;
165 DEREF(tmp);
166 errno = 0;
167 res = exp(d);
168 if (errno == ERANGE)
169 warning(_("exp: argument %g is out of range"), d);
170 return make_number((AWKNUM) res);
171 }
172
173 /* stdfile --- return fp for a standard file */
174
175 /*
176 * This function allows `fflush("/dev/stdout")' to work.
177 * The other files will be available via getredirect().
178 * /dev/stdin is not included, since fflush is only for output.
179 */
180
181 static FILE *
stdfile(const char * name,size_t len)182 stdfile(const char *name, size_t len)
183 {
184 if (len == 11) {
185 if (strncmp(name, "/dev/stderr", 11) == 0)
186 return stderr;
187 else if (strncmp(name, "/dev/stdout", 11) == 0)
188 return stdout;
189 }
190
191 return NULL;
192 }
193
194 /* do_fflush --- flush output, either named file or pipe or everything */
195
196 NODE *
do_fflush(int nargs)197 do_fflush(int nargs)
198 {
199 struct redirect *rp;
200 NODE *tmp;
201 FILE *fp;
202 int status = 0;
203 const char *file;
204 int len;
205
206 /*
207 * November, 2012.
208 * It turns out that circa 2002, when BWK
209 * added fflush() and fflush("") to his awk, he made both of
210 * them flush everything.
211 *
212 * Now, with our inside agent getting ready to try to get fflush()
213 * standardized in POSIX, we are going to make our awk consistent
214 * with his. This should not really affect anyone, as flushing
215 * everything also flushes stdout.
216 *
217 * So. Once upon a time:
218 * fflush() --- flush stdout
219 * fflush("") --- flush everything
220 * Now, both calls flush everything.
221 */
222
223 /* fflush() */
224 if (nargs == 0) {
225 status = flush_io(); // ERRNO updated
226 return make_number((AWKNUM) status);
227 }
228
229 tmp = POP_STRING();
230 file = tmp->stptr;
231 len = tmp->stlen;
232
233 /* fflush("") */
234 if (tmp->stlen == 0) {
235 status = flush_io(); // ERRNO updated
236 DEREF(tmp);
237 return make_number((AWKNUM) status);
238 }
239
240 /* fflush("/some/path") */
241 rp = getredirect(tmp->stptr, tmp->stlen);
242 status = -1;
243 if (rp != NULL) {
244 if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) {
245 if ((rp->flag & RED_PIPE) != 0)
246 warning(_("fflush: cannot flush: pipe `%.*s' opened for reading, not writing"),
247 len, file);
248 else
249 warning(_("fflush: cannot flush: file `%.*s' opened for reading, not writing"),
250 len, file);
251 DEREF(tmp);
252 return make_number((AWKNUM) status);
253 }
254 fp = rp->output.fp;
255 if (fp != NULL) {
256 status = rp->output.gawk_fflush(fp, rp->output.opaque);
257
258 if (status != 0) {
259 if (! is_non_fatal_redirect(tmp->stptr, tmp->stlen))
260 fatal(_("fflush: cannot flush file `%.*s': %s"),
261 len, file, strerror(errno));
262 update_ERRNO_int(errno);
263 }
264 } else if ((rp->flag & RED_TWOWAY) != 0)
265 warning(_("fflush: cannot flush: two-way pipe `%.*s' has closed write end"),
266 len, file);
267 } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) {
268 status = (non_fatal_flush_std_file(fp) == false);
269 } else {
270 status = -1;
271 warning(_("fflush: `%.*s' is not an open file, pipe or co-process"), len, file);
272 }
273 DEREF(tmp);
274 return make_number((AWKNUM) status);
275 }
276
277 /* strncasecmpmbs --- like strncasecmp (multibyte string version) */
278
279 int
strncasecmpmbs(const unsigned char * s1,const unsigned char * s2,size_t n)280 strncasecmpmbs(const unsigned char *s1, const unsigned char *s2, size_t n)
281 {
282 size_t i1, i2, mbclen1, mbclen2, gap;
283 wchar_t wc1, wc2;
284 mbstate_t mbs1, mbs2;
285
286 memset(& mbs1, 0, sizeof(mbs1));
287 memset(& mbs2, 0, sizeof(mbs2));
288
289 for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
290 if (is_valid_character(s1[i1])) {
291 mbclen1 = 1;
292 wc1 = btowc_cache(s1[i1]);
293 } else {
294 mbclen1 = mbrtowc(& wc1, (const char *)s1 + i1,
295 n - i1, & mbs1);
296 if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
297 /* We treat it as a singlebyte character. */
298 mbclen1 = 1;
299 wc1 = btowc_cache(s1[i1]);
300 }
301 }
302 if (is_valid_character(s2[i2])) {
303 mbclen2 = 1;
304 wc2 = btowc_cache(s2[i2]);
305 } else {
306 mbclen2 = mbrtowc(& wc2, (const char *)s2 + i2,
307 n - i2, & mbs2);
308 if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
309 /* We treat it as a singlebyte character. */
310 mbclen2 = 1;
311 wc2 = btowc_cache(s2[i2]);
312 }
313 }
314 if ((gap = towlower(wc1) - towlower(wc2)) != 0)
315 /* s1 and s2 are not equivalent. */
316 return gap;
317 }
318 /* s1 and s2 are equivalent. */
319 return 0;
320 }
321
322 /* Inspect the buffer `src' and write the index of each byte to `dest'.
323 Caller must allocate `dest'.
324 e.g. str = <mb1(1)>, <mb1(2)>, a, b, <mb2(1)>, <mb2(2)>, <mb2(3)>, c
325 where mb(i) means the `i'-th byte of a multibyte character.
326 dest = 1, 2, 1, 1, 1, 2, 3. 1
327 */
328 static void
index_multibyte_buffer(char * src,char * dest,int len)329 index_multibyte_buffer(char* src, char* dest, int len)
330 {
331 int idx, prev_idx;
332 mbstate_t mbs, prevs;
333
334 memset(& prevs, 0, sizeof(mbstate_t));
335 for (idx = prev_idx = 0 ; idx < len ; idx++) {
336 size_t mbclen;
337 mbs = prevs;
338 mbclen = mbrlen(src + prev_idx, idx - prev_idx + 1, & mbs);
339 if (mbclen == (size_t) -1 || mbclen == 1 || mbclen == 0) {
340 /* singlebyte character. */
341 mbclen = 1;
342 prev_idx = idx + 1;
343 } else if (mbclen == (size_t) -2) {
344 /* a part of a multibyte character. */
345 mbclen = idx - prev_idx + 1;
346 } else if (mbclen > 1) {
347 /* the end of a multibyte character. */
348 prev_idx = idx + 1;
349 prevs = mbs;
350 } else {
351 /* Can't reach. */
352 }
353 dest[idx] = mbclen;
354 }
355 }
356
357 /* do_index --- find index of a string */
358
359 NODE *
do_index(int nargs)360 do_index(int nargs)
361 {
362 NODE *s1, *s2;
363 const char *p1, *p2;
364 size_t l1, l2;
365 long ret;
366 bool do_single_byte = false;
367 mbstate_t mbs1, mbs2;
368
369 if (gawk_mb_cur_max > 1) {
370 memset(& mbs1, 0, sizeof(mbstate_t));
371 memset(& mbs2, 0, sizeof(mbstate_t));
372 }
373
374 POP_TWO_SCALARS(s1, s2);
375
376 if (do_lint) {
377 if ((fixtype(s1)->flags & STRING) == 0)
378 lintwarn(_("%s: received non-string first argument"), "index");
379 if ((fixtype(s2)->flags & STRING) == 0)
380 lintwarn(_("%s: received non-string second argument"), "index");
381 }
382
383 s1 = force_string(s1);
384 s2 = force_string(s2);
385
386 p1 = s1->stptr;
387 p2 = s2->stptr;
388 l1 = s1->stlen;
389 l2 = s2->stlen;
390 ret = 0;
391
392 /*
393 * Icky special case, index(foo, "") should return 1,
394 * since both bwk awk and mawk do, and since match("foo", "")
395 * returns 1. This makes index("", "") work, too, fwiw.
396 */
397 if (l2 == 0) {
398 ret = 1;
399 goto out;
400 }
401
402 if (gawk_mb_cur_max > 1) {
403 s1 = force_wstring(s1);
404 s2 = force_wstring(s2);
405 /*
406 * If we don't have valid wide character strings, use
407 * the real bytes.
408 */
409 do_single_byte = ((s1->wstlen == 0 && s1->stlen > 0)
410 || (s2->wstlen == 0 && s2->stlen > 0));
411 }
412
413 /* IGNORECASE will already be false if posix */
414 if (IGNORECASE) {
415 while (l1 > 0) {
416 if (l2 > l1)
417 break;
418 if (! do_single_byte && gawk_mb_cur_max > 1) {
419 const wchar_t *pos;
420
421 pos = wcasestrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
422 if (pos == NULL)
423 ret = 0;
424 else
425 ret = pos - s1->wstptr + 1; /* 1-based */
426 goto out;
427 } else {
428 /*
429 * Could use tolower(*p1) == tolower(*p2) here.
430 * See discussion in eval.c as to why not.
431 */
432 if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2]
433 && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) {
434 ret = 1 + s1->stlen - l1;
435 break;
436 }
437 l1--;
438 p1++;
439 }
440 }
441 } else {
442 while (l1 > 0) {
443 if (l2 > l1)
444 break;
445 if (*p1 == *p2
446 && (l2 == 1 || (l2 > 0 && memcmp(p1, p2, l2) == 0))) {
447 ret = 1 + s1->stlen - l1;
448 break;
449 }
450 if (! do_single_byte && gawk_mb_cur_max > 1) {
451 const wchar_t *pos;
452
453 pos = wstrstr(s1->wstptr, s1->wstlen, s2->wstptr, s2->wstlen);
454 if (pos == NULL)
455 ret = 0;
456 else
457 ret = pos - s1->wstptr + 1; /* 1-based */
458 goto out;
459 } else {
460 l1--;
461 p1++;
462 }
463 }
464 }
465 out:
466 DEREF(s1);
467 DEREF(s2);
468 return make_number((AWKNUM) ret);
469 }
470
471 /* double_to_int --- convert double to int, used several places */
472
473 double
double_to_int(double d)474 double_to_int(double d)
475 {
476 if (d >= 0)
477 d = floor(d);
478 else
479 d = ceil(d);
480 return d;
481 }
482
483 /* do_int --- convert double to int for awk */
484
485 NODE *
do_int(int nargs)486 do_int(int nargs)
487 {
488 NODE *tmp;
489 double d;
490
491 tmp = POP_SCALAR();
492 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
493 lintwarn(_("%s: received non-numeric argument"), "int");
494 d = force_number(tmp)->numbr;
495 d = double_to_int(d);
496 DEREF(tmp);
497 return make_number((AWKNUM) d);
498 }
499
500 /* do_isarray --- check if argument is array */
501
502 NODE *
do_isarray(int nargs)503 do_isarray(int nargs)
504 {
505 NODE *tmp;
506 int ret = 1;
507
508 tmp = POP();
509 if (tmp->type != Node_var_array) {
510 ret = 0;
511 // could be Node_var_new
512 if (tmp->type == Node_val)
513 DEREF(tmp);
514 }
515 return make_number((AWKNUM) ret);
516 }
517
518 /* do_length --- length of a string, array or $0 */
519
520 NODE *
do_length(int nargs)521 do_length(int nargs)
522 {
523 NODE *tmp;
524 size_t len;
525
526 tmp = POP();
527 if (tmp->type == Node_var_array) {
528 static bool warned = false;
529 unsigned long size;
530
531 if (do_posix)
532 fatal(_("length: received array argument"));
533 if (do_lint_extensions && ! warned) {
534 warned = true;
535 lintwarn(_("`length(array)' is a gawk extension"));
536 }
537
538 /*
539 * Support for deferred loading of array elements requires that
540 * we use the array length interface even though it isn't
541 * necessary for the built-in array types.
542 *
543 * 1/2015: The deferred arrays are gone, but this is probably
544 * still a good idea.
545 */
546
547 size = assoc_length(tmp);
548 return make_number(size);
549 }
550
551 assert(tmp->type == Node_val);
552
553 if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
554 lintwarn(_("%s: received non-string argument"), "length");
555 tmp = force_string(tmp);
556
557 if (gawk_mb_cur_max > 1) {
558 tmp = force_wstring(tmp);
559 len = tmp->wstlen;
560 /*
561 * If the bytes don't make a valid wide character
562 * string, fall back to the bytes themselves.
563 */
564 if (len == 0 && tmp->stlen > 0)
565 len = tmp->stlen;
566 } else
567 len = tmp->stlen;
568
569 DEREF(tmp);
570 return make_number((AWKNUM) len);
571 }
572
573 /* do_log --- the log function */
574
575 NODE *
do_log(int nargs)576 do_log(int nargs)
577 {
578 NODE *tmp;
579 double d, arg;
580
581 tmp = POP_SCALAR();
582 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
583 lintwarn(_("%s: received non-numeric argument"), "log");
584 arg = force_number(tmp)->numbr;
585 if (arg < 0.0)
586 warning(_("%s: received negative argument %g"), "log", arg);
587 d = log(arg);
588 DEREF(tmp);
589 return make_number((AWKNUM) d);
590 }
591
592
593 #ifdef HAVE_MPFR
594
595 /*
596 * mpz2mpfr --- convert an arbitrary-precision integer to a float
597 * without any loss of precision. The returned value is only
598 * good for temporary use.
599 */
600
601
602 static mpfr_ptr
mpz2mpfr(mpz_ptr zi)603 mpz2mpfr(mpz_ptr zi)
604 {
605 size_t prec;
606 static mpfr_t mpfrval;
607 static bool inited = false;
608 int tval;
609
610 /* estimate minimum precision for exact conversion */
611 prec = mpz_sizeinbase(zi, 2); /* most significant 1 bit position starting at 1 */
612 prec -= (size_t) mpz_scan1(zi, 0); /* least significant 1 bit index starting at 0 */
613 if (prec < MPFR_PREC_MIN)
614 prec = MPFR_PREC_MIN;
615 else if (prec > MPFR_PREC_MAX)
616 prec = MPFR_PREC_MAX;
617
618 if (! inited) {
619 mpfr_init2(mpfrval, prec);
620 inited = true;
621 } else
622 mpfr_set_prec(mpfrval, prec);
623 tval = mpfr_set_z(mpfrval, zi, ROUND_MODE);
624 IEEE_FMT(mpfrval, tval);
625 return mpfrval;
626 }
627 #endif
628
629 /*
630 * format_tree() formats arguments of sprintf,
631 * and accordingly to a fmt_string providing a format like in
632 * printf family from C library. Returns a string node which value
633 * is a formatted string. Called by sprintf function.
634 *
635 * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann
636 * for taming this beast and making it compatible with ANSI C.
637 */
638
639 NODE *
format_tree(const char * fmt_string,size_t n0,NODE ** the_args,long num_args)640 format_tree(
641 const char *fmt_string,
642 size_t n0,
643 NODE **the_args,
644 long num_args)
645 {
646 /* copy 'l' bytes from 's' to 'obufout' checking for space in the process */
647 /* difference of pointers should be of ptrdiff_t type, but let us be kind */
648 #define bchunk(s, l) if (l) { \
649 while ((l) > ofre) { \
650 size_t olen = obufout - obuf; \
651 erealloc(obuf, char *, osiz * 2, "format_tree"); \
652 ofre += osiz; \
653 osiz *= 2; \
654 obufout = obuf + olen; \
655 } \
656 memcpy(obufout, s, (size_t) (l)); \
657 obufout += (l); \
658 ofre -= (l); \
659 }
660
661 /* copy one byte from 's' to 'obufout' checking for space in the process */
662 #define bchunk_one(s) { \
663 if (ofre < 1) { \
664 size_t olen = obufout - obuf; \
665 erealloc(obuf, char *, osiz * 2, "format_tree"); \
666 ofre += osiz; \
667 osiz *= 2; \
668 obufout = obuf + olen; \
669 } \
670 *obufout++ = *s; \
671 --ofre; \
672 }
673
674 /* Is there space for something L big in the buffer? */
675 #define chksize(l) if ((l) >= ofre) { \
676 size_t olen = obufout - obuf; \
677 size_t delta = osiz+l-ofre; \
678 erealloc(obuf, char *, osiz + delta, "format_tree"); \
679 obufout = obuf + olen; \
680 ofre += delta; \
681 osiz += delta; \
682 }
683
684 size_t cur_arg = 0;
685 NODE *r = NULL;
686 int i, nc;
687 bool toofew = false;
688 char *obuf, *obufout;
689 size_t osiz, ofre, olen_final;
690 const char *chbuf;
691 const char *s0, *s1;
692 int cs1;
693 NODE *arg;
694 long fw, prec, argnum;
695 bool used_dollar;
696 bool lj, alt, have_prec, need_format;
697 long *cur = NULL;
698 uintmax_t uval;
699 bool sgn;
700 int base;
701 /*
702 * Although this is an array, the elements serve two different
703 * purposes. The first element is the general buffer meant
704 * to hold the entire result string. The second one is a
705 * temporary buffer for large floating point values. They
706 * could just as easily be separate variables, and the
707 * code might arguably be clearer.
708 */
709 struct {
710 char *buf;
711 size_t bufsize;
712 char stackbuf[30];
713 } cpbufs[2];
714 #define cpbuf cpbufs[0].buf
715 char *cend = &cpbufs[0].stackbuf[sizeof(cpbufs[0].stackbuf)];
716 char *cp;
717 const char *fill;
718 AWKNUM tmpval = 0.0;
719 char signchar = '\0';
720 size_t len;
721 bool zero_flag = false;
722 bool quote_flag = false;
723 int ii, jj;
724 char *chp;
725 size_t copy_count, char_count;
726 char *nan_inf_val;
727 bool magic_posix_flag;
728 #ifdef HAVE_MPFR
729 mpz_ptr zi;
730 mpfr_ptr mf;
731 #endif
732 enum { MP_NONE = 0, MP_INT_WITH_PREC = 1, MP_INT_WITHOUT_PREC, MP_FLOAT } fmt_type;
733
734 static const char sp[] = " ";
735 static const char zero_string[] = "0";
736 static const char lchbuf[] = "0123456789abcdef";
737 static const char Uchbuf[] = "0123456789ABCDEF";
738 static const char bad_modifiers[] = "hjlLtz";
739 static bool warned[sizeof(bad_modifiers)-1]; // auto-init to zero
740
741 bool modifier_seen[sizeof(bad_modifiers)-1];
742 #define modifier_index(c) (strchr(bad_modifiers, c) - bad_modifiers)
743
744 #define INITIAL_OUT_SIZE 64
745 emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree");
746 obufout = obuf;
747 osiz = INITIAL_OUT_SIZE;
748 ofre = osiz - 1;
749
750 cur_arg = 1;
751
752 {
753 size_t k;
754 for (k = 0; k < sizeof(cpbufs)/sizeof(cpbufs[0]); k++) {
755 cpbufs[k].bufsize = sizeof(cpbufs[k].stackbuf);
756 cpbufs[k].buf = cpbufs[k].stackbuf;
757 }
758 }
759
760 /*
761 * The point of this goop is to grow the buffer
762 * holding the converted number, so that large
763 * values don't overflow a fixed length buffer.
764 */
765 #define PREPEND(CH) do { \
766 if (cp == cpbufs[0].buf) { \
767 char *prev = cpbufs[0].buf; \
768 emalloc(cpbufs[0].buf, char *, 2*cpbufs[0].bufsize, \
769 "format_tree"); \
770 memcpy((cp = cpbufs[0].buf+cpbufs[0].bufsize), prev, \
771 cpbufs[0].bufsize); \
772 cpbufs[0].bufsize *= 2; \
773 if (prev != cpbufs[0].stackbuf) \
774 efree(prev); \
775 cend = cpbufs[0].buf+cpbufs[0].bufsize; \
776 } \
777 *--cp = (CH); \
778 } while(0)
779
780 /*
781 * Check first for use of `count$'.
782 * If plain argument retrieval was used earlier, choke.
783 * Otherwise, return the requested argument.
784 * If not `count$' now, but it was used earlier, choke.
785 * If this format is more than total number of args, choke.
786 * Otherwise, return the current argument.
787 */
788 #define parse_next_arg() { \
789 if (argnum > 0) { \
790 if (cur_arg > 1) { \
791 msg(_("fatal: must use `count$' on all formats or none")); \
792 goto out; \
793 } \
794 arg = the_args[argnum]; \
795 } else if (used_dollar) { \
796 msg(_("fatal: must use `count$' on all formats or none")); \
797 arg = 0; /* shutup the compiler */ \
798 goto out; \
799 } else if (cur_arg >= num_args) { \
800 arg = 0; /* shutup the compiler */ \
801 toofew = true; \
802 break; \
803 } else { \
804 arg = the_args[cur_arg]; \
805 cur_arg++; \
806 } \
807 }
808
809 need_format = false;
810 used_dollar = false;
811
812 s0 = s1 = fmt_string;
813 while (n0-- > 0) {
814 if (*s1 != '%') {
815 s1++;
816 continue;
817 }
818 need_format = true;
819 bchunk(s0, s1 - s0);
820 s0 = s1;
821 cur = &fw;
822 fw = 0;
823 prec = 0;
824 base = 0;
825 argnum = 0;
826 base = 0;
827 have_prec = false;
828 signchar = '\0';
829 zero_flag = false;
830 quote_flag = false;
831 nan_inf_val = NULL;
832 #ifdef HAVE_MPFR
833 mf = NULL;
834 zi = NULL;
835 #endif
836 fmt_type = MP_NONE;
837
838 lj = alt = false;
839 memset(modifier_seen, 0, sizeof(modifier_seen));
840 magic_posix_flag = false;
841 fill = sp;
842 cp = cend;
843 chbuf = lchbuf;
844 s1++;
845
846 retry:
847 if (n0-- == 0) /* ran out early! */
848 break;
849
850 switch (cs1 = *s1++) {
851 case (-1): /* dummy case to allow for checking */
852 check_pos:
853 if (cur != &fw)
854 break; /* reject as a valid format */
855 goto retry;
856 case '%':
857 need_format = false;
858 /*
859 * 29 Oct. 2002:
860 * The C99 standard pages 274 and 279 seem to imply that
861 * since there's no arg converted, the field width doesn't
862 * apply. The code already was that way, but this
863 * comment documents it, at least in the code.
864 */
865 if (do_lint) {
866 const char *msg = NULL;
867
868 if (fw && ! have_prec)
869 msg = _("field width is ignored for `%%' specifier");
870 else if (fw == 0 && have_prec)
871 msg = _("precision is ignored for `%%' specifier");
872 else if (fw && have_prec)
873 msg = _("field width and precision are ignored for `%%' specifier");
874
875 if (msg != NULL)
876 lintwarn("%s", msg);
877 }
878 bchunk_one("%");
879 s0 = s1;
880 break;
881
882 case '0':
883 /*
884 * Only turn on zero_flag if we haven't seen
885 * the field width or precision yet. Otherwise,
886 * screws up floating point formatting.
887 */
888 if (cur == & fw)
889 zero_flag = true;
890 if (lj)
891 goto retry;
892 /* FALL through */
893 case '1':
894 case '2':
895 case '3':
896 case '4':
897 case '5':
898 case '6':
899 case '7':
900 case '8':
901 case '9':
902 if (cur == NULL)
903 break;
904 if (prec >= 0)
905 *cur = cs1 - '0';
906 /*
907 * with a negative precision *cur is already set
908 * to -1, so it will remain negative, but we have
909 * to "eat" precision digits in any case
910 */
911 while (n0 > 0 && *s1 >= '0' && *s1 <= '9') {
912 --n0;
913 *cur = *cur * 10 + *s1++ - '0';
914 }
915 if (prec < 0) /* negative precision is discarded */
916 have_prec = false;
917 if (cur == &prec)
918 cur = NULL;
919 if (n0 == 0) /* badly formatted control string */
920 continue;
921 goto retry;
922 case '$':
923 if (do_traditional) {
924 msg(_("fatal: `$' is not permitted in awk formats"));
925 goto out;
926 }
927
928 if (cur == &fw) {
929 argnum = fw;
930 fw = 0;
931 used_dollar = true;
932 if (argnum <= 0) {
933 msg(_("fatal: argument index with `$' must be > 0"));
934 goto out;
935 }
936 if (argnum >= num_args) {
937 msg(_("fatal: argument index %ld greater than total number of supplied arguments"), argnum);
938 goto out;
939 }
940 } else {
941 msg(_("fatal: `$' not permitted after period in format"));
942 goto out;
943 }
944
945 goto retry;
946 case '*':
947 if (cur == NULL)
948 break;
949 if (! do_traditional && used_dollar && ! isdigit((unsigned char) *s1)) {
950 fatal(_("fatal: must use `count$' on all formats or none"));
951 break; /* silence warnings */
952 } else if (! do_traditional && isdigit((unsigned char) *s1)) {
953 int val = 0;
954
955 for (; n0 > 0 && *s1 && isdigit((unsigned char) *s1); s1++, n0--) {
956 val *= 10;
957 val += *s1 - '0';
958 }
959 if (*s1 != '$') {
960 msg(_("fatal: no `$' supplied for positional field width or precision"));
961 goto out;
962 } else {
963 s1++;
964 n0--;
965 }
966 if (val >= num_args) {
967 toofew = true;
968 break;
969 }
970 arg = the_args[val];
971 } else {
972 parse_next_arg();
973 }
974 (void) force_number(arg);
975 *cur = get_number_si(arg);
976 if (*cur < 0 && cur == &fw) {
977 *cur = -*cur;
978 lj = true;
979 }
980 if (cur == &prec) {
981 if (*cur >= 0)
982 have_prec = true;
983 else
984 have_prec = false;
985 cur = NULL;
986 }
987 goto retry;
988 case ' ': /* print ' ' or '-' */
989 /* 'space' flag is ignored */
990 /* if '+' already present */
991 if (signchar != false)
992 goto check_pos;
993 /* FALL THROUGH */
994 case '+': /* print '+' or '-' */
995 signchar = cs1;
996 goto check_pos;
997 case '-':
998 if (prec < 0)
999 break;
1000 if (cur == &prec) {
1001 prec = -1;
1002 goto retry;
1003 }
1004 fill = sp; /* if left justified then other */
1005 lj = true; /* filling is ignored */
1006 goto check_pos;
1007 case '.':
1008 if (cur != &fw)
1009 break;
1010 cur = ≺
1011 have_prec = true;
1012 goto retry;
1013 case '#':
1014 alt = true;
1015 goto check_pos;
1016 case '\'':
1017 #if defined(HAVE_LOCALE_H)
1018 quote_flag = true;
1019 goto check_pos;
1020 #else
1021 goto retry;
1022 #endif
1023 case 'h':
1024 case 'j':
1025 case 'l':
1026 case 'L':
1027 case 't':
1028 case 'z':
1029 if (modifier_seen[modifier_index(cs1)])
1030 break;
1031 else {
1032 int ind = modifier_index(cs1);
1033
1034 if (do_lint && ! warned[ind]) {
1035 lintwarn(_("`%c' is meaningless in awk formats; ignored"), cs1);
1036 warned[ind] = true;
1037 }
1038 if (do_posix) {
1039 msg(_("fatal: `%c' is not permitted in POSIX awk formats"), cs1);
1040 goto out;
1041 }
1042 }
1043 modifier_seen[modifier_index(cs1)] = true;
1044 goto retry;
1045
1046 case 'P':
1047 if (magic_posix_flag)
1048 break;
1049 magic_posix_flag = true;
1050 goto retry;
1051 case 'c':
1052 need_format = false;
1053 parse_next_arg();
1054 /* user input that looks numeric is numeric */
1055 fixtype(arg);
1056 if ((arg->flags & NUMBER) != 0) {
1057 uval = get_number_uj(arg);
1058 if (gawk_mb_cur_max > 1) {
1059 char buf[100];
1060 wchar_t wc;
1061 mbstate_t mbs;
1062 size_t count;
1063
1064 memset(& mbs, 0, sizeof(mbs));
1065
1066 /* handle systems with too small wchar_t */
1067 if (sizeof(wchar_t) < 4 && uval > 0xffff) {
1068 if (do_lint)
1069 lintwarn(
1070 _("[s]printf: value %g is too big for %%c format"),
1071 arg->numbr);
1072
1073 goto out0;
1074 }
1075
1076 wc = uval;
1077
1078 count = wcrtomb(buf, wc, & mbs);
1079 if (count == 0
1080 || count == (size_t) -1) {
1081 if (do_lint)
1082 lintwarn(
1083 _("[s]printf: value %g is not a valid wide character"),
1084 arg->numbr);
1085
1086 goto out0;
1087 }
1088
1089 memcpy(cpbuf, buf, count);
1090 prec = count;
1091 cp = cpbuf;
1092 goto pr_tail;
1093 }
1094 out0:
1095 ;
1096 /* else,
1097 fall through */
1098
1099 cpbuf[0] = uval;
1100 prec = 1;
1101 cp = cpbuf;
1102 goto pr_tail;
1103 }
1104 /*
1105 * As per POSIX, only output first character of a
1106 * string value. Thus, we ignore any provided
1107 * precision, forcing it to 1. (Didn't this
1108 * used to work? 6/2003.)
1109 */
1110 cp = arg->stptr;
1111 prec = 1;
1112 /*
1113 * First character can be multiple bytes if
1114 * it's a multibyte character. Grr.
1115 */
1116 if (gawk_mb_cur_max > 1) {
1117 mbstate_t state;
1118 size_t count;
1119
1120 memset(& state, 0, sizeof(state));
1121 count = mbrlen(cp, arg->stlen, & state);
1122 if (count != (size_t) -1 && count != (size_t) -2 && count > 0) {
1123 prec = count;
1124 /* may need to increase fw so that padding happens, see pr_tail code */
1125 if (fw > 0)
1126 fw += count - 1;
1127 }
1128 }
1129 goto pr_tail;
1130 case 's':
1131 need_format = false;
1132 parse_next_arg();
1133 arg = force_string(arg);
1134 if (fw == 0 && ! have_prec)
1135 prec = arg->stlen;
1136 else {
1137 char_count = mbc_char_count(arg->stptr, arg->stlen);
1138 if (! have_prec || prec > char_count)
1139 prec = char_count;
1140 }
1141 cp = arg->stptr;
1142 goto pr_tail;
1143 case 'd':
1144 case 'i':
1145 need_format = false;
1146 parse_next_arg();
1147 (void) force_number(arg);
1148
1149 /*
1150 * Check for Nan or Inf.
1151 */
1152 if (out_of_range(arg))
1153 goto out_of_range;
1154 #ifdef HAVE_MPFR
1155 if (is_mpg_float(arg))
1156 goto mpf0;
1157 else if (is_mpg_integer(arg))
1158 goto mpz0;
1159 else
1160 #endif
1161 tmpval = double_to_int(arg->numbr);
1162
1163 /*
1164 * ``The result of converting a zero value with a
1165 * precision of zero is no characters.''
1166 */
1167 if (have_prec && prec == 0 && tmpval == 0)
1168 goto pr_tail;
1169
1170 if (tmpval < 0) {
1171 tmpval = -tmpval;
1172 sgn = true;
1173 } else {
1174 if (tmpval == -0.0)
1175 /* avoid printing -0 */
1176 tmpval = 0.0;
1177 sgn = false;
1178 }
1179 /*
1180 * Use snprintf return value to tell if there
1181 * is enough room in the buffer or not.
1182 */
1183 while ((i = snprintf(cpbufs[1].buf,
1184 cpbufs[1].bufsize, "%.0f",
1185 tmpval)) >=
1186 cpbufs[1].bufsize) {
1187 if (cpbufs[1].buf == cpbufs[1].stackbuf)
1188 cpbufs[1].buf = NULL;
1189 if (i > 0) {
1190 cpbufs[1].bufsize += ((i > cpbufs[1].bufsize) ?
1191 i : cpbufs[1].bufsize);
1192 }
1193 else
1194 cpbufs[1].bufsize *= 2;
1195 assert(cpbufs[1].bufsize > 0);
1196 erealloc(cpbufs[1].buf, char *,
1197 cpbufs[1].bufsize, "format_tree");
1198 }
1199 if (i < 1)
1200 goto out_of_range;
1201 #if defined(HAVE_LOCALE_H)
1202 quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
1203 #endif
1204 chp = &cpbufs[1].buf[i-1];
1205 ii = jj = 0;
1206 do {
1207 PREPEND(*chp);
1208 chp--; i--;
1209 #if defined(HAVE_LOCALE_H)
1210 if (quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
1211 if (i) { /* only add if more digits coming */
1212 int k;
1213 const char *ts = loc.thousands_sep;
1214
1215 for (k = strlen(ts) - 1; k >= 0; k--) {
1216 PREPEND(ts[k]);
1217 }
1218 }
1219 if (loc.grouping[ii+1] == 0)
1220 jj = 0; /* keep using current val in loc.grouping[ii] */
1221 else if (loc.grouping[ii+1] == CHAR_MAX)
1222 quote_flag = false;
1223 else {
1224 ii++;
1225 jj = 0;
1226 }
1227 }
1228 #endif
1229 } while (i > 0);
1230
1231 /* add more output digits to match the precision */
1232 if (have_prec) {
1233 while (cend - cp < prec)
1234 PREPEND('0');
1235 }
1236
1237 if (sgn)
1238 PREPEND('-');
1239 else if (signchar)
1240 PREPEND(signchar);
1241 /*
1242 * When to fill with zeroes is of course not simple.
1243 * First: No zero fill if left-justifying.
1244 * Next: There seem to be two cases:
1245 * A '0' without a precision, e.g. %06d
1246 * A precision with no field width, e.g. %.10d
1247 * Any other case, we don't want to fill with zeroes.
1248 */
1249 if (! lj
1250 && ((zero_flag && ! have_prec)
1251 || (fw == 0 && have_prec)))
1252 fill = zero_string;
1253 if (prec > fw)
1254 fw = prec;
1255 prec = cend - cp;
1256 if (fw > prec && ! lj && fill != sp
1257 && (*cp == '-' || signchar)) {
1258 bchunk_one(cp);
1259 cp++;
1260 prec--;
1261 fw--;
1262 }
1263 goto pr_tail;
1264 case 'X':
1265 chbuf = Uchbuf; /* FALL THROUGH */
1266 case 'x':
1267 base += 6; /* FALL THROUGH */
1268 case 'u':
1269 base += 2; /* FALL THROUGH */
1270 case 'o':
1271 base += 8;
1272 need_format = false;
1273 parse_next_arg();
1274 (void) force_number(arg);
1275
1276 if (out_of_range(arg))
1277 goto out_of_range;
1278 #ifdef HAVE_MPFR
1279 if (is_mpg_integer(arg)) {
1280 mpz0:
1281 zi = arg->mpg_i;
1282
1283 if (cs1 != 'd' && cs1 != 'i') {
1284 if (mpz_sgn(zi) <= 0) {
1285 /*
1286 * Negative value or 0 requires special handling.
1287 * Unlike MPFR, GMP does not allow conversion
1288 * to (u)intmax_t. So we first convert GMP type to
1289 * a MPFR type.
1290 */
1291 mf = mpz2mpfr(zi);
1292 goto mpf1;
1293 }
1294 signchar = '\0'; /* Don't print '+' */
1295 }
1296
1297 /* See comments above about when to fill with zeros */
1298 zero_flag = (! lj
1299 && ((zero_flag && ! have_prec)
1300 || (fw == 0 && have_prec)));
1301
1302 fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
1303 goto fmt0;
1304
1305 } else if (is_mpg_float(arg)) {
1306 mpf0:
1307 mf = arg->mpg_numbr;
1308 if (! mpfr_number_p(mf)) {
1309 /* inf or NaN */
1310 cs1 = 'g';
1311 fmt_type = MP_FLOAT;
1312 goto fmt1;
1313 }
1314
1315 if (cs1 != 'd' && cs1 != 'i') {
1316 mpf1:
1317 /*
1318 * The output of printf("%#.0x", 0) is 0 instead of 0x, hence <= in
1319 * the comparison below.
1320 */
1321 if (mpfr_sgn(mf) <= 0) {
1322 if (! mpfr_fits_intmax_p(mf, ROUND_MODE)) {
1323 /* -ve number is too large */
1324 cs1 = 'g';
1325 fmt_type = MP_FLOAT;
1326 goto fmt1;
1327 }
1328
1329 tmpval = uval = (uintmax_t) mpfr_get_sj(mf, ROUND_MODE);
1330 if (! alt && have_prec && prec == 0 && tmpval == 0)
1331 goto pr_tail; /* printf("%.0x", 0) is no characters */
1332 goto int0;
1333 }
1334 signchar = '\0'; /* Don't print '+' */
1335 }
1336
1337 /* See comments above about when to fill with zeros */
1338 zero_flag = (! lj
1339 && ((zero_flag && ! have_prec)
1340 || (fw == 0 && have_prec)));
1341
1342 (void) mpfr_get_z(mpzval, mf, MPFR_RNDZ); /* convert to GMP integer */
1343 fmt_type = have_prec ? MP_INT_WITH_PREC : MP_INT_WITHOUT_PREC;
1344 zi = mpzval;
1345 goto fmt0;
1346 } else
1347 #endif
1348 tmpval = arg->numbr;
1349
1350 /*
1351 * ``The result of converting a zero value with a
1352 * precision of zero is no characters.''
1353 *
1354 * If I remember the ANSI C standard, though,
1355 * it says that for octal conversions
1356 * the precision is artificially increased
1357 * to add an extra 0 if # is supplied.
1358 * Indeed, in C,
1359 * printf("%#.0o\n", 0);
1360 * prints a single 0.
1361 */
1362 if (! alt && have_prec && prec == 0 && tmpval == 0)
1363 goto pr_tail;
1364
1365 if (tmpval < 0) {
1366 uval = (uintmax_t) (intmax_t) tmpval;
1367 if ((AWKNUM)(intmax_t)uval != double_to_int(tmpval))
1368 goto out_of_range;
1369 } else {
1370 uval = (uintmax_t) tmpval;
1371 if ((AWKNUM)uval != double_to_int(tmpval))
1372 goto out_of_range;
1373 }
1374 #ifdef HAVE_MPFR
1375 int0:
1376 #endif
1377 #if defined(HAVE_LOCALE_H)
1378 quote_flag = (quote_flag && loc.thousands_sep[0] != 0);
1379 #endif
1380 /*
1381 * When to fill with zeroes is of course not simple.
1382 * First: No zero fill if left-justifying.
1383 * Next: There seem to be two cases:
1384 * A '0' without a precision, e.g. %06d
1385 * A precision with no field width, e.g. %.10d
1386 * Any other case, we don't want to fill with zeroes.
1387 */
1388 if (! lj
1389 && ((zero_flag && ! have_prec)
1390 || (fw == 0 && have_prec)))
1391 fill = zero_string;
1392 ii = jj = 0;
1393 do {
1394 PREPEND(chbuf[uval % base]);
1395 uval /= base;
1396 #if defined(HAVE_LOCALE_H)
1397 if (base == 10 && quote_flag && loc.grouping[ii] && ++jj == loc.grouping[ii]) {
1398 if (uval) { /* only add if more digits coming */
1399 int k;
1400 const char *ts = loc.thousands_sep;
1401
1402 for (k = strlen(ts) - 1; k >= 0; k--) {
1403 PREPEND(ts[k]);
1404 }
1405 }
1406 if (loc.grouping[ii+1] == 0)
1407 jj = 0; /* keep using current val in loc.grouping[ii] */
1408 else if (loc.grouping[ii+1] == CHAR_MAX)
1409 quote_flag = false;
1410 else {
1411 ii++;
1412 jj = 0;
1413 }
1414 }
1415 #endif
1416 } while (uval > 0);
1417
1418 /* add more output digits to match the precision */
1419 if (have_prec) {
1420 while (cend - cp < prec)
1421 PREPEND('0');
1422 }
1423
1424 if (alt && tmpval != 0) {
1425 if (base == 16) {
1426 PREPEND(cs1);
1427 PREPEND('0');
1428 if (fill != sp) {
1429 bchunk(cp, 2);
1430 cp += 2;
1431 fw -= 2;
1432 }
1433 } else if (base == 8)
1434 PREPEND('0');
1435 }
1436 base = 0;
1437 if (prec > fw)
1438 fw = prec;
1439 prec = cend - cp;
1440 pr_tail:
1441 if (! lj) {
1442 while (fw > prec) {
1443 bchunk_one(fill);
1444 fw--;
1445 }
1446 }
1447 copy_count = prec;
1448 if (fw == 0 && ! have_prec)
1449 ;
1450 else if (gawk_mb_cur_max > 1) {
1451 if (cs1 == 's') {
1452 assert(cp == arg->stptr || cp == cpbuf);
1453 copy_count = mbc_byte_count(arg->stptr, prec);
1454 }
1455 /* prec was set by code for %c */
1456 /* else
1457 copy_count = prec; */
1458 }
1459 bchunk(cp, copy_count);
1460 while (fw > prec) {
1461 bchunk_one(fill);
1462 fw--;
1463 }
1464 s0 = s1;
1465 break;
1466
1467 out_of_range:
1468 /*
1469 * out of range - emergency use of %g format,
1470 * or format NaN and INF values.
1471 */
1472 nan_inf_val = format_nan_inf(arg, cs1);
1473 if (do_posix || magic_posix_flag || nan_inf_val == NULL) {
1474 if (do_lint && ! do_posix && ! magic_posix_flag)
1475 lintwarn(_("[s]printf: value %g is out of range for `%%%c' format"),
1476 (double) tmpval, cs1);
1477 tmpval = arg->numbr;
1478 if (strchr("aAeEfFgG", cs1) == NULL)
1479 cs1 = 'g';
1480 goto fmt1;
1481 } else {
1482 if (do_lint)
1483 lintwarn(_("[s]printf: value %s is out of range for `%%%c' format"),
1484 nan_inf_val, cs1);
1485 bchunk(nan_inf_val, strlen(nan_inf_val));
1486 s0 = s1;
1487 break;
1488 }
1489
1490 case 'F':
1491 #if ! defined(PRINTF_HAS_F_FORMAT) || PRINTF_HAS_F_FORMAT != 1
1492 cs1 = 'f';
1493 /* FALL THROUGH */
1494 #endif
1495 case 'g':
1496 case 'G':
1497 case 'e':
1498 case 'f':
1499 case 'E':
1500 #if defined(PRINTF_HAS_A_FORMAT) && PRINTF_HAS_A_FORMAT == 1
1501 case 'A':
1502 case 'a':
1503 {
1504 static bool warned = false;
1505
1506 if (do_lint && tolower(cs1) == 'a' && ! warned) {
1507 warned = true;
1508 lintwarn(_("%%%c format is POSIX standard but not portable to other awks"), cs1);
1509 }
1510 }
1511 #endif
1512 need_format = false;
1513 parse_next_arg();
1514 (void) force_number(arg);
1515
1516 if (! is_mpg_number(arg))
1517 tmpval = arg->numbr;
1518 #ifdef HAVE_MPFR
1519 else if (is_mpg_float(arg)) {
1520 mf = arg->mpg_numbr;
1521 fmt_type = MP_FLOAT;
1522 } else {
1523 /* arbitrary-precision integer, convert to MPFR float */
1524 assert(mf == NULL);
1525 mf = mpz2mpfr(arg->mpg_i);
1526 fmt_type = MP_FLOAT;
1527 }
1528 #endif
1529 if (out_of_range(arg))
1530 goto out_of_range;
1531
1532 fmt1:
1533 if (! have_prec)
1534 prec = DEFAULT_G_PRECISION;
1535 #ifdef HAVE_MPFR
1536 fmt0:
1537 #endif
1538 chksize(fw + prec + 11); /* 11 == slop */
1539 cp = cpbuf;
1540 *cp++ = '%';
1541 if (lj)
1542 *cp++ = '-';
1543 if (signchar)
1544 *cp++ = signchar;
1545 if (alt)
1546 *cp++ = '#';
1547 if (zero_flag)
1548 *cp++ = '0';
1549 if (quote_flag)
1550 *cp++ = '\'';
1551
1552 #if defined(LC_NUMERIC)
1553 if (quote_flag && ! use_lc_numeric)
1554 setlocale(LC_NUMERIC, "");
1555 #endif
1556
1557 bool need_to_add_thousands = false;
1558 switch (fmt_type) {
1559 #ifdef HAVE_MPFR
1560 case MP_INT_WITH_PREC:
1561 sprintf(cp, "*.*Z%c", cs1);
1562 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1563 (int) fw, (int) prec, zi)) >= (int) ofre)
1564 chksize(nc)
1565 need_to_add_thousands = true;
1566 break;
1567 case MP_INT_WITHOUT_PREC:
1568 sprintf(cp, "*Z%c", cs1);
1569 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1570 (int) fw, zi)) >= (int) ofre)
1571 chksize(nc)
1572 need_to_add_thousands = true;
1573 break;
1574 case MP_FLOAT:
1575 sprintf(cp, "*.*R*%c", cs1);
1576 while ((nc = mpfr_snprintf(obufout, ofre, cpbuf,
1577 (int) fw, (int) prec, ROUND_MODE, mf)) >= (int) ofre)
1578 chksize(nc)
1579 break;
1580 #endif
1581 default:
1582 if (have_prec || tolower(cs1) != 'a') {
1583 sprintf(cp, "*.*%c", cs1);
1584 while ((nc = snprintf(obufout, ofre, cpbuf,
1585 (int) fw, (int) prec,
1586 (double) tmpval)) >= (int) ofre)
1587 chksize(nc)
1588 } else {
1589 // For %a and %A, use the default precision if it
1590 // wasn't supplied by the user.
1591 sprintf(cp, "*%c", cs1);
1592 while ((nc = snprintf(obufout, ofre, cpbuf,
1593 (int) fw,
1594 (double) tmpval)) >= (int) ofre)
1595 chksize(nc)
1596 }
1597 }
1598
1599 #if defined(LC_NUMERIC)
1600 if (quote_flag && ! use_lc_numeric)
1601 setlocale(LC_NUMERIC, "C");
1602 #endif
1603 len = strlen(obufout);
1604 if (quote_flag && need_to_add_thousands) {
1605 const char *new_text = add_thousands(obufout, & loc);
1606
1607 len = strlen(new_text);
1608 chksize(len)
1609 strcpy(obufout, new_text);
1610 free((void *) new_text);
1611 }
1612
1613 ofre -= len;
1614 obufout += len;
1615 s0 = s1;
1616 break;
1617 default:
1618 if (do_lint && is_alpha(cs1))
1619 lintwarn(_("ignoring unknown format specifier character `%c': no argument converted"), cs1);
1620 break;
1621 }
1622 if (toofew) {
1623 msg("%s\n\t`%s'\n\t%*s%s",
1624 _("fatal: not enough arguments to satisfy format string"),
1625 fmt_string, (int) (s1 - fmt_string - 1), "",
1626 _("^ ran out for this one"));
1627 goto out;
1628 }
1629 }
1630 if (do_lint) {
1631 if (need_format)
1632 lintwarn(
1633 _("[s]printf: format specifier does not have control letter"));
1634 if (cur_arg < num_args)
1635 lintwarn(
1636 _("too many arguments supplied for format string"));
1637 }
1638 bchunk(s0, s1 - s0);
1639 olen_final = obufout - obuf;
1640 #define GIVE_BACK_SIZE (INITIAL_OUT_SIZE * 2)
1641 if (ofre > GIVE_BACK_SIZE)
1642 erealloc(obuf, char *, olen_final + 1, "format_tree");
1643 r = make_str_node(obuf, olen_final, ALREADY_MALLOCED);
1644 obuf = NULL;
1645 out:
1646 {
1647 size_t k;
1648 size_t count = sizeof(cpbufs)/sizeof(cpbufs[0]);
1649 for (k = 0; k < count; k++) {
1650 if (cpbufs[k].buf != cpbufs[k].stackbuf)
1651 efree(cpbufs[k].buf);
1652 }
1653 if (obuf != NULL)
1654 efree(obuf);
1655 }
1656
1657 if (r == NULL)
1658 gawk_exit(EXIT_FATAL);
1659 return r;
1660 }
1661
1662
1663 /* printf_common --- common code for sprintf and printf */
1664
1665 static NODE *
printf_common(int nargs)1666 printf_common(int nargs)
1667 {
1668 int i;
1669 NODE *r, *tmp;
1670
1671 assert(nargs > 0 && nargs <= max_args);
1672 for (i = 1; i <= nargs; i++) {
1673 tmp = args_array[nargs - i] = POP();
1674 if (tmp->type == Node_var_array) {
1675 while (--i > 0)
1676 DEREF(args_array[nargs - i]);
1677 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
1678 }
1679 }
1680
1681 args_array[0] = force_string(args_array[0]);
1682 r = format_tree(args_array[0]->stptr, args_array[0]->stlen, args_array, nargs);
1683 for (i = 0; i < nargs; i++)
1684 DEREF(args_array[i]);
1685 return r;
1686 }
1687
1688 /* do_sprintf --- perform sprintf */
1689
1690 NODE *
do_sprintf(int nargs)1691 do_sprintf(int nargs)
1692 {
1693 NODE *r;
1694
1695 if (nargs == 0)
1696 fatal(_("sprintf: no arguments"));
1697
1698 r = printf_common(nargs);
1699 if (r == NULL)
1700 gawk_exit(EXIT_FATAL);
1701 return r;
1702 }
1703
1704
1705 /* do_printf --- perform printf, including redirection */
1706
1707 void
do_printf(int nargs,int redirtype)1708 do_printf(int nargs, int redirtype)
1709 {
1710 FILE *fp = NULL;
1711 NODE *tmp;
1712 struct redirect *rp = NULL;
1713 int errflg = 0;
1714 NODE *redir_exp = NULL;
1715
1716 if (nargs == 0) {
1717 if (do_traditional) {
1718 if (do_lint)
1719 lintwarn(_("printf: no arguments"));
1720 if (redirtype != 0) {
1721 redir_exp = TOP();
1722 if (redir_exp->type != Node_val)
1723 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
1724 rp = redirect(redir_exp, redirtype, & errflg, true);
1725 DEREF(redir_exp);
1726 decr_sp();
1727 }
1728 return; /* bwk accepts it silently */
1729 }
1730 fatal(_("printf: no arguments"));
1731 }
1732
1733 if (redirtype != 0) {
1734 redir_exp = PEEK(nargs);
1735 if (redir_exp->type != Node_val)
1736 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
1737 rp = redirect(redir_exp, redirtype, & errflg, true);
1738 if (rp != NULL) {
1739 if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
1740 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
1741 update_ERRNO_int(EBADF);
1742 return;
1743 }
1744 (void) close_rp(rp, CLOSE_ALL);
1745 fatal(_("printf: attempt to write to closed write end of two-way pipe"));
1746 }
1747 fp = rp->output.fp;
1748 }
1749 else if (errflg) {
1750 update_ERRNO_int(errflg);
1751 return;
1752 }
1753 } else if (do_debug) /* only the debugger can change the default output */
1754 fp = output_fp;
1755 else
1756 fp = stdout;
1757
1758 tmp = printf_common(nargs);
1759 if (redir_exp != NULL) {
1760 DEREF(redir_exp);
1761 decr_sp();
1762 }
1763 if (tmp != NULL) {
1764 if (fp == NULL) {
1765 DEREF(tmp);
1766 return;
1767 }
1768 efwrite(tmp->stptr, sizeof(char), tmp->stlen, fp, "printf", rp, true);
1769 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
1770 rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
1771 DEREF(tmp);
1772 } else
1773 gawk_exit(EXIT_FATAL);
1774 }
1775
1776 /* do_sqrt --- do the sqrt function */
1777
1778 NODE *
do_sqrt(int nargs)1779 do_sqrt(int nargs)
1780 {
1781 NODE *tmp;
1782 double arg;
1783
1784 tmp = POP_SCALAR();
1785 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
1786 lintwarn(_("%s: received non-numeric argument"), "sqrt");
1787 arg = (double) force_number(tmp)->numbr;
1788 DEREF(tmp);
1789 if (arg < 0.0)
1790 warning(_("%s: received negative argument %g"), "sqrt", arg);
1791 return make_number((AWKNUM) sqrt(arg));
1792 }
1793
1794 /* do_substr --- do the substr function */
1795
1796 NODE *
do_substr(int nargs)1797 do_substr(int nargs)
1798 {
1799 NODE *t1;
1800 NODE *r;
1801 size_t indx;
1802 size_t length = 0;
1803 double d_index = 0, d_length = 0;
1804 size_t src_len;
1805
1806 if (nargs == 3) {
1807 t1 = POP_NUMBER();
1808 d_length = get_number_d(t1);
1809 DEREF(t1);
1810 }
1811
1812 t1 = POP_NUMBER();
1813 d_index = get_number_d(t1);
1814 DEREF(t1);
1815
1816 t1 = POP_STRING();
1817
1818 if (nargs == 3) {
1819 if (! (d_length >= 1)) {
1820 if (do_lint == DO_LINT_ALL)
1821 lintwarn(_("substr: length %g is not >= 1"), d_length);
1822 else if (do_lint == DO_LINT_INVALID && ! (d_length >= 0))
1823 lintwarn(_("substr: length %g is not >= 0"), d_length);
1824 DEREF(t1);
1825 /*
1826 * Return explicit null string instead of doing
1827 * dupnode(Nnull_string) so that if the result
1828 * is checked with the combination of length()
1829 * and lint, no error is reported about using
1830 * an uninitialized value. Same thing later, too.
1831 */
1832 return make_string("", 0);
1833 }
1834 if (do_lint) {
1835 if (double_to_int(d_length) != d_length)
1836 lintwarn(
1837 _("substr: non-integer length %g will be truncated"),
1838 d_length);
1839
1840 if (d_length > SIZE_MAX)
1841 lintwarn(
1842 _("substr: length %g too big for string indexing, truncating to %g"),
1843 d_length, (double) SIZE_MAX);
1844 }
1845 if (d_length < SIZE_MAX)
1846 length = d_length;
1847 else
1848 length = SIZE_MAX;
1849 }
1850
1851 /* the weird `! (foo)' tests help catch NaN values. */
1852 if (! (d_index >= 1)) {
1853 if (do_lint)
1854 lintwarn(_("substr: start index %g is invalid, using 1"),
1855 d_index);
1856 d_index = 1;
1857 }
1858 if (do_lint && double_to_int(d_index) != d_index)
1859 lintwarn(_("substr: non-integer start index %g will be truncated"),
1860 d_index);
1861
1862 /* awk indices are from 1, C's are from 0 */
1863 if (d_index <= SIZE_MAX)
1864 indx = d_index - 1;
1865 else
1866 indx = SIZE_MAX;
1867
1868 if (nargs == 2) { /* third arg. missing */
1869 /* use remainder of string */
1870 length = t1->stlen - indx; /* default to bytes */
1871 if (gawk_mb_cur_max > 1) {
1872 t1 = force_wstring(t1);
1873 if (t1->wstlen > 0) /* use length of wide char string if we have one */
1874 length = t1->wstlen - indx;
1875 }
1876 d_length = length; /* set here in case used in diagnostics, below */
1877 }
1878
1879 if (t1->stlen == 0) {
1880 /* substr("", 1, 0) produces a warning only if LINT_ALL */
1881 if (do_lint && (do_lint == DO_LINT_ALL || ((indx | length) != 0)))
1882 lintwarn(_("substr: source string is zero length"));
1883 DEREF(t1);
1884 return make_string("", 0);
1885 }
1886
1887 /* get total len of input string, for following checks */
1888 if (gawk_mb_cur_max > 1) {
1889 t1 = force_wstring(t1);
1890 src_len = t1->wstlen;
1891 } else
1892 src_len = t1->stlen;
1893
1894 if (indx >= src_len) {
1895 if (do_lint)
1896 lintwarn(_("substr: start index %g is past end of string"),
1897 d_index);
1898 DEREF(t1);
1899 return make_string("", 0);
1900 }
1901 if (length > src_len - indx) {
1902 if (do_lint)
1903 lintwarn(
1904 _("substr: length %g at start index %g exceeds length of first argument (%lu)"),
1905 d_length, d_index, (unsigned long int) src_len);
1906 length = src_len - indx;
1907 }
1908
1909 /* force_wstring() already called */
1910 if (gawk_mb_cur_max == 1 || t1->wstlen == t1->stlen)
1911 /* single byte case */
1912 r = make_string(t1->stptr + indx, length);
1913 else {
1914 /* multibyte case, more work */
1915 size_t result;
1916 wchar_t *wp;
1917 mbstate_t mbs;
1918 char *substr, *cp;
1919
1920 /*
1921 * Convert the wide chars in t1->wstptr back into m.b. chars.
1922 * This is pretty grotty, but it's the most straightforward
1923 * way to do things.
1924 */
1925 memset(& mbs, 0, sizeof(mbs));
1926 emalloc(substr, char *, (length * gawk_mb_cur_max) + 1, "do_substr");
1927 wp = t1->wstptr + indx;
1928 for (cp = substr; length > 0; length--) {
1929 result = wcrtomb(cp, *wp, & mbs);
1930 if (result == (size_t) -1) /* what to do? break seems best */
1931 break;
1932 cp += result;
1933 wp++;
1934 }
1935 *cp = '\0';
1936 r = make_str_node(substr, cp - substr, ALREADY_MALLOCED);
1937 }
1938
1939 DEREF(t1);
1940 return r;
1941 }
1942
1943 /* do_strftime --- format a time stamp */
1944
1945 NODE *
do_strftime(int nargs)1946 do_strftime(int nargs)
1947 {
1948 NODE *t1, *t2, *t3, *ret;
1949 struct tm *tm;
1950 time_t fclock;
1951 double clock_val;
1952 char *bufp;
1953 size_t buflen, bufsize;
1954 char buf[BUFSIZ];
1955 const char *format;
1956 int formatlen;
1957 bool do_gmt;
1958 NODE *val = NULL;
1959 NODE *sub = NULL;
1960 char save = '\0'; // initialize to avoid compiler warnings
1961 static const time_t time_t_min = TYPE_MINIMUM(time_t);
1962 static const time_t time_t_max = TYPE_MAXIMUM(time_t);
1963
1964 /* set defaults first */
1965 format = def_strftime_format; /* traditional date format */
1966 formatlen = strlen(format);
1967 (void) time(& fclock); /* current time of day */
1968 do_gmt = false;
1969
1970 if (PROCINFO_node != NULL) {
1971 sub = make_string("strftime", 8);
1972 val = in_array(PROCINFO_node, sub);
1973 unref(sub);
1974
1975 if (val != NULL) {
1976 if (do_lint && (fixtype(val)->flags & STRING) == 0)
1977 lintwarn(_("strftime: format value in PROCINFO[\"strftime\"] has numeric type"));
1978 val = force_string(val);
1979 format = val->stptr;
1980 formatlen = val->stlen;
1981 }
1982 }
1983
1984 t1 = t2 = t3 = NULL;
1985 if (nargs > 0) { /* have args */
1986 NODE *tmp;
1987
1988 if (nargs == 3) {
1989 t3 = POP_SCALAR();
1990 do_gmt = boolval(t3);
1991 DEREF(t3);
1992 }
1993
1994 if (nargs >= 2) {
1995 t2 = POP_SCALAR();
1996 if (do_lint && (fixtype(t2)->flags & NUMBER) == 0)
1997 lintwarn(_("%s: received non-numeric second argument"), "strftime");
1998 (void) force_number(t2);
1999 clock_val = get_number_d(t2);
2000 fclock = (time_t) clock_val;
2001 /*
2002 * Protect against negative value being assigned
2003 * to unsigned time_t.
2004 */
2005 if (clock_val < 0 && fclock > 0) {
2006 if (do_lint)
2007 lintwarn(_("strftime: second argument less than 0 or too big for time_t"));
2008 return make_string("", 0);
2009 }
2010
2011 /* And check that the value is in range */
2012 if (clock_val < time_t_min || clock_val > time_t_max) {
2013 if (do_lint)
2014 lintwarn(_("strftime: second argument out of range for time_t"));
2015 return make_string("", 0);
2016 }
2017
2018 DEREF(t2);
2019 }
2020
2021 tmp = POP_SCALAR();
2022 if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
2023 lintwarn(_("%s: received non-string first argument"), "strftime");
2024
2025 t1 = force_string(tmp);
2026 format = t1->stptr;
2027 formatlen = t1->stlen;
2028 if (formatlen == 0) {
2029 if (do_lint)
2030 lintwarn(_("strftime: received empty format string"));
2031 DEREF(t1);
2032 return make_string("", 0);
2033 }
2034 str_terminate(t1, save);
2035 }
2036
2037 if (do_gmt)
2038 tm = gmtime(& fclock);
2039 else
2040 tm = localtime(& fclock);
2041
2042 if (tm == NULL) {
2043 ret = make_string("", 0);
2044 goto done;
2045 }
2046
2047 bufp = buf;
2048 bufsize = sizeof(buf);
2049 for (;;) {
2050 *bufp = '\0';
2051 buflen = strftime(bufp, bufsize, format, tm);
2052 /*
2053 * buflen can be zero EITHER because there's not enough
2054 * room in the string, or because the control command
2055 * goes to the empty string. Make a reasonable guess that
2056 * if the buffer is 1024 times bigger than the length of the
2057 * format string, it's not failing for lack of room.
2058 * Thanks to Paul Eggert for pointing out this issue.
2059 */
2060 if (buflen > 0 || bufsize >= 1024 * formatlen)
2061 break;
2062 bufsize *= 2;
2063 if (bufp == buf)
2064 emalloc(bufp, char *, bufsize, "do_strftime");
2065 else
2066 erealloc(bufp, char *, bufsize, "do_strftime");
2067 }
2068 ret = make_string(bufp, buflen);
2069 if (bufp != buf)
2070 efree(bufp);
2071 done:
2072 if (t1) {
2073 str_restore(t1, save);
2074 DEREF(t1);
2075 }
2076 return ret;
2077 }
2078
2079 /* do_systime --- get the time of day */
2080
2081 NODE *
do_systime(int nargs ATTRIBUTE_UNUSED)2082 do_systime(int nargs ATTRIBUTE_UNUSED)
2083 {
2084 time_t lclock;
2085
2086 (void) time(& lclock);
2087 return make_number((AWKNUM) lclock);
2088 }
2089
2090 /* do_mktime --- turn a time string into a timestamp */
2091
2092 NODE *
do_mktime(int nargs)2093 do_mktime(int nargs)
2094 {
2095 NODE *t1, *t2;
2096 struct tm then;
2097 long year;
2098 int month, day, hour, minute, second, count;
2099 int dst = -1; /* default is unknown */
2100 time_t then_stamp;
2101 char save;
2102 bool do_gmt;
2103
2104 if (nargs == 2) {
2105 t2 = POP_SCALAR();
2106 do_gmt = boolval(t2);
2107 DEREF(t2);
2108 }
2109 else
2110 do_gmt = false;
2111 t1 = POP_SCALAR();
2112 if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2113 lintwarn(_("%s: received non-string argument"), "mktime");
2114 t1 = force_string(t1);
2115
2116 save = t1->stptr[t1->stlen];
2117 t1->stptr[t1->stlen] = '\0';
2118
2119 count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d",
2120 & year, & month, & day,
2121 & hour, & minute, & second,
2122 & dst);
2123
2124 // 9/2021: I've been told that according to the ISO 8601-1:2019 spec,
2125 // hour cannot be 24. So the check for hour > 23 is valid.
2126 if ( do_lint /* Ready? Set! Go: */
2127 && ( (second < 0 || second > 60)
2128 || (minute < 0 || minute > 59)
2129 || (hour < 0 || hour > 23)
2130 || (day < 1 || day > 31)
2131 || (month < 1 || month > 12) ))
2132 lintwarn(_("mktime: at least one of the values is out of the default range"));
2133
2134 t1->stptr[t1->stlen] = save;
2135 DEREF(t1);
2136
2137 if (count < 6
2138 || month == INT_MIN
2139 || year < INT_MIN + 1900
2140 || year - 1900 > INT_MAX)
2141 return make_number((AWKNUM) -1);
2142
2143 memset(& then, '\0', sizeof(then));
2144 then.tm_sec = second;
2145 then.tm_min = minute;
2146 then.tm_hour = hour;
2147 then.tm_mday = day;
2148 then.tm_mon = month - 1;
2149 then.tm_year = year - 1900;
2150 then.tm_isdst = dst;
2151
2152 then_stamp = (do_gmt ? timegm(& then) : mktime(& then));
2153 return make_number((AWKNUM) then_stamp);
2154 }
2155
2156 /* do_system --- run an external command */
2157
2158 NODE *
do_system(int nargs)2159 do_system(int nargs)
2160 {
2161 NODE *tmp;
2162 AWKNUM ret = 0; /* floating point on purpose, compat Unix awk */
2163 char *cmd;
2164 char save;
2165 int status;
2166
2167 if (do_sandbox)
2168 fatal(_("'system' function not allowed in sandbox mode"));
2169
2170 (void) flush_io(); /* so output is synchronous with gawk's */
2171 tmp = POP_SCALAR();
2172 if (do_lint && (fixtype(tmp)->flags & STRING) == 0)
2173 lintwarn(_("%s: received non-string argument"), "system");
2174 cmd = force_string(tmp)->stptr;
2175
2176 if (cmd && *cmd) {
2177 /* insure arg to system is zero-terminated */
2178 save = cmd[tmp->stlen];
2179 cmd[tmp->stlen] = '\0';
2180
2181 os_restore_mode(fileno(stdin));
2182 set_sigpipe_to_default();
2183
2184 status = system(cmd);
2185 /*
2186 * 3/2016. What to do with ret? It's never simple.
2187 * POSIX says to use the full return value. BWK awk
2188 * divides the result by 256. That normally gives the
2189 * exit status but gives a weird result for death-by-signal.
2190 * So we compromise as follows:
2191 */
2192 ret = status;
2193 if (status != -1) {
2194 if (do_posix)
2195 ; /* leave it alone, full 16 bits */
2196 else if (do_traditional)
2197 #ifdef __MINGW32__
2198 ret = (((unsigned)status) & ~0xC0000000);
2199 #else
2200 ret = (status / 256.0);
2201 #endif
2202 else
2203 ret = sanitize_exit_status(status);
2204 }
2205
2206 if ((BINMODE & BINMODE_INPUT) != 0)
2207 os_setbinmode(fileno(stdin), O_BINARY);
2208 ignore_sigpipe();
2209
2210 cmd[tmp->stlen] = save;
2211 }
2212 DEREF(tmp);
2213 return make_number((AWKNUM) ret);
2214 }
2215
2216 /* do_print --- print items, separated by OFS, terminated with ORS */
2217
2218 void
do_print(int nargs,int redirtype)2219 do_print(int nargs, int redirtype)
2220 {
2221 struct redirect *rp = NULL;
2222 int errflg = 0;
2223 FILE *fp = NULL;
2224 int i;
2225 NODE *redir_exp = NULL;
2226 NODE *tmp = NULL;
2227
2228 assert(nargs <= max_args);
2229
2230 if (redirtype != 0) {
2231 redir_exp = PEEK(nargs);
2232 if (redir_exp->type != Node_val)
2233 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(redir_exp));
2234 rp = redirect(redir_exp, redirtype, & errflg, true);
2235 if (rp != NULL) {
2236 if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
2237 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
2238 update_ERRNO_int(EBADF);
2239 return;
2240 }
2241 (void) close_rp(rp, CLOSE_ALL);
2242 fatal(_("print: attempt to write to closed write end of two-way pipe"));
2243 }
2244 fp = rp->output.fp;
2245 }
2246 else if (errflg) {
2247 update_ERRNO_int(errflg);
2248 return;
2249 }
2250 } else if (do_debug) /* only the debugger can change the default output */
2251 fp = output_fp;
2252 else
2253 fp = stdout;
2254
2255 for (i = 1; i <= nargs; i++) {
2256 tmp = args_array[i] = POP();
2257 if (tmp->type == Node_var_array) {
2258 while (--i > 0)
2259 DEREF(args_array[i]);
2260 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(tmp));
2261 }
2262 // Let force_string_ofmt handle checking if things
2263 // are already valid.
2264 args_array[i] = force_string_ofmt(tmp);
2265 if (args_array[i] != tmp)
2266 DEREF(tmp);
2267 }
2268
2269 if (redir_exp != NULL) {
2270 DEREF(redir_exp);
2271 decr_sp();
2272 }
2273
2274 if (fp == NULL) {
2275 for (i = nargs; i > 0; i--)
2276 DEREF(args_array[i]);
2277 return;
2278 }
2279
2280 for (i = nargs; i > 0; i--) {
2281 efwrite(args_array[i]->stptr, sizeof(char), args_array[i]->stlen, fp, "print", rp, false);
2282 DEREF(args_array[i]);
2283 if (i != 1 && OFSlen > 0)
2284 efwrite(OFS, sizeof(char), (size_t) OFSlen,
2285 fp, "print", rp, false);
2286
2287 }
2288 if (ORSlen > 0)
2289 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
2290
2291 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
2292 rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
2293 }
2294
2295 /* do_print_rec --- special case printing of $0, for speed */
2296
2297 void
do_print_rec(int nargs,int redirtype)2298 do_print_rec(int nargs, int redirtype)
2299 {
2300 FILE *fp = NULL;
2301 NODE *f0;
2302 struct redirect *rp = NULL;
2303 int errflg = 0;
2304 NODE *redir_exp = NULL;
2305
2306 assert(nargs == 0);
2307 if (redirtype != 0) {
2308 redir_exp = TOP();
2309 rp = redirect(redir_exp, redirtype, & errflg, true);
2310 if (rp != NULL) {
2311 if ((rp->flag & RED_TWOWAY) != 0 && rp->output.fp == NULL) {
2312 if (is_non_fatal_redirect(redir_exp->stptr, redir_exp->stlen)) {
2313 update_ERRNO_int(EBADF);
2314 return;
2315 }
2316 (void) close_rp(rp, CLOSE_ALL);
2317 fatal(_("print: attempt to write to closed write end of two-way pipe"));
2318 }
2319 fp = rp->output.fp;
2320 }
2321 DEREF(redir_exp);
2322 decr_sp();
2323 } else
2324 fp = output_fp;
2325
2326 if (errflg) {
2327 update_ERRNO_int(errflg);
2328 return;
2329 }
2330
2331 if (fp == NULL)
2332 return;
2333
2334 if (! field0_valid || do_lint) // lint check for field access in END
2335 (void) get_field(0L, NULL);
2336
2337 f0 = fields_arr[0];
2338
2339 if (do_lint && (f0->flags & NULL_FIELD) != 0)
2340 lintwarn(_("reference to uninitialized field `$%d'"), 0);
2341
2342 efwrite(f0->stptr, sizeof(char), f0->stlen, fp, "print", rp, false);
2343
2344 if (ORSlen > 0)
2345 efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, true);
2346
2347 if (rp != NULL && (rp->flag & RED_TWOWAY) != 0)
2348 rp->output.gawk_fflush(rp->output.fp, rp->output.opaque);
2349 }
2350
2351
2352 /* is_wupper --- function version of iswupper for passing function pointers */
2353
2354 static int
is_wupper(wchar_t c)2355 is_wupper(wchar_t c)
2356 {
2357 return iswupper(c);
2358 }
2359
2360 /* is_wlower --- function version of iswlower for passing function pointers */
2361
2362 static int
is_wlower(wchar_t c)2363 is_wlower(wchar_t c)
2364 {
2365 return iswlower(c);
2366 }
2367
2368 /* to_wupper --- function version of towupper for passing function pointers */
2369
2370 static int
to_wlower(wchar_t c)2371 to_wlower(wchar_t c)
2372 {
2373 return towlower(c);
2374 }
2375
2376 /* to_wlower --- function version of towlower for passing function pointers */
2377
2378 static int
to_wupper(wchar_t c)2379 to_wupper(wchar_t c)
2380 {
2381 return towupper(c);
2382 }
2383
2384 /* wide_change_case --- generic case converter for wide characters */
2385
2386 static void
wide_change_case(wchar_t * wstr,size_t wlen,int (* is_x)(wchar_t c),int (* to_y)(wchar_t c))2387 wide_change_case(wchar_t *wstr,
2388 size_t wlen,
2389 int (*is_x)(wchar_t c),
2390 int (*to_y)(wchar_t c))
2391 {
2392 size_t i;
2393 wchar_t *wcp;
2394
2395 for (i = 0, wcp = wstr; i < wlen; i++, wcp++)
2396 if (is_x(*wcp))
2397 *wcp = to_y(*wcp);
2398 }
2399
2400 /* wide_toupper --- map a wide string to upper case */
2401
2402 static void
wide_toupper(wchar_t * wstr,size_t wlen)2403 wide_toupper(wchar_t *wstr, size_t wlen)
2404 {
2405 wide_change_case(wstr, wlen, is_wlower, to_wupper);
2406 }
2407
2408 /* wide_tolower --- map a wide string to lower case */
2409
2410 static void
wide_tolower(wchar_t * wstr,size_t wlen)2411 wide_tolower(wchar_t *wstr, size_t wlen)
2412 {
2413 wide_change_case(wstr, wlen, is_wupper, to_wlower);
2414 }
2415
2416 /* do_tolower --- lower case a string */
2417
2418 NODE *
do_tolower(int nargs)2419 do_tolower(int nargs)
2420 {
2421 NODE *t1, *t2;
2422
2423 t1 = POP_SCALAR();
2424 if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2425 lintwarn(_("%s: received non-string argument"), "tolower");
2426 t1 = force_string(t1);
2427 t2 = make_string(t1->stptr, t1->stlen);
2428
2429 if (gawk_mb_cur_max == 1) {
2430 unsigned char *cp, *cp2;
2431
2432 for (cp = (unsigned char *)t2->stptr,
2433 cp2 = (unsigned char *)(t2->stptr + t2->stlen);
2434 cp < cp2; cp++)
2435 if (isupper(*cp))
2436 *cp = tolower(*cp);
2437 } else {
2438 force_wstring(t2);
2439 wide_tolower(t2->wstptr, t2->wstlen);
2440 wstr2str(t2);
2441 }
2442
2443 DEREF(t1);
2444 return t2;
2445 }
2446
2447 /* do_toupper --- upper case a string */
2448
2449 NODE *
do_toupper(int nargs)2450 do_toupper(int nargs)
2451 {
2452 NODE *t1, *t2;
2453
2454 t1 = POP_SCALAR();
2455 if (do_lint && (fixtype(t1)->flags & STRING) == 0)
2456 lintwarn(_("%s: received non-string argument"), "toupper");
2457 t1 = force_string(t1);
2458 t2 = make_string(t1->stptr, t1->stlen);
2459
2460 if (gawk_mb_cur_max == 1) {
2461 unsigned char *cp, *cp2;
2462
2463 for (cp = (unsigned char *)t2->stptr,
2464 cp2 = (unsigned char *)(t2->stptr + t2->stlen);
2465 cp < cp2; cp++)
2466 if (islower(*cp))
2467 *cp = toupper(*cp);
2468 } else {
2469 force_wstring(t2);
2470 wide_toupper(t2->wstptr, t2->wstlen);
2471 wstr2str(t2);
2472 }
2473
2474 DEREF(t1);
2475 return t2;
2476 }
2477
2478 /* do_atan2 --- do the atan2 function */
2479
2480 NODE *
do_atan2(int nargs)2481 do_atan2(int nargs)
2482 {
2483 NODE *t1, *t2;
2484 double d1, d2;
2485
2486 POP_TWO_SCALARS(t1, t2);
2487 if (do_lint) {
2488 if ((fixtype(t1)->flags & NUMBER) == 0)
2489 lintwarn(_("%s: received non-numeric first argument"), "atan2");
2490 if ((fixtype(t2)->flags & NUMBER) == 0)
2491 lintwarn(_("%s: received non-numeric second argument"), "atan2");
2492 }
2493 d1 = force_number(t1)->numbr;
2494 d2 = force_number(t2)->numbr;
2495 DEREF(t1);
2496 DEREF(t2);
2497 return make_number((AWKNUM) atan2(d1, d2));
2498 }
2499
2500 /* do_sin --- do the sin function */
2501
2502 NODE *
do_sin(int nargs)2503 do_sin(int nargs)
2504 {
2505 NODE *tmp;
2506 double d;
2507
2508 tmp = POP_SCALAR();
2509 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2510 lintwarn(_("%s: received non-numeric argument"), "sin");
2511 d = sin((double) force_number(tmp)->numbr);
2512 DEREF(tmp);
2513 return make_number((AWKNUM) d);
2514 }
2515
2516 /* do_cos --- do the cos function */
2517
2518 NODE *
do_cos(int nargs)2519 do_cos(int nargs)
2520 {
2521 NODE *tmp;
2522 double d;
2523
2524 tmp = POP_SCALAR();
2525 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2526 lintwarn(_("%s: received non-numeric argument"), "cos");
2527 d = cos((double) force_number(tmp)->numbr);
2528 DEREF(tmp);
2529 return make_number((AWKNUM) d);
2530 }
2531
2532 /* do_rand --- do the rand function */
2533
2534 static bool firstrand = true;
2535 /* Some systems require this array to be integer aligned. Sigh. */
2536 #define SIZEOF_STATE 256
2537 static uint32_t istate[SIZEOF_STATE/sizeof(uint32_t)];
2538 static char *const state = (char *const) istate;
2539
2540 /* ARGSUSED */
2541 NODE *
do_rand(int nargs ATTRIBUTE_UNUSED)2542 do_rand(int nargs ATTRIBUTE_UNUSED)
2543 {
2544 double tmprand;
2545 #define RAND_DIVISOR ((double)GAWK_RANDOM_MAX+1.0)
2546 if (firstrand) {
2547 (void) initstate((unsigned) 1, state, SIZEOF_STATE);
2548 /* don't need to srandom(1), initstate() does it for us. */
2549 firstrand = false;
2550 setstate(state);
2551 }
2552 /*
2553 * Per historical practice and POSIX, return value N is
2554 *
2555 * 0 <= n < 1
2556 */
2557 /*
2558 * Date: Wed, 28 Aug 2013 17:52:46 -0700
2559 * From: Bob Jewett <jewett@bill.scs.agilent.com>
2560 *
2561 * Call random() twice to fill in more bits in the value
2562 * of the double. Also, there is a bug in random() such
2563 * that when the values of successive values are combined
2564 * like (rand1*rand2)^2, (rand3*rand4)^2, ... the
2565 * resulting time series is not white noise. The
2566 * following also seems to fix that bug.
2567 *
2568 * The add/subtract 0.5 keeps small bits from filling
2569 * below 2^-53 in the double, not that anyone should be
2570 * looking down there.
2571 *
2572 * Date: Wed, 25 Sep 2013 10:45:38 -0600 (MDT)
2573 * From: "Nelson H. F. Beebe" <beebe@math.utah.edu>
2574 * (4) The code is typical of many published fragments for converting
2575 * from integer to floating-point, and I discuss the serious pitfalls
2576 * in my book, because it leads to platform-dependent behavior at the
2577 * end points of the interval [0,1]
2578 *
2579 * (5) the documentation in the gawk info node says
2580 *
2581 * `rand()'
2582 * Return a random number. The values of `rand()' are uniformly
2583 * distributed between zero and one. The value could be zero but is
2584 * never one.(1)
2585 *
2586 * The division by RAND_DIVISOR may not guarantee that 1.0 is never
2587 * returned: the programmer forgot the platform-dependent issue of
2588 * rounding.
2589 *
2590 * For points 4 and 5, the safe way is a loop:
2591 *
2592 * double
2593 * rand(void) // return value in [0.0, 1.0)
2594 * {
2595 * value = internal_rand();
2596 *
2597 * while (value == 1.0)
2598 * value = internal_rand();
2599 *
2600 * return (value);
2601 * }
2602 */
2603
2604 do {
2605 long d1, d2;
2606 /*
2607 * Do the calls in predictable order to avoid
2608 * compiler differences in order of evaluation.
2609 */
2610 d1 = random();
2611 d2 = random();
2612 tmprand = 0.5 + ( (d1/RAND_DIVISOR + d2) / RAND_DIVISOR );
2613 tmprand -= 0.5;
2614 } while (tmprand == 1.0);
2615
2616 return make_number((AWKNUM) tmprand);
2617 }
2618
2619 /* do_srand --- seed the random number generator */
2620
2621 NODE *
do_srand(int nargs)2622 do_srand(int nargs)
2623 {
2624 NODE *tmp;
2625 static long save_seed = 1;
2626 long ret = save_seed; /* SVR4 awk srand returns previous seed */
2627
2628 if (firstrand) {
2629 (void) initstate((unsigned) 1, state, SIZEOF_STATE);
2630 /* don't need to srandom(1), we're changing the seed below */
2631 firstrand = false;
2632 (void) setstate(state);
2633 }
2634
2635 if (nargs == 0)
2636 srandom((unsigned int) (save_seed = (long) time((time_t *) 0)));
2637 else {
2638 tmp = POP_SCALAR();
2639 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
2640 lintwarn(_("%s: received non-numeric argument"), "srand");
2641 srandom((unsigned int) (save_seed = (long) force_number(tmp)->numbr));
2642 DEREF(tmp);
2643 }
2644 return make_number((AWKNUM) ret);
2645 }
2646
2647 /* do_match --- match a regexp, set RSTART and RLENGTH,
2648 * optional third arg is array filled with text of
2649 * subpatterns enclosed in parens and start and len info.
2650 */
2651
2652 NODE *
do_match(int nargs)2653 do_match(int nargs)
2654 {
2655 NODE *tre, *t1, *dest, *it;
2656 int rstart, len, ii;
2657 int rlength;
2658 Regexp *rp;
2659 regoff_t s;
2660 char *start;
2661 char *buf = NULL;
2662 char buff[100];
2663 size_t amt, oldamt = 0, ilen, slen;
2664 char *subsepstr;
2665 size_t subseplen;
2666
2667 dest = NULL;
2668 if (nargs == 3) { /* 3rd optional arg for the subpatterns */
2669 dest = POP_PARAM();
2670 if (dest->type != Node_var_array)
2671 fatal(_("match: third argument is not an array"));
2672 check_symtab_functab(dest, "match",
2673 _("%s: cannot use %s as third argument"));
2674 assoc_clear(dest);
2675 }
2676 tre = POP();
2677 rp = re_update(tre);
2678 t1 = POP_STRING();
2679
2680 rstart = research(rp, t1->stptr, 0, t1->stlen, RE_NEED_START);
2681 if (rstart >= 0) { /* match succeded */
2682 size_t *wc_indices = NULL;
2683
2684 rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); /* byte length */
2685 if (rlength > 0 && gawk_mb_cur_max > 1) {
2686 t1 = str2wstr(t1, & wc_indices);
2687 rlength = wc_indices[rstart + rlength - 1] - wc_indices[rstart] + 1;
2688 rstart = wc_indices[rstart];
2689 }
2690
2691 rstart++; /* now it's 1-based indexing */
2692
2693 /* Build the array only if the caller wants the optional subpatterns */
2694 if (dest != NULL) {
2695 subsepstr = SUBSEP_node->var_value->stptr;
2696 subseplen = SUBSEP_node->var_value->stlen;
2697
2698 for (ii = 0; ii < NUMSUBPATS(rp, t1->stptr); ii++) {
2699 /*
2700 * Loop over all the subpats; some of them may have
2701 * matched even if all of them did not.
2702 */
2703 if ((s = SUBPATSTART(rp, t1->stptr, ii)) != -1) {
2704 size_t subpat_start;
2705 size_t subpat_len;
2706
2707 start = t1->stptr + s;
2708 subpat_start = s;
2709 subpat_len = len = SUBPATEND(rp, t1->stptr, ii) - s;
2710 if (len > 0 && gawk_mb_cur_max > 1) {
2711 subpat_start = wc_indices[s];
2712 subpat_len = wc_indices[s + len - 1] - subpat_start + 1;
2713 }
2714
2715 it = make_string(start, len);
2716 it->flags |= USER_INPUT;
2717 assoc_set(dest, make_number((AWKNUM) (ii)), it);;
2718
2719 sprintf(buff, "%d", ii);
2720 ilen = strlen(buff);
2721 amt = ilen + subseplen + strlen("length") + 1;
2722
2723 if (oldamt == 0) {
2724 emalloc(buf, char *, amt, "do_match");
2725 } else if (amt > oldamt) {
2726 erealloc(buf, char *, amt, "do_match");
2727 }
2728 oldamt = amt;
2729 memcpy(buf, buff, ilen);
2730 memcpy(buf + ilen, subsepstr, subseplen);
2731 memcpy(buf + ilen + subseplen, "start", 6);
2732
2733 slen = ilen + subseplen + 5;
2734
2735 assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_start + 1));
2736
2737 memcpy(buf, buff, ilen);
2738 memcpy(buf + ilen, subsepstr, subseplen);
2739 memcpy(buf + ilen + subseplen, "length", 7);
2740
2741 slen = ilen + subseplen + 6;
2742
2743 assoc_set(dest, make_string(buf, slen), make_number((AWKNUM) subpat_len));
2744 }
2745 }
2746
2747 efree(buf);
2748 }
2749 if (wc_indices != NULL)
2750 efree(wc_indices);
2751 } else { /* match failed */
2752 rstart = 0;
2753 rlength = -1;
2754 }
2755
2756 DEREF(t1);
2757 unref(RSTART_node->var_value);
2758 RSTART_node->var_value = make_number((AWKNUM) rstart);
2759 unref(RLENGTH_node->var_value);
2760 RLENGTH_node->var_value = make_number((AWKNUM) rlength);
2761 return make_number((AWKNUM) rstart);
2762 }
2763
2764 /* do_sub --- do the work for sub, gsub, and gensub */
2765
2766 /*
2767 * Gsub can be tricksy; particularly when handling the case of null strings.
2768 * The following awk code was useful in debugging problems. It is too bad
2769 * that it does not readily translate directly into the C code, below.
2770 *
2771 * #! /usr/local/bin/mawk -f
2772 *
2773 * BEGIN {
2774 * true = 1; false = 0
2775 * print "--->", mygsub("abc", "b+", "FOO")
2776 * print "--->", mygsub("abc", "x*", "X")
2777 * print "--->", mygsub("abc", "b*", "X")
2778 * print "--->", mygsub("abc", "c", "X")
2779 * print "--->", mygsub("abc", "c+", "X")
2780 * print "--->", mygsub("abc", "x*$", "X")
2781 * }
2782 *
2783 * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag)
2784 * {
2785 * origstr = str;
2786 * eosflag = nonzeroflag = false
2787 * while (match(str, regex)) {
2788 * if (RLENGTH > 0) { # easy case
2789 * nonzeroflag = true
2790 * if (RSTART == 1) { # match at front of string
2791 * newstr = newstr replace
2792 * } else {
2793 * newstr = newstr substr(str, 1, RSTART-1) replace
2794 * }
2795 * str = substr(str, RSTART+RLENGTH)
2796 * } else if (nonzeroflag) {
2797 * # last match was non-zero in length, and at the
2798 * # current character, we get a zero length match,
2799 * # which we don't really want, so skip over it
2800 * newstr = newstr substr(str, 1, 1)
2801 * str = substr(str, 2)
2802 * nonzeroflag = false
2803 * } else {
2804 * # 0-length match
2805 * if (RSTART == 1) {
2806 * newstr = newstr replace substr(str, 1, 1)
2807 * str = substr(str, 2)
2808 * } else {
2809 * return newstr str replace
2810 * }
2811 * }
2812 * if (length(str) == 0)
2813 * if (eosflag)
2814 * break
2815 * else
2816 * eosflag = true
2817 * }
2818 * if (length(str) > 0)
2819 * newstr = newstr str # rest of string
2820 *
2821 * return newstr
2822 * }
2823 */
2824
2825 /*
2826 * 1/2004: The gawk sub/gsub behavior dates from 1996, when we proposed it
2827 * for POSIX. The proposal fell through the cracks, and the 2001 POSIX
2828 * standard chose a more simple behavior.
2829 *
2830 * The relevant text is to be found on lines 6394-6407 (pages 166, 167) of the
2831 * 2001 standard:
2832 *
2833 * sub(ere, repl[, in ])
2834 * Substitute the string repl in place of the first instance of the
2835 * extended regular expression ERE in string in and return the number of
2836 * substitutions. An ampersand ('&') appearing in the string repl shall
2837 * be replaced by the string from in that matches the ERE. An ampersand
2838 * preceded with a backslash ('\') shall be interpreted as the literal
2839 * ampersand character. An occurrence of two consecutive backslashes shall
2840 * be interpreted as just a single literal backslash character. Any other
2841 * occurrence of a backslash (for example, preceding any other character)
2842 * shall be treated as a literal backslash character. Note that if repl is a
2843 * string literal (the lexical token STRING; see Grammar (on page 170)), the
2844 * handling of the ampersand character occurs after any lexical processing,
2845 * including any lexical backslash escape sequence processing. If in is
2846 * specified and it is not an lvalue (see Expressions in awk (on page 156)),
2847 * the behavior is undefined. If in is omitted, awk shall use the current
2848 * record ($0) in its place.
2849 *
2850 * 11/2010: The text in the 2008 standard is the same as just quoted.
2851 * However, POSIX behavior is now the default. This can change the behavior
2852 * of awk programs. The old behavior is not available.
2853 *
2854 * 7/2011: Reverted backslash handling to what it used to be. It was in
2855 * gawk for too long. Should have known better.
2856 */
2857
2858 /*
2859 * NB: `howmany' conflicts with a SunOS 4.x macro in <sys/param.h>.
2860 */
2861
2862 NODE *
do_sub(int nargs,unsigned int flags)2863 do_sub(int nargs, unsigned int flags)
2864 {
2865 char *scan;
2866 char *bp, *cp;
2867 char *buf = NULL;
2868 size_t buflen;
2869 char *matchend;
2870 size_t len;
2871 char *matchstart;
2872 char *text;
2873 size_t textlen = 0;
2874 char *repl;
2875 char *replend;
2876 size_t repllen;
2877 int sofar;
2878 int ampersands;
2879 int matches = 0;
2880 Regexp *rp;
2881 NODE *rep_node; /* replacement text */
2882 NODE *target; /* string to make sub. in; $0 if none given */
2883 NODE *tmp;
2884 NODE **lhs = NULL;
2885 long how_many = 1; /* one substitution for sub, also gensub default */
2886 bool global;
2887 long current;
2888 bool lastmatchnonzero;
2889 char *mb_indices = NULL;
2890
2891 if ((flags & GENSUB) != 0) {
2892 double d;
2893 NODE *glob_flag;
2894
2895 tmp = PEEK(3);
2896 rp = re_update(tmp);
2897
2898 target = POP_STRING(); /* original string */
2899
2900 glob_flag = POP_SCALAR(); /* value of global flag */
2901 if ( (glob_flag->flags & STRING) != 0
2902 && glob_flag->stlen > 0
2903 && (glob_flag->stptr[0] == 'g' || glob_flag->stptr[0] == 'G'))
2904 how_many = -1;
2905 else {
2906 (void) force_number(glob_flag);
2907 d = get_number_d(glob_flag);
2908 if (d < 1)
2909 how_many = 1;
2910 else if (d < LONG_MAX)
2911 how_many = d;
2912 else
2913 how_many = LONG_MAX;
2914 if (d <= 0) {
2915 (void) force_string(glob_flag);
2916 warning(_("gensub: third argument `%.*s' treated as 1"),
2917 (int) glob_flag->stlen,
2918 glob_flag->stptr);
2919 }
2920 }
2921 DEREF(glob_flag);
2922 } else {
2923 /* take care of regexp early, in case re_update is fatal */
2924
2925 tmp = PEEK(2);
2926 rp = re_update(tmp);
2927
2928 if ((flags & GSUB) != 0)
2929 how_many = -1;
2930
2931 /* original string */
2932
2933 if ((flags & LITERAL) != 0)
2934 target = POP_STRING();
2935 else {
2936 lhs = POP_ADDRESS();
2937 target = force_string(*lhs);
2938 }
2939 }
2940
2941 global = (how_many == -1);
2942
2943 rep_node = POP_STRING(); /* replacement text */
2944 decr_sp(); /* regexp, already updated above */
2945
2946 /* do the search early to avoid work on non-match */
2947 if (research(rp, target->stptr, 0, target->stlen, RE_NEED_START) == -1 ||
2948 RESTART(rp, target->stptr) > target->stlen)
2949 goto done;
2950
2951 text = target->stptr;
2952 textlen = target->stlen;
2953
2954 repl = rep_node->stptr;
2955 replend = repl + rep_node->stlen;
2956 repllen = replend - repl;
2957
2958 ampersands = 0;
2959
2960 /*
2961 * Some systems' malloc() can't handle being called with an
2962 * argument of zero. Thus we have to have some special case
2963 * code to check for `repllen == 0'. This can occur for
2964 * something like:
2965 * sub(/foo/, "", mystring)
2966 * for example.
2967 */
2968 if (gawk_mb_cur_max > 1 && repllen > 0) {
2969 emalloc(mb_indices, char *, repllen * sizeof(char), "do_sub");
2970 index_multibyte_buffer(repl, mb_indices, repllen);
2971 }
2972
2973 /* compute length of replacement string, number of ampersands */
2974 for (scan = repl; scan < replend; scan++) {
2975 if ((gawk_mb_cur_max == 1 || (repllen > 0 && mb_indices[scan - repl] == 1))
2976 && (*scan == '&')) {
2977 repllen--;
2978 ampersands++;
2979 } else if (*scan == '\\') {
2980 if ((flags & GENSUB) != 0) { /* gensub, behave sanely */
2981 if (isdigit((unsigned char) scan[1])) {
2982 ampersands++;
2983 scan++;
2984 } else { /* \q for any q --> q */
2985 repllen--;
2986 scan++;
2987 }
2988 } else if (do_posix) {
2989 /* \& --> &, \\ --> \ */
2990 if (scan[1] == '&' || scan[1] == '\\') {
2991 repllen--;
2992 scan++;
2993 } /* else
2994 leave alone, it goes into the output */
2995 } else {
2996 /* gawk default behavior since 1996 */
2997 if (strncmp(scan, "\\\\\\&", 4) == 0
2998 || strncmp(scan, "\\\\\\\\", 4) == 0) { /* 2016: fixed */
2999 /* \\\& --> \& */
3000 /* \\\\ --> \\ */
3001 repllen -= 2;
3002 scan += 3;
3003 } else if (strncmp(scan, "\\\\&", 3) == 0) {
3004 /* \\& --> \<string> */
3005 ampersands++;
3006 repllen--;
3007 scan += 2;
3008 } else if (scan[1] == '&') {
3009 /* \& --> & */
3010 repllen--;
3011 scan++;
3012 } /* else
3013 leave alone, it goes into the output */
3014 }
3015 }
3016 }
3017
3018 lastmatchnonzero = false;
3019
3020 /* guesstimate how much room to allocate; +1 forces > 0 */
3021 buflen = textlen + (ampersands + 1) * repllen + 1;
3022 emalloc(buf, char *, buflen + 1, "do_sub");
3023 buf[buflen] = '\0';
3024
3025 bp = buf;
3026 for (current = 1;; current++) {
3027 matches++;
3028 matchstart = target->stptr + RESTART(rp, target->stptr);
3029 matchend = target->stptr + REEND(rp, target->stptr);
3030
3031 /*
3032 * create the result, copying in parts of the original
3033 * string. note that length of replacement string can
3034 * vary since ampersand is actual text of regexp match.
3035 */
3036
3037 /*
3038 * add 1 to len to handle "empty" case where
3039 * matchend == matchstart and we force a match on a single
3040 * char. Use 'matchend - text' instead of 'matchstart - text'
3041 * because we may not actually make any substitution depending
3042 * on the 'global' and 'how_many' values.
3043 */
3044 len = matchend - text + repllen
3045 + ampersands * (matchend - matchstart) + 1;
3046 sofar = bp - buf;
3047 while (buflen < (sofar + len + 1)) {
3048 buflen *= 2;
3049 erealloc(buf, char *, buflen, "sub_common");
3050 bp = buf + sofar;
3051 }
3052 for (scan = text; scan < matchstart; scan++)
3053 *bp++ = *scan;
3054 if (global || current == how_many) {
3055 /*
3056 * If the current match matched the null string,
3057 * and the last match didn't and did a replacement,
3058 * and the match of the null string is at the front of
3059 * the text (meaning right after end of the previous
3060 * replacement), then skip this one.
3061 */
3062 if (matchstart == matchend
3063 && lastmatchnonzero
3064 && matchstart == text) {
3065 lastmatchnonzero = false;
3066 matches--;
3067 goto empty;
3068 }
3069 /*
3070 * If replacing all occurrences, or this is the
3071 * match we want, copy in the replacement text,
3072 * making substitutions as we go.
3073 */
3074 for (scan = repl; scan < replend; scan++)
3075 if (*scan == '&'
3076 /*
3077 * Don't test repllen here. A simple "&" could
3078 * end up with repllen == 0.
3079 */
3080 && (gawk_mb_cur_max == 1
3081 || mb_indices[scan - repl] == 1)
3082 ) {
3083 for (cp = matchstart; cp < matchend; cp++)
3084 *bp++ = *cp;
3085 } else if (*scan == '\\'
3086 && (gawk_mb_cur_max == 1
3087 || (repllen > 0 && mb_indices[scan - repl] == 1))
3088 ) {
3089 if (flags & GENSUB) { /* gensub, behave sanely */
3090 if (isdigit((unsigned char) scan[1])) {
3091 int dig = scan[1] - '0';
3092 if (dig < NUMSUBPATS(rp, target->stptr) && SUBPATSTART(rp, tp->stptr, dig) != -1) {
3093 char *start, *end;
3094
3095 start = target->stptr
3096 + SUBPATSTART(rp, target->stptr, dig);
3097 end = target->stptr
3098 + SUBPATEND(rp, target->stptr, dig);
3099
3100 for (cp = start; cp < end; cp++)
3101 *bp++ = *cp;
3102 }
3103 scan++;
3104 } else /* \q for any q --> q */
3105 *bp++ = *++scan;
3106 } else if (do_posix) {
3107 /* \& --> &, \\ --> \ */
3108 if (scan[1] == '&' || scan[1] == '\\')
3109 scan++;
3110 *bp++ = *scan;
3111 } else {
3112 /* gawk default behavior since 1996 */
3113 if (strncmp(scan, "\\\\\\&", 4) == 0
3114 || strncmp(scan, "\\\\\\\\", 4) == 0) { /* 2016: fixed */
3115 /* \\\& --> \& */
3116 /* \\\\ --> \\ */
3117 *bp++ = '\\';
3118 *bp++ = scan[3];
3119 scan += 3;
3120 } else if (strncmp(scan, "\\\\&", 3) == 0) {
3121 /* \\& --> \<string> */
3122 *bp++ = '\\';
3123 for (cp = matchstart; cp < matchend; cp++)
3124 *bp++ = *cp;
3125 scan += 2;
3126 } else if (scan[1] == '&') {
3127 /* \& --> & */
3128 *bp++ = '&';
3129 scan++;
3130 } else
3131 *bp++ = *scan;
3132 }
3133 } else
3134 *bp++ = *scan;
3135 if (matchstart != matchend)
3136 lastmatchnonzero = true;
3137 } else {
3138 /*
3139 * don't want this match, skip over it by copying
3140 * in current text.
3141 */
3142 for (cp = matchstart; cp < matchend; cp++)
3143 *bp++ = *cp;
3144 }
3145 empty:
3146 /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */
3147 if (matchstart == matchend && matchend < text + textlen) {
3148 *bp++ = *matchend;
3149 matchend++;
3150 }
3151 textlen = text + textlen - matchend;
3152 text = matchend;
3153
3154 #if 0
3155 if (bp - buf > sofar + len)
3156 fprintf(stderr, "debug: len = %zu, but used %ld\n", len, (long)((bp - buf) - (long)sofar));
3157 #endif
3158
3159 if ((current >= how_many && ! global)
3160 || ((long) textlen <= 0 && matchstart == matchend)
3161 || research(rp, target->stptr, text - target->stptr, textlen, RE_NEED_START) == -1)
3162 break;
3163
3164 }
3165 sofar = bp - buf;
3166 if (buflen < (sofar + textlen + 1)) {
3167 buflen = sofar + textlen + 1;
3168 erealloc(buf, char *, buflen, "do_sub");
3169 bp = buf + sofar;
3170 }
3171 /*
3172 * Note that text == matchend, since that assignment is made before
3173 * exiting the 'for' loop above. Thus we copy in the rest of the
3174 * original string.
3175 */
3176 for (scan = text; scan < text + textlen; scan++)
3177 *bp++ = *scan;
3178 *bp = '\0';
3179 textlen = bp - buf;
3180
3181 if (mb_indices != NULL)
3182 efree(mb_indices);
3183
3184 done:
3185 DEREF(rep_node);
3186
3187 if ((matches == 0 || (flags & LITERAL) != 0) && buf != NULL) {
3188 efree(buf);
3189 buf = NULL;
3190 }
3191
3192 if (flags & GENSUB) {
3193 if (matches > 0) {
3194 /* return the result string */
3195 DEREF(target);
3196 assert(buf != NULL);
3197 return make_str_node(buf, textlen, ALREADY_MALLOCED);
3198 } else if ((target->flags & STRING) == 0) {
3199 /* return a copy of original string */
3200 DEREF(target);
3201 return make_str_node(target->stptr, target->stlen, 0);
3202 }
3203
3204 /* return the original string */
3205 return target;
3206 }
3207
3208 /* For a string literal, must not change the original string. */
3209 if ((flags & LITERAL) != 0)
3210 DEREF(target);
3211 else if (matches > 0) {
3212 /*
3213 * 8/2021: There's a bit of a song and dance here. If someone does
3214 *
3215 * x = @/abc/
3216 * sub(/b/, "x", x)
3217 *
3218 * What should the type of x be after the call? Does it get converted
3219 * to string? Or does it remain a regexp? We've decided to let it
3220 * remain a regexp. In that case, we have to update the compiled
3221 * regular expression that it holds.
3222 */
3223 bool is_regex = false;
3224 NODE *target = *lhs;
3225
3226 if ((target->flags & REGEX) != 0) {
3227 is_regex = true;
3228
3229 // free old regex registers
3230 refree(target->typed_re->re_reg[0]);
3231 if (target->typed_re->re_reg[1] != NULL)
3232 refree(target->typed_re->re_reg[1]);
3233 freenode(target->typed_re);
3234 }
3235 unref(*lhs); // nuke original value
3236 if (is_regex)
3237 *lhs = make_typed_regex(buf, textlen);
3238 else
3239 *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
3240 }
3241
3242 return make_number((AWKNUM) matches);
3243 }
3244
3245 /* call_sub --- call do_sub indirectly */
3246
3247 NODE *
call_sub(const char * name,int nargs)3248 call_sub(const char *name, int nargs)
3249 {
3250 unsigned int flags = 0;
3251 NODE *regex, *replace, *glob_flag;
3252 NODE **lhs, *rhs;
3253 NODE *zero = make_number(0.0);
3254 NODE *result;
3255
3256 if (name[0] == 'g') {
3257 if (name[1] == 'e')
3258 flags = GENSUB;
3259 else
3260 flags = GSUB;
3261 }
3262
3263 bool need_free = false;
3264 if (flags == 0 || flags == GSUB) {
3265 /* sub or gsub */
3266 if (nargs != 2)
3267 fatal(_("%s: can be called indirectly only with two arguments"), name);
3268
3269 replace = POP_STRING();
3270 regex = POP(); /* the regex */
3271 /*
3272 * push regex
3273 * push replace
3274 * push $0
3275 */
3276 if ((regex->flags & REGEX) != 0)
3277 regex = regex->typed_re;
3278 else {
3279 regex = make_regnode(Node_regex, regex);
3280 need_free = true;
3281 }
3282 PUSH(regex);
3283 PUSH(replace);
3284 lhs = r_get_field(zero, (Func_ptr *) 0, true);
3285 nargs++;
3286 PUSH_ADDRESS(lhs);
3287 } else {
3288 /* gensub */
3289 if (nargs == 4)
3290 rhs = POP();
3291 else
3292 rhs = NULL;
3293 glob_flag = POP_STRING();
3294 replace = POP_STRING();
3295 regex = POP(); /* the regex */
3296 /*
3297 * push regex
3298 * push replace
3299 * push glob_flag
3300 * if (nargs = 3) {
3301 * push $0
3302 * nargs++
3303 * }
3304 */
3305 if ((regex->flags & REGEX) != 0)
3306 regex = regex->typed_re;
3307 else {
3308 regex = make_regnode(Node_regex, regex);
3309 need_free = true;
3310 }
3311 PUSH(regex);
3312 PUSH(replace);
3313 PUSH(glob_flag);
3314 if (rhs == NULL) {
3315 lhs = r_get_field(zero, (Func_ptr *) 0, true);
3316 rhs = *lhs;
3317 UPREF(rhs);
3318 PUSH(rhs);
3319 nargs++;
3320 }
3321 else
3322 PUSH(rhs);
3323 }
3324
3325 unref(zero);
3326 result = do_sub(nargs, flags);
3327
3328 if (need_free) {
3329 refree(regex->re_reg[0]);
3330 if (regex->re_reg[1] != NULL)
3331 refree(regex->re_reg[1]);
3332 freenode(regex);
3333 }
3334
3335 if (flags != GENSUB)
3336 reset_record();
3337 return result;
3338 }
3339
3340 /* call_match --- call do_match indirectly */
3341
3342 NODE *
call_match(int nargs)3343 call_match(int nargs)
3344 {
3345 NODE *regex, *text, *array;
3346 NODE *result;
3347
3348 regex = text = array = NULL;
3349 if (nargs == 3)
3350 array = POP();
3351 regex = POP();
3352
3353 /* Don't need to pop the string just to push it back ... */
3354
3355 bool need_free = false;
3356 if ((regex->flags & REGEX) != 0)
3357 regex = regex->typed_re;
3358 else {
3359 regex = make_regnode(Node_regex, regex);
3360 need_free = true;
3361 }
3362
3363 PUSH(regex);
3364
3365 if (array)
3366 PUSH(array);
3367
3368 result = do_match(nargs);
3369
3370 if (need_free) {
3371 refree(regex->re_reg[0]);
3372 if (regex->re_reg[1] != NULL)
3373 refree(regex->re_reg[1]);
3374 freenode(regex);
3375 }
3376
3377 return result;
3378 }
3379
3380 /* call_split_func --- call do_split or do_pat_split indirectly */
3381
3382 NODE *
call_split_func(const char * name,int nargs)3383 call_split_func(const char *name, int nargs)
3384 {
3385 NODE *regex, *seps;
3386 NODE *result;
3387
3388 regex = seps = NULL;
3389 if (nargs < 2)
3390 fatal(_("indirect call to %s requires at least two arguments"),
3391 name);
3392
3393 if (nargs == 4)
3394 seps = POP();
3395
3396 bool need_free = false;
3397 if (nargs >= 3) {
3398 regex = POP_STRING();
3399 if ((regex->flags & REGEX) != 0)
3400 regex = regex->typed_re;
3401 else {
3402 regex = make_regnode(Node_regex, regex);
3403 need_free = true;
3404 }
3405 } else {
3406 if (name[0] == 's') {
3407 regex = make_regnode(Node_regex, FS_node->var_value);
3408 regex->re_flags |= FS_DFLT;
3409 } else
3410 regex = make_regnode(Node_regex, FPAT_node->var_value);
3411
3412 need_free = true;
3413 nargs++;
3414 }
3415
3416 /* Don't need to pop the string or the data array */
3417
3418 PUSH(regex);
3419
3420 if (seps)
3421 PUSH(seps);
3422
3423 result = (name[0] == 's') ? do_split(nargs) : do_patsplit(nargs);
3424
3425 if (need_free) {
3426 refree(regex->re_reg[0]);
3427 if (regex->re_reg[1] != NULL)
3428 refree(regex->re_reg[1]);
3429 freenode(regex);
3430 }
3431
3432 return result;
3433 }
3434
3435 /* make_integer - Convert an integer to a number node. */
3436
3437 static NODE *
make_integer(uintmax_t n)3438 make_integer(uintmax_t n)
3439 {
3440 n = adjust_uint(n);
3441
3442 return make_number((AWKNUM) n);
3443 }
3444
3445 /* do_lshift --- perform a << operation */
3446
3447 NODE *
do_lshift(int nargs)3448 do_lshift(int nargs)
3449 {
3450 NODE *s1, *s2;
3451 uintmax_t uval, ushift, res;
3452 AWKNUM val, shift;
3453
3454 POP_TWO_SCALARS(s1, s2);
3455 if (do_lint) {
3456 if ((fixtype(s1)->flags & NUMBER) == 0)
3457 lintwarn(_("%s: received non-numeric first argument"), "lshift");
3458 if ((fixtype(s2)->flags & NUMBER) == 0)
3459 lintwarn(_("%s: received non-numeric second argument"), "lshift");
3460 }
3461
3462 val = force_number(s1)->numbr;
3463 shift = force_number(s2)->numbr;
3464 if (val < 0 || shift < 0)
3465 fatal(_("lshift(%f, %f): negative values are not allowed"), val, shift);
3466
3467 if (do_lint) {
3468 if (double_to_int(val) != val || double_to_int(shift) != shift)
3469 lintwarn(_("lshift(%f, %f): fractional values will be truncated"), val, shift);
3470 if (shift >= sizeof(uintmax_t) * CHAR_BIT)
3471 lintwarn(_("lshift(%f, %f): too large shift value will give strange results"), val, shift);
3472 }
3473
3474 DEREF(s1);
3475 DEREF(s2);
3476
3477 uval = (uintmax_t) val;
3478 ushift = (uintmax_t) shift;
3479
3480 res = uval << ushift;
3481 return make_integer(res);
3482 }
3483
3484 /* do_rshift --- perform a >> operation */
3485
3486 NODE *
do_rshift(int nargs)3487 do_rshift(int nargs)
3488 {
3489 NODE *s1, *s2;
3490 uintmax_t uval, ushift, res;
3491 AWKNUM val, shift;
3492
3493 POP_TWO_SCALARS(s1, s2);
3494 if (do_lint) {
3495 if ((fixtype(s1)->flags & NUMBER) == 0)
3496 lintwarn(_("%s: received non-numeric first argument"), "rshift");
3497 if ((fixtype(s2)->flags & NUMBER) == 0)
3498 lintwarn(_("%s: received non-numeric second argument"), "rshift");
3499 }
3500
3501 val = force_number(s1)->numbr;
3502 shift = force_number(s2)->numbr;
3503 if (val < 0 || shift < 0)
3504 fatal(_("rshift(%f, %f): negative values are not allowed"), val, shift);
3505
3506 if (do_lint) {
3507 if (double_to_int(val) != val || double_to_int(shift) != shift)
3508 lintwarn(_("rshift(%f, %f): fractional values will be truncated"), val, shift);
3509 if (shift >= sizeof(uintmax_t) * CHAR_BIT)
3510 lintwarn(_("rshift(%f, %f): too large shift value will give strange results"), val, shift);
3511 }
3512
3513 DEREF(s1);
3514 DEREF(s2);
3515
3516 uval = (uintmax_t) val;
3517 ushift = (uintmax_t) shift;
3518
3519 res = uval >> ushift;
3520 return make_integer(res);
3521 }
3522
3523 /* do_and --- perform an & operation */
3524
3525 NODE *
do_and(int nargs)3526 do_and(int nargs)
3527 {
3528 NODE *s1;
3529 uintmax_t res, uval;
3530 AWKNUM val;
3531
3532 res = ~(uintmax_t) 0; /* start off with all ones */
3533 if (nargs < 2)
3534 fatal(_("%s: called with less than two arguments"), "and");
3535
3536 for (; nargs > 0; nargs--) {
3537 s1 = POP_SCALAR();
3538 if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3539 lintwarn(_("%s: argument %d is non-numeric"), "and", nargs);
3540
3541 val = force_number(s1)->numbr;
3542 if (val < 0)
3543 fatal(_("%s: argument %d negative value %g is not allowed"), "and", nargs, val);
3544
3545 uval = (uintmax_t) val;
3546 res &= uval;
3547
3548 DEREF(s1);
3549 }
3550
3551 return make_integer(res);
3552 }
3553
3554 /* do_or --- perform an | operation */
3555
3556 NODE *
do_or(int nargs)3557 do_or(int nargs)
3558 {
3559 NODE *s1;
3560 uintmax_t res, uval;
3561 AWKNUM val;
3562
3563 res = 0;
3564 if (nargs < 2)
3565 fatal(_("%s: called with less than two arguments"), "or");
3566
3567 for (; nargs > 0; nargs--) {
3568 s1 = POP_SCALAR();
3569 if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3570 lintwarn(_("%s: argument %d is non-numeric"), "or", nargs);
3571
3572 val = force_number(s1)->numbr;
3573 if (val < 0)
3574 fatal(_("%s: argument %d negative value %g is not allowed"), "or", nargs, val);
3575
3576 uval = (uintmax_t) val;
3577 res |= uval;
3578
3579 DEREF(s1);
3580 }
3581
3582 return make_integer(res);
3583 }
3584
3585 /* do_xor --- perform an ^ operation */
3586
3587 NODE *
do_xor(int nargs)3588 do_xor(int nargs)
3589 {
3590 NODE *s1;
3591 uintmax_t res, uval;
3592 AWKNUM val;
3593
3594 if (nargs < 2)
3595 fatal(_("%s: called with less than two arguments"), "xor");
3596
3597 res = 0; /* start with all zeroes */
3598 for (; nargs > 0; nargs--) {
3599 s1 = POP_SCALAR();
3600 if (do_lint && (fixtype(s1)->flags & NUMBER) == 0)
3601 lintwarn(_("%s: argument %d is non-numeric"), "xor", nargs);
3602
3603 val = force_number(s1)->numbr;
3604 if (val < 0)
3605 fatal(_("%s: argument %d negative value %g is not allowed"), "xor", nargs, val);
3606
3607 uval = (uintmax_t) val;
3608 res ^= uval;
3609
3610 DEREF(s1);
3611 }
3612
3613 return make_integer(res);
3614 }
3615
3616 /* do_compl --- perform a ~ operation */
3617
3618 NODE *
do_compl(int nargs)3619 do_compl(int nargs)
3620 {
3621 NODE *tmp;
3622 double d;
3623 uintmax_t uval;
3624
3625 tmp = POP_SCALAR();
3626 if (do_lint && (fixtype(tmp)->flags & NUMBER) == 0)
3627 lintwarn(_("%s: received non-numeric argument"), "compl");
3628 d = force_number(tmp)->numbr;
3629 DEREF(tmp);
3630
3631 if (d < 0)
3632 fatal(_("compl(%f): negative value is not allowed"), d);
3633
3634 if (do_lint && double_to_int(d) != d)
3635 lintwarn(_("compl(%f): fractional value will be truncated"), d);
3636
3637 uval = (uintmax_t) d;
3638 uval = ~ uval;
3639 return make_integer(uval);
3640 }
3641
3642 /* do_strtonum --- the strtonum function */
3643
3644 NODE *
do_strtonum(int nargs)3645 do_strtonum(int nargs)
3646 {
3647 NODE *tmp;
3648 AWKNUM d;
3649
3650 tmp = fixtype(POP_SCALAR());
3651 if ((tmp->flags & NUMBER) != 0)
3652 d = (AWKNUM) tmp->numbr;
3653 else if (get_numbase(tmp->stptr, tmp->stlen, use_lc_numeric) != 10)
3654 d = nondec2awknum(tmp->stptr, tmp->stlen, NULL);
3655 else
3656 d = (AWKNUM) force_number(tmp)->numbr;
3657
3658 DEREF(tmp);
3659 return make_number((AWKNUM) d);
3660 }
3661
3662 /* nondec2awknum --- convert octal or hex value to double */
3663
3664 /*
3665 * Because of awk's concatenation rules and the way awk.y:yylex()
3666 * collects a number, this routine has to be willing to stop on the
3667 * first invalid character.
3668 */
3669
3670 AWKNUM
nondec2awknum(char * str,size_t len,char ** endptr)3671 nondec2awknum(char *str, size_t len, char **endptr)
3672 {
3673 AWKNUM retval = 0.0;
3674 char save;
3675 short val;
3676 char *start = str;
3677
3678 if (len >= 2 && *str == '0' && (str[1] == 'x' || str[1] == 'X')) {
3679 /*
3680 * User called strtonum("0x") or some such,
3681 * so just quit early.
3682 */
3683 if (len <= 2) {
3684 if (endptr)
3685 *endptr = start;
3686 return (AWKNUM) 0.0;
3687 }
3688
3689 for (str += 2, len -= 2; len > 0; len--, str++) {
3690 switch (*str) {
3691 case '0':
3692 case '1':
3693 case '2':
3694 case '3':
3695 case '4':
3696 case '5':
3697 case '6':
3698 case '7':
3699 case '8':
3700 case '9':
3701 val = *str - '0';
3702 break;
3703 case 'a':
3704 case 'b':
3705 case 'c':
3706 case 'd':
3707 case 'e':
3708 case 'f':
3709 val = *str - 'a' + 10;
3710 break;
3711 case 'A':
3712 case 'B':
3713 case 'C':
3714 case 'D':
3715 case 'E':
3716 case 'F':
3717 val = *str - 'A' + 10;
3718 break;
3719 default:
3720 if (endptr)
3721 *endptr = str;
3722 goto done;
3723 }
3724 retval = (retval * 16) + val;
3725 }
3726 if (endptr)
3727 *endptr = str;
3728 } else if (len >= 1 && *str == '0') {
3729 int l;
3730 // preserve len in case we go to decimal
3731 for (l = len; l > 0; l--) {
3732 if (! isdigit((unsigned char) *str)) {
3733 if (endptr)
3734 *endptr = str;
3735 goto done;
3736 }
3737 else if (*str == '8' || *str == '9') {
3738 str = start;
3739 goto decimal;
3740 }
3741 retval = (retval * 8) + (*str - '0');
3742 str++;
3743 }
3744 if (endptr)
3745 *endptr = str;
3746 } else {
3747 decimal:
3748 save = str[len];
3749 str[len] = '\0';
3750 retval = strtod(str, endptr);
3751 str[len] = save;
3752 }
3753 done:
3754 return retval;
3755 }
3756
3757 /* do_dcgettext, do_dcngettext --- handle i18n translations */
3758
3759 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3760
3761 static int
localecategory_from_argument(NODE * t)3762 localecategory_from_argument(NODE *t)
3763 {
3764 static const struct category_table {
3765 int val;
3766 const char *name;
3767 } cat_tab[] = {
3768 #ifdef LC_ALL
3769 { LC_ALL, "LC_ALL" },
3770 #endif /* LC_ALL */
3771 #ifdef LC_COLLATE
3772 { LC_COLLATE, "LC_COLLATE" },
3773 #endif /* LC_COLLATE */
3774 #ifdef LC_CTYPE
3775 { LC_CTYPE, "LC_CTYPE" },
3776 #endif /* LC_CTYPE */
3777 #ifdef LC_MESSAGES
3778 { LC_MESSAGES, "LC_MESSAGES" },
3779 #endif /* LC_MESSAGES */
3780 #ifdef LC_MONETARY
3781 { LC_MONETARY, "LC_MONETARY" },
3782 #endif /* LC_MONETARY */
3783 #ifdef LC_NUMERIC
3784 { LC_NUMERIC, "LC_NUMERIC" },
3785 #endif /* LC_NUMERIC */
3786 #ifdef LC_RESPONSE
3787 { LC_RESPONSE, "LC_RESPONSE" },
3788 #endif /* LC_RESPONSE */
3789 #ifdef LC_TIME
3790 { LC_TIME, "LC_TIME" },
3791 #endif /* LC_TIME */
3792 };
3793
3794 if (t != NULL) {
3795 int low, high, i, mid;
3796 char *category;
3797 int lc_cat = -1;
3798
3799 char save = t->stptr[t->stlen];
3800 t->stptr[t->stlen] = '\0';
3801 category = t->stptr;
3802
3803 /* binary search the table */
3804 low = 0;
3805 high = (sizeof(cat_tab) / sizeof(cat_tab[0])) - 1;
3806 while (low <= high) {
3807 mid = (low + high) / 2;
3808 i = strcmp(category, cat_tab[mid].name);
3809
3810 if (i < 0) /* category < mid */
3811 high = mid - 1;
3812 else if (i > 0) /* category > mid */
3813 low = mid + 1;
3814 else {
3815 lc_cat = cat_tab[mid].val;
3816 break;
3817 }
3818 }
3819 t->stptr[t->stlen] = save;
3820 if (lc_cat == -1) /* not there */
3821 fatal(_("dcgettext: `%s' is not a valid locale category"), category);
3822
3823 return lc_cat;
3824 } else
3825 return LC_MESSAGES;
3826 }
3827
3828 #endif
3829
3830 /*
3831 * awk usage is
3832 *
3833 * str = dcgettext(string [, domain [, category]])
3834 * str = dcngettext(string1, string2, number [, domain [, category]])
3835 *
3836 * Default domain is TEXTDOMAIN, default category is LC_MESSAGES.
3837 */
3838
3839 NODE *
do_dcgettext(int nargs)3840 do_dcgettext(int nargs)
3841 {
3842 NODE *tmp, *t1, *t2 = NULL;
3843 char *string;
3844 char *the_result;
3845 size_t reslen;
3846 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3847 int lc_cat;
3848 char *domain;
3849 char save1 = '\0', save2 = '\0';
3850
3851 if (nargs == 3) { /* third argument */
3852 tmp = POP_STRING();
3853 lc_cat = localecategory_from_argument(tmp);
3854 DEREF(tmp);
3855 } else
3856 lc_cat = LC_MESSAGES;
3857
3858 if (nargs >= 2) { /* second argument */
3859 t2 = POP_STRING();
3860 domain = t2->stptr;
3861 str_terminate(t2, save2);
3862 } else
3863 domain = TEXTDOMAIN;
3864 #else
3865 if (nargs == 3) {
3866 tmp = POP_STRING();
3867 DEREF(tmp);
3868 }
3869 if (nargs >= 2) {
3870 t2 = POP_STRING();
3871 DEREF(t2);
3872 }
3873 #endif
3874
3875 t1 = POP_STRING(); /* first argument */
3876 string = t1->stptr;
3877
3878 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3879 str_terminate(t1, save1);
3880 the_result = dcgettext(domain, string, lc_cat);
3881 str_restore(t1, save1);
3882 if (t2 != NULL) {
3883 str_restore(t2, save2);
3884 DEREF(t2);
3885 }
3886 reslen = strlen(the_result);
3887 #else
3888 the_result = string;
3889 reslen = t1->stlen;
3890 #endif
3891 DEREF(t1);
3892 return make_string(the_result, reslen);
3893 }
3894
3895
3896 NODE *
do_dcngettext(int nargs)3897 do_dcngettext(int nargs)
3898 {
3899 NODE *tmp, *t1, *t2, *t3;
3900 char *string1, *string2;
3901 unsigned long number;
3902 AWKNUM d;
3903 char *the_result;
3904 size_t reslen;
3905
3906 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3907 int lc_cat;
3908 char *domain;
3909 char save = '\0', save1 = '\0', save2 = '\0';
3910 bool saved_end = false;
3911
3912 if (nargs == 5) { /* fifth argument */
3913 tmp = POP_STRING();
3914 lc_cat = localecategory_from_argument(tmp);
3915 DEREF(tmp);
3916 } else
3917 lc_cat = LC_MESSAGES;
3918
3919 t3 = NULL;
3920 if (nargs >= 4) { /* fourth argument */
3921 t3 = POP_STRING();
3922 domain = t3->stptr;
3923 save = domain[t3->stlen];
3924 domain[t3->stlen] = '\0';
3925 saved_end = true;
3926 } else
3927 domain = TEXTDOMAIN;
3928 #else
3929 if (nargs == 5) {
3930 tmp = POP_STRING();
3931 DEREF(tmp);
3932 }
3933 if (nargs >= 4) {
3934 t3 = POP_STRING();
3935 DEREF(t3);
3936 }
3937 #endif
3938
3939 t2 = POP_NUMBER(); /* third argument */
3940 d = get_number_d(t2);
3941 DEREF(t2);
3942
3943 number = (unsigned long) double_to_int(d);
3944 t2 = POP_STRING(); /* second argument */
3945 string2 = t2->stptr;
3946 t1 = POP_STRING(); /* first argument */
3947 string1 = t1->stptr;
3948
3949 #if ENABLE_NLS && defined(LC_MESSAGES) && HAVE_DCGETTEXT
3950
3951 str_terminate(t1, save1);
3952 str_terminate(t2, save2);
3953 the_result = dcngettext(domain, string1, string2, number, lc_cat);
3954 reslen = strlen(the_result);
3955 str_restore(t1, save1);
3956 str_restore(t2, save2);
3957 if (saved_end)
3958 domain[t3->stlen] = save;
3959 if (t3 != NULL)
3960 DEREF(t3);
3961 #else
3962 if (number == 1) {
3963 the_result = string1;
3964 reslen = t1->stlen;
3965 } else {
3966 the_result = string2;
3967 reslen = t2->stlen;
3968 }
3969 #endif
3970 DEREF(t1);
3971 DEREF(t2);
3972 return make_string(the_result, reslen);
3973 }
3974
3975 /* do_bindtextdomain --- set the directory for a text domain */
3976
3977 /*
3978 * awk usage is
3979 *
3980 * binding = bindtextdomain(dir [, domain])
3981 *
3982 * If dir is "", pass NULL to C version.
3983 * Default domain is TEXTDOMAIN.
3984 */
3985
3986 NODE *
do_bindtextdomain(int nargs)3987 do_bindtextdomain(int nargs)
3988 {
3989 NODE *t1, *t2;
3990 const char *directory, *domain;
3991 const char *the_result;
3992
3993 t1 = t2 = NULL;
3994 /* set defaults */
3995 directory = NULL;
3996 domain = TEXTDOMAIN;
3997 char save = '\0', save1 = '\0';
3998
3999 if (nargs == 2) { /* second argument */
4000 t2 = POP_STRING();
4001 domain = (const char *) t2->stptr;
4002 save = t2->stptr[t2->stlen];
4003 t2->stptr[t2->stlen] = '\0';
4004 }
4005
4006 /* first argument */
4007 t1 = POP_STRING();
4008 if (t1->stlen > 0) {
4009 directory = (const char *) t1->stptr;
4010 str_terminate(t1, save1);
4011 }
4012
4013 the_result = bindtextdomain(domain, directory);
4014 if (directory)
4015 str_restore(t1, save1);
4016
4017 DEREF(t1);
4018 if (t2 != NULL) {
4019 t2->stptr[t2->stlen] = save;
4020 DEREF(t2);
4021 }
4022
4023 if (the_result == NULL)
4024 the_result = "";
4025
4026 return make_string(the_result, strlen(the_result));
4027 }
4028
4029 #ifdef SUPPLY_INTDIV
4030 /* do_intdiv --- do integer division, return quotient and remainder in dest array */
4031
4032 /*
4033 * We define the semantics as:
4034 * numerator = int(numerator)
4035 * denominator = int(denonmator)
4036 * quotient = int(numerator / denomator)
4037 * remainder = int(numerator % denomator)
4038 */
4039
4040 NODE *
do_intdiv(int nargs)4041 do_intdiv(int nargs)
4042 {
4043 NODE *numerator, *denominator, *result;
4044 double num, denom, quotient, remainder;
4045
4046 result = POP_PARAM();
4047 if (result->type != Node_var_array)
4048 fatal(_("intdiv: third argument is not an array"));
4049 assoc_clear(result);
4050
4051 denominator = POP_SCALAR();
4052 numerator = POP_SCALAR();
4053
4054 if (do_lint) {
4055 if ((fixtype(numerator)->flags & NUMBER) == 0)
4056 lintwarn(_("%s: received non-numeric first argument"), "intdiv");
4057 if ((fixtype(denominator)->flags & NUMBER) == 0)
4058 lintwarn(_("%s: received non-numeric second argument"), "intdiv");
4059 }
4060
4061 (void) force_number(numerator);
4062 (void) force_number(denominator);
4063 num = double_to_int(get_number_d(numerator));
4064 denom = double_to_int(get_number_d(denominator));
4065
4066 if (denom == 0.0)
4067 fatal(_("intdiv: division by zero attempted"));
4068
4069 quotient = double_to_int(num / denom);
4070 /*
4071 * FIXME: This code is duplicated, factor it out to a
4072 * separate function.
4073 */
4074 #ifdef HAVE_FMOD
4075 remainder = fmod(num, denom);
4076 #else /* ! HAVE_FMOD */
4077 (void) modf(num / denom, & remainder);
4078 remainder = num - remainder * denom;
4079 #endif /* ! HAVE_FMOD */
4080 remainder = double_to_int(remainder);
4081
4082 assoc_set(result, make_string("quotient", 8), make_number((AWKNUM) quotient));
4083
4084 assoc_set(result, make_string("remainder", 9), make_number((AWKNUM) remainder));
4085
4086 DEREF(denominator);
4087 DEREF(numerator);
4088
4089 return make_number((AWKNUM) 0.0);
4090 }
4091 #endif /* SUPPLY_INTDIV */
4092
4093 /* do_typeof --- return a string with the type of the arg */
4094
4095 NODE *
do_typeof(int nargs)4096 do_typeof(int nargs)
4097 {
4098 NODE *arg;
4099 const char *res = NULL;
4100 bool deref = true;
4101 NODE *dbg;
4102
4103 if (nargs == 2) { /* 2nd optional arg for debugging */
4104 dbg = POP_PARAM();
4105 if (dbg->type != Node_var_array)
4106 fatal(_("typeof: second argument is not an array"));
4107 assoc_clear(dbg);
4108 }
4109 else
4110 dbg = NULL;
4111 arg = POP();
4112 switch (arg->type) {
4113 case Node_var_array:
4114 /* Node_var_array is never UPREF'ed */
4115 res = "array";
4116 deref = false;
4117 if (dbg) {
4118 assoc_set(dbg, make_string("array_type", 10), make_string(arg->array_funcs->name, strlen(arg->array_funcs->name)));
4119 if (arg == PROCINFO_node) {
4120 int i;
4121 for (i = 0; i < BLOCK_MAX; i++) {
4122 char *p;
4123 size_t nl = strlen(nextfree[i].name);
4124 /*
4125 * save values before we create new
4126 * array elements so that we have a
4127 * snapshot at a consistent moment in
4128 * time
4129 */
4130 long hw = nextfree[i].highwater;
4131 long active;
4132 #ifdef MEMDEBUG
4133 active = nextfree[i].active;
4134 #else
4135 active = hw;
4136 {
4137 struct block_item *ip;
4138 for (ip = nextfree[i].freep; ip; ip = ip->freep)
4139 active--;
4140 }
4141 #endif
4142
4143 #define SETVAL(X, V) { \
4144 size_t l = nl + sizeof(#X); \
4145 emalloc(p, char *, l+1, "do_typeof"); \
4146 sprintf(p, "%s_" #X, nextfree[i].name); \
4147 assoc_set(dbg, make_str_node(p, l, ALREADY_MALLOCED), make_number((AWKNUM) (V))); \
4148 }
4149 SETVAL(highwater, hw)
4150 SETVAL(active, active)
4151 #undef SETVAL
4152 }
4153 }
4154 }
4155 break;
4156 case Node_val:
4157 switch (fixtype(arg)->flags & (STRING|NUMBER|USER_INPUT|REGEX)) {
4158 case NUMBER:
4159 res = "number";
4160 break;
4161 case NUMBER|USER_INPUT:
4162 res = "strnum";
4163 break;
4164 case REGEX:
4165 res = "regexp";
4166 break;
4167 case STRING:
4168 res = "string";
4169 // fall through
4170 case NUMBER|STRING:
4171 if (arg == Nnull_string || (arg->flags & NULL_FIELD) != 0) {
4172 res = "unassigned";
4173 break;
4174 }
4175 /* fall through */
4176 default:
4177 if (res == NULL) {
4178 warning(_("typeof detected invalid flags combination `%s'; please file a bug report"), flags2str(arg->flags));
4179 res = "unknown";
4180 }
4181 break;
4182 }
4183 if (dbg) {
4184 const char *s = flags2str(arg->flags);
4185 assoc_set(dbg, make_string("flags", 5), make_string(s, strlen(s)));
4186 }
4187 break;
4188 case Node_var_new:
4189 case Node_array_ref:
4190 res = "untyped";
4191 deref = false;
4192 break;
4193 case Node_var:
4194 /*
4195 * Note: this doesn't happen because the function calling code
4196 * in interpret.h pushes Node_var->var_value.
4197 */
4198 fatal(_("typeof: invalid argument type `%s'"),
4199 nodetype2str(arg->type));
4200 break;
4201 default:
4202 fatal(_("typeof: unknown argument type `%s'"),
4203 nodetype2str(arg->type));
4204 break;
4205 }
4206
4207 if (deref)
4208 DEREF(arg);
4209 return make_string(res, strlen(res));
4210 }
4211
4212 /* mbc_byte_count --- return number of bytes for corresponding numchars multibyte characters */
4213
4214 static size_t
mbc_byte_count(const char * ptr,size_t numchars)4215 mbc_byte_count(const char *ptr, size_t numchars)
4216 {
4217 mbstate_t cur_state;
4218 size_t sum = 0;
4219 int mb_len;
4220
4221 memset(& cur_state, 0, sizeof(cur_state));
4222
4223 assert(gawk_mb_cur_max > 1);
4224 mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
4225 if (mb_len <= 0)
4226 return numchars; /* no valid m.b. char */
4227
4228 for (; numchars > 0; numchars--) {
4229 mb_len = mbrlen(ptr, numchars * gawk_mb_cur_max, &cur_state);
4230 if (mb_len <= 0)
4231 break;
4232 sum += mb_len;
4233 ptr += mb_len;
4234 }
4235
4236 return sum;
4237 }
4238
4239 /* mbc_char_count --- return number of m.b. chars in string, up to numbytes bytes */
4240
4241 static size_t
mbc_char_count(const char * ptr,size_t numbytes)4242 mbc_char_count(const char *ptr, size_t numbytes)
4243 {
4244 mbstate_t cur_state;
4245 size_t sum = 0;
4246 int mb_len;
4247
4248 if (gawk_mb_cur_max == 1)
4249 return numbytes;
4250
4251 memset(& cur_state, 0, sizeof(cur_state));
4252
4253 mb_len = mbrlen(ptr, numbytes, &cur_state);
4254 if (mb_len <= 0)
4255 return numbytes; /* no valid m.b. char */
4256
4257 while (numbytes > 0) {
4258 mb_len = mbrlen(ptr, numbytes, &cur_state);
4259 if (mb_len <= 0)
4260 break;
4261 sum++;
4262 ptr += mb_len;
4263 numbytes -= mb_len;
4264 }
4265
4266 return sum;
4267 }
4268
4269 /* sanitize_exit_status --- convert a 16 bit Unix exit status into something reasonable */
4270
sanitize_exit_status(int status)4271 int sanitize_exit_status(int status)
4272 {
4273 int ret = 0;
4274
4275 if (WIFEXITED(status))
4276 ret = WEXITSTATUS(status); /* normal exit */
4277 else if (WIFSIGNALED(status)) {
4278 bool coredumped = false;
4279 #ifdef WCOREDUMP
4280 coredumped = WCOREDUMP(status);
4281 #endif
4282 /* use 256 since exit values are 8 bits */
4283 ret = WTERMSIG(status) + (coredumped ? 512 : 256);
4284 } else
4285 ret = 0; /* shouldn't get here */
4286
4287 return ret;
4288 }
4289
4290 /* out_of_range --- return true if a value is out of range */
4291
4292 bool
out_of_range(NODE * n)4293 out_of_range(NODE *n)
4294 {
4295 #ifdef HAVE_MPFR
4296 if (is_mpg_integer(n))
4297 return false;
4298 else if (is_mpg_float(n))
4299 return (! mpfr_number_p(n->mpg_numbr));
4300 else
4301 #endif
4302 return (isnan(n->numbr) || isinf(n->numbr));
4303 }
4304
4305 /* format_nan_inf --- format NaN and INF values */
4306
4307 char *
format_nan_inf(NODE * n,char format)4308 format_nan_inf(NODE *n, char format)
4309 {
4310 static char buf[100];
4311 double val = n->numbr;
4312
4313 #ifdef HAVE_MPFR
4314 if (is_mpg_integer(n))
4315 return NULL;
4316 else if (is_mpg_float(n)) {
4317 if (mpfr_nan_p(n->mpg_numbr)) {
4318 strcpy(buf, mpfr_signbit(n->mpg_numbr) != 0 ? "-nan" : "+nan");
4319
4320 goto fmt;
4321 } else if (mpfr_inf_p(n->mpg_numbr)) {
4322 strcpy(buf, mpfr_sgn(n->mpg_numbr) < 0 ? "-inf" : "+inf");
4323
4324 goto fmt;
4325 } else
4326 return NULL;
4327 }
4328 /* else
4329 fallthrough */
4330 #endif
4331
4332 if (isnan(val)) {
4333 strcpy(buf, signbit(val) != 0 ? "-nan" : "+nan");
4334
4335 // fall through to end
4336 } else if (isinf(val)) {
4337 strcpy(buf, val < 0 ? "-inf" : "+inf");
4338
4339 // fall through to end
4340 } else
4341 return NULL;
4342
4343 #ifdef HAVE_MPFR
4344 fmt:
4345 #endif
4346 if (isupper(format)) {
4347 int i;
4348
4349 for (i = 0; buf[i] != '\0'; i++)
4350 buf[i] = toupper(buf[i]);
4351 }
4352 return buf;
4353 }
4354
4355
4356 /* check_symtab_functab --- check if dest is SYMTAB or FUNCTAB, fatal if so */
4357
4358 void
check_symtab_functab(NODE * dest,const char * fname,const char * msg)4359 check_symtab_functab(NODE *dest, const char *fname, const char *msg)
4360 {
4361 if (dest == symbol_table)
4362 fatal(msg, fname, "SYMTAB");
4363 else if (dest == func_table)
4364 fatal(msg, fname, "FUNCTAB");
4365 }
4366
4367 /* reverse --- reverse the contents of a string in place */
4368
4369 static void
reverse(char * str)4370 reverse(char *str)
4371 {
4372 int i, j;
4373 char tmp;
4374
4375 for (i = 0, j = strlen(str) - 1; j > i; i++, j--) {
4376 tmp = str[i];
4377 str[i] = str[j];
4378 str[j] = tmp;
4379 }
4380 }
4381
4382 /* add_thousands --- add the thousands separator. Needed for MPFR %d format */
4383
4384 /*
4385 * Copy the source string into the destination string, backwards,
4386 * adding the thousands separator at the right points. Then reverse
4387 * the string when done. This gives us much cleaner code than trying
4388 * to work through the string backwards. (We tried it, it was yucky.)
4389 */
4390
4391 static const char *
add_thousands(const char * original,struct lconv * loc)4392 add_thousands(const char *original, struct lconv *loc)
4393 {
4394 size_t orig_len = strlen(original);
4395 size_t new_len = orig_len + (orig_len * strlen(loc->thousands_sep)) + 1; // worst case
4396 char *newbuf;
4397 char decimal_point = '\0';
4398 const char *dec = NULL;
4399 const char *src;
4400 char *dest;
4401
4402 emalloc(newbuf, char *, new_len, "add_thousands");
4403 memset(newbuf, '\0', new_len);
4404
4405 src = original + strlen(original) - 1;
4406 dest = newbuf;
4407
4408 if (loc->decimal_point[0] != '\0') {
4409 decimal_point = loc->decimal_point[0];
4410 if ((dec = strchr(original, decimal_point)) != NULL) {
4411 while (src >= dec)
4412 *dest++ = *src--;
4413 }
4414 }
4415
4416
4417 int ii = 0;
4418 int jj = 0;
4419 do {
4420 *dest++ = *src--;
4421 if (loc->grouping[ii] && ++jj == loc->grouping[ii]) {
4422 if (src >= original) { /* only add if more digits coming */
4423 const char *ts = loc->thousands_sep;
4424 int k;
4425
4426 for (k = strlen(ts) - 1; k >= 0; k--)
4427 *dest++ = ts[k];
4428 }
4429 if (loc->grouping[ii+1] == 0)
4430 jj = 0; /* keep using current val in loc.grouping[ii] */
4431 else if (loc->grouping[ii+1] == CHAR_MAX) {
4432 // copy in the rest and be done
4433 while (src >= original)
4434 *dest++ = *src--;
4435 break;
4436 } else {
4437 ii++;
4438 jj = 0;
4439 }
4440 }
4441 } while (src >= original);
4442
4443 *dest++ = '\0';
4444 reverse(newbuf);
4445
4446 return newbuf;
4447 }
4448
4449 #if 0
4450 // test program
4451
4452 int main(int argc, char **argv)
4453 {
4454 struct lconv *l;
4455
4456 setlocale(LC_ALL, "");
4457 l = localeconv();
4458
4459 const char *new = add_thousands("12345678901234567890.54321", l);
4460 printf("%s\n", new);
4461 free((void*) new);
4462
4463 new = add_thousands("12345678901234567890", l);
4464 printf("%s\n", new);
4465 free((void*) new);
4466
4467 return 0;
4468 }
4469 #endif
4470