1 /*
2 * array.c - routines for awk arrays.
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2014, 2016, 2018-2021,
7 * the Free Software Foundation, Inc.
8 *
9 * This file is part of GAWK, the GNU implementation of the
10 * AWK Programming Language.
11 *
12 * GAWK is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
16 *
17 * GAWK is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 */
26
27 #include "awk.h"
28
29 extern FILE *output_fp;
30 extern NODE **fmt_list; /* declared in eval.c */
31
32 NODE *success_node;
33
34 static size_t SUBSEPlen;
35 static char *SUBSEP;
36 static char indent_char[] = " ";
37
38 static int sort_up_value_type(const void *p1, const void *p2);
39 static NODE **null_lookup(NODE *symbol, NODE *subs);
40 static NODE **null_dump(NODE *symbol, NODE *subs);
41 static const array_funcs_t null_array_func = {
42 "null",
43 (afunc_t) 0,
44 (afunc_t) 0,
45 null_lookup,
46 null_afunc,
47 null_afunc,
48 null_afunc,
49 null_afunc,
50 null_afunc,
51 null_dump,
52 (afunc_t) 0,
53 };
54
55 #define MAX_ATYPE 10
56
57 static const array_funcs_t *array_types[MAX_ATYPE];
58 static int num_array_types = 0;
59
60 /* register_array_func --- add routines to handle arrays */
61
62 static int
register_array_func(const array_funcs_t * afunc)63 register_array_func(const array_funcs_t *afunc)
64 {
65 if (afunc && num_array_types < MAX_ATYPE) {
66 if (afunc != & str_array_func && afunc->type_of == NULL)
67 return false;
68 array_types[num_array_types++] = afunc;
69 if (afunc->init) /* execute init routine if any */
70 (void) (*afunc->init)(NULL, NULL);
71 return true;
72 }
73 return false;
74 }
75
76
77 /* array_init --- register all builtin array types */
78
79 void
array_init()80 array_init()
81 {
82 (void) register_array_func(& str_array_func); /* the default */
83 if (! do_mpfr) {
84 (void) register_array_func(& int_array_func);
85 (void) register_array_func(& cint_array_func);
86 }
87 }
88
89
90 /* make_array --- create an array node */
91
92 NODE *
make_array()93 make_array()
94 {
95 NODE *array;
96 getnode(array);
97 memset(array, '\0', sizeof(NODE));
98 array->type = Node_var_array;
99 array->array_funcs = & null_array_func;
100 /* vname, flags, and parent_array not set here */
101
102 return array;
103 }
104
105
106 /* null_array --- force symbol to be an empty typeless array */
107
108 void
null_array(NODE * symbol)109 null_array(NODE *symbol)
110 {
111 symbol->type = Node_var_array;
112 symbol->array_funcs = & null_array_func;
113 symbol->buckets = NULL;
114 symbol->table_size = 0;
115 symbol->array_size = 0;
116 symbol->array_capacity = 0;
117 symbol->flags = 0;
118
119 assert(symbol->xarray == NULL);
120
121 /* vname, parent_array not (re)initialized */
122 }
123
124
125 /* null_lookup --- assign type to an empty array. */
126
127 static NODE **
null_lookup(NODE * symbol,NODE * subs)128 null_lookup(NODE *symbol, NODE *subs)
129 {
130 int i;
131 const array_funcs_t *afunc = NULL;
132
133 assert(symbol->table_size == 0);
134
135 /*
136 * Check which array type wants to accept this sub; traverse
137 * array type list in reverse order.
138 */
139 for (i = num_array_types - 1; i >= 1; i--) {
140 afunc = array_types[i];
141 if (afunc->type_of(symbol, subs) != NULL)
142 break;
143 }
144 if (i == 0 || afunc == NULL)
145 afunc = array_types[0]; /* default is str_array_func */
146 symbol->array_funcs = afunc;
147
148 /* We have the right type of array; install the subscript */
149 return symbol->alookup(symbol, subs);
150 }
151
152
153 /* null_afunc --- default function for array interface */
154
155 NODE **
null_afunc(NODE * symbol ATTRIBUTE_UNUSED,NODE * subs ATTRIBUTE_UNUSED)156 null_afunc(NODE *symbol ATTRIBUTE_UNUSED, NODE *subs ATTRIBUTE_UNUSED)
157 {
158 return NULL;
159 }
160
161 /* null_dump --- dump function for an empty array */
162
163 static NODE **
null_dump(NODE * symbol,NODE * subs ATTRIBUTE_UNUSED)164 null_dump(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
165 {
166 fprintf(output_fp, "array `%s' is empty\n", array_vname(symbol));
167 return NULL;
168 }
169
170
171 /* assoc_copy --- duplicate input array "symbol" */
172
173 NODE *
assoc_copy(NODE * symbol,NODE * newsymb)174 assoc_copy(NODE *symbol, NODE *newsymb)
175 {
176 assert(newsymb->vname != NULL);
177
178 assoc_clear(newsymb);
179 (void) symbol->acopy(symbol, newsymb);
180 newsymb->array_funcs = symbol->array_funcs;
181 newsymb->flags = symbol->flags;
182 return newsymb;
183 }
184
185
186 /* assoc_dump --- dump array */
187
188 void
assoc_dump(NODE * symbol,NODE * ndump)189 assoc_dump(NODE *symbol, NODE *ndump)
190 {
191 if (symbol->adump)
192 (void) symbol->adump(symbol, ndump);
193 }
194
195
196 /* make_aname --- construct a 'vname' for a (sub)array */
197
198 const char *
make_aname(const NODE * symbol)199 make_aname(const NODE *symbol)
200 {
201 static char *aname = NULL;
202 static size_t alen;
203 static size_t max_alen;
204 #define SLEN 256
205
206 if (symbol->parent_array != NULL) {
207 size_t slen;
208
209 (void) make_aname(symbol->parent_array);
210 slen = strlen(symbol->vname); /* subscript in parent array */
211 if (alen + slen + 4 > max_alen) { /* sizeof("[\"\"]") = 4 */
212 max_alen = alen + slen + 4 + SLEN;
213 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
214 }
215 alen += sprintf(aname + alen, "[\"%s\"]", symbol->vname);
216 } else {
217 alen = strlen(symbol->vname);
218 if (aname == NULL) {
219 max_alen = alen + SLEN;
220 emalloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
221 } else if (alen > max_alen) {
222 max_alen = alen + SLEN;
223 erealloc(aname, char *, (max_alen + 1) * sizeof(char *), "make_aname");
224 }
225 memcpy(aname, symbol->vname, alen + 1);
226 }
227 return aname;
228 }
229 #undef SLEN
230
231
232 /*
233 * array_vname --- print the name of the array
234 *
235 * Returns a pointer to a statically maintained dynamically allocated string.
236 * It's appropriate for printing the name once; if the caller wants
237 * to save it, they have to make a copy.
238 */
239
240 const char *
array_vname(const NODE * symbol)241 array_vname(const NODE *symbol)
242 {
243 static char *message = NULL;
244 static size_t msglen = 0;
245 char *s;
246 size_t len;
247 int n;
248 const NODE *save_symbol = symbol;
249 const char *from = _("from %s");
250 const char *aname;
251
252 if (symbol->type != Node_array_ref
253 || symbol->orig_array->type != Node_var_array
254 ) {
255 if (symbol->type != Node_var_array || symbol->parent_array == NULL)
256 return symbol->vname;
257 return make_aname(symbol);
258 }
259
260 /* First, we have to compute the length of the string: */
261
262 len = 2; /* " (" */
263 n = 0;
264 while (symbol->type == Node_array_ref) {
265 len += strlen(symbol->vname);
266 n++;
267 symbol = symbol->prev_array;
268 }
269
270 /* Get the (sub)array name */
271 if (symbol->parent_array == NULL)
272 aname = symbol->vname;
273 else
274 aname = make_aname(symbol);
275 len += strlen(aname);
276 /*
277 * Each node contributes by strlen(from) minus the length
278 * of "%s" in the translation (which is at least 2)
279 * plus 2 for ", " or ")\0"; this adds up to strlen(from).
280 */
281 len += n * strlen(from);
282
283 /* (Re)allocate memory: */
284 if (message == NULL) {
285 emalloc(message, char *, len, "array_vname");
286 msglen = len;
287 } else if (len > msglen) {
288 erealloc(message, char *, len, "array_vname");
289 msglen = len;
290 } /* else
291 current buffer can hold new name */
292
293 /* We're ready to print: */
294 symbol = save_symbol;
295 s = message;
296 /*
297 * Ancient systems have sprintf() returning char *, not int.
298 * If you have one of those, use sprintf(..); s += strlen(s) instead.
299 */
300
301 s += sprintf(s, "%s (", symbol->vname);
302 for (;;) {
303 symbol = symbol->prev_array;
304 if (symbol->type != Node_array_ref)
305 break;
306 s += sprintf(s, from, symbol->vname);
307 s += sprintf(s, ", ");
308 }
309 s += sprintf(s, from, aname);
310 strcpy(s, ")");
311
312 return message;
313 }
314
315
316 /*
317 * force_array --- proceed to the actual Node_var_array,
318 * change Node_var_new to an array.
319 * If canfatal and type isn't good, die fatally,
320 * otherwise return the final actual value.
321 */
322
323 NODE *
force_array(NODE * symbol,bool canfatal)324 force_array(NODE *symbol, bool canfatal)
325 {
326 NODE *save_symbol = symbol;
327 bool isparam = false;
328
329 if (symbol->type == Node_param_list) {
330 save_symbol = symbol = GET_PARAM(symbol->param_cnt);
331 isparam = true;
332 if (symbol->type == Node_array_ref)
333 symbol = symbol->orig_array;
334 }
335
336 switch (symbol->type) {
337 case Node_var_new:
338 symbol->xarray = NULL; /* make sure union is as it should be */
339 null_array(symbol);
340 symbol->parent_array = NULL; /* main array has no parent */
341 /* fall through */
342 case Node_var_array:
343 break;
344
345 case Node_array_ref:
346 default:
347 /* notably Node_var but catches also e.g. a[1] = "x"; a[1][1] = "y" */
348 if (canfatal) {
349 if (symbol->type == Node_val)
350 fatal(_("attempt to use a scalar value as array"));
351 if (isparam)
352 fatal(_("attempt to use scalar parameter `%s' as an array"),
353 save_symbol->vname);
354 else
355 fatal(_("attempt to use scalar `%s' as an array"),
356 save_symbol->vname);
357 } else
358 break;
359 }
360
361 return symbol;
362 }
363
364
365 /* set_SUBSEP --- update SUBSEP related variables when SUBSEP assigned to */
366
367 void
set_SUBSEP()368 set_SUBSEP()
369 {
370 SUBSEP_node->var_value = force_string(SUBSEP_node->var_value);
371 SUBSEP = SUBSEP_node->var_value->stptr;
372 SUBSEPlen = SUBSEP_node->var_value->stlen;
373 }
374
375
376 /* concat_exp --- concatenate expression list into a single string */
377
378 NODE *
concat_exp(int nargs,bool do_subsep)379 concat_exp(int nargs, bool do_subsep)
380 {
381 /* do_subsep is false for Op_concat */
382 NODE *r;
383 char *str;
384 char *s;
385 size_t len;
386 size_t subseplen = 0;
387 int i;
388 extern NODE **args_array;
389
390 if (nargs == 1)
391 return POP_STRING();
392
393 if (do_subsep)
394 subseplen = SUBSEPlen;
395
396 len = 0;
397 for (i = 1; i <= nargs; i++) {
398 r = TOP();
399 if (r->type == Node_var_array) {
400 while (--i > 0)
401 DEREF(args_array[i]); /* avoid memory leak */
402 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(r));
403 }
404 r = POP_STRING();
405 args_array[i] = r;
406 len += r->stlen;
407 }
408 len += (nargs - 1) * subseplen;
409
410 emalloc(str, char *, len + 1, "concat_exp");
411
412 r = args_array[nargs];
413 memcpy(str, r->stptr, r->stlen);
414 s = str + r->stlen;
415 DEREF(r);
416 for (i = nargs - 1; i > 0; i--) {
417 if (subseplen == 1)
418 *s++ = *SUBSEP;
419 else if (subseplen > 0) {
420 memcpy(s, SUBSEP, subseplen);
421 s += subseplen;
422 }
423 r = args_array[i];
424 memcpy(s, r->stptr, r->stlen);
425 s += r->stlen;
426 DEREF(r);
427 }
428
429 return make_str_node(str, len, ALREADY_MALLOCED);
430 }
431
432
433 /*
434 * adjust_fcall_stack: remove subarray(s) of symbol[] from
435 * function call stack.
436 */
437
438 static void
adjust_fcall_stack(NODE * symbol,int nsubs)439 adjust_fcall_stack(NODE *symbol, int nsubs)
440 {
441 NODE *func, *r, *n;
442 NODE **sp;
443 int pcount;
444
445 /*
446 * Solve the nasty problem of disappearing subarray arguments:
447 *
448 * function f(c, d) { delete c; .. use non-existent array d .. }
449 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
450 *
451 * The fix is to convert 'd' to a local empty array; This has
452 * to be done before clearing the parent array to avoid referring to
453 * already free-ed memory.
454 *
455 * Similar situations exist for builtins accepting more than
456 * one array argument: split, patsplit, asort and asorti. For example:
457 *
458 * BEGIN { a[0][0] = 1; split("abc", a, "", a[0]) }
459 *
460 * These cases do not involve the function call stack, and are
461 * handled individually in their respective routines.
462 */
463
464 func = frame_ptr->func_node;
465 if (func == NULL) /* in main */
466 return;
467 pcount = func->param_cnt;
468 sp = frame_ptr->stack;
469
470 for (; pcount > 0; pcount--) {
471 r = *sp++;
472 if (r->type != Node_array_ref
473 || r->orig_array->type != Node_var_array)
474 continue;
475 n = r->orig_array;
476
477 /* Case 1 */
478 if (n == symbol
479 && symbol->parent_array != NULL
480 && nsubs > 0
481 ) {
482 /*
483 * 'symbol' is a subarray, and 'r' is the same subarray:
484 *
485 * function f(c, d) { delete c[0]; .. }
486 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
487 *
488 * But excludes cases like (nsubs = 0):
489 *
490 * function f(c, d) { delete c; ..}
491 * BEGIN { a[0][0] = 1; f(a[0], a[0]); ...}
492 */
493
494 null_array(r);
495 r->parent_array = NULL;
496 continue;
497 }
498
499 /* Case 2 */
500 for (n = n->parent_array; n != NULL; n = n->parent_array) {
501 assert(n->type == Node_var_array);
502 if (n == symbol) {
503 /*
504 * 'r' is a subarray of 'symbol':
505 *
506 * function f(c, d) { delete c; .. use d as array .. }
507 * BEGIN { a[0][0] = 1; f(a, a[0]); .. }
508 * OR
509 * BEGIN { a[0][0][0][0] = 1; f(a[0], a[0][0][0]); .. }
510 *
511 */
512 null_array(r);
513 r->parent_array = NULL;
514 break;
515 }
516 }
517 }
518 }
519
520
521 /* do_delete --- perform `delete array[s]' */
522
523 /*
524 * `symbol' is array
525 * `nsubs' is no of subscripts
526 */
527
528 void
do_delete(NODE * symbol,int nsubs)529 do_delete(NODE *symbol, int nsubs)
530 {
531 NODE *val, *subs;
532 int i;
533
534 assert(symbol->type == Node_var_array);
535 subs = val = NULL; /* silence the compiler */
536
537 /*
538 * The force_string() call is needed to make sure that
539 * the string subscript is reasonable. For example, with it:
540 *
541 * $ ./gawk --posix 'BEGIN { CONVFMT="%ld"; delete a[1.233]}'
542 * gawk: cmd. line:1: fatal: `%l' is not permitted in POSIX awk formats
543 *
544 * Without it, the code does not fail.
545 */
546
547 #define free_subs(n) do { \
548 NODE *s = PEEK(n - 1); \
549 if (s->type == Node_val) { \
550 (void) force_string(s); /* may have side effects. */ \
551 DEREF(s); \
552 } \
553 } while (--n > 0)
554
555 if (nsubs == 0) {
556 /* delete array */
557
558 adjust_fcall_stack(symbol, 0); /* fix function call stack; See above. */
559 assoc_clear(symbol);
560 return;
561 }
562
563 /* NB: subscripts are in reverse order on stack */
564
565 for (i = nsubs; i > 0; i--) {
566 subs = PEEK(i - 1);
567 if (subs->type != Node_val) {
568 free_subs(i);
569 fatal(_("attempt to use array `%s' in a scalar context"), array_vname(subs));
570 }
571
572 val = in_array(symbol, subs);
573 if (val == NULL) {
574 if (do_lint) {
575 subs = force_string(subs);
576 lintwarn(_("delete: index `%.*s' not in array `%s'"),
577 (int) subs->stlen, subs->stptr, array_vname(symbol));
578 }
579 /* avoid memory leak, free all subs */
580 free_subs(i);
581 return;
582 }
583
584 if (i > 1) {
585 if (val->type != Node_var_array) {
586 /* e.g.: a[1] = 1; delete a[1][1] */
587
588 free_subs(i);
589 subs = force_string(subs);
590 fatal(_("attempt to use scalar `%s[\"%.*s\"]' as an array"),
591 array_vname(symbol),
592 (int) subs->stlen,
593 subs->stptr);
594 }
595 symbol = val;
596 DEREF(subs);
597 }
598 }
599
600 if (val->type == Node_var_array) {
601 adjust_fcall_stack(val, nsubs); /* fix function call stack; See above. */
602 assoc_clear(val);
603 /* cleared a sub-array, free Node_var_array */
604 efree(val->vname);
605 freenode(val);
606 } else
607 unref(val);
608
609 (void) assoc_remove(symbol, subs);
610 DEREF(subs);
611 if (assoc_empty(symbol))
612 /* last element was removed, so reset array type to null */
613 null_array(symbol);
614
615 #undef free_subs
616 }
617
618
619 /* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */
620
621 /*
622 * The primary hassle here is that `iggy' needs to have some arbitrary
623 * array index put in it before we can clear the array, we can't
624 * just replace the loop with `delete foo'.
625 */
626
627 void
do_delete_loop(NODE * symbol,NODE ** lhs)628 do_delete_loop(NODE *symbol, NODE **lhs)
629 {
630 NODE **list;
631 NODE akind;
632
633 akind.flags = AINDEX|ADELETE; /* need a single index */
634 list = symbol->alist(symbol, & akind);
635
636 if (assoc_empty(symbol))
637 return;
638
639 unref(*lhs);
640 *lhs = list[0];
641 efree(list);
642
643 /* blast the array in one shot */
644 adjust_fcall_stack(symbol, 0);
645 assoc_clear(symbol);
646 }
647
648
649 /* value_info --- print scalar node info */
650
651 static void
value_info(NODE * n)652 value_info(NODE *n)
653 {
654
655 #define PREC_NUM -1
656
657 if (n == Nnull_string || n == Null_field) {
658 fprintf(output_fp, "<(null)>");
659 return;
660 }
661
662 if ((n->flags & (STRING|STRCUR)) != 0) {
663 fprintf(output_fp, "<");
664 fprintf(output_fp, "\"%.*s\"", (int) n->stlen, n->stptr);
665 if ((n->flags & (NUMBER|NUMCUR)) != 0) {
666 #ifdef HAVE_MPFR
667 if (is_mpg_float(n))
668 fprintf(output_fp, ":%s",
669 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
670 else if (is_mpg_integer(n))
671 fprintf(output_fp, ":%s", mpg_fmt("%Zd", n->mpg_i));
672 else
673 #endif
674 fprintf(output_fp, ":%.*g", PREC_NUM, n->numbr);
675 }
676 fprintf(output_fp, ">");
677 } else {
678 #ifdef HAVE_MPFR
679 if (is_mpg_float(n))
680 fprintf(output_fp, "<%s>",
681 mpg_fmt("%.*R*g", PREC_NUM, ROUND_MODE, n->mpg_numbr));
682 else if (is_mpg_integer(n))
683 fprintf(output_fp, "<%s>", mpg_fmt("%Zd", n->mpg_i));
684 else
685 #endif
686 fprintf(output_fp, "<%.*g>", PREC_NUM, n->numbr);
687 }
688
689 fprintf(output_fp, ":%s", flags2str(n->flags));
690
691 if ((n->flags & MALLOC) != 0)
692 fprintf(output_fp, ":%ld", n->valref);
693 else
694 fprintf(output_fp, ":");
695
696 if ((n->flags & (STRING|STRCUR)) == STRCUR) {
697 size_t len;
698
699 fprintf(output_fp, "][");
700 fprintf(output_fp, "stfmt=%d, ", n->stfmt);
701 /*
702 * If not STFMT_UNUSED, could be CONVFMT or OFMT if last
703 * used in a print statement. If immutable, could be that it
704 * was originally set as a string, or it's a number that has
705 * an integer value.
706 */
707 len = fmt_list[n->stfmt]->stlen;
708 fmt_list[n->stfmt]->stptr[len] = '\0';
709 fprintf(output_fp, "FMT=\"%s\"",
710 n->stfmt == STFMT_UNUSED ? "<unused>"
711 : fmt_list[n->stfmt]->stptr);
712 #ifdef HAVE_MPFR
713 fprintf(output_fp, ", ROUNDMODE=\"%c\"", n->strndmode);
714 #endif
715 }
716
717 #undef PREC_NUM
718 }
719
720
721 void
indent(int indent_level)722 indent(int indent_level)
723 {
724 int i;
725 for (i = 0; i < indent_level; i++)
726 fprintf(output_fp, "%s", indent_char);
727 }
728
729 /* assoc_info --- print index, value info */
730
731 void
assoc_info(NODE * subs,NODE * val,NODE * ndump,const char * aname)732 assoc_info(NODE *subs, NODE *val, NODE *ndump, const char *aname)
733 {
734 int indent_level = ndump->alevel;
735
736 indent_level++;
737 indent(indent_level);
738 fprintf(output_fp, "I: [%s:", aname);
739 if ((subs->flags & (MPFN|MPZN|INTIND)) == INTIND)
740 fprintf(output_fp, "<%ld>", (long) subs->numbr);
741 else
742 value_info(subs);
743 fprintf(output_fp, "]\n");
744
745 indent(indent_level);
746 if (val->type == Node_val) {
747 fprintf(output_fp, "V: [scalar: ");
748 value_info(val);
749 } else {
750 fprintf(output_fp, "V: [");
751 ndump->alevel++;
752 ndump->adepth--;
753 assoc_dump(val, ndump);
754 ndump->adepth++;
755 ndump->alevel--;
756 indent(indent_level);
757 }
758 fprintf(output_fp, "]\n");
759 }
760
761
762 /* do_adump --- dump an array: interface to assoc_dump */
763
764 NODE *
do_adump(int nargs)765 do_adump(int nargs)
766 {
767 NODE *symbol, *tmp;
768 static NODE ndump;
769 long depth = 0;
770
771 /*
772 * depth < 0, no index and value info.
773 * = 0, main array index and value info; does not descend into sub-arrays.
774 * > 0, descends into 'depth' sub-arrays, and prints index and value info.
775 */
776
777 if (nargs == 2) {
778 tmp = POP_NUMBER();
779 depth = get_number_si(tmp);
780 DEREF(tmp);
781 }
782 symbol = POP_PARAM();
783 if (symbol->type != Node_var_array)
784 fatal(_("%s: first argument is not an array"), "adump");
785
786 ndump.type = Node_dump_array;
787 ndump.adepth = depth;
788 ndump.alevel = 0;
789 assoc_dump(symbol, & ndump);
790 return make_number((AWKNUM) 0);
791 }
792
793
794 /* asort_actual --- do the actual work to sort the input array */
795
796 static NODE *
asort_actual(int nargs,sort_context_t ctxt)797 asort_actual(int nargs, sort_context_t ctxt)
798 {
799 NODE *array, *dest = NULL, *result;
800 NODE *r, *subs, *s;
801 NODE **list = NULL, **ptr;
802 unsigned long num_elems, i;
803 const char *sort_str;
804 char save;
805 const char *name = (ctxt == ASORT ? "asort" : "asorti"); // D.R.Y.
806
807 if (nargs == 3) /* 3rd optional arg */
808 s = POP_STRING();
809 else
810 s = dupnode(Nnull_string); /* "" => default sorting */
811
812 s = force_string(s);
813 sort_str = s->stptr;
814 save = s->stptr[s->stlen];
815 s->stptr[s->stlen] = '\0';
816 if (s->stlen == 0) { /* default sorting */
817 if (ctxt == ASORT)
818 sort_str = "@val_type_asc";
819 else
820 sort_str = "@ind_str_asc";
821 }
822
823 if (nargs >= 2) { /* 2nd optional arg */
824 dest = POP_PARAM();
825 if (dest->type != Node_var_array) {
826 fatal(_("%s: second argument is not an array"), name);
827 }
828 check_symtab_functab(dest, name,
829 _("%s: cannot use %s as second argument"));
830 }
831
832 array = POP_PARAM();
833 if (array->type != Node_var_array) {
834 fatal(_("%s: first argument is not an array"), name);
835 }
836 else if (array == symbol_table && dest == NULL)
837 fatal(_("%s: first argument cannot be SYMTAB without a second argument"), name);
838 else if (array == func_table && dest == NULL)
839 fatal(_("%s: first argument cannot be FUNCTAB without a second argument"), name);
840
841 if (dest != NULL) {
842 static bool warned = false;
843
844 if (nargs == 2 && array == dest && ! warned) {
845 warned = true;
846 lintwarn(_("asort/asorti: using the same array as source and destination without "
847 "a third argument is silly."));
848 }
849 for (r = dest->parent_array; r != NULL; r = r->parent_array) {
850 if (r == array)
851 fatal(_("%s: cannot use a subarray of first argument for second argument"),
852 name);
853 }
854 for (r = array->parent_array; r != NULL; r = r->parent_array) {
855 if (r == dest)
856 fatal(_("%s: cannot use a subarray of second argument for first argument"),
857 name);
858 }
859 }
860
861 /* sorting happens inside assoc_list */
862 list = assoc_list(array, sort_str, ctxt);
863 s->stptr[s->stlen] = save;
864 DEREF(s);
865
866 num_elems = assoc_length(array);
867 if (num_elems == 0 || list == NULL) {
868 /* source array is empty */
869 if (dest != NULL && dest != array)
870 assoc_clear(dest);
871 if (list != NULL)
872 efree(list);
873 return make_number((AWKNUM) 0);
874 }
875
876 /*
877 * Must not assoc_clear() the source array before constructing
878 * the output array. assoc_list() does not duplicate array values
879 * which are needed for asort().
880 */
881
882 if (dest != NULL && dest != array) {
883 assoc_clear(dest);
884 result = dest;
885 } else {
886 /* use 'result' as a temporary destination array */
887 result = make_array();
888 result->vname = array->vname;
889 result->parent_array = array->parent_array;
890 }
891
892 if (ctxt == ASORTI) {
893 /* We want the indices of the source array. */
894
895 for (i = 1, ptr = list; i <= num_elems; i++, ptr += 2) {
896 subs = make_number(i);
897 assoc_set(result, subs, *ptr);
898 }
899 } else {
900 /* We want the values of the source array. */
901
902 for (i = 1, ptr = list; i <= num_elems; i++) {
903 subs = make_number(i);
904
905 /* free index node */
906 r = *ptr++;
907 unref(r);
908
909 /* value node */
910 r = *ptr++;
911
912 NODE *value;
913
914 if (r->type == Node_val)
915 value = dupnode(r);
916 else if (r->type == Node_var)
917 /* SYMTAB ... */
918 value = dupnode(r->var_value);
919 else if (r->type == Node_builtin_func
920 || r->type == Node_func
921 || r->type == Node_ext_func) {
922 /* FUNCTAB ... */
923 value = make_string(r->vname, strlen(r->vname));
924 } else {
925 NODE *arr;
926 arr = make_array();
927 subs = force_string(subs);
928 arr->vname = subs->stptr;
929 arr->vname[subs->stlen] = '\0';
930 subs->stptr = NULL;
931 subs->flags &= ~STRCUR;
932 arr->parent_array = array; /* actual parent, not the temporary one. */
933
934 value = assoc_copy(r, arr);
935 }
936 assoc_set(result, subs, value);
937 }
938 }
939
940 efree(list);
941
942 if (result != dest) {
943 /* dest == NULL or dest == array */
944 assoc_clear(array);
945 *array = *result; /* copy result into array */
946 freenode(result);
947 } /* else
948 result == dest
949 dest != NULL and dest != array */
950
951 return make_number((AWKNUM) num_elems);
952 }
953
954 /* do_asort --- sort array by value */
955
956 NODE *
do_asort(int nargs)957 do_asort(int nargs)
958 {
959 return asort_actual(nargs, ASORT);
960 }
961
962 /* do_asorti --- sort array by index */
963
964 NODE *
do_asorti(int nargs)965 do_asorti(int nargs)
966 {
967 return asort_actual(nargs, ASORTI);
968 }
969
970
971 /*
972 * cmp_strings --- compare two strings; logic similar to cmp_nodes() in eval.c
973 * except the extra case-sensitive comparison when the case-insensitive
974 * result is a match.
975 */
976
977 static int
cmp_strings(const NODE * n1,const NODE * n2)978 cmp_strings(const NODE *n1, const NODE *n2)
979 {
980 char *s1, *s2;
981 size_t len1, len2;
982 int ret;
983
984 s1 = n1->stptr;
985 len1 = n1->stlen;
986 s2 = n2->stptr;
987 len2 = n2->stlen;
988
989 if (len1 == 0)
990 return len2 == 0 ? 0 : -1;
991 if (len2 == 0)
992 return 1;
993
994 /* len1 > 0 && len2 > 0 */
995 // make const to ensure it doesn't change if we
996 // need to call memcmp(), below
997 const size_t lmin = len1 < len2 ? len1 : len2;
998
999 if (IGNORECASE) {
1000 const unsigned char *cp1 = (const unsigned char *) s1;
1001 const unsigned char *cp2 = (const unsigned char *) s2;
1002
1003 if (gawk_mb_cur_max > 1) {
1004 ret = strncasecmpmbs((const unsigned char *) cp1,
1005 (const unsigned char *) cp2, lmin);
1006 } else {
1007 size_t count = lmin;
1008
1009 for (ret = 0; count-- > 0 && ret == 0; cp1++, cp2++)
1010 ret = casetable[*cp1] - casetable[*cp2];
1011 }
1012 if (ret != 0)
1013 return ret;
1014 /*
1015 * If case insensitive result is "they're the same",
1016 * use case sensitive comparison to force distinct order.
1017 */
1018 }
1019
1020 ret = memcmp(s1, s2, lmin);
1021 if (ret != 0 || len1 == len2)
1022 return ret;
1023 return (len1 < len2) ? -1 : 1;
1024 }
1025
1026 /* sort_up_index_string --- qsort comparison function; ascending index strings. */
1027
1028 static int
sort_up_index_string(const void * p1,const void * p2)1029 sort_up_index_string(const void *p1, const void *p2)
1030 {
1031 const NODE *t1, *t2;
1032
1033 /* Array indices are strings */
1034 t1 = *((const NODE *const *) p1);
1035 t2 = *((const NODE *const *) p2);
1036 return cmp_strings(t1, t2);
1037 }
1038
1039
1040 /* sort_down_index_str --- qsort comparison function; descending index strings. */
1041
1042 static int
sort_down_index_string(const void * p1,const void * p2)1043 sort_down_index_string(const void *p1, const void *p2)
1044 {
1045 /*
1046 * Negation versus transposed arguments: when all keys are
1047 * distinct, as with array indices here, either method will
1048 * transform an ascending sort into a descending one. But if
1049 * there are equal keys--such as when IGNORECASE is honored--
1050 * that get disambiguated into a determisitc order, negation
1051 * will reverse those but transposed arguments would retain
1052 * their relative order within the rest of the reversed sort.
1053 */
1054 return -sort_up_index_string(p1, p2);
1055 }
1056
1057
1058 /* sort_up_index_number --- qsort comparison function; ascending index numbers. */
1059
1060 static int
sort_up_index_number(const void * p1,const void * p2)1061 sort_up_index_number(const void *p1, const void *p2)
1062 {
1063 const NODE *t1, *t2;
1064 int ret;
1065
1066 t1 = *((const NODE *const *) p1);
1067 t2 = *((const NODE *const *) p2);
1068
1069 ret = cmp_numbers(t1, t2);
1070 if (ret != 0)
1071 return ret;
1072
1073 /* break a tie with the index string itself */
1074 t1 = force_string((NODE *) t1);
1075 t2 = force_string((NODE *) t2);
1076 return cmp_strings(t1, t2);
1077 }
1078
1079 /* sort_down_index_number --- qsort comparison function; descending index numbers */
1080
1081 static int
sort_down_index_number(const void * p1,const void * p2)1082 sort_down_index_number(const void *p1, const void *p2)
1083 {
1084 return -sort_up_index_number(p1, p2);
1085 }
1086
1087
1088 /* sort_up_value_string --- qsort comparison function; ascending value string */
1089
1090 static int
sort_up_value_string(const void * p1,const void * p2)1091 sort_up_value_string(const void *p1, const void *p2)
1092 {
1093 const NODE *t1, *t2;
1094 int ret;
1095
1096 t1 = *((const NODE *const *) p1 + 1);
1097 t2 = *((const NODE *const *) p2 + 1);
1098
1099 if (t1->type != Node_val || t2->type != Node_val)
1100 return sort_up_value_type(p1, p2);
1101
1102 /* t1 and t2 both have string values */
1103 ret = cmp_strings(t1, t2);
1104 if (ret != 0)
1105 return ret;
1106 return sort_up_index_string(p1, p2);
1107 }
1108
1109
1110 /* sort_down_value_string --- qsort comparison function; descending value string */
1111
1112 static int
sort_down_value_string(const void * p1,const void * p2)1113 sort_down_value_string(const void *p1, const void *p2)
1114 {
1115 return -sort_up_value_string(p1, p2);
1116 }
1117
1118
1119 /* sort_up_value_number --- qsort comparison function; ascending value number */
1120
1121 static int
sort_up_value_number(const void * p1,const void * p2)1122 sort_up_value_number(const void *p1, const void *p2)
1123 {
1124 NODE *t1, *t2;
1125 int ret;
1126
1127 t1 = *((NODE *const *) p1 + 1);
1128 t2 = *((NODE *const *) p2 + 1);
1129
1130 if (t1->type != Node_val || t2->type != Node_val)
1131 return sort_up_value_type(p1, p2);
1132
1133 ret = cmp_numbers(t1, t2);
1134 if (ret != 0)
1135 return ret;
1136
1137 /*
1138 * Use string value to guarantee same sort order on all
1139 * versions of qsort().
1140 */
1141 ret = cmp_strings(force_string(t1), force_string(t2));
1142 if (ret != 0)
1143 return ret;
1144 return sort_up_index_string(p1, p2);
1145 }
1146
1147
1148 /* sort_down_value_number --- qsort comparison function; descending value number */
1149
1150 static int
sort_down_value_number(const void * p1,const void * p2)1151 sort_down_value_number(const void *p1, const void *p2)
1152 {
1153 return -sort_up_value_number(p1, p2);
1154 }
1155
1156
1157 /* do_sort_up_value_type --- backend comparison on ascending value type */
1158
1159 static int
do_sort_up_value_type(const void * p1,const void * p2)1160 do_sort_up_value_type(const void *p1, const void *p2)
1161 {
1162 NODE *n1, *n2;
1163
1164 static const NODETYPE element_types[] = {
1165 Node_builtin_func,
1166 Node_func,
1167 Node_ext_func,
1168 Node_var_new,
1169 Node_var,
1170 Node_var_array,
1171 Node_val,
1172 Node_illegal
1173 };
1174
1175 /* we want to compare the element values */
1176 n1 = *((NODE *const *) p1 + 1);
1177 n2 = *((NODE *const *) p2 + 1);
1178
1179 if (n1->type == Node_var && n2->type == Node_var) {
1180 /* compare the values of the variables */
1181 n1 = n1->var_value;
1182 n2 = n2->var_value;
1183 }
1184
1185 /* 1. Arrays vs. everything else, everything else is less than array */
1186 if (n1->type == Node_var_array) {
1187 /* return 0 if n2 is a sub-array too, else return 1 */
1188 return (n2->type != Node_var_array);
1189 }
1190 if (n2->type == Node_var_array) {
1191 return -1; /* n1 (non-array) < n2 (sub-array) */
1192 }
1193
1194 /* 2. Non scalars */
1195 if (n1->type != Node_val || n2->type != Node_val) {
1196 int n1_pos, n2_pos, i;
1197
1198 n1_pos = n2_pos = -1;
1199 for (i = 0; element_types[i] != Node_illegal; i++) {
1200 if (n1->type == element_types[i])
1201 n1_pos = i;
1202
1203 if (n2->type == element_types[i])
1204 n2_pos = i;
1205 }
1206
1207 assert(n1_pos != -1 && n2_pos != -1);
1208 return (n1_pos - n2_pos);
1209 }
1210
1211 /* two scalars */
1212 (void) fixtype(n1);
1213 (void) fixtype(n2);
1214
1215 if ((n1->flags & NUMBER) != 0 && (n2->flags & NUMBER) != 0) {
1216 return cmp_numbers(n1, n2);
1217 }
1218
1219 /* 3. All numbers are less than all strings. This is aribitrary. */
1220 if ((n1->flags & NUMBER) != 0 && (n2->flags & STRING) != 0) {
1221 return -1;
1222 } else if ((n1->flags & STRING) != 0 && (n2->flags & NUMBER) != 0) {
1223 return 1;
1224 }
1225
1226 /* 4. Two strings */
1227 return cmp_strings(n1, n2);
1228 }
1229
1230 /* sort_up_value_type --- qsort comparison function; ascending value type */
1231
1232 static int
sort_up_value_type(const void * p1,const void * p2)1233 sort_up_value_type(const void *p1, const void *p2)
1234 {
1235 int rc = do_sort_up_value_type(p1, p2);
1236
1237 /* use a tie-breaker if do_sort_up_value_type has no opinion */
1238 return rc ? rc : sort_up_index_string(p1, p2);
1239 }
1240
1241 /* sort_down_value_type --- qsort comparison function; descending value type */
1242
1243 static int
sort_down_value_type(const void * p1,const void * p2)1244 sort_down_value_type(const void *p1, const void *p2)
1245 {
1246 return -sort_up_value_type(p1, p2);
1247 }
1248
1249 /* sort_user_func --- user defined qsort comparison function */
1250
1251 static int
sort_user_func(const void * p1,const void * p2)1252 sort_user_func(const void *p1, const void *p2)
1253 {
1254 NODE *idx1, *idx2, *val1, *val2, *r;
1255 int ret;
1256 INSTRUCTION *code;
1257
1258 idx1 = *((NODE *const *) p1);
1259 idx2 = *((NODE *const *) p2);
1260 val1 = *((NODE *const *) p1 + 1);
1261 val2 = *((NODE *const *) p2 + 1);
1262
1263 code = TOP()->code_ptr; /* comparison function call instructions */
1264
1265 /* setup 4 arguments to comp_func() */
1266 UPREF(idx1);
1267 PUSH(idx1);
1268 if (val1->type == Node_val)
1269 UPREF(val1);
1270 PUSH(val1);
1271
1272 UPREF(idx2);
1273 PUSH(idx2);
1274 if (val2->type == Node_val)
1275 UPREF(val2);
1276 PUSH(val2);
1277
1278 /* execute the comparison function */
1279 (void) (*interpret)(code);
1280
1281 /* return value of the comparison function */
1282 r = POP_NUMBER();
1283 #ifdef HAVE_MPFR
1284 /*
1285 * mpfr_sgn(mpz_sgn): Returns a positive value if op > 0,
1286 * zero if op = 0, and a negative value if op < 0.
1287 */
1288 if (is_mpg_float(r))
1289 ret = mpfr_sgn(r->mpg_numbr);
1290 else if (is_mpg_integer(r))
1291 ret = mpz_sgn(r->mpg_i);
1292 else
1293 #endif
1294 ret = (r->numbr < 0.0) ? -1 : (r->numbr > 0.0);
1295 DEREF(r);
1296 return ret;
1297 }
1298
1299
1300 /* assoc_list -- construct, and optionally sort, a list of array elements */
1301
1302 NODE **
assoc_list(NODE * symbol,const char * sort_str,sort_context_t sort_ctxt)1303 assoc_list(NODE *symbol, const char *sort_str, sort_context_t sort_ctxt)
1304 {
1305 typedef int (*qsort_compfunc)(const void *, const void *);
1306
1307 static const struct qsort_funcs {
1308 const char *name;
1309 qsort_compfunc comp_func;
1310 assoc_kind_t kind;
1311 } sort_funcs[] = {
1312 { "@ind_str_asc", sort_up_index_string, AINDEX|AISTR|AASC },
1313 { "@ind_num_asc", sort_up_index_number, AINDEX|AINUM|AASC },
1314 { "@val_str_asc", sort_up_value_string, AVALUE|AVSTR|AASC },
1315 { "@val_num_asc", sort_up_value_number, AVALUE|AVNUM|AASC },
1316 { "@ind_str_desc", sort_down_index_string, AINDEX|AISTR|ADESC },
1317 { "@ind_num_desc", sort_down_index_number, AINDEX|AINUM|ADESC },
1318 { "@val_str_desc", sort_down_value_string, AVALUE|AVSTR|ADESC },
1319 { "@val_num_desc", sort_down_value_number, AVALUE|AVNUM|ADESC },
1320 { "@val_type_asc", sort_up_value_type, AVALUE|AASC },
1321 { "@val_type_desc", sort_down_value_type, AVALUE|ADESC },
1322 { "@unsorted", 0, AINDEX },
1323 };
1324
1325 /*
1326 * N.B.: AASC and ADESC are hints to the specific array types.
1327 * See cint_list() in cint_array.c.
1328 */
1329
1330 NODE **list;
1331 NODE akind;
1332 unsigned long num_elems, j;
1333 int elem_size, qi;
1334 qsort_compfunc cmp_func = 0;
1335 INSTRUCTION *code = NULL;
1336 extern int currule;
1337 int save_rule = 0;
1338 assoc_kind_t assoc_kind = ANONE;
1339
1340 elem_size = 1;
1341
1342 for (qi = 0, j = sizeof(sort_funcs)/sizeof(sort_funcs[0]); qi < j; qi++) {
1343 if (strcmp(sort_funcs[qi].name, sort_str) == 0)
1344 break;
1345 }
1346
1347 if (qi < j) {
1348 cmp_func = sort_funcs[qi].comp_func;
1349 assoc_kind = sort_funcs[qi].kind;
1350
1351 if (symbol->array_funcs != & cint_array_func)
1352 assoc_kind &= ~(AASC|ADESC);
1353
1354 if (sort_ctxt != SORTED_IN || (assoc_kind & AVALUE) != 0) {
1355 /* need index and value pair in the list */
1356
1357 assoc_kind |= (AINDEX|AVALUE);
1358 elem_size = 2;
1359 }
1360
1361 } else { /* unrecognized */
1362 NODE *f;
1363 const char *sp;
1364
1365 for (sp = sort_str; *sp != '\0' && ! isspace((unsigned char) *sp); sp++)
1366 continue;
1367
1368 /* empty string or string with space(s) not valid as function name */
1369 if (sp == sort_str || *sp != '\0')
1370 fatal(_("`%s' is invalid as a function name"), sort_str);
1371
1372 f = lookup(sort_str);
1373 if (f == NULL || f->type != Node_func)
1374 fatal(_("sort comparison function `%s' is not defined"), sort_str);
1375
1376 cmp_func = sort_user_func;
1377
1378 /* need index and value pair in the list */
1379 assoc_kind |= (AVALUE|AINDEX);
1380 elem_size = 2;
1381
1382 /* make function call instructions */
1383 code = bcalloc(Op_func_call, 2, 0);
1384 code->func_body = f;
1385 code->func_name = NULL; /* not needed, func_body already assigned */
1386 (code + 1)->expr_count = 4; /* function takes 4 arguments */
1387 code->nexti = bcalloc(Op_stop, 1, 0);
1388
1389 /*
1390 * make non-redirected getline, exit, `next' and `nextfile' fatal in
1391 * callback function by setting currule in interpret()
1392 * to undefined (0).
1393 */
1394
1395 save_rule = currule; /* save current rule */
1396 currule = 0;
1397
1398 PUSH_CODE(code);
1399 }
1400
1401 akind.flags = (unsigned int) assoc_kind; /* kludge */
1402 list = symbol->alist(symbol, & akind);
1403 assoc_kind = (assoc_kind_t) akind.flags; /* symbol->alist can modify it */
1404
1405 /* check for empty list or unsorted, or list already sorted */
1406 if (list != NULL && cmp_func != NULL && (assoc_kind & (AASC|ADESC)) == 0) {
1407 num_elems = assoc_length(symbol);
1408
1409 qsort(list, num_elems, elem_size * sizeof(NODE *), cmp_func); /* shazzam! */
1410
1411 if (sort_ctxt == SORTED_IN && (assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE)) {
1412 /* relocate all index nodes to the first half of the list. */
1413 for (j = 1; j < num_elems; j++)
1414 list[j] = list[2 * j];
1415
1416 /* give back extra memory */
1417
1418 erealloc(list, NODE **, num_elems * sizeof(NODE *), "assoc_list");
1419 }
1420 }
1421
1422 if (cmp_func == sort_user_func) {
1423 code = POP_CODE();
1424 currule = save_rule; /* restore current rule */
1425 bcfree(code->nexti); /* Op_stop */
1426 bcfree(code); /* Op_func_call */
1427 }
1428
1429 return list;
1430 }
1431