1 /*
2 * int_array.c - routines for arrays of integer indices.
3 */
4
5 /*
6 * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019, 2020,
7 * the Free Software Foundation, Inc.
8 *
9 * This file is part of GAWK, the GNU implementation of the
10 * AWK Programming Language.
11 *
12 * GAWK is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 3 of the License, or
15 * (at your option) any later version.
16 *
17 * GAWK is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 */
26
27 #include "awk.h"
28
29 extern FILE *output_fp;
30 extern void indent(int indent_level);
31 extern NODE **is_integer(NODE *symbol, NODE *subs);
32
33 static size_t INT_CHAIN_MAX = 2;
34
35 static NODE **int_array_init(NODE *symbol, NODE *subs);
36 static NODE **int_lookup(NODE *symbol, NODE *subs);
37 static NODE **int_exists(NODE *symbol, NODE *subs);
38 static NODE **int_clear(NODE *symbol, NODE *subs);
39 static NODE **int_remove(NODE *symbol, NODE *subs);
40 static NODE **int_list(NODE *symbol, NODE *t);
41 static NODE **int_copy(NODE *symbol, NODE *newsymb);
42 static NODE **int_dump(NODE *symbol, NODE *ndump);
43
44 static uint32_t int_hash(uint32_t k, uint32_t hsize);
45 static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1);
46 static NODE **int_insert(NODE *symbol, long k, uint32_t hash1);
47 static void grow_int_table(NODE *symbol);
48
49 const array_funcs_t int_array_func = {
50 "int",
51 int_array_init,
52 is_integer,
53 int_lookup,
54 int_exists,
55 int_clear,
56 int_remove,
57 int_list,
58 int_copy,
59 int_dump,
60 (afunc_t) 0,
61 };
62
63
64 /* int_array_init --- array initialization routine */
65
66 static NODE **
int_array_init(NODE * symbol,NODE * subs ATTRIBUTE_UNUSED)67 int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
68 {
69 if (symbol == NULL) { /* first time */
70 long newval;
71
72 /* check relevant environment variables */
73 if ((newval = getenv_long("INT_CHAIN_MAX")) > 0)
74 INT_CHAIN_MAX = newval;
75 } else
76 null_array(symbol);
77
78 return & success_node;
79 }
80
81 /*
82 * standard_integer_string -- check whether the string matches what
83 * sprintf("%ld", <value>) would produce. This is accomplished by accepting
84 * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be
85 * faster than comparing vs. the results of actually calling sprintf.
86 */
87
88 static bool
standard_integer_string(const char * s,size_t len)89 standard_integer_string(const char *s, size_t len)
90 {
91 const char *end;
92
93 if (len == 0)
94 return false;
95 if (*s == '0' && len == 1)
96 return true;
97 end = s + len;
98 /* ignore leading minus sign */
99 if (*s == '-' && ++s == end)
100 return false;
101 /* check first char is [1-9] */
102 if (*s < '1' || *s > '9')
103 return false;
104 while (++s < end) {
105 if (*s < '0' || *s > '9')
106 return false;
107 }
108 return true;
109 }
110
111 /* is_integer --- check if subscript is an integer */
112
113 NODE **
is_integer(NODE * symbol,NODE * subs)114 is_integer(NODE *symbol, NODE *subs)
115 {
116 #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
117 long l;
118 #endif
119 AWKNUM d;
120
121 if ((subs->flags & NUMINT) != 0)
122 /* quick exit */
123 return & success_node;
124
125 if (subs == Nnull_string || do_mpfr)
126 return NULL;
127
128 #ifdef CHECK_INTEGER_USING_FORCE_NUMBER
129 /*
130 * This approach is much simpler, because we remove all of the strtol
131 * logic below. But this may be slower in some usage cases.
132 */
133 if ((subs->flags & NUMCUR) == 0) {
134 str2number(subs);
135
136 /* check again in case force_number set NUMINT */
137 if ((subs->flags & NUMINT) != 0)
138 return & success_node;
139 }
140 #else /* CHECK_INTEGER_USING_FORCE_NUMBER */
141 if ((subs->flags & NUMCUR) != 0) {
142 #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
143 d = subs->numbr;
144 if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
145 /*
146 * The numeric value is an integer, but we must
147 * protect against strings that cannot be generated
148 * from sprintf("%ld", <subscript>). This can happen
149 * with strnum or string values. We could skip this
150 * check for pure NUMBER values, but unfortunately the
151 * code does not currently distinguish between NUMBER
152 * and strnum values.
153 */
154 if ( (subs->flags & STRCUR) == 0
155 || standard_integer_string(subs->stptr, subs->stlen)) {
156 subs->flags |= NUMINT;
157 return & success_node;
158 }
159 }
160 return NULL;
161 #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
162 }
163
164 /* a[3]=1; print "3" in a -- true
165 * a[3]=1; print "+3" in a -- false
166 * a[3]=1; print "03" in a -- false
167 * a[-3]=1; print "-3" in a -- true
168 */
169
170 /* must be a STRING */
171 char *cp = subs->stptr, *cpend, *ptr;
172 char save;
173 size_t len = subs->stlen;
174
175 if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-'))
176 return NULL;
177
178 if (len > 1 &&
179 ((*cp == '0') /* "00", "011" .. */
180 || (*cp == '-' && *(cp + 1) == '0') /* "-0", "-011" .. */
181 )
182 )
183 return NULL;
184 if (len == 1 && *cp != '-') { /* single digit */
185 subs->numbr = (long) (*cp - '0');
186 if ((subs->flags & USER_INPUT) != 0) {
187 /* leave USER_INPUT set */
188 subs->flags &= ~STRING;
189 subs->flags |= NUMBER;
190 }
191 subs->flags |= (NUMCUR|NUMINT);
192 return & success_node;
193 }
194
195 cpend = cp + len;
196 save = *cpend;
197 *cpend = '\0';
198
199 errno = 0;
200 l = strtol(cp, & ptr, 10);
201 *cpend = save;
202 if (errno != 0 || ptr != cpend)
203 return NULL;
204
205 subs->numbr = l;
206 if ((subs->flags & USER_INPUT) != 0) {
207 /* leave USER_INPUT set */
208 subs->flags &= ~STRING;
209 subs->flags |= NUMBER;
210 }
211 subs->flags |= NUMCUR;
212 if (l <= INT32_MAX && l >= INT32_MIN) {
213 subs->flags |= NUMINT;
214 return & success_node;
215 }
216
217 return NULL;
218 #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
219 }
220
221
222 /*
223 * int_lookup --- Find SYMBOL[SUBS] in the assoc array. Install it with value ""
224 * if it isn't there. Returns a pointer ala get_lhs to where its value is stored.
225 */
226
227 static NODE **
int_lookup(NODE * symbol,NODE * subs)228 int_lookup(NODE *symbol, NODE *subs)
229 {
230 uint32_t hash1;
231 long k;
232 unsigned long size;
233 NODE **lhs;
234 NODE *xn;
235
236 /*
237 * N.B: symbol->table_size is the total # of non-integers (symbol->xarray)
238 * and integer elements. Also, symbol->xarray must have at least one
239 * item in it, and cannot exist if there are no integer elements.
240 * In that case, symbol->xarray is promoted to 'symbol' (See int_remove).
241 */
242
243
244 if (! is_integer(symbol, subs)) {
245 xn = symbol->xarray;
246 if (xn == NULL) {
247 xn = symbol->xarray = make_array();
248 xn->vname = symbol->vname; /* shallow copy */
249 xn->flags |= XARRAY;
250 } else if ((lhs = xn->aexists(xn, subs)) != NULL)
251 return lhs;
252 symbol->table_size++;
253 return assoc_lookup(xn, subs);
254 }
255
256 k = subs->numbr;
257 if (symbol->buckets == NULL)
258 grow_int_table(symbol);
259
260 hash1 = int_hash(k, symbol->array_size);
261 if ((lhs = int_find(symbol, k, hash1)) != NULL)
262 return lhs;
263
264 /* It's not there, install it */
265
266 symbol->table_size++;
267
268 /* first see if we would need to grow the array, before installing */
269 size = symbol->table_size;
270 if ((xn = symbol->xarray) != NULL)
271 size -= xn->table_size;
272
273 if ((symbol->flags & ARRAYMAXED) == 0
274 && (size / symbol->array_size) > INT_CHAIN_MAX) {
275 grow_int_table(symbol);
276 /* have to recompute hash value for new size */
277 hash1 = int_hash(k, symbol->array_size);
278 }
279
280 return int_insert(symbol, k, hash1);
281 }
282
283
284 /*
285 * int_exists --- test whether the array element symbol[subs] exists or not,
286 * return pointer to value if it does.
287 */
288
289 static NODE **
int_exists(NODE * symbol,NODE * subs)290 int_exists(NODE *symbol, NODE *subs)
291 {
292 long k;
293 uint32_t hash1;
294
295 if (! is_integer(symbol, subs)) {
296 NODE *xn = symbol->xarray;
297 if (xn == NULL)
298 return NULL;
299 return xn->aexists(xn, subs);
300 }
301 if (symbol->buckets == NULL)
302 return NULL;
303
304 k = subs->numbr;
305 hash1 = int_hash(k, symbol->array_size);
306 return int_find(symbol, k, hash1);
307 }
308
309 /* int_clear --- flush all the values in symbol[] */
310
311 static NODE **
int_clear(NODE * symbol,NODE * subs ATTRIBUTE_UNUSED)312 int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
313 {
314 unsigned long i;
315 int j;
316 BUCKET *b, *next;
317 NODE *r;
318
319 if (symbol->xarray != NULL) {
320 NODE *xn = symbol->xarray;
321 assoc_clear(xn);
322 freenode(xn);
323 symbol->xarray = NULL;
324 }
325
326 for (i = 0; i < symbol->array_size; i++) {
327 for (b = symbol->buckets[i]; b != NULL; b = next) {
328 next = b->ainext;
329 for (j = 0; j < b->aicount; j++) {
330 r = b->aivalue[j];
331 if (r->type == Node_var_array) {
332 assoc_clear(r); /* recursively clear all sub-arrays */
333 efree(r->vname);
334 freenode(r);
335 } else
336 unref(r);
337 }
338 freebucket(b);
339 }
340 symbol->buckets[i] = NULL;
341 }
342 if (symbol->buckets != NULL)
343 efree(symbol->buckets);
344 symbol->ainit(symbol, NULL); /* re-initialize symbol */
345 return NULL;
346 }
347
348
349 /* int_remove --- If SUBS is already in the table, remove it. */
350
351 static NODE **
int_remove(NODE * symbol,NODE * subs)352 int_remove(NODE *symbol, NODE *subs)
353 {
354 uint32_t hash1;
355 BUCKET *b, *prev = NULL;
356 long k;
357 int i;
358 NODE *xn = symbol->xarray;
359
360 if (symbol->table_size == 0 || symbol->buckets == NULL)
361 return NULL;
362
363 if (! is_integer(symbol, subs)) {
364 if (xn == NULL || xn->aremove(xn, subs) == NULL)
365 return NULL;
366 if (xn->table_size == 0) {
367 freenode(xn);
368 symbol->xarray = NULL;
369 }
370 symbol->table_size--;
371 assert(symbol->table_size > 0);
372 return & success_node;
373 }
374
375 k = subs->numbr;
376 hash1 = int_hash(k, symbol->array_size);
377
378 for (b = symbol->buckets[hash1]; b != NULL; prev = b, b = b->ainext) {
379 for (i = 0; i < b->aicount; i++) {
380 if (k != b->ainum[i])
381 continue;
382
383 /* item found */
384 if (i == 0 && b->aicount == 2) {
385 /* removing the 1st item; move 2nd item from position 1 to 0 */
386
387 b->ainum[0] = b->ainum[1];
388 b->aivalue[0] = b->aivalue[1];
389 } /* else
390 removing the only item or the 2nd item */
391
392 goto removed;
393 }
394 }
395
396 if (b == NULL) /* item not in array */
397 return NULL;
398
399 removed:
400 b->aicount--;
401
402 if (b->aicount == 0) {
403 /* detach bucket */
404 if (prev != NULL)
405 prev->ainext = b->ainext;
406 else
407 symbol->buckets[hash1] = b->ainext;
408
409 /* delete bucket */
410 freebucket(b);
411 } else if (b != symbol->buckets[hash1]) {
412 BUCKET *head = symbol->buckets[hash1];
413
414 assert(b->aicount == 1);
415 /* move the last element from head to bucket to make it full. */
416 i = --head->aicount; /* head has one less element */
417 b->ainum[1] = head->ainum[i];
418 b->aivalue[1] = head->aivalue[i];
419 b->aicount++; /* bucket has one more element */
420 if (i == 0) {
421 /* head is now empty; delete head */
422 symbol->buckets[hash1] = head->ainext;
423 freebucket(head);
424 }
425 } /* else
426 do nothing */
427
428 symbol->table_size--;
429 if (xn == NULL && symbol->table_size == 0) {
430 efree(symbol->buckets);
431 symbol->ainit(symbol, NULL); /* re-initialize array 'symbol' */
432 } else if (xn != NULL && symbol->table_size == xn->table_size) {
433 /* promote xn (str_array) to symbol */
434 xn->flags &= ~XARRAY;
435 xn->parent_array = symbol->parent_array;
436 efree(symbol->buckets);
437 *symbol = *xn;
438 freenode(xn);
439 }
440
441 return & success_node; /* return success */
442 }
443
444
445 /* int_copy --- duplicate input array "symbol" */
446
447 static NODE **
int_copy(NODE * symbol,NODE * newsymb)448 int_copy(NODE *symbol, NODE *newsymb)
449 {
450 BUCKET **old, **new, **pnew;
451 BUCKET *chain, *newchain;
452 int j;
453 unsigned long i, cursize;
454
455 assert(symbol->buckets != NULL);
456
457 /* find the current hash size */
458 cursize = symbol->array_size;
459
460 /* allocate new table */
461 ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "int_copy");
462
463 old = symbol->buckets;
464
465 for (i = 0; i < cursize; i++) {
466 for (chain = old[i], pnew = & new[i]; chain != NULL;
467 chain = chain->ainext
468 ) {
469 getbucket(newchain);
470 newchain->aicount = chain->aicount;
471 newchain->ainext = NULL;
472 for (j = 0; j < chain->aicount; j++) {
473 NODE *oldval;
474
475 /*
476 * copy the corresponding key and
477 * value from the original input list
478 */
479 newchain->ainum[j] = chain->ainum[j];
480
481 oldval = chain->aivalue[j];
482 if (oldval->type == Node_val)
483 newchain->aivalue[j] = dupnode(oldval);
484 else {
485 NODE *r;
486 r = make_array();
487 r->vname = estrdup(oldval->vname, strlen(oldval->vname));
488 r->parent_array = newsymb;
489 newchain->aivalue[j] = assoc_copy(oldval, r);
490 }
491 }
492
493 *pnew = newchain;
494 newchain->ainext = NULL;
495 pnew = & newchain->ainext;
496 }
497 }
498
499 if (symbol->xarray != NULL) {
500 NODE *xn, *n;
501 xn = symbol->xarray;
502 n = make_array();
503 n->vname = newsymb->vname; /* shallow copy */
504 (void) xn->acopy(xn, n);
505 newsymb->xarray = n;
506 } else
507 newsymb->xarray = NULL;
508
509 newsymb->table_size = symbol->table_size;
510 newsymb->buckets = new;
511 newsymb->array_size = cursize;
512 newsymb->flags = symbol->flags;
513
514 return NULL;
515 }
516
517
518 /* int_list --- return a list of array items */
519
520 static NODE**
int_list(NODE * symbol,NODE * t)521 int_list(NODE *symbol, NODE *t)
522 {
523 NODE **list = NULL;
524 unsigned long num_elems, list_size, i, k = 0;
525 BUCKET *b;
526 NODE *r, *subs, *xn;
527 int j, elem_size = 1;
528 long num;
529 static char buf[100];
530 assoc_kind_t assoc_kind;
531
532 if (symbol->table_size == 0)
533 return NULL;
534
535 assoc_kind = (assoc_kind_t) t->flags;
536 num_elems = symbol->table_size;
537 if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
538 num_elems = 1;
539
540 if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
541 elem_size = 2;
542 list_size = elem_size * num_elems;
543
544 if (symbol->xarray != NULL) {
545 xn = symbol->xarray;
546 list = xn->alist(xn, t);
547 assert(list != NULL);
548 if (num_elems == 1 || num_elems == xn->table_size)
549 return list;
550 erealloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
551 k = elem_size * xn->table_size;
552 } else
553 emalloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
554
555 /* populate it */
556
557 for (i = 0; i < symbol->array_size; i++) {
558 for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
559 for (j = 0; j < b->aicount; j++) {
560 /* index */
561 num = b->ainum[j];
562 if ((assoc_kind & AISTR) != 0) {
563 sprintf(buf, "%ld", num);
564 subs = make_string(buf, strlen(buf));
565 subs->numbr = num;
566 subs->flags |= (NUMCUR|NUMINT);
567 } else {
568 subs = make_number((AWKNUM) num);
569 subs->flags |= (INTIND|NUMINT);
570 }
571 list[k++] = subs;
572
573 /* value */
574 if ((assoc_kind & AVALUE) != 0) {
575 r = b->aivalue[j];
576 if (r->type == Node_val) {
577 if ((assoc_kind & AVNUM) != 0)
578 (void) force_number(r);
579 else if ((assoc_kind & AVSTR) != 0)
580 r = force_string(r);
581 }
582 list[k++] = r;
583 }
584
585 if (k >= list_size)
586 return list;
587 }
588 }
589 }
590 return list;
591 }
592
593
594 /* int_kilobytes --- calculate memory consumption of the assoc array */
595
596 AWKNUM
int_kilobytes(NODE * symbol)597 int_kilobytes(NODE *symbol)
598 {
599 unsigned long i, bucket_cnt = 0;
600 BUCKET *b;
601 AWKNUM kb;
602 extern AWKNUM str_kilobytes(NODE *symbol);
603
604 for (i = 0; i < symbol->array_size; i++) {
605 for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
606 bucket_cnt++;
607 }
608 kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
609 ((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
610
611 if (symbol->xarray != NULL)
612 kb += str_kilobytes(symbol->xarray);
613
614 return kb;
615 }
616
617
618 /* int_dump --- dump array info */
619
620 static NODE **
int_dump(NODE * symbol,NODE * ndump)621 int_dump(NODE *symbol, NODE *ndump)
622 {
623 #define HCNT 31
624
625 int indent_level;
626 BUCKET *b;
627 NODE *xn = NULL;
628 unsigned long str_size = 0, int_size = 0;
629 unsigned long i;
630 size_t j, bucket_cnt;
631 static size_t hash_dist[HCNT + 1];
632
633 indent_level = ndump->alevel;
634
635 if (symbol->xarray != NULL) {
636 xn = symbol->xarray;
637 str_size = xn->table_size;
638 }
639 int_size = symbol->table_size - str_size;
640
641 if ((symbol->flags & XARRAY) == 0)
642 fprintf(output_fp, "%s `%s'\n",
643 (symbol->parent_array == NULL) ? "array" : "sub-array",
644 array_vname(symbol));
645
646 indent_level++;
647 indent(indent_level);
648 fprintf(output_fp, "array_func: int_array_func\n");
649 if (symbol->flags != 0) {
650 indent(indent_level);
651 fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
652 }
653 indent(indent_level);
654 fprintf(output_fp, "INT_CHAIN_MAX: %lu\n", (unsigned long) INT_CHAIN_MAX);
655 indent(indent_level);
656 fprintf(output_fp, "array_size: %lu (int)\n", (unsigned long) symbol->array_size);
657 indent(indent_level);
658 fprintf(output_fp, "table_size: %lu (total), %lu (int), %lu (str)\n",
659 (unsigned long) symbol->table_size, int_size, str_size);
660 indent(indent_level);
661 fprintf(output_fp, "Avg # of items per chain (int): %.2g\n",
662 ((AWKNUM) int_size) / symbol->array_size);
663
664 indent(indent_level);
665 fprintf(output_fp, "memory: %.2g kB (total)\n", int_kilobytes(symbol));
666
667 /* hash value distribution */
668
669 memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
670 for (i = 0; i < symbol->array_size; i++) {
671 bucket_cnt = 0;
672 for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
673 bucket_cnt += b->aicount;
674 if (bucket_cnt >= HCNT)
675 bucket_cnt = HCNT;
676 hash_dist[bucket_cnt]++;
677 }
678
679 indent(indent_level);
680 fprintf(output_fp, "Hash distribution:\n");
681 indent_level++;
682 for (j = 0; j <= HCNT; j++) {
683 if (hash_dist[j] > 0) {
684 indent(indent_level);
685 if (j == HCNT)
686 fprintf(output_fp, "[>=%lu]:%lu\n",
687 (unsigned long) HCNT, (unsigned long) hash_dist[j]);
688 else
689 fprintf(output_fp, "[%lu]:%lu\n",
690 (unsigned long) j, (unsigned long) hash_dist[j]);
691 }
692 }
693 indent_level--;
694
695 /* dump elements */
696
697 if (ndump->adepth >= 0) {
698 NODE *subs;
699 const char *aname;
700
701 fprintf(output_fp, "\n");
702
703 aname = make_aname(symbol);
704 subs = make_number((AWKNUM) 0);
705 subs->flags |= (INTIND|NUMINT);
706
707 for (i = 0; i < symbol->array_size; i++) {
708 for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
709 for (j = 0; j < b->aicount; j++) {
710 subs->numbr = b->ainum[j];
711 assoc_info(subs, b->aivalue[j], ndump, aname);
712 }
713 }
714 }
715 unref(subs);
716 }
717
718 if (xn != NULL) {
719 fprintf(output_fp, "\n");
720 xn->adump(xn, ndump);
721 }
722
723 return NULL;
724
725 #undef HCNT
726 }
727
728
729 /* int_hash --- calculate the hash function of the integer subs */
730
731 static uint32_t
int_hash(uint32_t k,uint32_t hsize)732 int_hash(uint32_t k, uint32_t hsize)
733 {
734
735 /*
736 * Code snippet copied from:
737 * Hash functions (http://www.azillionmonkeys.com/qed/hash.html).
738 * Copyright 2004-2008 by Paul Hsieh. Licenced under LGPL 2.1.
739 */
740
741 /* This is the final mixing function used by Paul Hsieh in SuperFastHash. */
742
743 k ^= k << 3;
744 k += k >> 5;
745 k ^= k << 4;
746 k += k >> 17;
747 k ^= k << 25;
748 k += k >> 6;
749
750 if (k >= hsize)
751 k %= hsize;
752 return k;
753 }
754
755 /* int_find --- locate symbol[subs] */
756
757 static inline NODE **
int_find(NODE * symbol,long k,uint32_t hash1)758 int_find(NODE *symbol, long k, uint32_t hash1)
759 {
760 BUCKET *b;
761 int i;
762
763 assert(symbol->buckets != NULL);
764 for (b = symbol->buckets[hash1]; b != NULL; b = b->ainext) {
765 for (i = 0; i < b->aicount; i++) {
766 if (b->ainum[i] == k)
767 return (b->aivalue + i);
768 }
769 }
770 return NULL;
771 }
772
773
774 /* int_insert --- install subs in the assoc array */
775
776 static NODE **
int_insert(NODE * symbol,long k,uint32_t hash1)777 int_insert(NODE *symbol, long k, uint32_t hash1)
778 {
779 BUCKET *b;
780 int i;
781
782 b = symbol->buckets[hash1];
783
784 /* Only the first bucket in the chain can be partially full, but is never empty. */
785
786 if (b == NULL || (i = b->aicount) == 2) {
787 getbucket(b);
788 b->aicount = 0;
789 b->ainext = symbol->buckets[hash1];
790 symbol->buckets[hash1] = b;
791 i = 0;
792 }
793
794 b->ainum[i] = k;
795 b->aivalue[i] = dupnode(Nnull_string);
796 b->aicount++;
797 return & b->aivalue[i];
798 }
799
800
801 /* grow_int_table --- grow the hash table */
802
803 static void
grow_int_table(NODE * symbol)804 grow_int_table(NODE *symbol)
805 {
806 BUCKET **old, **new;
807 BUCKET *chain, *next;
808 int i, j;
809 unsigned long oldsize, newsize, k;
810
811 /*
812 * This is an array of primes. We grow the table by an order of
813 * magnitude each time (not just doubling) so that growing is a
814 * rare operation. We expect, on average, that it won't happen
815 * more than twice. The final size is also chosen to be small
816 * enough so that MS-DOG mallocs can handle it. When things are
817 * very large (> 8K), we just double more or less, instead of
818 * just jumping from 8K to 64K.
819 */
820
821 static const unsigned long sizes[] = {
822 13, 127, 1021, 8191, 16381, 32749, 65497,
823 131101, 262147, 524309, 1048583, 2097169,
824 4194319, 8388617, 16777259, 33554467,
825 67108879, 134217757, 268435459, 536870923,
826 1073741827
827 };
828
829 /* find next biggest hash size */
830 newsize = oldsize = symbol->array_size;
831
832 for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
833 if (oldsize < sizes[i]) {
834 newsize = sizes[i];
835 break;
836 }
837 }
838 if (newsize == oldsize) { /* table already at max (!) */
839 symbol->flags |= ARRAYMAXED;
840 return;
841 }
842
843 /* allocate new table */
844 ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_int_table");
845
846 old = symbol->buckets;
847 symbol->buckets = new;
848 symbol->array_size = newsize;
849
850 /* brand new hash table */
851 if (old == NULL)
852 return; /* DO NOT initialize symbol->table_size */
853
854 /* old hash table there, move stuff to new, free old */
855 /* note that symbol->table_size does not change if an old array. */
856
857 for (k = 0; k < oldsize; k++) {
858 long num;
859 for (chain = old[k]; chain != NULL; chain = next) {
860 for (i = 0; i < chain->aicount; i++) {
861 num = chain->ainum[i];
862 *int_insert(symbol, num, int_hash(num, newsize)) = chain->aivalue[i];
863 }
864 next = chain->ainext;
865 freebucket(chain);
866 }
867 }
868 efree(old);
869 }
870