1 /*
2  * int_array.c - routines for arrays of integer indices.
3  */
4 
5 /*
6  * Copyright (C) 1986, 1988, 1989, 1991-2013, 2016, 2017, 2019, 2020,
7  * the Free Software Foundation, Inc.
8  *
9  * This file is part of GAWK, the GNU implementation of the
10  * AWK Programming Language.
11  *
12  * GAWK is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 3 of the License, or
15  * (at your option) any later version.
16  *
17  * GAWK is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
25  */
26 
27 #include "awk.h"
28 
29 extern FILE *output_fp;
30 extern void indent(int indent_level);
31 extern NODE **is_integer(NODE *symbol, NODE *subs);
32 
33 static size_t INT_CHAIN_MAX = 2;
34 
35 static NODE **int_array_init(NODE *symbol, NODE *subs);
36 static NODE **int_lookup(NODE *symbol, NODE *subs);
37 static NODE **int_exists(NODE *symbol, NODE *subs);
38 static NODE **int_clear(NODE *symbol, NODE *subs);
39 static NODE **int_remove(NODE *symbol, NODE *subs);
40 static NODE **int_list(NODE *symbol, NODE *t);
41 static NODE **int_copy(NODE *symbol, NODE *newsymb);
42 static NODE **int_dump(NODE *symbol, NODE *ndump);
43 
44 static uint32_t int_hash(uint32_t k, uint32_t hsize);
45 static inline NODE **int_find(NODE *symbol, long k, uint32_t hash1);
46 static NODE **int_insert(NODE *symbol, long k, uint32_t hash1);
47 static void grow_int_table(NODE *symbol);
48 
49 const array_funcs_t int_array_func = {
50 	"int",
51 	int_array_init,
52 	is_integer,
53 	int_lookup,
54 	int_exists,
55 	int_clear,
56 	int_remove,
57 	int_list,
58 	int_copy,
59 	int_dump,
60 	(afunc_t) 0,
61 };
62 
63 
64 /* int_array_init --- array initialization routine */
65 
66 static NODE **
int_array_init(NODE * symbol,NODE * subs ATTRIBUTE_UNUSED)67 int_array_init(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
68 {
69 	if (symbol == NULL) {	/* first time */
70 		long newval;
71 
72 		/* check relevant environment variables */
73 		if ((newval = getenv_long("INT_CHAIN_MAX")) > 0)
74 			INT_CHAIN_MAX = newval;
75 	} else
76 		null_array(symbol);
77 
78 	return & success_node;
79 }
80 
81 /*
82  * standard_integer_string -- check whether the string matches what
83  * sprintf("%ld", <value>) would produce. This is accomplished by accepting
84  * only strings that look like /^0$/ or /^-?[1-9][0-9]*$/. This should be
85  * faster than comparing vs. the results of actually calling sprintf.
86  */
87 
88 static bool
standard_integer_string(const char * s,size_t len)89 standard_integer_string(const char *s, size_t len)
90 {
91 	const char *end;
92 
93 	if (len == 0)
94 		return false;
95 	if (*s == '0' && len == 1)
96 		return true;
97 	end = s + len;
98 	/* ignore leading minus sign */
99 	if (*s == '-' && ++s == end)
100 		return false;
101 	/* check first char is [1-9] */
102 	if (*s < '1' || *s > '9')
103 		return false;
104 	while (++s < end) {
105 		if (*s < '0' || *s > '9')
106 			return false;
107 	}
108 	return true;
109 }
110 
111 /* is_integer --- check if subscript is an integer */
112 
113 NODE **
is_integer(NODE * symbol,NODE * subs)114 is_integer(NODE *symbol, NODE *subs)
115 {
116 #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
117 	long l;
118 #endif
119 	AWKNUM d;
120 
121 	if ((subs->flags & NUMINT) != 0)
122 		/* quick exit */
123 		return & success_node;
124 
125 	if (subs == Nnull_string || do_mpfr)
126 		return NULL;
127 
128 #ifdef CHECK_INTEGER_USING_FORCE_NUMBER
129 	/*
130 	 * This approach is much simpler, because we remove all of the strtol
131 	 * logic below. But this may be slower in some usage cases.
132 	 */
133 	if ((subs->flags & NUMCUR) == 0) {
134 		str2number(subs);
135 
136 		/* check again in case force_number set NUMINT */
137 		if ((subs->flags & NUMINT) != 0)
138 			return & success_node;
139 	}
140 #else /* CHECK_INTEGER_USING_FORCE_NUMBER */
141 	if ((subs->flags & NUMCUR) != 0) {
142 #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
143 		d = subs->numbr;
144 		if (d <= INT32_MAX && d >= INT32_MIN && d == (int32_t) d) {
145 			/*
146 			 * The numeric value is an integer, but we must
147 			 * protect against strings that cannot be generated
148 			 * from sprintf("%ld", <subscript>). This can happen
149 			 * with strnum or string values. We could skip this
150 			 * check for pure NUMBER values, but unfortunately the
151 			 * code does not currently distinguish between NUMBER
152 			 * and strnum values.
153 			 */
154 			if (   (subs->flags & STRCUR) == 0
155 			    || standard_integer_string(subs->stptr, subs->stlen)) {
156 				subs->flags |= NUMINT;
157 				return & success_node;
158 			}
159 		}
160 		return NULL;
161 #ifndef CHECK_INTEGER_USING_FORCE_NUMBER
162 	}
163 
164 	/* a[3]=1; print "3" in a    -- true
165 	 * a[3]=1; print "+3" in a   -- false
166 	 * a[3]=1; print "03" in a   -- false
167 	 * a[-3]=1; print "-3" in a  -- true
168 	 */
169 
170 	/* must be a STRING */
171 	char *cp = subs->stptr, *cpend, *ptr;
172 	char save;
173 	size_t len = subs->stlen;
174 
175 	if (len == 0 || (! isdigit((unsigned char) *cp) && *cp != '-'))
176 		return NULL;
177 
178 	if (len > 1 &&
179 		((*cp == '0')		/* "00", "011" .. */
180 			|| (*cp == '-' && *(cp + 1) == '0')	/* "-0", "-011" .. */
181 		)
182 	)
183 		return NULL;
184 	if (len == 1 && *cp != '-') {	/* single digit */
185 		subs->numbr = (long) (*cp - '0');
186 		if ((subs->flags & USER_INPUT) != 0) {
187 			/* leave USER_INPUT set */
188 			subs->flags &= ~STRING;
189 			subs->flags |= NUMBER;
190 		}
191 		subs->flags |= (NUMCUR|NUMINT);
192 		return & success_node;
193 	}
194 
195 	cpend = cp + len;
196 	save = *cpend;
197 	*cpend = '\0';
198 
199 	errno = 0;
200 	l = strtol(cp, & ptr, 10);
201 	*cpend = save;
202 	if (errno != 0 || ptr != cpend)
203 		return NULL;
204 
205 	subs->numbr = l;
206 	if ((subs->flags & USER_INPUT) != 0) {
207 		/* leave USER_INPUT set */
208 		subs->flags &= ~STRING;
209 		subs->flags |= NUMBER;
210 	}
211 	subs->flags |= NUMCUR;
212 	if (l <= INT32_MAX && l >= INT32_MIN) {
213 		subs->flags |= NUMINT;
214 		return & success_node;
215 	}
216 
217 	return NULL;
218 #endif /* CHECK_INTEGER_USING_FORCE_NUMBER */
219 }
220 
221 
222 /*
223  * int_lookup --- Find SYMBOL[SUBS] in the assoc array.  Install it with value ""
224  * if it isn't there. Returns a pointer ala get_lhs to where its value is stored.
225  */
226 
227 static NODE **
int_lookup(NODE * symbol,NODE * subs)228 int_lookup(NODE *symbol, NODE *subs)
229 {
230 	uint32_t hash1;
231 	long k;
232 	unsigned long size;
233 	NODE **lhs;
234 	NODE *xn;
235 
236 	/*
237 	 * N.B: symbol->table_size is the total # of non-integers (symbol->xarray)
238 	 *	and integer elements. Also, symbol->xarray must have at least one
239 	 *	item in it, and cannot exist if there are no integer elements.
240 	 * 	In that case, symbol->xarray is promoted to 'symbol' (See int_remove).
241 	 */
242 
243 
244 	if (! is_integer(symbol, subs)) {
245 		xn = symbol->xarray;
246 		if (xn == NULL) {
247 			xn = symbol->xarray = make_array();
248 			xn->vname = symbol->vname;	/* shallow copy */
249 			xn->flags |= XARRAY;
250 		} else if ((lhs = xn->aexists(xn, subs)) != NULL)
251 			return lhs;
252 		symbol->table_size++;
253 		return assoc_lookup(xn, subs);
254 	}
255 
256 	k = subs->numbr;
257 	if (symbol->buckets == NULL)
258 		grow_int_table(symbol);
259 
260  	hash1 = int_hash(k, symbol->array_size);
261 	if ((lhs = int_find(symbol, k, hash1)) != NULL)
262 		return lhs;
263 
264 	/* It's not there, install it */
265 
266 	symbol->table_size++;
267 
268 	/* first see if we would need to grow the array, before installing */
269 	size = symbol->table_size;
270 	if ((xn = symbol->xarray) != NULL)
271 		size -= xn->table_size;
272 
273 	if ((symbol->flags & ARRAYMAXED) == 0
274 		    && (size / symbol->array_size) > INT_CHAIN_MAX) {
275 		grow_int_table(symbol);
276 		/* have to recompute hash value for new size */
277 		hash1 = int_hash(k, symbol->array_size);
278 	}
279 
280 	return int_insert(symbol, k, hash1);
281 }
282 
283 
284 /*
285  * int_exists --- test whether the array element symbol[subs] exists or not,
286  *	return pointer to value if it does.
287  */
288 
289 static NODE **
int_exists(NODE * symbol,NODE * subs)290 int_exists(NODE *symbol, NODE *subs)
291 {
292 	long k;
293 	uint32_t hash1;
294 
295 	if (! is_integer(symbol, subs)) {
296 		NODE *xn = symbol->xarray;
297 		if (xn == NULL)
298 			return NULL;
299 		return xn->aexists(xn, subs);
300 	}
301 	if (symbol->buckets == NULL)
302 		return NULL;
303 
304 	k = subs->numbr;
305 	hash1 = int_hash(k, symbol->array_size);
306 	return int_find(symbol, k, hash1);
307 }
308 
309 /* int_clear --- flush all the values in symbol[] */
310 
311 static NODE **
int_clear(NODE * symbol,NODE * subs ATTRIBUTE_UNUSED)312 int_clear(NODE *symbol, NODE *subs ATTRIBUTE_UNUSED)
313 {
314 	unsigned long i;
315 	int j;
316 	BUCKET *b, *next;
317 	NODE *r;
318 
319 	if (symbol->xarray != NULL) {
320 		NODE *xn = symbol->xarray;
321 		assoc_clear(xn);
322 		freenode(xn);
323 		symbol->xarray = NULL;
324 	}
325 
326 	for (i = 0; i < symbol->array_size; i++) {
327 		for (b = symbol->buckets[i]; b != NULL;	b = next) {
328 			next = b->ainext;
329 			for (j = 0; j < b->aicount; j++) {
330 				r = b->aivalue[j];
331 				if (r->type == Node_var_array) {
332 					assoc_clear(r);	/* recursively clear all sub-arrays */
333 					efree(r->vname);
334 					freenode(r);
335 				} else
336 					unref(r);
337 			}
338 			freebucket(b);
339 		}
340 		symbol->buckets[i] = NULL;
341 	}
342 	if (symbol->buckets != NULL)
343 		efree(symbol->buckets);
344 	symbol->ainit(symbol, NULL);	/* re-initialize symbol */
345 	return NULL;
346 }
347 
348 
349 /* int_remove --- If SUBS is already in the table, remove it. */
350 
351 static NODE **
int_remove(NODE * symbol,NODE * subs)352 int_remove(NODE *symbol, NODE *subs)
353 {
354 	uint32_t hash1;
355 	BUCKET *b, *prev = NULL;
356 	long k;
357 	int i;
358 	NODE *xn = symbol->xarray;
359 
360 	if (symbol->table_size == 0 || symbol->buckets == NULL)
361 		return NULL;
362 
363 	if (! is_integer(symbol, subs)) {
364 		if (xn == NULL || xn->aremove(xn, subs) == NULL)
365 			return NULL;
366 		if (xn->table_size == 0) {
367 			freenode(xn);
368 			symbol->xarray = NULL;
369 		}
370 		symbol->table_size--;
371 		assert(symbol->table_size > 0);
372 		return & success_node;
373 	}
374 
375 	k = subs->numbr;
376 	hash1 = int_hash(k, symbol->array_size);
377 
378 	for (b = symbol->buckets[hash1]; b != NULL; prev = b, b = b->ainext) {
379 		for (i = 0; i < b->aicount; i++) {
380 			if (k != b->ainum[i])
381 				continue;
382 
383 			/* item found */
384 			if (i == 0 && b->aicount == 2) {
385 				/* removing the 1st item; move 2nd item from position 1 to 0 */
386 
387 				b->ainum[0] = b->ainum[1];
388 				b->aivalue[0] = b->aivalue[1];
389 			} /* else
390 				removing the only item or the 2nd item */
391 
392 			goto removed;
393 		}
394 	}
395 
396 	if (b == NULL)	/* item not in array */
397 		return NULL;
398 
399 removed:
400 	b->aicount--;
401 
402 	if (b->aicount == 0) {
403 		/* detach bucket */
404 		if (prev != NULL)
405 			prev->ainext = b->ainext;
406 		else
407 			symbol->buckets[hash1] = b->ainext;
408 
409 		/* delete bucket */
410 		freebucket(b);
411 	} else if (b != symbol->buckets[hash1]) {
412 		BUCKET *head = symbol->buckets[hash1];
413 
414 		assert(b->aicount == 1);
415 		/* move the last element from head to bucket to make it full. */
416 		i = --head->aicount;	/* head has one less element */
417 		b->ainum[1] = head->ainum[i];
418 		b->aivalue[1] = head->aivalue[i];
419 		b->aicount++;	/* bucket has one more element */
420 		if (i == 0) {
421 			/* head is now empty; delete head */
422 			symbol->buckets[hash1] = head->ainext;
423 			freebucket(head);
424 		}
425 	} /* else
426 		do nothing */
427 
428 	symbol->table_size--;
429 	if (xn == NULL && symbol->table_size == 0) {
430 		efree(symbol->buckets);
431 		symbol->ainit(symbol, NULL);	/* re-initialize array 'symbol' */
432 	} else if (xn != NULL && symbol->table_size == xn->table_size) {
433 		/* promote xn (str_array) to symbol */
434 		xn->flags &= ~XARRAY;
435 		xn->parent_array = symbol->parent_array;
436 		efree(symbol->buckets);
437 		*symbol = *xn;
438 		freenode(xn);
439 	}
440 
441 	return & success_node;	/* return success */
442 }
443 
444 
445 /* int_copy --- duplicate input array "symbol" */
446 
447 static NODE **
int_copy(NODE * symbol,NODE * newsymb)448 int_copy(NODE *symbol, NODE *newsymb)
449 {
450 	BUCKET **old, **new, **pnew;
451 	BUCKET *chain, *newchain;
452 	int j;
453 	unsigned long i, cursize;
454 
455 	assert(symbol->buckets != NULL);
456 
457 	/* find the current hash size */
458 	cursize = symbol->array_size;
459 
460 	/* allocate new table */
461 	ezalloc(new, BUCKET **, cursize * sizeof(BUCKET *), "int_copy");
462 
463 	old = symbol->buckets;
464 
465 	for (i = 0; i < cursize; i++) {
466 		for (chain = old[i], pnew = & new[i]; chain != NULL;
467 				chain = chain->ainext
468 		) {
469 			getbucket(newchain);
470 			newchain->aicount = chain->aicount;
471 			newchain->ainext = NULL;
472 			for (j = 0; j < chain->aicount; j++) {
473 				NODE *oldval;
474 
475 				/*
476 				 * copy the corresponding key and
477 				 * value from the original input list
478 				 */
479 				newchain->ainum[j] = chain->ainum[j];
480 
481 				oldval = chain->aivalue[j];
482 				if (oldval->type == Node_val)
483 					newchain->aivalue[j] = dupnode(oldval);
484 				else {
485 					NODE *r;
486 					r = make_array();
487 					r->vname = estrdup(oldval->vname, strlen(oldval->vname));
488 					r->parent_array = newsymb;
489 					newchain->aivalue[j] = assoc_copy(oldval, r);
490 				}
491 			}
492 
493 			*pnew = newchain;
494 			newchain->ainext = NULL;
495 			pnew = & newchain->ainext;
496 		}
497 	}
498 
499 	if (symbol->xarray != NULL) {
500 		NODE *xn, *n;
501 		xn = symbol->xarray;
502 		n = make_array();
503 		n->vname = newsymb->vname;	/* shallow copy */
504 		(void) xn->acopy(xn, n);
505 		newsymb->xarray = n;
506 	} else
507 		newsymb->xarray = NULL;
508 
509 	newsymb->table_size = symbol->table_size;
510 	newsymb->buckets = new;
511 	newsymb->array_size = cursize;
512 	newsymb->flags = symbol->flags;
513 
514 	return NULL;
515 }
516 
517 
518 /* int_list --- return a list of array items */
519 
520 static NODE**
int_list(NODE * symbol,NODE * t)521 int_list(NODE *symbol, NODE *t)
522 {
523 	NODE **list = NULL;
524 	unsigned long num_elems, list_size, i, k = 0;
525 	BUCKET *b;
526 	NODE *r, *subs, *xn;
527 	int j, elem_size = 1;
528 	long num;
529 	static char buf[100];
530 	assoc_kind_t assoc_kind;
531 
532 	if (symbol->table_size == 0)
533 		return NULL;
534 
535 	assoc_kind = (assoc_kind_t) t->flags;
536 	num_elems = symbol->table_size;
537 	if ((assoc_kind & (AINDEX|AVALUE|ADELETE)) == (AINDEX|ADELETE))
538 		num_elems = 1;
539 
540 	if ((assoc_kind & (AINDEX|AVALUE)) == (AINDEX|AVALUE))
541 		elem_size = 2;
542 	list_size = elem_size * num_elems;
543 
544 	if (symbol->xarray != NULL) {
545 		xn = symbol->xarray;
546 		list = xn->alist(xn, t);
547 		assert(list != NULL);
548 		if (num_elems == 1 || num_elems == xn->table_size)
549 			return list;
550 		erealloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
551 		k = elem_size * xn->table_size;
552 	} else
553 		emalloc(list, NODE **, list_size * sizeof(NODE *), "int_list");
554 
555 	/* populate it */
556 
557 	for (i = 0; i < symbol->array_size; i++) {
558 		for (b = symbol->buckets[i]; b != NULL;	b = b->ainext) {
559 			for (j = 0; j < b->aicount; j++) {
560 				/* index */
561 				num = b->ainum[j];
562 				if ((assoc_kind & AISTR) != 0) {
563 					sprintf(buf, "%ld", num);
564 					subs = make_string(buf, strlen(buf));
565 					subs->numbr = num;
566 					subs->flags |= (NUMCUR|NUMINT);
567 				} else {
568 					subs = make_number((AWKNUM) num);
569 					subs->flags |= (INTIND|NUMINT);
570 				}
571 				list[k++] = subs;
572 
573 				/* value */
574 				if ((assoc_kind & AVALUE) != 0) {
575 					r = b->aivalue[j];
576 					if (r->type == Node_val) {
577 						if ((assoc_kind & AVNUM) != 0)
578 							(void) force_number(r);
579 						else if ((assoc_kind & AVSTR) != 0)
580 							r = force_string(r);
581 					}
582 					list[k++] = r;
583 				}
584 
585 				if (k >= list_size)
586 					return list;
587 			}
588 		}
589 	}
590 	return list;
591 }
592 
593 
594 /* int_kilobytes --- calculate memory consumption of the assoc array */
595 
596 AWKNUM
int_kilobytes(NODE * symbol)597 int_kilobytes(NODE *symbol)
598 {
599 	unsigned long i, bucket_cnt = 0;
600 	BUCKET *b;
601 	AWKNUM kb;
602 	extern AWKNUM str_kilobytes(NODE *symbol);
603 
604 	for (i = 0; i < symbol->array_size; i++) {
605 		for (b = symbol->buckets[i]; b != NULL; b = b->ainext)
606 			bucket_cnt++;
607 	}
608 	kb = (((AWKNUM) bucket_cnt) * sizeof (BUCKET) +
609 			((AWKNUM) symbol->array_size) * sizeof (BUCKET *)) / 1024.0;
610 
611 	if (symbol->xarray != NULL)
612 		kb += str_kilobytes(symbol->xarray);
613 
614 	return kb;
615 }
616 
617 
618 /* int_dump --- dump array info */
619 
620 static NODE **
int_dump(NODE * symbol,NODE * ndump)621 int_dump(NODE *symbol, NODE *ndump)
622 {
623 #define HCNT	31
624 
625 	int indent_level;
626 	BUCKET *b;
627 	NODE *xn = NULL;
628 	unsigned long str_size = 0, int_size = 0;
629 	unsigned long i;
630 	size_t j, bucket_cnt;
631 	static size_t hash_dist[HCNT + 1];
632 
633 	indent_level = ndump->alevel;
634 
635 	if (symbol->xarray != NULL) {
636 		xn = symbol->xarray;
637 		str_size = xn->table_size;
638 	}
639 	int_size = symbol->table_size - str_size;
640 
641 	if ((symbol->flags & XARRAY) == 0)
642 		fprintf(output_fp, "%s `%s'\n",
643 				(symbol->parent_array == NULL) ? "array" : "sub-array",
644 				array_vname(symbol));
645 
646 	indent_level++;
647 	indent(indent_level);
648 	fprintf(output_fp, "array_func: int_array_func\n");
649 	if (symbol->flags != 0) {
650 		indent(indent_level);
651 		fprintf(output_fp, "flags: %s\n", flags2str(symbol->flags));
652 	}
653 	indent(indent_level);
654 	fprintf(output_fp, "INT_CHAIN_MAX: %lu\n", (unsigned long) INT_CHAIN_MAX);
655 	indent(indent_level);
656 	fprintf(output_fp, "array_size: %lu (int)\n", (unsigned long) symbol->array_size);
657 	indent(indent_level);
658 	fprintf(output_fp, "table_size: %lu (total), %lu (int), %lu (str)\n",
659 			(unsigned long) symbol->table_size, int_size, str_size);
660 	indent(indent_level);
661 	fprintf(output_fp, "Avg # of items per chain (int): %.2g\n",
662 			((AWKNUM) int_size) / symbol->array_size);
663 
664 	indent(indent_level);
665 	fprintf(output_fp, "memory: %.2g kB (total)\n", int_kilobytes(symbol));
666 
667 	/* hash value distribution */
668 
669 	memset(hash_dist, '\0', (HCNT + 1) * sizeof(size_t));
670 	for (i = 0; i < symbol->array_size; i++) {
671 		bucket_cnt = 0;
672 		for (b = symbol->buckets[i]; b != NULL;	b = b->ainext)
673 			bucket_cnt += b->aicount;
674 		if (bucket_cnt >= HCNT)
675 			bucket_cnt = HCNT;
676 		hash_dist[bucket_cnt]++;
677 	}
678 
679 	indent(indent_level);
680 	fprintf(output_fp, "Hash distribution:\n");
681 	indent_level++;
682 	for (j = 0; j <= HCNT; j++) {
683 		if (hash_dist[j] > 0) {
684 			indent(indent_level);
685 			if (j == HCNT)
686 				fprintf(output_fp, "[>=%lu]:%lu\n",
687 					(unsigned long) HCNT, (unsigned long) hash_dist[j]);
688 			else
689 				fprintf(output_fp, "[%lu]:%lu\n",
690 					(unsigned long) j, (unsigned long) hash_dist[j]);
691 		}
692 	}
693 	indent_level--;
694 
695 	/* dump elements */
696 
697 	if (ndump->adepth >= 0) {
698 		NODE *subs;
699 		const char *aname;
700 
701 		fprintf(output_fp, "\n");
702 
703 		aname = make_aname(symbol);
704 		subs = make_number((AWKNUM) 0);
705 		subs->flags |= (INTIND|NUMINT);
706 
707 		for (i = 0; i < symbol->array_size; i++) {
708 			for (b = symbol->buckets[i]; b != NULL; b = b->ainext) {
709 				for (j = 0; j < b->aicount; j++) {
710 					subs->numbr = b->ainum[j];
711 					assoc_info(subs, b->aivalue[j], ndump, aname);
712 				}
713 			}
714 		}
715 		unref(subs);
716 	}
717 
718 	if (xn != NULL)	{
719 		fprintf(output_fp, "\n");
720 		xn->adump(xn, ndump);
721 	}
722 
723 	return NULL;
724 
725 #undef HCNT
726 }
727 
728 
729 /* int_hash --- calculate the hash function of the integer subs */
730 
731 static uint32_t
int_hash(uint32_t k,uint32_t hsize)732 int_hash(uint32_t k, uint32_t hsize)
733 {
734 
735 /*
736  * Code snippet copied from:
737  *	Hash functions (http://www.azillionmonkeys.com/qed/hash.html).
738  *	Copyright 2004-2008 by Paul Hsieh. Licenced under LGPL 2.1.
739  */
740 
741 	/* This is the final mixing function used by Paul Hsieh in SuperFastHash. */
742 
743 	k ^= k << 3;
744 	k += k >> 5;
745 	k ^= k << 4;
746 	k += k >> 17;
747 	k ^= k << 25;
748 	k += k >> 6;
749 
750 	if (k >= hsize)
751 		k %= hsize;
752 	return k;
753 }
754 
755 /* int_find --- locate symbol[subs] */
756 
757 static inline NODE **
int_find(NODE * symbol,long k,uint32_t hash1)758 int_find(NODE *symbol, long k, uint32_t hash1)
759 {
760 	BUCKET *b;
761 	int i;
762 
763 	assert(symbol->buckets != NULL);
764 	for (b = symbol->buckets[hash1]; b != NULL; b = b->ainext) {
765 		for (i = 0; i < b->aicount; i++) {
766 			if (b->ainum[i] == k)
767 				return (b->aivalue + i);
768 		}
769 	}
770 	return NULL;
771 }
772 
773 
774 /* int_insert --- install subs in the assoc array */
775 
776 static NODE **
int_insert(NODE * symbol,long k,uint32_t hash1)777 int_insert(NODE *symbol, long k, uint32_t hash1)
778 {
779 	BUCKET *b;
780 	int i;
781 
782 	b = symbol->buckets[hash1];
783 
784 	/* Only the first bucket in the chain can be partially full, but is never empty. */
785 
786 	if (b == NULL || (i = b->aicount) == 2) {
787 		getbucket(b);
788 		b->aicount = 0;
789 		b->ainext = symbol->buckets[hash1];
790 		symbol->buckets[hash1] = b;
791 		i = 0;
792 	}
793 
794 	b->ainum[i] = k;
795 	b->aivalue[i] = dupnode(Nnull_string);
796 	b->aicount++;
797 	return & b->aivalue[i];
798 }
799 
800 
801 /* grow_int_table --- grow the hash table */
802 
803 static void
grow_int_table(NODE * symbol)804 grow_int_table(NODE *symbol)
805 {
806 	BUCKET **old, **new;
807 	BUCKET *chain, *next;
808 	int i, j;
809 	unsigned long oldsize, newsize, k;
810 
811 	/*
812 	 * This is an array of primes. We grow the table by an order of
813 	 * magnitude each time (not just doubling) so that growing is a
814 	 * rare operation. We expect, on average, that it won't happen
815 	 * more than twice.  The final size is also chosen to be small
816 	 * enough so that MS-DOG mallocs can handle it. When things are
817 	 * very large (> 8K), we just double more or less, instead of
818 	 * just jumping from 8K to 64K.
819 	 */
820 
821 	static const unsigned long sizes[] = {
822 		13, 127, 1021, 8191, 16381, 32749, 65497,
823 		131101, 262147, 524309, 1048583, 2097169,
824 		4194319, 8388617, 16777259, 33554467,
825 		67108879, 134217757, 268435459, 536870923,
826 		1073741827
827 	};
828 
829 	/* find next biggest hash size */
830 	newsize = oldsize = symbol->array_size;
831 
832 	for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) {
833 		if (oldsize < sizes[i]) {
834 			newsize = sizes[i];
835 			break;
836 		}
837 	}
838 	if (newsize == oldsize) {	/* table already at max (!) */
839 		symbol->flags |= ARRAYMAXED;
840 		return;
841 	}
842 
843 	/* allocate new table */
844 	ezalloc(new, BUCKET **, newsize * sizeof(BUCKET *), "grow_int_table");
845 
846 	old = symbol->buckets;
847 	symbol->buckets = new;
848 	symbol->array_size = newsize;
849 
850 	/* brand new hash table */
851 	if (old == NULL)
852 		return;		/* DO NOT initialize symbol->table_size */
853 
854 	/* old hash table there, move stuff to new, free old */
855 	/* note that symbol->table_size does not change if an old array. */
856 
857 	for (k = 0; k < oldsize; k++) {
858 		long num;
859 		for (chain = old[k]; chain != NULL; chain = next) {
860 			for (i = 0; i < chain->aicount; i++) {
861 				num = chain->ainum[i];
862 				*int_insert(symbol, num, int_hash(num, newsize)) = chain->aivalue[i];
863 			}
864 			next = chain->ainext;
865 			freebucket(chain);
866 		}
867 	}
868 	efree(old);
869 }
870