xref: /freebsd/contrib/lua/src/ltable.c (revision 8e3e3a7a)
1 /*
2 ** $Id: ltable.c,v 2.118 2016/11/07 12:38:35 roberto Exp $
3 ** Lua tables (hash)
4 ** See Copyright Notice in lua.h
5 */
6 
7 #define ltable_c
8 #define LUA_CORE
9 
10 #include "lprefix.h"
11 
12 
13 /*
14 ** Implementation of tables (aka arrays, objects, or hash tables).
15 ** Tables keep its elements in two parts: an array part and a hash part.
16 ** Non-negative integer keys are all candidates to be kept in the array
17 ** part. The actual size of the array is the largest 'n' such that
18 ** more than half the slots between 1 and n are in use.
19 ** Hash uses a mix of chained scatter table with Brent's variation.
20 ** A main invariant of these tables is that, if an element is not
21 ** in its main position (i.e. the 'original' position that its hash gives
22 ** to it), then the colliding element is in its own main position.
23 ** Hence even when the load factor reaches 100%, performance remains good.
24 */
25 
26 #include <math.h>
27 #include <limits.h>
28 
29 #include "lua.h"
30 
31 #include "ldebug.h"
32 #include "ldo.h"
33 #include "lgc.h"
34 #include "lmem.h"
35 #include "lobject.h"
36 #include "lstate.h"
37 #include "lstring.h"
38 #include "ltable.h"
39 #include "lvm.h"
40 
41 
42 /*
43 ** Maximum size of array part (MAXASIZE) is 2^MAXABITS. MAXABITS is
44 ** the largest integer such that MAXASIZE fits in an unsigned int.
45 */
46 #define MAXABITS	cast_int(sizeof(int) * CHAR_BIT - 1)
47 #define MAXASIZE	(1u << MAXABITS)
48 
49 /*
50 ** Maximum size of hash part is 2^MAXHBITS. MAXHBITS is the largest
51 ** integer such that 2^MAXHBITS fits in a signed int. (Note that the
52 ** maximum number of elements in a table, 2^MAXABITS + 2^MAXHBITS, still
53 ** fits comfortably in an unsigned int.)
54 */
55 #define MAXHBITS	(MAXABITS - 1)
56 
57 
58 #define hashpow2(t,n)		(gnode(t, lmod((n), sizenode(t))))
59 
60 #define hashstr(t,str)		hashpow2(t, (str)->hash)
61 #define hashboolean(t,p)	hashpow2(t, p)
62 #define hashint(t,i)		hashpow2(t, i)
63 
64 
65 /*
66 ** for some types, it is better to avoid modulus by power of 2, as
67 ** they tend to have many 2 factors.
68 */
69 #define hashmod(t,n)	(gnode(t, ((n) % ((sizenode(t)-1)|1))))
70 
71 
72 #define hashpointer(t,p)	hashmod(t, point2uint(p))
73 
74 
75 #define dummynode		(&dummynode_)
76 
77 static const Node dummynode_ = {
78   {NILCONSTANT},  /* value */
79   {{NILCONSTANT, 0}}  /* key */
80 };
81 
82 
83 /*
84 ** Hash for floating-point numbers.
85 ** The main computation should be just
86 **     n = frexp(n, &i); return (n * INT_MAX) + i
87 ** but there are some numerical subtleties.
88 ** In a two-complement representation, INT_MAX does not has an exact
89 ** representation as a float, but INT_MIN does; because the absolute
90 ** value of 'frexp' is smaller than 1 (unless 'n' is inf/NaN), the
91 ** absolute value of the product 'frexp * -INT_MIN' is smaller or equal
92 ** to INT_MAX. Next, the use of 'unsigned int' avoids overflows when
93 ** adding 'i'; the use of '~u' (instead of '-u') avoids problems with
94 ** INT_MIN.
95 */
96 #if !defined(l_hashfloat)
97 static int l_hashfloat (lua_Number n) {
98   int i;
99   lua_Integer ni;
100   n = l_mathop(frexp)(n, &i) * -cast_num(INT_MIN);
101   if (!lua_numbertointeger(n, &ni)) {  /* is 'n' inf/-inf/NaN? */
102     lua_assert(luai_numisnan(n) || l_mathop(fabs)(n) == cast_num(HUGE_VAL));
103     return 0;
104   }
105   else {  /* normal case */
106     unsigned int u = cast(unsigned int, i) + cast(unsigned int, ni);
107     return cast_int(u <= cast(unsigned int, INT_MAX) ? u : ~u);
108   }
109 }
110 #endif
111 
112 
113 /*
114 ** returns the 'main' position of an element in a table (that is, the index
115 ** of its hash value)
116 */
117 static Node *mainposition (const Table *t, const TValue *key) {
118   switch (ttype(key)) {
119     case LUA_TNUMINT:
120       return hashint(t, ivalue(key));
121     case LUA_TNUMFLT:
122       return hashmod(t, l_hashfloat(fltvalue(key)));
123     case LUA_TSHRSTR:
124       return hashstr(t, tsvalue(key));
125     case LUA_TLNGSTR:
126       return hashpow2(t, luaS_hashlongstr(tsvalue(key)));
127     case LUA_TBOOLEAN:
128       return hashboolean(t, bvalue(key));
129     case LUA_TLIGHTUSERDATA:
130       return hashpointer(t, pvalue(key));
131     case LUA_TLCF:
132       return hashpointer(t, fvalue(key));
133     default:
134       lua_assert(!ttisdeadkey(key));
135       return hashpointer(t, gcvalue(key));
136   }
137 }
138 
139 
140 /*
141 ** returns the index for 'key' if 'key' is an appropriate key to live in
142 ** the array part of the table, 0 otherwise.
143 */
144 static unsigned int arrayindex (const TValue *key) {
145   if (ttisinteger(key)) {
146     lua_Integer k = ivalue(key);
147     if (0 < k && (lua_Unsigned)k <= MAXASIZE)
148       return cast(unsigned int, k);  /* 'key' is an appropriate array index */
149   }
150   return 0;  /* 'key' did not match some condition */
151 }
152 
153 
154 /*
155 ** returns the index of a 'key' for table traversals. First goes all
156 ** elements in the array part, then elements in the hash part. The
157 ** beginning of a traversal is signaled by 0.
158 */
159 static unsigned int findindex (lua_State *L, Table *t, StkId key) {
160   unsigned int i;
161   if (ttisnil(key)) return 0;  /* first iteration */
162   i = arrayindex(key);
163   if (i != 0 && i <= t->sizearray)  /* is 'key' inside array part? */
164     return i;  /* yes; that's the index */
165   else {
166     int nx;
167     Node *n = mainposition(t, key);
168     for (;;) {  /* check whether 'key' is somewhere in the chain */
169       /* key may be dead already, but it is ok to use it in 'next' */
170       if (luaV_rawequalobj(gkey(n), key) ||
171             (ttisdeadkey(gkey(n)) && iscollectable(key) &&
172              deadvalue(gkey(n)) == gcvalue(key))) {
173         i = cast_int(n - gnode(t, 0));  /* key index in hash table */
174         /* hash elements are numbered after array ones */
175         return (i + 1) + t->sizearray;
176       }
177       nx = gnext(n);
178       if (nx == 0)
179         luaG_runerror(L, "invalid key to 'next'");  /* key not found */
180       else n += nx;
181     }
182   }
183 }
184 
185 
186 int luaH_next (lua_State *L, Table *t, StkId key) {
187   unsigned int i = findindex(L, t, key);  /* find original element */
188   for (; i < t->sizearray; i++) {  /* try first array part */
189     if (!ttisnil(&t->array[i])) {  /* a non-nil value? */
190       setivalue(key, i + 1);
191       setobj2s(L, key+1, &t->array[i]);
192       return 1;
193     }
194   }
195   for (i -= t->sizearray; cast_int(i) < sizenode(t); i++) {  /* hash part */
196     if (!ttisnil(gval(gnode(t, i)))) {  /* a non-nil value? */
197       setobj2s(L, key, gkey(gnode(t, i)));
198       setobj2s(L, key+1, gval(gnode(t, i)));
199       return 1;
200     }
201   }
202   return 0;  /* no more elements */
203 }
204 
205 
206 /*
207 ** {=============================================================
208 ** Rehash
209 ** ==============================================================
210 */
211 
212 /*
213 ** Compute the optimal size for the array part of table 't'. 'nums' is a
214 ** "count array" where 'nums[i]' is the number of integers in the table
215 ** between 2^(i - 1) + 1 and 2^i. 'pna' enters with the total number of
216 ** integer keys in the table and leaves with the number of keys that
217 ** will go to the array part; return the optimal size.
218 */
219 static unsigned int computesizes (unsigned int nums[], unsigned int *pna) {
220   int i;
221   unsigned int twotoi;  /* 2^i (candidate for optimal size) */
222   unsigned int a = 0;  /* number of elements smaller than 2^i */
223   unsigned int na = 0;  /* number of elements to go to array part */
224   unsigned int optimal = 0;  /* optimal size for array part */
225   /* loop while keys can fill more than half of total size */
226   for (i = 0, twotoi = 1; *pna > twotoi / 2; i++, twotoi *= 2) {
227     if (nums[i] > 0) {
228       a += nums[i];
229       if (a > twotoi/2) {  /* more than half elements present? */
230         optimal = twotoi;  /* optimal size (till now) */
231         na = a;  /* all elements up to 'optimal' will go to array part */
232       }
233     }
234   }
235   lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
236   *pna = na;
237   return optimal;
238 }
239 
240 
241 static int countint (const TValue *key, unsigned int *nums) {
242   unsigned int k = arrayindex(key);
243   if (k != 0) {  /* is 'key' an appropriate array index? */
244     nums[luaO_ceillog2(k)]++;  /* count as such */
245     return 1;
246   }
247   else
248     return 0;
249 }
250 
251 
252 /*
253 ** Count keys in array part of table 't': Fill 'nums[i]' with
254 ** number of keys that will go into corresponding slice and return
255 ** total number of non-nil keys.
256 */
257 static unsigned int numusearray (const Table *t, unsigned int *nums) {
258   int lg;
259   unsigned int ttlg;  /* 2^lg */
260   unsigned int ause = 0;  /* summation of 'nums' */
261   unsigned int i = 1;  /* count to traverse all array keys */
262   /* traverse each slice */
263   for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
264     unsigned int lc = 0;  /* counter */
265     unsigned int lim = ttlg;
266     if (lim > t->sizearray) {
267       lim = t->sizearray;  /* adjust upper limit */
268       if (i > lim)
269         break;  /* no more elements to count */
270     }
271     /* count elements in range (2^(lg - 1), 2^lg] */
272     for (; i <= lim; i++) {
273       if (!ttisnil(&t->array[i-1]))
274         lc++;
275     }
276     nums[lg] += lc;
277     ause += lc;
278   }
279   return ause;
280 }
281 
282 
283 static int numusehash (const Table *t, unsigned int *nums, unsigned int *pna) {
284   int totaluse = 0;  /* total number of elements */
285   int ause = 0;  /* elements added to 'nums' (can go to array part) */
286   int i = sizenode(t);
287   while (i--) {
288     Node *n = &t->node[i];
289     if (!ttisnil(gval(n))) {
290       ause += countint(gkey(n), nums);
291       totaluse++;
292     }
293   }
294   *pna += ause;
295   return totaluse;
296 }
297 
298 
299 static void setarrayvector (lua_State *L, Table *t, unsigned int size) {
300   unsigned int i;
301   luaM_reallocvector(L, t->array, t->sizearray, size, TValue);
302   for (i=t->sizearray; i<size; i++)
303      setnilvalue(&t->array[i]);
304   t->sizearray = size;
305 }
306 
307 
308 static void setnodevector (lua_State *L, Table *t, unsigned int size) {
309   if (size == 0) {  /* no elements to hash part? */
310     t->node = cast(Node *, dummynode);  /* use common 'dummynode' */
311     t->lsizenode = 0;
312     t->lastfree = NULL;  /* signal that it is using dummy node */
313   }
314   else {
315     int i;
316     int lsize = luaO_ceillog2(size);
317     if (lsize > MAXHBITS)
318       luaG_runerror(L, "table overflow");
319     size = twoto(lsize);
320     t->node = luaM_newvector(L, size, Node);
321     for (i = 0; i < (int)size; i++) {
322       Node *n = gnode(t, i);
323       gnext(n) = 0;
324       setnilvalue(wgkey(n));
325       setnilvalue(gval(n));
326     }
327     t->lsizenode = cast_byte(lsize);
328     t->lastfree = gnode(t, size);  /* all positions are free */
329   }
330 }
331 
332 
333 void luaH_resize (lua_State *L, Table *t, unsigned int nasize,
334                                           unsigned int nhsize) {
335   unsigned int i;
336   int j;
337   unsigned int oldasize = t->sizearray;
338   int oldhsize = allocsizenode(t);
339   Node *nold = t->node;  /* save old hash ... */
340   if (nasize > oldasize)  /* array part must grow? */
341     setarrayvector(L, t, nasize);
342   /* create new hash part with appropriate size */
343   setnodevector(L, t, nhsize);
344   if (nasize < oldasize) {  /* array part must shrink? */
345     t->sizearray = nasize;
346     /* re-insert elements from vanishing slice */
347     for (i=nasize; i<oldasize; i++) {
348       if (!ttisnil(&t->array[i]))
349         luaH_setint(L, t, i + 1, &t->array[i]);
350     }
351     /* shrink array */
352     luaM_reallocvector(L, t->array, oldasize, nasize, TValue);
353   }
354   /* re-insert elements from hash part */
355   for (j = oldhsize - 1; j >= 0; j--) {
356     Node *old = nold + j;
357     if (!ttisnil(gval(old))) {
358       /* doesn't need barrier/invalidate cache, as entry was
359          already present in the table */
360       setobjt2t(L, luaH_set(L, t, gkey(old)), gval(old));
361     }
362   }
363   if (oldhsize > 0)  /* not the dummy node? */
364     luaM_freearray(L, nold, cast(size_t, oldhsize)); /* free old hash */
365 }
366 
367 
368 void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
369   int nsize = allocsizenode(t);
370   luaH_resize(L, t, nasize, nsize);
371 }
372 
373 /*
374 ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
375 */
376 static void rehash (lua_State *L, Table *t, const TValue *ek) {
377   unsigned int asize;  /* optimal size for array part */
378   unsigned int na;  /* number of keys in the array part */
379   unsigned int nums[MAXABITS + 1];
380   int i;
381   int totaluse;
382   for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
383   na = numusearray(t, nums);  /* count keys in array part */
384   totaluse = na;  /* all those keys are integer keys */
385   totaluse += numusehash(t, nums, &na);  /* count keys in hash part */
386   /* count extra key */
387   na += countint(ek, nums);
388   totaluse++;
389   /* compute new size for array part */
390   asize = computesizes(nums, &na);
391   /* resize the table to new computed sizes */
392   luaH_resize(L, t, asize, totaluse - na);
393 }
394 
395 
396 
397 /*
398 ** }=============================================================
399 */
400 
401 
402 Table *luaH_new (lua_State *L) {
403   GCObject *o = luaC_newobj(L, LUA_TTABLE, sizeof(Table));
404   Table *t = gco2t(o);
405   t->metatable = NULL;
406   t->flags = cast_byte(~0);
407   t->array = NULL;
408   t->sizearray = 0;
409   setnodevector(L, t, 0);
410   return t;
411 }
412 
413 
414 void luaH_free (lua_State *L, Table *t) {
415   if (!isdummy(t))
416     luaM_freearray(L, t->node, cast(size_t, sizenode(t)));
417   luaM_freearray(L, t->array, t->sizearray);
418   luaM_free(L, t);
419 }
420 
421 
422 static Node *getfreepos (Table *t) {
423   if (!isdummy(t)) {
424     while (t->lastfree > t->node) {
425       t->lastfree--;
426       if (ttisnil(gkey(t->lastfree)))
427         return t->lastfree;
428     }
429   }
430   return NULL;  /* could not find a free place */
431 }
432 
433 
434 
435 /*
436 ** inserts a new key into a hash table; first, check whether key's main
437 ** position is free. If not, check whether colliding node is in its main
438 ** position or not: if it is not, move colliding node to an empty place and
439 ** put new key in its main position; otherwise (colliding node is in its main
440 ** position), new key goes to an empty position.
441 */
442 TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
443   Node *mp;
444   TValue aux;
445   if (ttisnil(key)) luaG_runerror(L, "table index is nil");
446   else if (ttisfloat(key)) {
447     lua_Integer k;
448     if (luaV_tointeger(key, &k, 0)) {  /* does index fit in an integer? */
449       setivalue(&aux, k);
450       key = &aux;  /* insert it as an integer */
451     }
452     else if (luai_numisnan(fltvalue(key)))
453       luaG_runerror(L, "table index is NaN");
454   }
455   mp = mainposition(t, key);
456   if (!ttisnil(gval(mp)) || isdummy(t)) {  /* main position is taken? */
457     Node *othern;
458     Node *f = getfreepos(t);  /* get a free place */
459     if (f == NULL) {  /* cannot find a free place? */
460       rehash(L, t, key);  /* grow table */
461       /* whatever called 'newkey' takes care of TM cache */
462       return luaH_set(L, t, key);  /* insert key into grown table */
463     }
464     lua_assert(!isdummy(t));
465     othern = mainposition(t, gkey(mp));
466     if (othern != mp) {  /* is colliding node out of its main position? */
467       /* yes; move colliding node into free position */
468       while (othern + gnext(othern) != mp)  /* find previous */
469         othern += gnext(othern);
470       gnext(othern) = cast_int(f - othern);  /* rechain to point to 'f' */
471       *f = *mp;  /* copy colliding node into free pos. (mp->next also goes) */
472       if (gnext(mp) != 0) {
473         gnext(f) += cast_int(mp - f);  /* correct 'next' */
474         gnext(mp) = 0;  /* now 'mp' is free */
475       }
476       setnilvalue(gval(mp));
477     }
478     else {  /* colliding node is in its own main position */
479       /* new node will go into free position */
480       if (gnext(mp) != 0)
481         gnext(f) = cast_int((mp + gnext(mp)) - f);  /* chain new position */
482       else lua_assert(gnext(f) == 0);
483       gnext(mp) = cast_int(f - mp);
484       mp = f;
485     }
486   }
487   setnodekey(L, &mp->i_key, key);
488   luaC_barrierback(L, t, key);
489   lua_assert(ttisnil(gval(mp)));
490   return gval(mp);
491 }
492 
493 
494 /*
495 ** search function for integers
496 */
497 const TValue *luaH_getint (Table *t, lua_Integer key) {
498   /* (1 <= key && key <= t->sizearray) */
499   if (l_castS2U(key) - 1 < t->sizearray)
500     return &t->array[key - 1];
501   else {
502     Node *n = hashint(t, key);
503     for (;;) {  /* check whether 'key' is somewhere in the chain */
504       if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
505         return gval(n);  /* that's it */
506       else {
507         int nx = gnext(n);
508         if (nx == 0) break;
509         n += nx;
510       }
511     }
512     return luaO_nilobject;
513   }
514 }
515 
516 
517 /*
518 ** search function for short strings
519 */
520 const TValue *luaH_getshortstr (Table *t, TString *key) {
521   Node *n = hashstr(t, key);
522   lua_assert(key->tt == LUA_TSHRSTR);
523   for (;;) {  /* check whether 'key' is somewhere in the chain */
524     const TValue *k = gkey(n);
525     if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
526       return gval(n);  /* that's it */
527     else {
528       int nx = gnext(n);
529       if (nx == 0)
530         return luaO_nilobject;  /* not found */
531       n += nx;
532     }
533   }
534 }
535 
536 
537 /*
538 ** "Generic" get version. (Not that generic: not valid for integers,
539 ** which may be in array part, nor for floats with integral values.)
540 */
541 static const TValue *getgeneric (Table *t, const TValue *key) {
542   Node *n = mainposition(t, key);
543   for (;;) {  /* check whether 'key' is somewhere in the chain */
544     if (luaV_rawequalobj(gkey(n), key))
545       return gval(n);  /* that's it */
546     else {
547       int nx = gnext(n);
548       if (nx == 0)
549         return luaO_nilobject;  /* not found */
550       n += nx;
551     }
552   }
553 }
554 
555 
556 const TValue *luaH_getstr (Table *t, TString *key) {
557   if (key->tt == LUA_TSHRSTR)
558     return luaH_getshortstr(t, key);
559   else {  /* for long strings, use generic case */
560     TValue ko;
561     setsvalue(cast(lua_State *, NULL), &ko, key);
562     return getgeneric(t, &ko);
563   }
564 }
565 
566 
567 /*
568 ** main search function
569 */
570 const TValue *luaH_get (Table *t, const TValue *key) {
571   switch (ttype(key)) {
572     case LUA_TSHRSTR: return luaH_getshortstr(t, tsvalue(key));
573     case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
574     case LUA_TNIL: return luaO_nilobject;
575     case LUA_TNUMFLT: {
576       lua_Integer k;
577       if (luaV_tointeger(key, &k, 0)) /* index is int? */
578         return luaH_getint(t, k);  /* use specialized version */
579       /* else... */
580     }  /* FALLTHROUGH */
581     default:
582       return getgeneric(t, key);
583   }
584 }
585 
586 
587 /*
588 ** beware: when using this function you probably need to check a GC
589 ** barrier and invalidate the TM cache.
590 */
591 TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
592   const TValue *p = luaH_get(t, key);
593   if (p != luaO_nilobject)
594     return cast(TValue *, p);
595   else return luaH_newkey(L, t, key);
596 }
597 
598 
599 void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
600   const TValue *p = luaH_getint(t, key);
601   TValue *cell;
602   if (p != luaO_nilobject)
603     cell = cast(TValue *, p);
604   else {
605     TValue k;
606     setivalue(&k, key);
607     cell = luaH_newkey(L, t, &k);
608   }
609   setobj2t(L, cell, value);
610 }
611 
612 
613 static int unbound_search (Table *t, unsigned int j) {
614   unsigned int i = j;  /* i is zero or a present index */
615   j++;
616   /* find 'i' and 'j' such that i is present and j is not */
617   while (!ttisnil(luaH_getint(t, j))) {
618     i = j;
619     if (j > cast(unsigned int, MAX_INT)/2) {  /* overflow? */
620       /* table was built with bad purposes: resort to linear search */
621       i = 1;
622       while (!ttisnil(luaH_getint(t, i))) i++;
623       return i - 1;
624     }
625     j *= 2;
626   }
627   /* now do a binary search between them */
628   while (j - i > 1) {
629     unsigned int m = (i+j)/2;
630     if (ttisnil(luaH_getint(t, m))) j = m;
631     else i = m;
632   }
633   return i;
634 }
635 
636 
637 /*
638 ** Try to find a boundary in table 't'. A 'boundary' is an integer index
639 ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
640 */
641 int luaH_getn (Table *t) {
642   unsigned int j = t->sizearray;
643   if (j > 0 && ttisnil(&t->array[j - 1])) {
644     /* there is a boundary in the array part: (binary) search for it */
645     unsigned int i = 0;
646     while (j - i > 1) {
647       unsigned int m = (i+j)/2;
648       if (ttisnil(&t->array[m - 1])) j = m;
649       else i = m;
650     }
651     return i;
652   }
653   /* else must find a boundary in hash part */
654   else if (isdummy(t))  /* hash part is empty? */
655     return j;  /* that is easy... */
656   else return unbound_search(t, j);
657 }
658 
659 
660 
661 #if defined(LUA_DEBUG)
662 
663 Node *luaH_mainposition (const Table *t, const TValue *key) {
664   return mainposition(t, key);
665 }
666 
667 int luaH_isdummy (const Table *t) { return isdummy(t); }
668 
669 #endif
670