1 /*
2 ** $Id: ltable.c,v 2.100 2015/01/05 13:52:37 roberto Exp $
3 ** Lua tables (hash)
4 ** See Copyright Notice in lua.h
5 */
6 
7 #define ltable_c
8 #define LUA_CORE
9 
10 #include "lprefix.h"
11 
12 
13 /*
14 ** Implementation of tables (aka arrays, objects, or hash tables).
15 ** Tables keep its elements in two parts: an array part and a hash part.
16 ** Non-negative integer keys are all candidates to be kept in the array
17 ** part. The actual size of the array is the largest 'n' such that at
18 ** least half the slots between 0 and n are in use.
19 ** Hash uses a mix of chained scatter table with Brent's variation.
20 ** A main invariant of these tables is that, if an element is not
21 ** in its main position (i.e. the 'original' position that its hash gives
22 ** to it), then the colliding element is in its own main position.
23 ** Hence even when the load factor reaches 100%, performance remains good.
24 */
25 
26 #include <float.h>
27 #include <math.h>
28 #include <string.h>
29 #include <limits.h>
30 
31 #include "lua.h"
32 
33 #include "ldebug.h"
34 #include "ldo.h"
35 #include "lgc.h"
36 #include "lmem.h"
37 #include "lobject.h"
38 #include "lstate.h"
39 #include "lstring.h"
40 #include "ltable.h"
41 #include "lvm.h"
42 
43 
44 /*
45 ** Maximum size of array part (MAXASIZE) is 2^MAXABITS. MAXABITS is
46 ** the largest integer such that MAXASIZE fits in an unsigned int.
47 */
48 #define MAXABITS	cast_int(sizeof(int) * CHAR_BIT - 1)
49 #define MAXASIZE	(1u << MAXABITS)
50 
51 /*
52 ** Maximum size of hash part is 2^MAXHBITS. MAXHBITS is the largest
53 ** integer such that 2^MAXHBITS fits in a signed int. (Note that the
54 ** maximum number of elements in a table, 2^MAXABITS + 2^MAXHBITS, still
55 ** fits comfortably in an unsigned int.)
56 */
57 #define MAXHBITS	(MAXABITS - 1)
58 
59 
60 #define hashpow2(t,n)		(gnode(t, lmod((n), sizenode(t))))
61 
62 #define hashstr(t,str)		hashpow2(t, (str)->hash)
63 #define hashboolean(t,p)	hashpow2(t, p)
64 #define hashint(t,i)		hashpow2(t, i)
65 
66 
67 /*
68 ** for some types, it is better to avoid modulus by power of 2, as
69 ** they tend to have many 2 factors.
70 */
71 #define hashmod(t,n)	(gnode(t, ((n) % ((sizenode(t)-1)|1))))
72 
73 
74 #define hashpointer(t,p)	hashmod(t, point2int(p))
75 
76 
77 #define dummynode		(&dummynode_)
78 
79 #define isdummy(n)		((n) == dummynode)
80 
81 static const Node dummynode_ = {
82   {NILCONSTANT},  /* value */
83   {{NILCONSTANT, 0}}  /* key */
84 };
85 
86 
87 /*
88 ** Checks whether a float has a value representable as a lua_Integer
89 ** (and does the conversion if so)
90 */
numisinteger(lua_Number x,lua_Integer * p)91 static int numisinteger (lua_Number x, lua_Integer *p) {
92   if ((x) == l_floor(x))  /* integral value? */
93     return lua_numbertointeger(x, p);  /* try as an integer */
94   else return 0;
95 }
96 
97 
98 /*
99 ** hash for floating-point numbers
100 */
hashfloat(const Table * t,lua_Number n)101 static Node *hashfloat (const Table *t, lua_Number n) {
102   int i;
103   n = l_mathop(frexp)(n, &i) * cast_num(INT_MAX - DBL_MAX_EXP);
104   i += cast_int(n);
105   if (i < 0) {
106     if (cast(unsigned int, i) == 0u - i)  /* use unsigned to avoid overflows */
107       i = 0;  /* handle INT_MIN */
108     i = -i;  /* must be a positive value */
109   }
110   return hashmod(t, i);
111 }
112 
113 
114 
115 /*
116 ** returns the 'main' position of an element in a table (that is, the index
117 ** of its hash value)
118 */
mainposition(const Table * t,const TValue * key)119 static Node *mainposition (const Table *t, const TValue *key) {
120   switch (ttype(key)) {
121     case LUA_TNUMINT:
122       return hashint(t, ivalue(key));
123     case LUA_TNUMFLT:
124       return hashfloat(t, fltvalue(key));
125     case LUA_TSHRSTR:
126       return hashstr(t, tsvalue(key));
127     case LUA_TLNGSTR: {
128       TString *s = tsvalue(key);
129       if (s->extra == 0) {  /* no hash? */
130         s->hash = luaS_hash(getstr(s), s->len, s->hash);
131         s->extra = 1;  /* now it has its hash */
132       }
133       return hashstr(t, tsvalue(key));
134     }
135     case LUA_TBOOLEAN:
136       return hashboolean(t, bvalue(key));
137     case LUA_TLIGHTUSERDATA:
138       return hashpointer(t, pvalue(key));
139     case LUA_TLCF:
140       return hashpointer(t, fvalue(key));
141     default:
142       return hashpointer(t, gcvalue(key));
143   }
144 }
145 
146 
147 /*
148 ** returns the index for 'key' if 'key' is an appropriate key to live in
149 ** the array part of the table, 0 otherwise.
150 */
arrayindex(const TValue * key)151 static unsigned int arrayindex (const TValue *key) {
152   if (ttisinteger(key)) {
153     lua_Integer k = ivalue(key);
154     if (0 < k && (lua_Unsigned)k <= MAXASIZE)
155       return cast(unsigned int, k);  /* 'key' is an appropriate array index */
156   }
157   return 0;  /* 'key' did not match some condition */
158 }
159 
160 
161 /*
162 ** returns the index of a 'key' for table traversals. First goes all
163 ** elements in the array part, then elements in the hash part. The
164 ** beginning of a traversal is signaled by 0.
165 */
findindex(lua_State * L,Table * t,StkId key)166 static unsigned int findindex (lua_State *L, Table *t, StkId key) {
167   unsigned int i;
168   if (ttisnil(key)) return 0;  /* first iteration */
169   i = arrayindex(key);
170   if (i != 0 && i <= t->sizearray)  /* is 'key' inside array part? */
171     return i;  /* yes; that's the index */
172   else {
173     int nx;
174     Node *n = mainposition(t, key);
175     for (;;) {  /* check whether 'key' is somewhere in the chain */
176       /* key may be dead already, but it is ok to use it in 'next' */
177       if (luaV_rawequalobj(gkey(n), key) ||
178             (ttisdeadkey(gkey(n)) && iscollectable(key) &&
179              deadvalue(gkey(n)) == gcvalue(key))) {
180         i = cast_int(n - gnode(t, 0));  /* key index in hash table */
181         /* hash elements are numbered after array ones */
182         return (i + 1) + t->sizearray;
183       }
184       nx = gnext(n);
185       if (nx == 0)
186         luaG_runerror(L, "invalid key to 'next'");  /* key not found */
187       else n += nx;
188     }
189   }
190 }
191 
192 
luaH_next(lua_State * L,Table * t,StkId key)193 int luaH_next (lua_State *L, Table *t, StkId key) {
194   unsigned int i = findindex(L, t, key);  /* find original element */
195   for (; i < t->sizearray; i++) {  /* try first array part */
196     if (!ttisnil(&t->array[i])) {  /* a non-nil value? */
197       setivalue(key, i + 1);
198       setobj2s(L, key+1, &t->array[i]);
199       return 1;
200     }
201   }
202   for (i -= t->sizearray; cast_int(i) < sizenode(t); i++) {  /* hash part */
203     if (!ttisnil(gval(gnode(t, i)))) {  /* a non-nil value? */
204       setobj2s(L, key, gkey(gnode(t, i)));
205       setobj2s(L, key+1, gval(gnode(t, i)));
206       return 1;
207     }
208   }
209   return 0;  /* no more elements */
210 }
211 
212 
213 /*
214 ** {=============================================================
215 ** Rehash
216 ** ==============================================================
217 */
218 
219 /*
220 ** Compute the optimal size for the array part of table 't'. 'nums' is a
221 ** "count array" where 'nums[i]' is the number of integers in the table
222 ** between 2^(i - 1) + 1 and 2^i. Put in '*narray' the optimal size, and
223 ** return the number of elements that will go to that part.
224 */
computesizes(unsigned int nums[],unsigned int * narray)225 static unsigned int computesizes (unsigned int nums[], unsigned int *narray) {
226   int i;
227   unsigned int twotoi;  /* 2^i */
228   unsigned int a = 0;  /* number of elements smaller than 2^i */
229   unsigned int na = 0;  /* number of elements to go to array part */
230   unsigned int n = 0;  /* optimal size for array part */
231   for (i = 0, twotoi = 1; twotoi/2 < *narray; i++, twotoi *= 2) {
232     if (nums[i] > 0) {
233       a += nums[i];
234       if (a > twotoi/2) {  /* more than half elements present? */
235         n = twotoi;  /* optimal size (till now) */
236         na = a;  /* all elements up to 'n' will go to array part */
237       }
238     }
239     if (a == *narray) break;  /* all elements already counted */
240   }
241   *narray = n;
242   lua_assert(*narray/2 <= na && na <= *narray);
243   return na;
244 }
245 
246 
countint(const TValue * key,unsigned int * nums)247 static int countint (const TValue *key, unsigned int *nums) {
248   unsigned int k = arrayindex(key);
249   if (k != 0) {  /* is 'key' an appropriate array index? */
250     nums[luaO_ceillog2(k)]++;  /* count as such */
251     return 1;
252   }
253   else
254     return 0;
255 }
256 
257 
numusearray(const Table * t,unsigned int * nums)258 static unsigned int numusearray (const Table *t, unsigned int *nums) {
259   int lg;
260   unsigned int ttlg;  /* 2^lg */
261   unsigned int ause = 0;  /* summation of 'nums' */
262   unsigned int i = 1;  /* count to traverse all array keys */
263   /* traverse each slice */
264   for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
265     unsigned int lc = 0;  /* counter */
266     unsigned int lim = ttlg;
267     if (lim > t->sizearray) {
268       lim = t->sizearray;  /* adjust upper limit */
269       if (i > lim)
270         break;  /* no more elements to count */
271     }
272     /* count elements in range (2^(lg - 1), 2^lg] */
273     for (; i <= lim; i++) {
274       if (!ttisnil(&t->array[i-1]))
275         lc++;
276     }
277     nums[lg] += lc;
278     ause += lc;
279   }
280   return ause;
281 }
282 
283 
numusehash(const Table * t,unsigned int * nums,unsigned int * pnasize)284 static int numusehash (const Table *t, unsigned int *nums,
285                        unsigned int *pnasize) {
286   int totaluse = 0;  /* total number of elements */
287   int ause = 0;  /* elements added to 'nums' (can go to array part) */
288   int i = sizenode(t);
289   while (i--) {
290     Node *n = &t->node[i];
291     if (!ttisnil(gval(n))) {
292       ause += countint(gkey(n), nums);
293       totaluse++;
294     }
295   }
296   *pnasize += ause;
297   return totaluse;
298 }
299 
300 
setarrayvector(lua_State * L,Table * t,unsigned int size)301 static void setarrayvector (lua_State *L, Table *t, unsigned int size) {
302   unsigned int i;
303   luaM_reallocvector(L, t->array, t->sizearray, size, TValue);
304   for (i=t->sizearray; i<size; i++)
305      setnilvalue(&t->array[i]);
306   t->sizearray = size;
307 }
308 
309 
setnodevector(lua_State * L,Table * t,unsigned int size)310 static void setnodevector (lua_State *L, Table *t, unsigned int size) {
311   int lsize;
312   if (size == 0) {  /* no elements to hash part? */
313     t->node = cast(Node *, dummynode);  /* use common 'dummynode' */
314     lsize = 0;
315   }
316   else {
317     int i;
318     lsize = luaO_ceillog2(size);
319     if (lsize > MAXHBITS)
320       luaG_runerror(L, "table overflow");
321     size = twoto(lsize);
322     t->node = luaM_newvector(L, size, Node);
323     for (i = 0; i < (int)size; i++) {
324       Node *n = gnode(t, i);
325       gnext(n) = 0;
326       setnilvalue(wgkey(n));
327       setnilvalue(gval(n));
328     }
329   }
330   t->lsizenode = cast_byte(lsize);
331   t->lastfree = gnode(t, size);  /* all positions are free */
332 }
333 
334 
luaH_resize(lua_State * L,Table * t,unsigned int nasize,unsigned int nhsize)335 void luaH_resize (lua_State *L, Table *t, unsigned int nasize,
336                                           unsigned int nhsize) {
337   unsigned int i;
338   int j;
339   unsigned int oldasize = t->sizearray;
340   int oldhsize = t->lsizenode;
341   Node *nold = t->node;  /* save old hash ... */
342   if (nasize > oldasize)  /* array part must grow? */
343     setarrayvector(L, t, nasize);
344   /* create new hash part with appropriate size */
345   setnodevector(L, t, nhsize);
346   if (nasize < oldasize) {  /* array part must shrink? */
347     t->sizearray = nasize;
348     /* re-insert elements from vanishing slice */
349     for (i=nasize; i<oldasize; i++) {
350       if (!ttisnil(&t->array[i]))
351         luaH_setint(L, t, i + 1, &t->array[i]);
352     }
353     /* shrink array */
354     luaM_reallocvector(L, t->array, oldasize, nasize, TValue);
355   }
356   /* re-insert elements from hash part */
357   for (j = twoto(oldhsize) - 1; j >= 0; j--) {
358     Node *old = nold + j;
359     if (!ttisnil(gval(old))) {
360       /* doesn't need barrier/invalidate cache, as entry was
361          already present in the table */
362       setobjt2t(L, luaH_set(L, t, gkey(old)), gval(old));
363     }
364   }
365   if (!isdummy(nold))
366     luaM_freearray(L, nold, cast(size_t, twoto(oldhsize))); /* free old array */
367 }
368 
369 
luaH_resizearray(lua_State * L,Table * t,unsigned int nasize)370 void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
371   int nsize = isdummy(t->node) ? 0 : sizenode(t);
372   luaH_resize(L, t, nasize, nsize);
373 }
374 
375 /*
376 ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
377 */
rehash(lua_State * L,Table * t,const TValue * ek)378 static void rehash (lua_State *L, Table *t, const TValue *ek) {
379   unsigned int nasize, na;
380   unsigned int nums[MAXABITS + 1];
381   int i;
382   int totaluse;
383   for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
384   nasize = numusearray(t, nums);  /* count keys in array part */
385   totaluse = nasize;  /* all those keys are integer keys */
386   totaluse += numusehash(t, nums, &nasize);  /* count keys in hash part */
387   /* count extra key */
388   nasize += countint(ek, nums);
389   totaluse++;
390   /* compute new size for array part */
391   na = computesizes(nums, &nasize);
392   /* resize the table to new computed sizes */
393   luaH_resize(L, t, nasize, totaluse - na);
394 }
395 
396 
397 
398 /*
399 ** }=============================================================
400 */
401 
402 
luaH_new(lua_State * L)403 Table *luaH_new (lua_State *L) {
404   GCObject *o = luaC_newobj(L, LUA_TTABLE, sizeof(Table));
405   Table *t = gco2t(o);
406   t->metatable = NULL;
407   t->flags = cast_byte(~0);
408   t->array = NULL;
409   t->sizearray = 0;
410   setnodevector(L, t, 0);
411   return t;
412 }
413 
414 
luaH_free(lua_State * L,Table * t)415 void luaH_free (lua_State *L, Table *t) {
416   if (!isdummy(t->node))
417     luaM_freearray(L, t->node, cast(size_t, sizenode(t)));
418   luaM_freearray(L, t->array, t->sizearray);
419   luaM_free(L, t);
420 }
421 
422 
getfreepos(Table * t)423 static Node *getfreepos (Table *t) {
424   while (t->lastfree > t->node) {
425     t->lastfree--;
426     if (ttisnil(gkey(t->lastfree)))
427       return t->lastfree;
428   }
429   return NULL;  /* could not find a free place */
430 }
431 
432 
433 
434 /*
435 ** inserts a new key into a hash table; first, check whether key's main
436 ** position is free. If not, check whether colliding node is in its main
437 ** position or not: if it is not, move colliding node to an empty place and
438 ** put new key in its main position; otherwise (colliding node is in its main
439 ** position), new key goes to an empty position.
440 */
luaH_newkey(lua_State * L,Table * t,const TValue * key)441 TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
442   Node *mp;
443   TValue aux;
444   if (ttisnil(key)) luaG_runerror(L, "table index is nil");
445   else if (ttisfloat(key)) {
446     lua_Number n = fltvalue(key);
447     lua_Integer k;
448     if (luai_numisnan(n))
449       luaG_runerror(L, "table index is NaN");
450     if (numisinteger(n, &k)) {  /* index is int? */
451       setivalue(&aux, k);
452       key = &aux;  /* insert it as an integer */
453     }
454   }
455   mp = mainposition(t, key);
456   if (!ttisnil(gval(mp)) || isdummy(mp)) {  /* main position is taken? */
457     Node *othern;
458     Node *f = getfreepos(t);  /* get a free place */
459     if (f == NULL) {  /* cannot find a free place? */
460       rehash(L, t, key);  /* grow table */
461       /* whatever called 'newkey' takes care of TM cache and GC barrier */
462       return luaH_set(L, t, key);  /* insert key into grown table */
463     }
464     lua_assert(!isdummy(f));
465     othern = mainposition(t, gkey(mp));
466     if (othern != mp) {  /* is colliding node out of its main position? */
467       /* yes; move colliding node into free position */
468       while (othern + gnext(othern) != mp)  /* find previous */
469         othern += gnext(othern);
470       gnext(othern) = cast_int(f - othern);  /* rechain to point to 'f' */
471       *f = *mp;  /* copy colliding node into free pos. (mp->next also goes) */
472       if (gnext(mp) != 0) {
473         gnext(f) += cast_int(mp - f);  /* correct 'next' */
474         gnext(mp) = 0;  /* now 'mp' is free */
475       }
476       setnilvalue(gval(mp));
477     }
478     else {  /* colliding node is in its own main position */
479       /* new node will go into free position */
480       if (gnext(mp) != 0)
481         gnext(f) = cast_int((mp + gnext(mp)) - f);  /* chain new position */
482       else lua_assert(gnext(f) == 0);
483       gnext(mp) = cast_int(f - mp);
484       mp = f;
485     }
486   }
487   setnodekey(L, &mp->i_key, key);
488   luaC_barrierback(L, t, key);
489   lua_assert(ttisnil(gval(mp)));
490   return gval(mp);
491 }
492 
493 
494 /*
495 ** search function for integers
496 */
luaH_getint(Table * t,lua_Integer key)497 const TValue *luaH_getint (Table *t, lua_Integer key) {
498   /* (1 <= key && key <= t->sizearray) */
499   if (l_castS2U(key - 1) < t->sizearray)
500     return &t->array[key - 1];
501   else {
502     Node *n = hashint(t, key);
503     for (;;) {  /* check whether 'key' is somewhere in the chain */
504       if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
505         return gval(n);  /* that's it */
506       else {
507         int nx = gnext(n);
508         if (nx == 0) break;
509         n += nx;
510       }
511     };
512     return luaO_nilobject;
513   }
514 }
515 
516 
517 /*
518 ** search function for short strings
519 */
luaH_getstr(Table * t,TString * key)520 const TValue *luaH_getstr (Table *t, TString *key) {
521   Node *n = hashstr(t, key);
522   lua_assert(key->tt == LUA_TSHRSTR);
523   for (;;) {  /* check whether 'key' is somewhere in the chain */
524     const TValue *k = gkey(n);
525     if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
526       return gval(n);  /* that's it */
527     else {
528       int nx = gnext(n);
529       if (nx == 0) break;
530       n += nx;
531     }
532   };
533   return luaO_nilobject;
534 }
535 
536 
537 /*
538 ** main search function
539 */
luaH_get(Table * t,const TValue * key)540 const TValue *luaH_get (Table *t, const TValue *key) {
541   switch (ttype(key)) {
542     case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key));
543     case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
544     case LUA_TNIL: return luaO_nilobject;
545     case LUA_TNUMFLT: {
546       lua_Integer k;
547       if (numisinteger(fltvalue(key), &k)) /* index is int? */
548         return luaH_getint(t, k);  /* use specialized version */
549       /* else go through */
550     }
551     default: {
552       Node *n = mainposition(t, key);
553       for (;;) {  /* check whether 'key' is somewhere in the chain */
554         if (luaV_rawequalobj(gkey(n), key))
555           return gval(n);  /* that's it */
556         else {
557           int nx = gnext(n);
558           if (nx == 0) break;
559           n += nx;
560         }
561       };
562       return luaO_nilobject;
563     }
564   }
565 }
566 
567 
568 /*
569 ** beware: when using this function you probably need to check a GC
570 ** barrier and invalidate the TM cache.
571 */
luaH_set(lua_State * L,Table * t,const TValue * key)572 TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
573   const TValue *p = luaH_get(t, key);
574   if (p != luaO_nilobject)
575     return cast(TValue *, p);
576   else return luaH_newkey(L, t, key);
577 }
578 
579 
luaH_setint(lua_State * L,Table * t,lua_Integer key,TValue * value)580 void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
581   const TValue *p = luaH_getint(t, key);
582   TValue *cell;
583   if (p != luaO_nilobject)
584     cell = cast(TValue *, p);
585   else {
586     TValue k;
587     setivalue(&k, key);
588     cell = luaH_newkey(L, t, &k);
589   }
590   setobj2t(L, cell, value);
591 }
592 
593 
unbound_search(Table * t,unsigned int j)594 static int unbound_search (Table *t, unsigned int j) {
595   unsigned int i = j;  /* i is zero or a present index */
596   j++;
597   /* find 'i' and 'j' such that i is present and j is not */
598   while (!ttisnil(luaH_getint(t, j))) {
599     i = j;
600     if (j > cast(unsigned int, MAX_INT)/2) {  /* overflow? */
601       /* table was built with bad purposes: resort to linear search */
602       i = 1;
603       while (!ttisnil(luaH_getint(t, i))) i++;
604       return i - 1;
605     }
606     j *= 2;
607   }
608   /* now do a binary search between them */
609   while (j - i > 1) {
610     unsigned int m = (i+j)/2;
611     if (ttisnil(luaH_getint(t, m))) j = m;
612     else i = m;
613   }
614   return i;
615 }
616 
617 
618 /*
619 ** Try to find a boundary in table 't'. A 'boundary' is an integer index
620 ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
621 */
luaH_getn(Table * t)622 int luaH_getn (Table *t) {
623   unsigned int j = t->sizearray;
624   if (j > 0 && ttisnil(&t->array[j - 1])) {
625     /* there is a boundary in the array part: (binary) search for it */
626     unsigned int i = 0;
627     while (j - i > 1) {
628       unsigned int m = (i+j)/2;
629       if (ttisnil(&t->array[m - 1])) j = m;
630       else i = m;
631     }
632     return i;
633   }
634   /* else must find a boundary in hash part */
635   else if (isdummy(t->node))  /* hash part is empty? */
636     return j;  /* that is easy... */
637   else return unbound_search(t, j);
638 }
639 
640 
641 
642 #if defined(LUA_DEBUG)
643 
luaH_mainposition(const Table * t,const TValue * key)644 Node *luaH_mainposition (const Table *t, const TValue *key) {
645   return mainposition(t, key);
646 }
647 
luaH_isdummy(Node * n)648 int luaH_isdummy (Node *n) { return isdummy(n); }
649 
650 #endif
651