1 /*
2 ** $Id: ltable.c,v 2.100 2015/01/05 13:52:37 roberto Exp $
3 ** Lua tables (hash)
4 ** See Copyright Notice in lua.h
5 */
6
7 #define ltable_c
8 #define LUA_CORE
9
10 #include "lprefix.h"
11
12
13 /*
14 ** Implementation of tables (aka arrays, objects, or hash tables).
15 ** Tables keep its elements in two parts: an array part and a hash part.
16 ** Non-negative integer keys are all candidates to be kept in the array
17 ** part. The actual size of the array is the largest 'n' such that at
18 ** least half the slots between 0 and n are in use.
19 ** Hash uses a mix of chained scatter table with Brent's variation.
20 ** A main invariant of these tables is that, if an element is not
21 ** in its main position (i.e. the 'original' position that its hash gives
22 ** to it), then the colliding element is in its own main position.
23 ** Hence even when the load factor reaches 100%, performance remains good.
24 */
25
26 #include <float.h>
27 #include <math.h>
28 #include <string.h>
29 #include <limits.h>
30
31 #include "lua.h"
32
33 #include "ldebug.h"
34 #include "ldo.h"
35 #include "lgc.h"
36 #include "lmem.h"
37 #include "lobject.h"
38 #include "lstate.h"
39 #include "lstring.h"
40 #include "ltable.h"
41 #include "lvm.h"
42
43
44 /*
45 ** Maximum size of array part (MAXASIZE) is 2^MAXABITS. MAXABITS is
46 ** the largest integer such that MAXASIZE fits in an unsigned int.
47 */
48 #define MAXABITS cast_int(sizeof(int) * CHAR_BIT - 1)
49 #define MAXASIZE (1u << MAXABITS)
50
51 /*
52 ** Maximum size of hash part is 2^MAXHBITS. MAXHBITS is the largest
53 ** integer such that 2^MAXHBITS fits in a signed int. (Note that the
54 ** maximum number of elements in a table, 2^MAXABITS + 2^MAXHBITS, still
55 ** fits comfortably in an unsigned int.)
56 */
57 #define MAXHBITS (MAXABITS - 1)
58
59
60 #define hashpow2(t,n) (gnode(t, lmod((n), sizenode(t))))
61
62 #define hashstr(t,str) hashpow2(t, (str)->hash)
63 #define hashboolean(t,p) hashpow2(t, p)
64 #define hashint(t,i) hashpow2(t, i)
65
66
67 /*
68 ** for some types, it is better to avoid modulus by power of 2, as
69 ** they tend to have many 2 factors.
70 */
71 #define hashmod(t,n) (gnode(t, ((n) % ((sizenode(t)-1)|1))))
72
73
74 #define hashpointer(t,p) hashmod(t, point2int(p))
75
76
77 #define dummynode (&dummynode_)
78
79 #define isdummy(n) ((n) == dummynode)
80
81 static const Node dummynode_ = {
82 {NILCONSTANT}, /* value */
83 {{NILCONSTANT, 0}} /* key */
84 };
85
86
87 /*
88 ** Checks whether a float has a value representable as a lua_Integer
89 ** (and does the conversion if so)
90 */
numisinteger(lua_Number x,lua_Integer * p)91 static int numisinteger (lua_Number x, lua_Integer *p) {
92 if ((x) == l_floor(x)) /* integral value? */
93 return lua_numbertointeger(x, p); /* try as an integer */
94 else return 0;
95 }
96
97
98 /*
99 ** hash for floating-point numbers
100 */
hashfloat(const Table * t,lua_Number n)101 static Node *hashfloat (const Table *t, lua_Number n) {
102 int i;
103 n = l_mathop(frexp)(n, &i) * cast_num(INT_MAX - DBL_MAX_EXP);
104 i += cast_int(n);
105 if (i < 0) {
106 if (cast(unsigned int, i) == 0u - i) /* use unsigned to avoid overflows */
107 i = 0; /* handle INT_MIN */
108 i = -i; /* must be a positive value */
109 }
110 return hashmod(t, i);
111 }
112
113
114
115 /*
116 ** returns the 'main' position of an element in a table (that is, the index
117 ** of its hash value)
118 */
mainposition(const Table * t,const TValue * key)119 static Node *mainposition (const Table *t, const TValue *key) {
120 switch (ttype(key)) {
121 case LUA_TNUMINT:
122 return hashint(t, ivalue(key));
123 case LUA_TNUMFLT:
124 return hashfloat(t, fltvalue(key));
125 case LUA_TSHRSTR:
126 return hashstr(t, tsvalue(key));
127 case LUA_TLNGSTR: {
128 TString *s = tsvalue(key);
129 if (s->extra == 0) { /* no hash? */
130 s->hash = luaS_hash(getstr(s), s->len, s->hash);
131 s->extra = 1; /* now it has its hash */
132 }
133 return hashstr(t, tsvalue(key));
134 }
135 case LUA_TBOOLEAN:
136 return hashboolean(t, bvalue(key));
137 case LUA_TLIGHTUSERDATA:
138 return hashpointer(t, pvalue(key));
139 case LUA_TLCF:
140 return hashpointer(t, fvalue(key));
141 default:
142 return hashpointer(t, gcvalue(key));
143 }
144 }
145
146
147 /*
148 ** returns the index for 'key' if 'key' is an appropriate key to live in
149 ** the array part of the table, 0 otherwise.
150 */
arrayindex(const TValue * key)151 static unsigned int arrayindex (const TValue *key) {
152 if (ttisinteger(key)) {
153 lua_Integer k = ivalue(key);
154 if (0 < k && (lua_Unsigned)k <= MAXASIZE)
155 return cast(unsigned int, k); /* 'key' is an appropriate array index */
156 }
157 return 0; /* 'key' did not match some condition */
158 }
159
160
161 /*
162 ** returns the index of a 'key' for table traversals. First goes all
163 ** elements in the array part, then elements in the hash part. The
164 ** beginning of a traversal is signaled by 0.
165 */
findindex(lua_State * L,Table * t,StkId key)166 static unsigned int findindex (lua_State *L, Table *t, StkId key) {
167 unsigned int i;
168 if (ttisnil(key)) return 0; /* first iteration */
169 i = arrayindex(key);
170 if (i != 0 && i <= t->sizearray) /* is 'key' inside array part? */
171 return i; /* yes; that's the index */
172 else {
173 int nx;
174 Node *n = mainposition(t, key);
175 for (;;) { /* check whether 'key' is somewhere in the chain */
176 /* key may be dead already, but it is ok to use it in 'next' */
177 if (luaV_rawequalobj(gkey(n), key) ||
178 (ttisdeadkey(gkey(n)) && iscollectable(key) &&
179 deadvalue(gkey(n)) == gcvalue(key))) {
180 i = cast_int(n - gnode(t, 0)); /* key index in hash table */
181 /* hash elements are numbered after array ones */
182 return (i + 1) + t->sizearray;
183 }
184 nx = gnext(n);
185 if (nx == 0)
186 luaG_runerror(L, "invalid key to 'next'"); /* key not found */
187 else n += nx;
188 }
189 }
190 }
191
192
luaH_next(lua_State * L,Table * t,StkId key)193 int luaH_next (lua_State *L, Table *t, StkId key) {
194 unsigned int i = findindex(L, t, key); /* find original element */
195 for (; i < t->sizearray; i++) { /* try first array part */
196 if (!ttisnil(&t->array[i])) { /* a non-nil value? */
197 setivalue(key, i + 1);
198 setobj2s(L, key+1, &t->array[i]);
199 return 1;
200 }
201 }
202 for (i -= t->sizearray; cast_int(i) < sizenode(t); i++) { /* hash part */
203 if (!ttisnil(gval(gnode(t, i)))) { /* a non-nil value? */
204 setobj2s(L, key, gkey(gnode(t, i)));
205 setobj2s(L, key+1, gval(gnode(t, i)));
206 return 1;
207 }
208 }
209 return 0; /* no more elements */
210 }
211
212
213 /*
214 ** {=============================================================
215 ** Rehash
216 ** ==============================================================
217 */
218
219 /*
220 ** Compute the optimal size for the array part of table 't'. 'nums' is a
221 ** "count array" where 'nums[i]' is the number of integers in the table
222 ** between 2^(i - 1) + 1 and 2^i. Put in '*narray' the optimal size, and
223 ** return the number of elements that will go to that part.
224 */
computesizes(unsigned int nums[],unsigned int * narray)225 static unsigned int computesizes (unsigned int nums[], unsigned int *narray) {
226 int i;
227 unsigned int twotoi; /* 2^i */
228 unsigned int a = 0; /* number of elements smaller than 2^i */
229 unsigned int na = 0; /* number of elements to go to array part */
230 unsigned int n = 0; /* optimal size for array part */
231 for (i = 0, twotoi = 1; twotoi/2 < *narray; i++, twotoi *= 2) {
232 if (nums[i] > 0) {
233 a += nums[i];
234 if (a > twotoi/2) { /* more than half elements present? */
235 n = twotoi; /* optimal size (till now) */
236 na = a; /* all elements up to 'n' will go to array part */
237 }
238 }
239 if (a == *narray) break; /* all elements already counted */
240 }
241 *narray = n;
242 lua_assert(*narray/2 <= na && na <= *narray);
243 return na;
244 }
245
246
countint(const TValue * key,unsigned int * nums)247 static int countint (const TValue *key, unsigned int *nums) {
248 unsigned int k = arrayindex(key);
249 if (k != 0) { /* is 'key' an appropriate array index? */
250 nums[luaO_ceillog2(k)]++; /* count as such */
251 return 1;
252 }
253 else
254 return 0;
255 }
256
257
numusearray(const Table * t,unsigned int * nums)258 static unsigned int numusearray (const Table *t, unsigned int *nums) {
259 int lg;
260 unsigned int ttlg; /* 2^lg */
261 unsigned int ause = 0; /* summation of 'nums' */
262 unsigned int i = 1; /* count to traverse all array keys */
263 /* traverse each slice */
264 for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
265 unsigned int lc = 0; /* counter */
266 unsigned int lim = ttlg;
267 if (lim > t->sizearray) {
268 lim = t->sizearray; /* adjust upper limit */
269 if (i > lim)
270 break; /* no more elements to count */
271 }
272 /* count elements in range (2^(lg - 1), 2^lg] */
273 for (; i <= lim; i++) {
274 if (!ttisnil(&t->array[i-1]))
275 lc++;
276 }
277 nums[lg] += lc;
278 ause += lc;
279 }
280 return ause;
281 }
282
283
numusehash(const Table * t,unsigned int * nums,unsigned int * pnasize)284 static int numusehash (const Table *t, unsigned int *nums,
285 unsigned int *pnasize) {
286 int totaluse = 0; /* total number of elements */
287 int ause = 0; /* elements added to 'nums' (can go to array part) */
288 int i = sizenode(t);
289 while (i--) {
290 Node *n = &t->node[i];
291 if (!ttisnil(gval(n))) {
292 ause += countint(gkey(n), nums);
293 totaluse++;
294 }
295 }
296 *pnasize += ause;
297 return totaluse;
298 }
299
300
setarrayvector(lua_State * L,Table * t,unsigned int size)301 static void setarrayvector (lua_State *L, Table *t, unsigned int size) {
302 unsigned int i;
303 luaM_reallocvector(L, t->array, t->sizearray, size, TValue);
304 for (i=t->sizearray; i<size; i++)
305 setnilvalue(&t->array[i]);
306 t->sizearray = size;
307 }
308
309
setnodevector(lua_State * L,Table * t,unsigned int size)310 static void setnodevector (lua_State *L, Table *t, unsigned int size) {
311 int lsize;
312 if (size == 0) { /* no elements to hash part? */
313 t->node = cast(Node *, dummynode); /* use common 'dummynode' */
314 lsize = 0;
315 }
316 else {
317 int i;
318 lsize = luaO_ceillog2(size);
319 if (lsize > MAXHBITS)
320 luaG_runerror(L, "table overflow");
321 size = twoto(lsize);
322 t->node = luaM_newvector(L, size, Node);
323 for (i = 0; i < (int)size; i++) {
324 Node *n = gnode(t, i);
325 gnext(n) = 0;
326 setnilvalue(wgkey(n));
327 setnilvalue(gval(n));
328 }
329 }
330 t->lsizenode = cast_byte(lsize);
331 t->lastfree = gnode(t, size); /* all positions are free */
332 }
333
334
luaH_resize(lua_State * L,Table * t,unsigned int nasize,unsigned int nhsize)335 void luaH_resize (lua_State *L, Table *t, unsigned int nasize,
336 unsigned int nhsize) {
337 unsigned int i;
338 int j;
339 unsigned int oldasize = t->sizearray;
340 int oldhsize = t->lsizenode;
341 Node *nold = t->node; /* save old hash ... */
342 if (nasize > oldasize) /* array part must grow? */
343 setarrayvector(L, t, nasize);
344 /* create new hash part with appropriate size */
345 setnodevector(L, t, nhsize);
346 if (nasize < oldasize) { /* array part must shrink? */
347 t->sizearray = nasize;
348 /* re-insert elements from vanishing slice */
349 for (i=nasize; i<oldasize; i++) {
350 if (!ttisnil(&t->array[i]))
351 luaH_setint(L, t, i + 1, &t->array[i]);
352 }
353 /* shrink array */
354 luaM_reallocvector(L, t->array, oldasize, nasize, TValue);
355 }
356 /* re-insert elements from hash part */
357 for (j = twoto(oldhsize) - 1; j >= 0; j--) {
358 Node *old = nold + j;
359 if (!ttisnil(gval(old))) {
360 /* doesn't need barrier/invalidate cache, as entry was
361 already present in the table */
362 setobjt2t(L, luaH_set(L, t, gkey(old)), gval(old));
363 }
364 }
365 if (!isdummy(nold))
366 luaM_freearray(L, nold, cast(size_t, twoto(oldhsize))); /* free old array */
367 }
368
369
luaH_resizearray(lua_State * L,Table * t,unsigned int nasize)370 void luaH_resizearray (lua_State *L, Table *t, unsigned int nasize) {
371 int nsize = isdummy(t->node) ? 0 : sizenode(t);
372 luaH_resize(L, t, nasize, nsize);
373 }
374
375 /*
376 ** nums[i] = number of keys 'k' where 2^(i - 1) < k <= 2^i
377 */
rehash(lua_State * L,Table * t,const TValue * ek)378 static void rehash (lua_State *L, Table *t, const TValue *ek) {
379 unsigned int nasize, na;
380 unsigned int nums[MAXABITS + 1];
381 int i;
382 int totaluse;
383 for (i = 0; i <= MAXABITS; i++) nums[i] = 0; /* reset counts */
384 nasize = numusearray(t, nums); /* count keys in array part */
385 totaluse = nasize; /* all those keys are integer keys */
386 totaluse += numusehash(t, nums, &nasize); /* count keys in hash part */
387 /* count extra key */
388 nasize += countint(ek, nums);
389 totaluse++;
390 /* compute new size for array part */
391 na = computesizes(nums, &nasize);
392 /* resize the table to new computed sizes */
393 luaH_resize(L, t, nasize, totaluse - na);
394 }
395
396
397
398 /*
399 ** }=============================================================
400 */
401
402
luaH_new(lua_State * L)403 Table *luaH_new (lua_State *L) {
404 GCObject *o = luaC_newobj(L, LUA_TTABLE, sizeof(Table));
405 Table *t = gco2t(o);
406 t->metatable = NULL;
407 t->flags = cast_byte(~0);
408 t->array = NULL;
409 t->sizearray = 0;
410 setnodevector(L, t, 0);
411 return t;
412 }
413
414
luaH_free(lua_State * L,Table * t)415 void luaH_free (lua_State *L, Table *t) {
416 if (!isdummy(t->node))
417 luaM_freearray(L, t->node, cast(size_t, sizenode(t)));
418 luaM_freearray(L, t->array, t->sizearray);
419 luaM_free(L, t);
420 }
421
422
getfreepos(Table * t)423 static Node *getfreepos (Table *t) {
424 while (t->lastfree > t->node) {
425 t->lastfree--;
426 if (ttisnil(gkey(t->lastfree)))
427 return t->lastfree;
428 }
429 return NULL; /* could not find a free place */
430 }
431
432
433
434 /*
435 ** inserts a new key into a hash table; first, check whether key's main
436 ** position is free. If not, check whether colliding node is in its main
437 ** position or not: if it is not, move colliding node to an empty place and
438 ** put new key in its main position; otherwise (colliding node is in its main
439 ** position), new key goes to an empty position.
440 */
luaH_newkey(lua_State * L,Table * t,const TValue * key)441 TValue *luaH_newkey (lua_State *L, Table *t, const TValue *key) {
442 Node *mp;
443 TValue aux;
444 if (ttisnil(key)) luaG_runerror(L, "table index is nil");
445 else if (ttisfloat(key)) {
446 lua_Number n = fltvalue(key);
447 lua_Integer k;
448 if (luai_numisnan(n))
449 luaG_runerror(L, "table index is NaN");
450 if (numisinteger(n, &k)) { /* index is int? */
451 setivalue(&aux, k);
452 key = &aux; /* insert it as an integer */
453 }
454 }
455 mp = mainposition(t, key);
456 if (!ttisnil(gval(mp)) || isdummy(mp)) { /* main position is taken? */
457 Node *othern;
458 Node *f = getfreepos(t); /* get a free place */
459 if (f == NULL) { /* cannot find a free place? */
460 rehash(L, t, key); /* grow table */
461 /* whatever called 'newkey' takes care of TM cache and GC barrier */
462 return luaH_set(L, t, key); /* insert key into grown table */
463 }
464 lua_assert(!isdummy(f));
465 othern = mainposition(t, gkey(mp));
466 if (othern != mp) { /* is colliding node out of its main position? */
467 /* yes; move colliding node into free position */
468 while (othern + gnext(othern) != mp) /* find previous */
469 othern += gnext(othern);
470 gnext(othern) = cast_int(f - othern); /* rechain to point to 'f' */
471 *f = *mp; /* copy colliding node into free pos. (mp->next also goes) */
472 if (gnext(mp) != 0) {
473 gnext(f) += cast_int(mp - f); /* correct 'next' */
474 gnext(mp) = 0; /* now 'mp' is free */
475 }
476 setnilvalue(gval(mp));
477 }
478 else { /* colliding node is in its own main position */
479 /* new node will go into free position */
480 if (gnext(mp) != 0)
481 gnext(f) = cast_int((mp + gnext(mp)) - f); /* chain new position */
482 else lua_assert(gnext(f) == 0);
483 gnext(mp) = cast_int(f - mp);
484 mp = f;
485 }
486 }
487 setnodekey(L, &mp->i_key, key);
488 luaC_barrierback(L, t, key);
489 lua_assert(ttisnil(gval(mp)));
490 return gval(mp);
491 }
492
493
494 /*
495 ** search function for integers
496 */
luaH_getint(Table * t,lua_Integer key)497 const TValue *luaH_getint (Table *t, lua_Integer key) {
498 /* (1 <= key && key <= t->sizearray) */
499 if (l_castS2U(key - 1) < t->sizearray)
500 return &t->array[key - 1];
501 else {
502 Node *n = hashint(t, key);
503 for (;;) { /* check whether 'key' is somewhere in the chain */
504 if (ttisinteger(gkey(n)) && ivalue(gkey(n)) == key)
505 return gval(n); /* that's it */
506 else {
507 int nx = gnext(n);
508 if (nx == 0) break;
509 n += nx;
510 }
511 };
512 return luaO_nilobject;
513 }
514 }
515
516
517 /*
518 ** search function for short strings
519 */
luaH_getstr(Table * t,TString * key)520 const TValue *luaH_getstr (Table *t, TString *key) {
521 Node *n = hashstr(t, key);
522 lua_assert(key->tt == LUA_TSHRSTR);
523 for (;;) { /* check whether 'key' is somewhere in the chain */
524 const TValue *k = gkey(n);
525 if (ttisshrstring(k) && eqshrstr(tsvalue(k), key))
526 return gval(n); /* that's it */
527 else {
528 int nx = gnext(n);
529 if (nx == 0) break;
530 n += nx;
531 }
532 };
533 return luaO_nilobject;
534 }
535
536
537 /*
538 ** main search function
539 */
luaH_get(Table * t,const TValue * key)540 const TValue *luaH_get (Table *t, const TValue *key) {
541 switch (ttype(key)) {
542 case LUA_TSHRSTR: return luaH_getstr(t, tsvalue(key));
543 case LUA_TNUMINT: return luaH_getint(t, ivalue(key));
544 case LUA_TNIL: return luaO_nilobject;
545 case LUA_TNUMFLT: {
546 lua_Integer k;
547 if (numisinteger(fltvalue(key), &k)) /* index is int? */
548 return luaH_getint(t, k); /* use specialized version */
549 /* else go through */
550 }
551 default: {
552 Node *n = mainposition(t, key);
553 for (;;) { /* check whether 'key' is somewhere in the chain */
554 if (luaV_rawequalobj(gkey(n), key))
555 return gval(n); /* that's it */
556 else {
557 int nx = gnext(n);
558 if (nx == 0) break;
559 n += nx;
560 }
561 };
562 return luaO_nilobject;
563 }
564 }
565 }
566
567
568 /*
569 ** beware: when using this function you probably need to check a GC
570 ** barrier and invalidate the TM cache.
571 */
luaH_set(lua_State * L,Table * t,const TValue * key)572 TValue *luaH_set (lua_State *L, Table *t, const TValue *key) {
573 const TValue *p = luaH_get(t, key);
574 if (p != luaO_nilobject)
575 return cast(TValue *, p);
576 else return luaH_newkey(L, t, key);
577 }
578
579
luaH_setint(lua_State * L,Table * t,lua_Integer key,TValue * value)580 void luaH_setint (lua_State *L, Table *t, lua_Integer key, TValue *value) {
581 const TValue *p = luaH_getint(t, key);
582 TValue *cell;
583 if (p != luaO_nilobject)
584 cell = cast(TValue *, p);
585 else {
586 TValue k;
587 setivalue(&k, key);
588 cell = luaH_newkey(L, t, &k);
589 }
590 setobj2t(L, cell, value);
591 }
592
593
unbound_search(Table * t,unsigned int j)594 static int unbound_search (Table *t, unsigned int j) {
595 unsigned int i = j; /* i is zero or a present index */
596 j++;
597 /* find 'i' and 'j' such that i is present and j is not */
598 while (!ttisnil(luaH_getint(t, j))) {
599 i = j;
600 if (j > cast(unsigned int, MAX_INT)/2) { /* overflow? */
601 /* table was built with bad purposes: resort to linear search */
602 i = 1;
603 while (!ttisnil(luaH_getint(t, i))) i++;
604 return i - 1;
605 }
606 j *= 2;
607 }
608 /* now do a binary search between them */
609 while (j - i > 1) {
610 unsigned int m = (i+j)/2;
611 if (ttisnil(luaH_getint(t, m))) j = m;
612 else i = m;
613 }
614 return i;
615 }
616
617
618 /*
619 ** Try to find a boundary in table 't'. A 'boundary' is an integer index
620 ** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
621 */
luaH_getn(Table * t)622 int luaH_getn (Table *t) {
623 unsigned int j = t->sizearray;
624 if (j > 0 && ttisnil(&t->array[j - 1])) {
625 /* there is a boundary in the array part: (binary) search for it */
626 unsigned int i = 0;
627 while (j - i > 1) {
628 unsigned int m = (i+j)/2;
629 if (ttisnil(&t->array[m - 1])) j = m;
630 else i = m;
631 }
632 return i;
633 }
634 /* else must find a boundary in hash part */
635 else if (isdummy(t->node)) /* hash part is empty? */
636 return j; /* that is easy... */
637 else return unbound_search(t, j);
638 }
639
640
641
642 #if defined(LUA_DEBUG)
643
luaH_mainposition(const Table * t,const TValue * key)644 Node *luaH_mainposition (const Table *t, const TValue *key) {
645 return mainposition(t, key);
646 }
647
luaH_isdummy(Node * n)648 int luaH_isdummy (Node *n) { return isdummy(n); }
649
650 #endif
651