1 /*
2  * This file is part of the MicroPython project, http://micropython.org/
3  *
4  * The MIT License (MIT)
5  *
6  * Copyright (c) 2013, 2014 Damien P. George
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24  * THE SOFTWARE.
25  */
26 
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <assert.h>
31 
32 #include "py/mpconfig.h"
33 #include "py/misc.h"
34 #include "py/runtime.h"
35 
36 #if MICROPY_DEBUG_VERBOSE // print debugging info
37 #define DEBUG_PRINT (1)
38 #else // don't print debugging info
39 #define DEBUG_PRINT (0)
40 #define DEBUG_printf(...) (void)0
41 #endif
42 
43 // This table of sizes is used to control the growth of hash tables.
44 // The first set of sizes are chosen so the allocation fits exactly in a
45 // 4-word GC block, and it's not so important for these small values to be
46 // prime.  The latter sizes are prime and increase at an increasing rate.
47 STATIC const uint16_t hash_allocation_sizes[] = {
48     0, 2, 4, 6, 8, 10, 12, // +2
49     17, 23, 29, 37, 47, 59, 73, // *1.25
50     97, 127, 167, 223, 293, 389, 521, 691, 919, 1223, 1627, 2161, // *1.33
51     3229, 4831, 7243, 10861, 16273, 24407, 36607, 54907, // *1.5
52 };
53 
get_hash_alloc_greater_or_equal_to(size_t x)54 STATIC size_t get_hash_alloc_greater_or_equal_to(size_t x) {
55     for (size_t i = 0; i < MP_ARRAY_SIZE(hash_allocation_sizes); i++) {
56         if (hash_allocation_sizes[i] >= x) {
57             return hash_allocation_sizes[i];
58         }
59     }
60     // ran out of primes in the table!
61     // return something sensible, at least make it odd
62     return (x + x / 2) | 1;
63 }
64 
65 /******************************************************************************/
66 /* map                                                                        */
67 
mp_map_init(mp_map_t * map,size_t n)68 void mp_map_init(mp_map_t *map, size_t n) {
69     if (n == 0) {
70         map->alloc = 0;
71         map->table = NULL;
72     } else {
73         map->alloc = n;
74         map->table = m_new0(mp_map_elem_t, map->alloc);
75     }
76     map->used = 0;
77     map->all_keys_are_qstrs = 1;
78     map->is_fixed = 0;
79     map->is_ordered = 0;
80 }
81 
mp_map_init_fixed_table(mp_map_t * map,size_t n,const mp_obj_t * table)82 void mp_map_init_fixed_table(mp_map_t *map, size_t n, const mp_obj_t *table) {
83     map->alloc = n;
84     map->used = n;
85     map->all_keys_are_qstrs = 1;
86     map->is_fixed = 1;
87     map->is_ordered = 1;
88     map->table = (mp_map_elem_t *)table;
89 }
90 
91 // Differentiate from mp_map_clear() - semantics is different
mp_map_deinit(mp_map_t * map)92 void mp_map_deinit(mp_map_t *map) {
93     if (!map->is_fixed) {
94         m_del(mp_map_elem_t, map->table, map->alloc);
95     }
96     map->used = map->alloc = 0;
97 }
98 
mp_map_clear(mp_map_t * map)99 void mp_map_clear(mp_map_t *map) {
100     if (!map->is_fixed) {
101         m_del(mp_map_elem_t, map->table, map->alloc);
102     }
103     map->alloc = 0;
104     map->used = 0;
105     map->all_keys_are_qstrs = 1;
106     map->is_fixed = 0;
107     map->table = NULL;
108 }
109 
mp_map_rehash(mp_map_t * map)110 STATIC void mp_map_rehash(mp_map_t *map) {
111     size_t old_alloc = map->alloc;
112     size_t new_alloc = get_hash_alloc_greater_or_equal_to(map->alloc + 1);
113     DEBUG_printf("mp_map_rehash(%p): " UINT_FMT " -> " UINT_FMT "\n", map, old_alloc, new_alloc);
114     mp_map_elem_t *old_table = map->table;
115     mp_map_elem_t *new_table = m_new0(mp_map_elem_t, new_alloc);
116     // If we reach this point, table resizing succeeded, now we can edit the old map.
117     map->alloc = new_alloc;
118     map->used = 0;
119     map->all_keys_are_qstrs = 1;
120     map->table = new_table;
121     for (size_t i = 0; i < old_alloc; i++) {
122         if (old_table[i].key != MP_OBJ_NULL && old_table[i].key != MP_OBJ_SENTINEL) {
123             mp_map_lookup(map, old_table[i].key, MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = old_table[i].value;
124         }
125     }
126     m_del(mp_map_elem_t, old_table, old_alloc);
127 }
128 
129 // MP_MAP_LOOKUP behaviour:
130 //  - returns NULL if not found, else the slot it was found in with key,value non-null
131 // MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour:
132 //  - returns slot, with key non-null and value=MP_OBJ_NULL if it was added
133 // MP_MAP_LOOKUP_REMOVE_IF_FOUND behaviour:
134 //  - returns NULL if not found, else the slot if was found in with key null and value non-null
mp_map_lookup(mp_map_t * map,mp_obj_t index,mp_map_lookup_kind_t lookup_kind)135 mp_map_elem_t *mp_map_lookup(mp_map_t *map, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
136     // If the map is a fixed array then we must only be called for a lookup
137     assert(!map->is_fixed || lookup_kind == MP_MAP_LOOKUP);
138 
139     // Work out if we can compare just pointers
140     bool compare_only_ptrs = map->all_keys_are_qstrs;
141     if (compare_only_ptrs) {
142         if (mp_obj_is_qstr(index)) {
143             // Index is a qstr, so can just do ptr comparison.
144         } else if (mp_obj_is_type(index, &mp_type_str)) {
145             // Index is a non-interned string.
146             // We can either intern the string, or force a full equality comparison.
147             // We chose the latter, since interning costs time and potentially RAM,
148             // and it won't necessarily benefit subsequent calls because these calls
149             // most likely won't pass the newly-interned string.
150             compare_only_ptrs = false;
151         } else if (lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
152             // If we are not adding, then we can return straight away a failed
153             // lookup because we know that the index will never be found.
154             return NULL;
155         }
156     }
157 
158     // if the map is an ordered array then we must do a brute force linear search
159     if (map->is_ordered) {
160         for (mp_map_elem_t *elem = &map->table[0], *top = &map->table[map->used]; elem < top; elem++) {
161             if (elem->key == index || (!compare_only_ptrs && mp_obj_equal(elem->key, index))) {
162                 #if MICROPY_PY_COLLECTIONS_ORDEREDDICT
163                 if (MP_UNLIKELY(lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND)) {
164                     // remove the found element by moving the rest of the array down
165                     mp_obj_t value = elem->value;
166                     --map->used;
167                     memmove(elem, elem + 1, (top - elem - 1) * sizeof(*elem));
168                     // put the found element after the end so the caller can access it if needed
169                     // note: caller must NULL the value so the GC can clean up (e.g. see dict_get_helper).
170                     elem = &map->table[map->used];
171                     elem->key = MP_OBJ_NULL;
172                     elem->value = value;
173                 }
174                 #endif
175                 return elem;
176             }
177         }
178         #if MICROPY_PY_COLLECTIONS_ORDEREDDICT
179         if (MP_LIKELY(lookup_kind != MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)) {
180             return NULL;
181         }
182         if (map->used == map->alloc) {
183             // TODO: Alloc policy
184             map->alloc += 4;
185             map->table = m_renew(mp_map_elem_t, map->table, map->used, map->alloc);
186             mp_seq_clear(map->table, map->used, map->alloc, sizeof(*map->table));
187         }
188         mp_map_elem_t *elem = map->table + map->used++;
189         elem->key = index;
190         if (!mp_obj_is_qstr(index)) {
191             map->all_keys_are_qstrs = 0;
192         }
193         return elem;
194         #else
195         return NULL;
196         #endif
197     }
198 
199     // map is a hash table (not an ordered array), so do a hash lookup
200 
201     if (map->alloc == 0) {
202         if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
203             mp_map_rehash(map);
204         } else {
205             return NULL;
206         }
207     }
208 
209     // get hash of index, with fast path for common case of qstr
210     mp_uint_t hash;
211     if (mp_obj_is_qstr(index)) {
212         hash = qstr_hash(MP_OBJ_QSTR_VALUE(index));
213     } else {
214         hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
215     }
216 
217     size_t pos = hash % map->alloc;
218     size_t start_pos = pos;
219     mp_map_elem_t *avail_slot = NULL;
220     for (;;) {
221         mp_map_elem_t *slot = &map->table[pos];
222         if (slot->key == MP_OBJ_NULL) {
223             // found NULL slot, so index is not in table
224             if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
225                 map->used += 1;
226                 if (avail_slot == NULL) {
227                     avail_slot = slot;
228                 }
229                 avail_slot->key = index;
230                 avail_slot->value = MP_OBJ_NULL;
231                 if (!mp_obj_is_qstr(index)) {
232                     map->all_keys_are_qstrs = 0;
233                 }
234                 return avail_slot;
235             } else {
236                 return NULL;
237             }
238         } else if (slot->key == MP_OBJ_SENTINEL) {
239             // found deleted slot, remember for later
240             if (avail_slot == NULL) {
241                 avail_slot = slot;
242             }
243         } else if (slot->key == index || (!compare_only_ptrs && mp_obj_equal(slot->key, index))) {
244             // found index
245             // Note: CPython does not replace the index; try x={True:'true'};x[1]='one';x
246             if (lookup_kind == MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
247                 // delete element in this slot
248                 map->used--;
249                 if (map->table[(pos + 1) % map->alloc].key == MP_OBJ_NULL) {
250                     // optimisation if next slot is empty
251                     slot->key = MP_OBJ_NULL;
252                 } else {
253                     slot->key = MP_OBJ_SENTINEL;
254                 }
255                 // keep slot->value so that caller can access it if needed
256             }
257             return slot;
258         }
259 
260         // not yet found, keep searching in this table
261         pos = (pos + 1) % map->alloc;
262 
263         if (pos == start_pos) {
264             // search got back to starting position, so index is not in table
265             if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
266                 if (avail_slot != NULL) {
267                     // there was an available slot, so use that
268                     map->used++;
269                     avail_slot->key = index;
270                     avail_slot->value = MP_OBJ_NULL;
271                     if (!mp_obj_is_qstr(index)) {
272                         map->all_keys_are_qstrs = 0;
273                     }
274                     return avail_slot;
275                 } else {
276                     // not enough room in table, rehash it
277                     mp_map_rehash(map);
278                     // restart the search for the new element
279                     start_pos = pos = hash % map->alloc;
280                 }
281             } else {
282                 return NULL;
283             }
284         }
285     }
286 }
287 
288 /******************************************************************************/
289 /* set                                                                        */
290 
291 #if MICROPY_PY_BUILTINS_SET
292 
mp_set_init(mp_set_t * set,size_t n)293 void mp_set_init(mp_set_t *set, size_t n) {
294     set->alloc = n;
295     set->used = 0;
296     set->table = m_new0(mp_obj_t, set->alloc);
297 }
298 
mp_set_rehash(mp_set_t * set)299 STATIC void mp_set_rehash(mp_set_t *set) {
300     size_t old_alloc = set->alloc;
301     mp_obj_t *old_table = set->table;
302     set->alloc = get_hash_alloc_greater_or_equal_to(set->alloc + 1);
303     set->used = 0;
304     set->table = m_new0(mp_obj_t, set->alloc);
305     for (size_t i = 0; i < old_alloc; i++) {
306         if (old_table[i] != MP_OBJ_NULL && old_table[i] != MP_OBJ_SENTINEL) {
307             mp_set_lookup(set, old_table[i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
308         }
309     }
310     m_del(mp_obj_t, old_table, old_alloc);
311 }
312 
mp_set_lookup(mp_set_t * set,mp_obj_t index,mp_map_lookup_kind_t lookup_kind)313 mp_obj_t mp_set_lookup(mp_set_t *set, mp_obj_t index, mp_map_lookup_kind_t lookup_kind) {
314     // Note: lookup_kind can be MP_MAP_LOOKUP_ADD_IF_NOT_FOUND_OR_REMOVE_IF_FOUND which
315     // is handled by using bitwise operations.
316 
317     if (set->alloc == 0) {
318         if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
319             mp_set_rehash(set);
320         } else {
321             return MP_OBJ_NULL;
322         }
323     }
324     mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
325     size_t pos = hash % set->alloc;
326     size_t start_pos = pos;
327     mp_obj_t *avail_slot = NULL;
328     for (;;) {
329         mp_obj_t elem = set->table[pos];
330         if (elem == MP_OBJ_NULL) {
331             // found NULL slot, so index is not in table
332             if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
333                 if (avail_slot == NULL) {
334                     avail_slot = &set->table[pos];
335                 }
336                 set->used++;
337                 *avail_slot = index;
338                 return index;
339             } else {
340                 return MP_OBJ_NULL;
341             }
342         } else if (elem == MP_OBJ_SENTINEL) {
343             // found deleted slot, remember for later
344             if (avail_slot == NULL) {
345                 avail_slot = &set->table[pos];
346             }
347         } else if (mp_obj_equal(elem, index)) {
348             // found index
349             if (lookup_kind & MP_MAP_LOOKUP_REMOVE_IF_FOUND) {
350                 // delete element
351                 set->used--;
352                 if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
353                     // optimisation if next slot is empty
354                     set->table[pos] = MP_OBJ_NULL;
355                 } else {
356                     set->table[pos] = MP_OBJ_SENTINEL;
357                 }
358             }
359             return elem;
360         }
361 
362         // not yet found, keep searching in this table
363         pos = (pos + 1) % set->alloc;
364 
365         if (pos == start_pos) {
366             // search got back to starting position, so index is not in table
367             if (lookup_kind & MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
368                 if (avail_slot != NULL) {
369                     // there was an available slot, so use that
370                     set->used++;
371                     *avail_slot = index;
372                     return index;
373                 } else {
374                     // not enough room in table, rehash it
375                     mp_set_rehash(set);
376                     // restart the search for the new element
377                     start_pos = pos = hash % set->alloc;
378                 }
379             } else {
380                 return MP_OBJ_NULL;
381             }
382         }
383     }
384 }
385 
mp_set_remove_first(mp_set_t * set)386 mp_obj_t mp_set_remove_first(mp_set_t *set) {
387     for (size_t pos = 0; pos < set->alloc; pos++) {
388         if (mp_set_slot_is_filled(set, pos)) {
389             mp_obj_t elem = set->table[pos];
390             // delete element
391             set->used--;
392             if (set->table[(pos + 1) % set->alloc] == MP_OBJ_NULL) {
393                 // optimisation if next slot is empty
394                 set->table[pos] = MP_OBJ_NULL;
395             } else {
396                 set->table[pos] = MP_OBJ_SENTINEL;
397             }
398             return elem;
399         }
400     }
401     return MP_OBJ_NULL;
402 }
403 
mp_set_clear(mp_set_t * set)404 void mp_set_clear(mp_set_t *set) {
405     m_del(mp_obj_t, set->table, set->alloc);
406     set->alloc = 0;
407     set->used = 0;
408     set->table = NULL;
409 }
410 
411 #endif // MICROPY_PY_BUILTINS_SET
412 
413 #if defined(DEBUG_PRINT) && DEBUG_PRINT
mp_map_dump(mp_map_t * map)414 void mp_map_dump(mp_map_t *map) {
415     for (size_t i = 0; i < map->alloc; i++) {
416         if (map->table[i].key != MP_OBJ_NULL) {
417             mp_obj_print(map->table[i].key, PRINT_REPR);
418         } else {
419             DEBUG_printf("(nil)");
420         }
421         DEBUG_printf(": %p\n", map->table[i].value);
422     }
423     DEBUG_printf("---\n");
424 }
425 #endif
426