1 /*****************************************************************************/
2 /*  LibreDWG - free implementation of the DWG file format                    */
3 /*                                                                           */
4 /*  Copyright (C) 2018-2019 Free Software Foundation, Inc.                   */
5 /*                                                                           */
6 /*  This library is free software, licensed under the terms of the GNU       */
7 /*  General Public License as published by the Free Software Foundation,     */
8 /*  either version 3 of the License, or (at your option) any later version.  */
9 /*  You should have received a copy of the GNU General Public License        */
10 /*  along with this program.  If not, see <http://www.gnu.org/licenses/>.    */
11 /*****************************************************************************/
12 
13 /*
14  * hash.c: int hashmap for the object_ref map.
15  *         uses linear probing for best cache usage.
16  *         values are inlined into the array. The 0 key is disallowed.
17  * written by Reini Urban
18  */
19 
20 #include "hash.h"
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include "logging.h"
25 
26 dwg_inthash *
hash_new(uint32_t size)27 hash_new (uint32_t size)
28 {
29   dwg_inthash *hash = (dwg_inthash *)malloc (sizeof (dwg_inthash));
30   uint32_t cap;
31   if (!hash)
32     return NULL;
33   // multiply with load factor,
34   // and round size to next power of 2 (fast) or prime (secure),
35   if (size < 15)
36     size = 15;
37   cap = (uint32_t) (size * 100.0 / HASH_LOAD);
38   // this is slow, but only done once. clz would be much faster
39   while (size <= cap)
40     size <<= 1U;
41   hash->array = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket)); // key+value pairs
42   hash->elems = 0;
43   hash->size = size;
44   return hash;
45 }
46 
47 // if exceeds load factor
48 static inline int
hash_need_resize(dwg_inthash * hash)49 hash_need_resize (dwg_inthash *hash)
50 {
51   return (uint32_t) (hash->elems * 100.0 / HASH_LOAD) > hash->size;
52 }
53 
54 static void
hash_resize(dwg_inthash * hash)55 hash_resize (dwg_inthash *hash)
56 {
57   dwg_inthash oldhash = *hash;
58   uint32_t size = hash->size * 2;
59   uint32_t i;
60 
61   // allocate key+value pairs afresh
62   hash->array = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket));
63   if (!hash->array)
64     {
65       *hash = oldhash;
66       return;
67     }
68   hash->elems = 0;
69   hash->size = size;
70   memset (hash->array, 0, size * sizeof (struct _hashbucket));
71   // spread out the old elements in double space, less collisions
72   for (i = 0; i < oldhash.size; i++)
73     {
74       if (oldhash.array[i].key)
75         hash_set (hash, oldhash.array[i].key, oldhash.array[i].value);
76     }
77   free (oldhash.array);
78   return;
79 }
80 
81 // found this gem by Thomas Mueller at stackoverflow. triviality threshold.
82 // it's like a normal murmur or jenkins finalizer,
83 // just statistically tested to be optimal.
84 // Note that this is entirely "insecure", the inverse func is trivial.
85 // We don't care as we deal with DWG and had linear search before.
86 static inline uint32_t
hash_func(uint32_t key)87 hash_func (uint32_t key)
88 {
89   key = ((key >> 16) ^ key) * 0x45d9f3b;
90   key = ((key >> 16) ^ key) * 0x45d9f3b;
91   key = (key >> 16) ^ key;
92   return key;
93 }
94 
95 // 0 is disallowed as key, even if there's no deletion.
96 uint32_t
hash_get(dwg_inthash * hash,uint32_t key)97 hash_get (dwg_inthash *hash, uint32_t key)
98 {
99   uint32_t i = hash_func (key) % hash->size;
100   uint32_t j = i;
101   while (hash->array[i].key && hash->array[i].key != key)
102     {
103       //HANDLER (OUTPUT, "get collision at %d\n", i);
104       i++; // linear probing with wrap around
105       if (i == hash->size)
106         i = 0;
107       if (i == j) // not found
108         return HASH_NOT_FOUND;
109     }
110   if (hash->array[i].key)
111     return hash->array[i].value;
112   else
113     return HASH_NOT_FOUND;
114 }
115 
116 // search or insert. key 0 is forbidden.
117 void
hash_set(dwg_inthash * hash,uint32_t key,uint32_t value)118 hash_set (dwg_inthash *hash, uint32_t key, uint32_t value)
119 {
120   uint32_t i = hash_func (key) % hash->size;
121   uint32_t j = i;
122   if (key == 0)
123     {
124       HANDLER (OUTPUT, "forbidden 0 key\n");
125       return;
126     }
127   // empty slot
128   if (!hash->array[i].key)
129     {
130       hash->array[i].key = key;
131       hash->array[i].value = value;
132       hash->elems++;
133       return;
134     }
135   while (hash->array[i].key)
136     {
137       if (hash->array[i].key == key)
138         { // found
139           hash->array[i].value = value;
140           return;
141         }
142       // HANDLER (OUTPUT, "set collision at %d\n", i);
143       i++; // linear probing with wrap around
144       if (i == hash->size)
145         i = 0;
146       if (i == j) // not found
147         {
148           // HANDLER (OUTPUT, "set not found at %d\n", i);
149           // if does not exist, add at i+1
150           if (hash_need_resize (hash))
151             {
152               // HANDLER (OUTPUT, "resize at %d\n", hash->size);
153               hash_resize (hash);
154               return hash_set (hash, key, value);
155             }
156           while (hash->array[i].key) // find next empty slot
157             {
158               // up to here we have no coverage!
159               // HANDLER (OUTPUT, "set 2nd collision at %d\n", i);
160               i++; // again linear probing with wrap around
161               if (i == hash->size)
162                 i = 0;
163               if (i == j) // not found
164                 {
165                   // HANDLER (OUTPUT, "not found resize at %d\n", hash->size);
166                   hash_resize (hash); // guarantees new empty slots
167                   hash_set (hash, key, value);
168                   return;
169                 }
170               else
171                 { // insert at empty slot
172                   hash->array[i].key = key;
173                   hash->array[i].value = value;
174                   hash->elems++;
175                   return;
176                 }
177             }
178         }
179     }
180   // empty slot
181   hash->array[i].key = key;
182   hash->array[i].value = value;
183   hash->elems++;
184   return;
185 }
186 
187 void
hash_free(dwg_inthash * hash)188 hash_free (dwg_inthash *hash)
189 {
190   free (hash->array);
191   hash->array = NULL;
192   hash->size = 0;
193   hash->elems = 0;
194   free (hash);
195 }
196