1 /*****************************************************************************/
2 /* LibreDWG - free implementation of the DWG file format */
3 /* */
4 /* Copyright (C) 2018-2019 Free Software Foundation, Inc. */
5 /* */
6 /* This library is free software, licensed under the terms of the GNU */
7 /* General Public License as published by the Free Software Foundation, */
8 /* either version 3 of the License, or (at your option) any later version. */
9 /* You should have received a copy of the GNU General Public License */
10 /* along with this program. If not, see <http://www.gnu.org/licenses/>. */
11 /*****************************************************************************/
12
13 /*
14 * hash.c: int hashmap for the object_ref map.
15 * uses linear probing for best cache usage.
16 * values are inlined into the array. The 0 key is disallowed.
17 * written by Reini Urban
18 */
19
20 #include "hash.h"
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include "logging.h"
25
26 dwg_inthash *
hash_new(uint32_t size)27 hash_new (uint32_t size)
28 {
29 dwg_inthash *hash = (dwg_inthash *)malloc (sizeof (dwg_inthash));
30 uint32_t cap;
31 if (!hash)
32 return NULL;
33 // multiply with load factor,
34 // and round size to next power of 2 (fast) or prime (secure),
35 if (size < 15)
36 size = 15;
37 cap = (uint32_t) (size * 100.0 / HASH_LOAD);
38 // this is slow, but only done once. clz would be much faster
39 while (size <= cap)
40 size <<= 1U;
41 hash->array = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket)); // key+value pairs
42 hash->elems = 0;
43 hash->size = size;
44 return hash;
45 }
46
47 // if exceeds load factor
48 static inline int
hash_need_resize(dwg_inthash * hash)49 hash_need_resize (dwg_inthash *hash)
50 {
51 return (uint32_t) (hash->elems * 100.0 / HASH_LOAD) > hash->size;
52 }
53
54 static void
hash_resize(dwg_inthash * hash)55 hash_resize (dwg_inthash *hash)
56 {
57 dwg_inthash oldhash = *hash;
58 uint32_t size = hash->size * 2;
59 uint32_t i;
60
61 // allocate key+value pairs afresh
62 hash->array = (struct _hashbucket *)calloc (size, sizeof (struct _hashbucket));
63 if (!hash->array)
64 {
65 *hash = oldhash;
66 return;
67 }
68 hash->elems = 0;
69 hash->size = size;
70 memset (hash->array, 0, size * sizeof (struct _hashbucket));
71 // spread out the old elements in double space, less collisions
72 for (i = 0; i < oldhash.size; i++)
73 {
74 if (oldhash.array[i].key)
75 hash_set (hash, oldhash.array[i].key, oldhash.array[i].value);
76 }
77 free (oldhash.array);
78 return;
79 }
80
81 // found this gem by Thomas Mueller at stackoverflow. triviality threshold.
82 // it's like a normal murmur or jenkins finalizer,
83 // just statistically tested to be optimal.
84 // Note that this is entirely "insecure", the inverse func is trivial.
85 // We don't care as we deal with DWG and had linear search before.
86 static inline uint32_t
hash_func(uint32_t key)87 hash_func (uint32_t key)
88 {
89 key = ((key >> 16) ^ key) * 0x45d9f3b;
90 key = ((key >> 16) ^ key) * 0x45d9f3b;
91 key = (key >> 16) ^ key;
92 return key;
93 }
94
95 // 0 is disallowed as key, even if there's no deletion.
96 uint32_t
hash_get(dwg_inthash * hash,uint32_t key)97 hash_get (dwg_inthash *hash, uint32_t key)
98 {
99 uint32_t i = hash_func (key) % hash->size;
100 uint32_t j = i;
101 while (hash->array[i].key && hash->array[i].key != key)
102 {
103 //HANDLER (OUTPUT, "get collision at %d\n", i);
104 i++; // linear probing with wrap around
105 if (i == hash->size)
106 i = 0;
107 if (i == j) // not found
108 return HASH_NOT_FOUND;
109 }
110 if (hash->array[i].key)
111 return hash->array[i].value;
112 else
113 return HASH_NOT_FOUND;
114 }
115
116 // search or insert. key 0 is forbidden.
117 void
hash_set(dwg_inthash * hash,uint32_t key,uint32_t value)118 hash_set (dwg_inthash *hash, uint32_t key, uint32_t value)
119 {
120 uint32_t i = hash_func (key) % hash->size;
121 uint32_t j = i;
122 if (key == 0)
123 {
124 HANDLER (OUTPUT, "forbidden 0 key\n");
125 return;
126 }
127 // empty slot
128 if (!hash->array[i].key)
129 {
130 hash->array[i].key = key;
131 hash->array[i].value = value;
132 hash->elems++;
133 return;
134 }
135 while (hash->array[i].key)
136 {
137 if (hash->array[i].key == key)
138 { // found
139 hash->array[i].value = value;
140 return;
141 }
142 // HANDLER (OUTPUT, "set collision at %d\n", i);
143 i++; // linear probing with wrap around
144 if (i == hash->size)
145 i = 0;
146 if (i == j) // not found
147 {
148 // HANDLER (OUTPUT, "set not found at %d\n", i);
149 // if does not exist, add at i+1
150 if (hash_need_resize (hash))
151 {
152 // HANDLER (OUTPUT, "resize at %d\n", hash->size);
153 hash_resize (hash);
154 return hash_set (hash, key, value);
155 }
156 while (hash->array[i].key) // find next empty slot
157 {
158 // up to here we have no coverage!
159 // HANDLER (OUTPUT, "set 2nd collision at %d\n", i);
160 i++; // again linear probing with wrap around
161 if (i == hash->size)
162 i = 0;
163 if (i == j) // not found
164 {
165 // HANDLER (OUTPUT, "not found resize at %d\n", hash->size);
166 hash_resize (hash); // guarantees new empty slots
167 hash_set (hash, key, value);
168 return;
169 }
170 else
171 { // insert at empty slot
172 hash->array[i].key = key;
173 hash->array[i].value = value;
174 hash->elems++;
175 return;
176 }
177 }
178 }
179 }
180 // empty slot
181 hash->array[i].key = key;
182 hash->array[i].value = value;
183 hash->elems++;
184 return;
185 }
186
187 void
hash_free(dwg_inthash * hash)188 hash_free (dwg_inthash *hash)
189 {
190 free (hash->array);
191 hash->array = NULL;
192 hash->size = 0;
193 hash->elems = 0;
194 free (hash);
195 }
196