1 /*
2 * pkghash.c
3 *
4 * Copyright (c) 2011-2018 Pacman Development Team <pacman-dev@archlinux.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20 #include <errno.h>
21
22 #include "pkghash.h"
23 #include "util.h"
24
25 /* List of primes for possible sizes of hash tables.
26 *
27 * The maximum table size is the last prime under 1,000,000. That is
28 * more than an order of magnitude greater than the number of packages
29 * in any Linux distribution, and well under UINT_MAX.
30 */
31 static const unsigned int prime_list[] =
32 {
33 11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
34 53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
35 109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
36 199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
37 383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
38 761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
39 1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
40 2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
41 5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
42 9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
43 16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
44 28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
45 49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
46 85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
47 147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
48 256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
49 444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
50 771049u, 834181u, 902483u, 976369u
51 };
52
53 /* How far forward do we look when linear probing for a spot? */
54 static const unsigned int stride = 1;
55 /* What is the maximum load percentage of our hash table? */
56 static const double max_hash_load = 0.68;
57 /* Initial load percentage given a certain size */
58 static const double initial_hash_load = 0.58;
59
60 /* Allocate a hash table with space for at least "size" elements */
_alpm_pkghash_create(unsigned int size)61 alpm_pkghash_t *_alpm_pkghash_create(unsigned int size)
62 {
63 alpm_pkghash_t *hash = NULL;
64 unsigned int i, loopsize;
65
66 CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL);
67 size = size / initial_hash_load + 1;
68
69 loopsize = ARRAYSIZE(prime_list);
70 for(i = 0; i < loopsize; i++) {
71 if(prime_list[i] > size) {
72 hash->buckets = prime_list[i];
73 hash->limit = hash->buckets * max_hash_load;
74 break;
75 }
76 }
77
78 if(hash->buckets < size) {
79 errno = ERANGE;
80 free(hash);
81 return NULL;
82 }
83
84 CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \
85 free(hash); return NULL);
86
87 return hash;
88 }
89
get_hash_position(unsigned long name_hash,alpm_pkghash_t * hash)90 static unsigned int get_hash_position(unsigned long name_hash,
91 alpm_pkghash_t *hash)
92 {
93 unsigned int position;
94
95 position = name_hash % hash->buckets;
96
97 /* collision resolution using open addressing with linear probing */
98 while(hash->hash_table[position] != NULL) {
99 position += stride;
100 while(position >= hash->buckets) {
101 position -= hash->buckets;
102 }
103 }
104
105 return position;
106 }
107
108 /* Expand the hash table size to the next increment and rebin the entries */
rehash(alpm_pkghash_t * oldhash)109 static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash)
110 {
111 alpm_pkghash_t *newhash;
112 unsigned int newsize, i;
113
114 /* Hash tables will need resized in two cases:
115 * - adding packages to the local database
116 * - poor estimation of the number of packages in sync database
117 *
118 * For small hash tables sizes (<500) the increase in size is by a
119 * minimum of a factor of 2 for optimal rehash efficiency. For
120 * larger database sizes, this increase is reduced to avoid excess
121 * memory allocation as both scenarios requiring a rehash should not
122 * require a table size increase that large. */
123 if(oldhash->buckets < 500) {
124 newsize = oldhash->buckets * 2;
125 } else if(oldhash->buckets < 2000) {
126 newsize = oldhash->buckets * 3 / 2;
127 } else if(oldhash->buckets < 5000) {
128 newsize = oldhash->buckets * 4 / 3;
129 } else {
130 newsize = oldhash->buckets + 1;
131 }
132
133 newhash = _alpm_pkghash_create(newsize);
134 if(newhash == NULL) {
135 return NULL;
136 }
137
138 newhash->list = oldhash->list;
139 oldhash->list = NULL;
140
141 for(i = 0; i < oldhash->buckets; i++) {
142 if(oldhash->hash_table[i] != NULL) {
143 alpm_pkg_t *package = oldhash->hash_table[i]->data;
144 unsigned int position = get_hash_position(package->name_hash, newhash);
145
146 newhash->hash_table[position] = oldhash->hash_table[i];
147 oldhash->hash_table[i] = NULL;
148 }
149 }
150
151 newhash->entries = oldhash->entries;
152
153 _alpm_pkghash_free(oldhash);
154
155 return newhash;
156 }
157
pkghash_add_pkg(alpm_pkghash_t ** hashref,alpm_pkg_t * pkg,int sorted)158 static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t **hashref, alpm_pkg_t *pkg,
159 int sorted)
160 {
161 alpm_list_t *ptr;
162 unsigned int position;
163 alpm_pkghash_t *hash;
164
165 if(pkg == NULL || hashref == NULL || *hashref == NULL) {
166 return NULL;
167 }
168 hash = *hashref;
169
170 if(hash->entries >= hash->limit) {
171 if((hash = rehash(hash)) == NULL) {
172 /* resizing failed and there are no more open buckets */
173 return NULL;
174 }
175 *hashref = hash;
176 }
177
178 position = get_hash_position(pkg->name_hash, hash);
179
180 MALLOC(ptr, sizeof(alpm_list_t), return NULL);
181
182 ptr->data = pkg;
183 ptr->prev = ptr;
184 ptr->next = NULL;
185
186 hash->hash_table[position] = ptr;
187 if(!sorted) {
188 hash->list = alpm_list_join(hash->list, ptr);
189 } else {
190 hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
191 }
192
193 hash->entries += 1;
194 return hash;
195 }
196
_alpm_pkghash_add(alpm_pkghash_t ** hash,alpm_pkg_t * pkg)197 alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
198 {
199 return pkghash_add_pkg(hash, pkg, 0);
200 }
201
_alpm_pkghash_add_sorted(alpm_pkghash_t ** hash,alpm_pkg_t * pkg)202 alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
203 {
204 return pkghash_add_pkg(hash, pkg, 1);
205 }
206
move_one_entry(alpm_pkghash_t * hash,unsigned int start,unsigned int end)207 static unsigned int move_one_entry(alpm_pkghash_t *hash,
208 unsigned int start, unsigned int end)
209 {
210 /* Iterate backwards from 'end' to 'start', seeing if any of the items
211 * would hash to 'start'. If we find one, we move it there and break. If
212 * we get all the way back to position and find none that hash to it, we
213 * also end iteration. Iterating backwards helps prevent needless shuffles;
214 * we will never need to move more than one item per function call. The
215 * return value is our current iteration location; if this is equal to
216 * 'start' we can stop this madness. */
217 while(end != start) {
218 alpm_list_t *i = hash->hash_table[end];
219 alpm_pkg_t *info = i->data;
220 unsigned int new_position = get_hash_position(info->name_hash, hash);
221
222 if(new_position == start) {
223 hash->hash_table[start] = i;
224 hash->hash_table[end] = NULL;
225 break;
226 }
227
228 /* the odd math ensures we are always positive, e.g.
229 * e.g. (0 - 1) % 47 == -1
230 * e.g. (47 + 0 - 1) % 47 == 46 */
231 end = (hash->buckets + end - stride) % hash->buckets;
232 }
233 return end;
234 }
235
236 /**
237 * @brief Remove a package from a pkghash.
238 *
239 * @param hash the hash to remove the package from
240 * @param pkg the package we are removing
241 * @param data output parameter containing the removed item
242 *
243 * @return the resultant hash
244 */
_alpm_pkghash_remove(alpm_pkghash_t * hash,alpm_pkg_t * pkg,alpm_pkg_t ** data)245 alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
246 alpm_pkg_t **data)
247 {
248 alpm_list_t *i;
249 unsigned int position;
250
251 if(data) {
252 *data = NULL;
253 }
254
255 if(pkg == NULL || hash == NULL) {
256 return hash;
257 }
258
259 position = pkg->name_hash % hash->buckets;
260 while((i = hash->hash_table[position]) != NULL) {
261 alpm_pkg_t *info = i->data;
262
263 if(info->name_hash == pkg->name_hash &&
264 strcmp(info->name, pkg->name) == 0) {
265 unsigned int stop, prev;
266
267 /* remove from list and hash */
268 hash->list = alpm_list_remove_item(hash->list, i);
269 if(data) {
270 *data = info;
271 }
272 hash->hash_table[position] = NULL;
273 free(i);
274 hash->entries -= 1;
275
276 /* Potentially move entries following removed entry to keep open
277 * addressing collision resolution working. We start by finding the
278 * next null bucket to know how far we have to look. */
279 stop = position + stride;
280 while(stop >= hash->buckets) {
281 stop -= hash->buckets;
282 }
283 while(hash->hash_table[stop] != NULL && stop != position) {
284 stop += stride;
285 while(stop >= hash->buckets) {
286 stop -= hash->buckets;
287 }
288 }
289 stop = (hash->buckets + stop - stride) % hash->buckets;
290
291 /* We now search backwards from stop to position. If we find an
292 * item that now hashes to position, we will move it, and then try
293 * to plug the new hole we just opened up, until we finally don't
294 * move anything. */
295 while((prev = move_one_entry(hash, position, stop)) != position) {
296 position = prev;
297 }
298
299 return hash;
300 }
301
302 position += stride;
303 while(position >= hash->buckets) {
304 position -= hash->buckets;
305 }
306 }
307
308 return hash;
309 }
310
_alpm_pkghash_free(alpm_pkghash_t * hash)311 void _alpm_pkghash_free(alpm_pkghash_t *hash)
312 {
313 if(hash != NULL) {
314 unsigned int i;
315 for(i = 0; i < hash->buckets; i++) {
316 free(hash->hash_table[i]);
317 }
318 free(hash->hash_table);
319 }
320 free(hash);
321 }
322
_alpm_pkghash_find(alpm_pkghash_t * hash,const char * name)323 alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name)
324 {
325 alpm_list_t *lp;
326 unsigned long name_hash;
327 unsigned int position;
328
329 if(name == NULL || hash == NULL) {
330 return NULL;
331 }
332
333 name_hash = _alpm_hash_sdbm(name);
334
335 position = name_hash % hash->buckets;
336
337 while((lp = hash->hash_table[position]) != NULL) {
338 alpm_pkg_t *info = lp->data;
339
340 if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
341 return info;
342 }
343
344 position += stride;
345 while(position >= hash->buckets) {
346 position -= hash->buckets;
347 }
348 }
349
350 return NULL;
351 }
352