1 /*
2  *  pkghash.c
3  *
4  *  Copyright (c) 2011-2018 Pacman Development Team <pacman-dev@archlinux.org>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include <errno.h>
21 
22 #include "pkghash.h"
23 #include "util.h"
24 
25 /* List of primes for possible sizes of hash tables.
26  *
27  * The maximum table size is the last prime under 1,000,000.  That is
28  * more than an order of magnitude greater than the number of packages
29  * in any Linux distribution, and well under UINT_MAX.
30  */
31 static const unsigned int prime_list[] =
32 {
33 	11u, 13u, 17u, 19u, 23u, 29u, 31u, 37u, 41u, 43u, 47u,
34 	53u, 59u, 61u, 67u, 71u, 73u, 79u, 83u, 89u, 97u, 103u,
35 	109u, 113u, 127u, 137u, 139u, 149u, 157u, 167u, 179u, 193u,
36 	199u, 211u, 227u, 241u, 257u, 277u, 293u, 313u, 337u, 359u,
37 	383u, 409u, 439u, 467u, 503u, 541u, 577u, 619u, 661u, 709u,
38 	761u, 823u, 887u, 953u, 1031u, 1109u, 1193u, 1289u, 1381u,
39 	1493u, 1613u, 1741u, 1879u, 2029u, 2179u, 2357u, 2549u,
40 	2753u, 2971u, 3209u, 3469u, 3739u, 4027u, 4349u, 4703u,
41 	5087u, 5503u, 5953u, 6427u, 6949u, 7517u, 8123u, 8783u,
42 	9497u, 10273u, 11113u, 12011u, 12983u, 14033u, 15173u,
43 	16411u, 17749u, 19183u, 20753u, 22447u, 24281u, 26267u,
44 	28411u, 30727u, 33223u, 35933u, 38873u, 42043u, 45481u,
45 	49201u, 53201u, 57557u, 62233u, 67307u, 72817u, 78779u,
46 	85229u, 92203u, 99733u, 107897u, 116731u, 126271u, 136607u,
47 	147793u, 159871u, 172933u, 187091u, 202409u, 218971u, 236897u,
48 	256279u, 277261u, 299951u, 324503u, 351061u, 379787u, 410857u,
49 	444487u, 480881u, 520241u, 562841u, 608903u, 658753u, 712697u,
50 	771049u, 834181u, 902483u, 976369u
51 };
52 
53 /* How far forward do we look when linear probing for a spot? */
54 static const unsigned int stride = 1;
55 /* What is the maximum load percentage of our hash table? */
56 static const double max_hash_load = 0.68;
57 /* Initial load percentage given a certain size */
58 static const double initial_hash_load = 0.58;
59 
60 /* Allocate a hash table with space for at least "size" elements */
_alpm_pkghash_create(unsigned int size)61 alpm_pkghash_t *_alpm_pkghash_create(unsigned int size)
62 {
63 	alpm_pkghash_t *hash = NULL;
64 	unsigned int i, loopsize;
65 
66 	CALLOC(hash, 1, sizeof(alpm_pkghash_t), return NULL);
67 	size = size / initial_hash_load + 1;
68 
69 	loopsize = ARRAYSIZE(prime_list);
70 	for(i = 0; i < loopsize; i++) {
71 		if(prime_list[i] > size) {
72 			hash->buckets = prime_list[i];
73 			hash->limit = hash->buckets * max_hash_load;
74 			break;
75 		}
76 	}
77 
78 	if(hash->buckets < size) {
79 		errno = ERANGE;
80 		free(hash);
81 		return NULL;
82 	}
83 
84 	CALLOC(hash->hash_table, hash->buckets, sizeof(alpm_list_t *), \
85 				free(hash); return NULL);
86 
87 	return hash;
88 }
89 
get_hash_position(unsigned long name_hash,alpm_pkghash_t * hash)90 static unsigned int get_hash_position(unsigned long name_hash,
91 		alpm_pkghash_t *hash)
92 {
93 	unsigned int position;
94 
95 	position = name_hash % hash->buckets;
96 
97 	/* collision resolution using open addressing with linear probing */
98 	while(hash->hash_table[position] != NULL) {
99 		position += stride;
100 		while(position >= hash->buckets) {
101 			position -= hash->buckets;
102 		}
103 	}
104 
105 	return position;
106 }
107 
108 /* Expand the hash table size to the next increment and rebin the entries */
rehash(alpm_pkghash_t * oldhash)109 static alpm_pkghash_t *rehash(alpm_pkghash_t *oldhash)
110 {
111 	alpm_pkghash_t *newhash;
112 	unsigned int newsize, i;
113 
114 	/* Hash tables will need resized in two cases:
115 	 *  - adding packages to the local database
116 	 *  - poor estimation of the number of packages in sync database
117 	 *
118 	 * For small hash tables sizes (<500) the increase in size is by a
119 	 * minimum of a factor of 2 for optimal rehash efficiency.  For
120 	 * larger database sizes, this increase is reduced to avoid excess
121 	 * memory allocation as both scenarios requiring a rehash should not
122 	 * require a table size increase that large. */
123 	if(oldhash->buckets < 500) {
124 		newsize = oldhash->buckets * 2;
125 	} else if(oldhash->buckets < 2000) {
126 		newsize = oldhash->buckets * 3 / 2;
127 	} else if(oldhash->buckets < 5000) {
128 		newsize = oldhash->buckets * 4 / 3;
129 	} else {
130 		newsize = oldhash->buckets + 1;
131 	}
132 
133 	newhash = _alpm_pkghash_create(newsize);
134 	if(newhash == NULL) {
135 		return NULL;
136 	}
137 
138 	newhash->list = oldhash->list;
139 	oldhash->list = NULL;
140 
141 	for(i = 0; i < oldhash->buckets; i++) {
142 		if(oldhash->hash_table[i] != NULL) {
143 			alpm_pkg_t *package = oldhash->hash_table[i]->data;
144 			unsigned int position = get_hash_position(package->name_hash, newhash);
145 
146 			newhash->hash_table[position] = oldhash->hash_table[i];
147 			oldhash->hash_table[i] = NULL;
148 		}
149 	}
150 
151 	newhash->entries = oldhash->entries;
152 
153 	_alpm_pkghash_free(oldhash);
154 
155 	return newhash;
156 }
157 
pkghash_add_pkg(alpm_pkghash_t ** hashref,alpm_pkg_t * pkg,int sorted)158 static alpm_pkghash_t *pkghash_add_pkg(alpm_pkghash_t **hashref, alpm_pkg_t *pkg,
159 		int sorted)
160 {
161 	alpm_list_t *ptr;
162 	unsigned int position;
163 	alpm_pkghash_t *hash;
164 
165 	if(pkg == NULL || hashref == NULL || *hashref == NULL) {
166 		return NULL;
167 	}
168 	hash = *hashref;
169 
170 	if(hash->entries >= hash->limit) {
171 		if((hash = rehash(hash)) == NULL) {
172 			/* resizing failed and there are no more open buckets */
173 			return NULL;
174 		}
175 		*hashref = hash;
176 	}
177 
178 	position = get_hash_position(pkg->name_hash, hash);
179 
180 	MALLOC(ptr, sizeof(alpm_list_t), return NULL);
181 
182 	ptr->data = pkg;
183 	ptr->prev = ptr;
184 	ptr->next = NULL;
185 
186 	hash->hash_table[position] = ptr;
187 	if(!sorted) {
188 		hash->list = alpm_list_join(hash->list, ptr);
189 	} else {
190 		hash->list = alpm_list_mmerge(hash->list, ptr, _alpm_pkg_cmp);
191 	}
192 
193 	hash->entries += 1;
194 	return hash;
195 }
196 
_alpm_pkghash_add(alpm_pkghash_t ** hash,alpm_pkg_t * pkg)197 alpm_pkghash_t *_alpm_pkghash_add(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
198 {
199 	return pkghash_add_pkg(hash, pkg, 0);
200 }
201 
_alpm_pkghash_add_sorted(alpm_pkghash_t ** hash,alpm_pkg_t * pkg)202 alpm_pkghash_t *_alpm_pkghash_add_sorted(alpm_pkghash_t **hash, alpm_pkg_t *pkg)
203 {
204 	return pkghash_add_pkg(hash, pkg, 1);
205 }
206 
move_one_entry(alpm_pkghash_t * hash,unsigned int start,unsigned int end)207 static unsigned int move_one_entry(alpm_pkghash_t *hash,
208 		unsigned int start, unsigned int end)
209 {
210 	/* Iterate backwards from 'end' to 'start', seeing if any of the items
211 	 * would hash to 'start'. If we find one, we move it there and break.  If
212 	 * we get all the way back to position and find none that hash to it, we
213 	 * also end iteration. Iterating backwards helps prevent needless shuffles;
214 	 * we will never need to move more than one item per function call.  The
215 	 * return value is our current iteration location; if this is equal to
216 	 * 'start' we can stop this madness. */
217 	while(end != start) {
218 		alpm_list_t *i = hash->hash_table[end];
219 		alpm_pkg_t *info = i->data;
220 		unsigned int new_position = get_hash_position(info->name_hash, hash);
221 
222 		if(new_position == start) {
223 			hash->hash_table[start] = i;
224 			hash->hash_table[end] = NULL;
225 			break;
226 		}
227 
228 		/* the odd math ensures we are always positive, e.g.
229 		 * e.g. (0 - 1) % 47      == -1
230 		 * e.g. (47 + 0 - 1) % 47 == 46 */
231 		end = (hash->buckets + end - stride) % hash->buckets;
232 	}
233 	return end;
234 }
235 
236 /**
237  * @brief Remove a package from a pkghash.
238  *
239  * @param hash     the hash to remove the package from
240  * @param pkg      the package we are removing
241  * @param data     output parameter containing the removed item
242  *
243  * @return the resultant hash
244  */
_alpm_pkghash_remove(alpm_pkghash_t * hash,alpm_pkg_t * pkg,alpm_pkg_t ** data)245 alpm_pkghash_t *_alpm_pkghash_remove(alpm_pkghash_t *hash, alpm_pkg_t *pkg,
246 		alpm_pkg_t **data)
247 {
248 	alpm_list_t *i;
249 	unsigned int position;
250 
251 	if(data) {
252 		*data = NULL;
253 	}
254 
255 	if(pkg == NULL || hash == NULL) {
256 		return hash;
257 	}
258 
259 	position = pkg->name_hash % hash->buckets;
260 	while((i = hash->hash_table[position]) != NULL) {
261 		alpm_pkg_t *info = i->data;
262 
263 		if(info->name_hash == pkg->name_hash &&
264 					strcmp(info->name, pkg->name) == 0) {
265 			unsigned int stop, prev;
266 
267 			/* remove from list and hash */
268 			hash->list = alpm_list_remove_item(hash->list, i);
269 			if(data) {
270 				*data = info;
271 			}
272 			hash->hash_table[position] = NULL;
273 			free(i);
274 			hash->entries -= 1;
275 
276 			/* Potentially move entries following removed entry to keep open
277 			 * addressing collision resolution working. We start by finding the
278 			 * next null bucket to know how far we have to look. */
279 			stop = position + stride;
280 			while(stop >= hash->buckets) {
281 				stop -= hash->buckets;
282 			}
283 			while(hash->hash_table[stop] != NULL && stop != position) {
284 				stop += stride;
285 				while(stop >= hash->buckets) {
286 					stop -= hash->buckets;
287 				}
288 			}
289 			stop = (hash->buckets + stop - stride) % hash->buckets;
290 
291 			/* We now search backwards from stop to position. If we find an
292 			 * item that now hashes to position, we will move it, and then try
293 			 * to plug the new hole we just opened up, until we finally don't
294 			 * move anything. */
295 			while((prev = move_one_entry(hash, position, stop)) != position) {
296 				position = prev;
297 			}
298 
299 			return hash;
300 		}
301 
302 		position += stride;
303 		while(position >= hash->buckets) {
304 			position -= hash->buckets;
305 		}
306 	}
307 
308 	return hash;
309 }
310 
_alpm_pkghash_free(alpm_pkghash_t * hash)311 void _alpm_pkghash_free(alpm_pkghash_t *hash)
312 {
313 	if(hash != NULL) {
314 		unsigned int i;
315 		for(i = 0; i < hash->buckets; i++) {
316 			free(hash->hash_table[i]);
317 		}
318 		free(hash->hash_table);
319 	}
320 	free(hash);
321 }
322 
_alpm_pkghash_find(alpm_pkghash_t * hash,const char * name)323 alpm_pkg_t *_alpm_pkghash_find(alpm_pkghash_t *hash, const char *name)
324 {
325 	alpm_list_t *lp;
326 	unsigned long name_hash;
327 	unsigned int position;
328 
329 	if(name == NULL || hash == NULL) {
330 		return NULL;
331 	}
332 
333 	name_hash = _alpm_hash_sdbm(name);
334 
335 	position = name_hash % hash->buckets;
336 
337 	while((lp = hash->hash_table[position]) != NULL) {
338 		alpm_pkg_t *info = lp->data;
339 
340 		if(info->name_hash == name_hash && strcmp(info->name, name) == 0) {
341 			return info;
342 		}
343 
344 		position += stride;
345 		while(position >= hash->buckets) {
346 			position -= hash->buckets;
347 		}
348 	}
349 
350 	return NULL;
351 }
352