1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
5 Copyright (C) 2014-2014 Bareos GmbH & Co. KG
6
7 This program is Free Software; you can redistribute it and/or
8 modify it under the terms of version three of the GNU Affero General Public
9 License as published by the Free Software Foundation and included
10 in the file LICENSE.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.
21 */
22 /*
23 * BAREOS hash table routines
24 *
25 * htable is a hash table of items (pointers). This code is
26 * adapted and enhanced from code I wrote in 1982 for a
27 * relocatable linker. At that time, the hash table size
28 * was fixed and a primary number, which essentially provides
29 * the randomness. In this program, the hash table can grow when
30 * it gets too full, so the table size here is a binary number. The
31 * hashing is provided using an idea from Tcl where the initial
32 * hash code is "randomized" using a simple calculation from
33 * a random number generator that multiplies by a big number
34 * (I multiply by a prime number, while Tcl did not)
35 * then shifts the result down and does the binary division
36 * by masking. Increasing the size of the hash table is simple.
37 * Just create a new larger table, walk the old table and
38 * re-hash insert each entry into the new table.
39 *
40 * Kern Sibbald, July MMIII
41 */
42
43 #include "include/hostconfig.h"
44
45 #ifdef HAVE_HPUX_OS
46 #pragma pack(4)
47 #endif
48
49 #include "include/bareos.h"
50
51 #define B_PAGE_SIZE 4096
52 #define MIN_PAGES 32
53 #define MAX_PAGES 2400
54 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
55 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
56
57 static const int debuglevel = 500;
58
59 /*
60 * htable (Hash Table) class.
61 */
62
63 /*
64 * This subroutine gets a big buffer.
65 */
MallocBigBuf(int size)66 void htable::MallocBigBuf(int size)
67 {
68 struct h_mem *hmem;
69
70 hmem = (struct h_mem *)malloc(size);
71 total_size += size;
72 blocks++;
73 hmem->next = mem_block;
74 mem_block = hmem;
75 hmem->mem = mem_block->first;
76 hmem->rem = (char *)hmem + size - hmem->mem;
77 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
78 }
79
80 /*
81 * This routine frees the whole tree.
82 */
HashBigFree()83 void htable::HashBigFree()
84 {
85 struct h_mem *hmem, *rel;
86
87 for (hmem=mem_block; hmem; ) {
88 rel = hmem;
89 hmem = hmem->next;
90 Dmsg1(100, "free malloc buf=%p\n", rel);
91 free(rel);
92 }
93 }
94
95 /*
96 * Normal hash malloc routine that gets a "small" buffer from the big buffer
97 */
hash_malloc(int size)98 char *htable::hash_malloc(int size)
99 {
100 int mb_size;
101 char *buf;
102 int asize = BALIGN(size);
103
104 if (mem_block->rem < asize) {
105 if (total_size >= (extend_length / 2)) {
106 mb_size = extend_length;
107 } else {
108 mb_size = extend_length / 2;
109 }
110 MallocBigBuf(mb_size);
111 Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
112 }
113 mem_block->rem -= asize;
114 buf = mem_block->mem;
115 mem_block->mem += asize;
116 return buf;
117 }
118
119 /*
120 * Create hash of key, stored in hash then
121 * create and return the pseudo random bucket index
122 */
HashIndex(char * key)123 void htable::HashIndex(char *key)
124 {
125 hash = 0;
126 for (char *p = key; *p; p++) {
127 hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
128 }
129
130 /*
131 * Multiply by large prime number, take top bits, mask for remainder.
132 */
133 index = ((hash * 1103515249) >> rshift) & mask;
134 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
135 }
136
HashIndex(uint32_t key)137 void htable::HashIndex(uint32_t key)
138 {
139 hash = key;
140
141 /*
142 * Multiply by large prime number, take top bits, mask for remainder.
143 */
144 index = ((hash * 1103515249) >> rshift) & mask;
145 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
146 }
147
HashIndex(uint64_t key)148 void htable::HashIndex(uint64_t key)
149 {
150 hash = key;
151
152 /*
153 * Multiply by large prime number, take top bits, mask for remainder.
154 */
155 index = ((hash * 1103515249) >> rshift) & mask;
156 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
157 }
158
HashIndex(uint8_t * key,uint32_t keylen)159 void htable::HashIndex(uint8_t *key, uint32_t keylen)
160 {
161 hash = 0;
162 for (uint8_t *p = key; keylen--; p++) {
163 hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
164 }
165
166 /*
167 * Multiply by large prime number, take top bits, mask for remainder.
168 */
169 index = ((hash * 1103515249) >> rshift) & mask;
170 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
171 }
172
173 /*
174 * tsize is the estimated number of entries in the hash table
175 */
htable(void * item,void * link,int tsize,int nr_pages,int nr_entries)176 htable::htable(void *item, void *link, int tsize, int nr_pages, int nr_entries)
177 {
178 init(item, link, tsize, nr_pages, nr_entries);
179 }
180
init(void * item,void * link,int tsize,int nr_pages,int nr_entries)181 void htable::init(void *item, void *link, int tsize, int nr_pages, int nr_entries)
182 {
183 int pwr;
184 int pagesize;
185 int buffer_size;
186
187 memset(this, 0, sizeof(htable));
188 if (tsize < 31) {
189 tsize = 31;
190 }
191 tsize >>= 2;
192 for (pwr = 0; tsize; pwr++) {
193 tsize >>= 1;
194 }
195 loffset = (char *)link - (char *)item;
196 mask = ~((~0) << pwr); /* 3 bits => table size = 8 */
197 rshift = 30 - pwr; /* Start using bits 28, 29, 30 */
198 buckets = 1 << pwr; /* Hash table size -- power of two */
199 max_items = buckets * nr_entries; /* Allow average nr_entries entries per chain */
200 table = (hlink **)malloc(buckets * sizeof(hlink *));
201 memset(table, 0, buckets * sizeof(hlink *));
202
203 #ifdef HAVE_GETPAGESIZE
204 pagesize = getpagesize();
205 #else
206 pagesize = B_PAGE_SIZE;
207 #endif
208 if (nr_pages == 0) {
209 buffer_size = MAX_BUF_SIZE;
210 } else {
211 buffer_size = pagesize * nr_pages;
212 if (buffer_size > MAX_BUF_SIZE) {
213 buffer_size = MAX_BUF_SIZE;
214 } else if (buffer_size < MIN_BUF_SIZE) {
215 buffer_size = MIN_BUF_SIZE;
216 }
217 }
218 MallocBigBuf(buffer_size);
219 extend_length = buffer_size;
220 Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
221 }
222
size()223 uint32_t htable::size()
224 {
225 return num_items;
226 }
227
228 /*
229 * Take each hash link and walk down the chain of items
230 * that hash there counting them (i.e. the hits),
231 * then report that number.
232 * Obiously, the more hits in a chain, the more time
233 * it takes to reference them. Empty chains are not so
234 * hot either -- as it means unused or wasted space.
235 */
236 #define MAX_COUNT 20
stats()237 void htable::stats()
238 {
239 int hits[MAX_COUNT];
240 int max = 0;
241 int i, j;
242 hlink *p;
243 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
244 printf("Hits/bucket: buckets\n");
245 for (i = 0; i < MAX_COUNT; i++) {
246 hits[i] = 0;
247 }
248 for (i = 0; i < (int)buckets; i++) {
249 p = table[i];
250 j = 0;
251 while (p) {
252 p = (hlink *)(p->next);
253 j++;
254 }
255 if (j > max) {
256 max = j;
257 }
258 if (j < MAX_COUNT) {
259 hits[j]++;
260 }
261 }
262 for (i = 0; i < MAX_COUNT; i++) {
263 printf("%2d: %d\n",i, hits[i]);
264 }
265 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
266 printf("max hits in a bucket = %d\n", max);
267 printf("total bytes malloced = %lld\n", (long long int)total_size);
268 printf("total blocks malloced = %d\n", blocks);
269 }
270
grow_table()271 void htable::grow_table()
272 {
273 htable *big;
274 hlink *cur;
275 void *next_item;
276
277 Dmsg1(100, "Grow called old size = %d\n", buckets);
278
279 /*
280 * Setup a bigger table.
281 */
282 big = (htable *)malloc(sizeof(htable));
283 big->hash = hash;
284 big->total_size = total_size;
285 big->extend_length = extend_length;
286 big->index = index;
287 big->blocks = blocks;
288 big->mem_block = mem_block;
289 big->loffset = loffset;
290 big->mask = mask<<1 | 1;
291 big->rshift = rshift - 1;
292 big->num_items = 0;
293 big->buckets = buckets * 2;
294 big->max_items = big->buckets * 4;
295
296 /*
297 * Create a bigger hash table.
298 */
299 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
300 memset(big->table, 0, big->buckets * sizeof(hlink *));
301 big->walkptr = NULL;
302 big->walk_index = 0;
303
304 /*
305 * Insert all the items in the new hash table
306 */
307 Dmsg1(100, "Before copy num_items=%d\n", num_items);
308
309 /*
310 * We walk through the old smaller tree getting items,
311 * but since we are overwriting the colision links, we must
312 * explicitly save the item->next pointer and walk each
313 * colision chain ourselves. We do use next() for getting
314 * to the next bucket.
315 */
316 for (void *item=first(); item; ) {
317 cur = (hlink *)((char *)item + loffset);
318 next_item = cur->next; /* Save link overwritten by insert */
319 switch (cur->key_type) {
320 case KEY_TYPE_CHAR:
321 Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
322 big->insert(cur->key.char_key, item);
323 break;
324 case KEY_TYPE_UINT32:
325 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
326 big->insert(cur->key.uint32_key, item);
327 break;
328 case KEY_TYPE_UINT64:
329 Dmsg1(100, "Grow insert: %lld\n", cur->key.uint64_key);
330 big->insert(cur->key.uint64_key, item);
331 break;
332 case KEY_TYPE_BINARY:
333 big->insert(cur->key.binary_key, cur->key_len, item);
334 break;
335 }
336 if (next_item) {
337 item = (void *)((char *)next_item - loffset);
338 } else {
339 walkptr = NULL;
340 item = next();
341 }
342 }
343
344 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
345 if (num_items != big->num_items) {
346 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
347 }
348
349 free(table);
350 memcpy(this, big, sizeof(htable)); /* Move everything across */
351 free(big);
352
353 Dmsg0(100, "Exit grow.\n");
354 }
355
insert(char * key,void * item)356 bool htable::insert(char *key, void *item)
357 {
358 hlink *hp;
359
360 if (lookup(key)) {
361 return false; /* Already exists */
362 }
363
364 ASSERT(index < buckets);
365 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
366 hp = (hlink *)(((char *)item) + loffset);
367
368 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
369
370 hp->next = table[index];
371 hp->hash = hash;
372 hp->key_type = KEY_TYPE_CHAR;
373 hp->key.char_key = key;
374 hp->key_len = 0;
375 table[index] = hp;
376
377 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n",
378 hp->next, hp->hash, hp->key.char_key);
379
380 if (++num_items >= max_items) {
381 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
382 grow_table();
383 }
384
385 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
386
387 return true;
388 }
389
insert(uint32_t key,void * item)390 bool htable::insert(uint32_t key, void *item)
391 {
392 hlink *hp;
393
394 if (lookup(key)) {
395 return false; /* Already exists */
396 }
397
398 ASSERT(index < buckets);
399 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
400 hp = (hlink *)(((char *)item) + loffset);
401
402 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
403
404 hp->next = table[index];
405 hp->hash = hash;
406 hp->key_type = KEY_TYPE_UINT32;
407 hp->key.uint32_key = key;
408 hp->key_len = 0;
409 table[index] = hp;
410
411 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n", hp->next, hp->hash, hp->key.uint32_key);
412
413 if (++num_items >= max_items) {
414 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
415 grow_table();
416 }
417
418 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%ld\n", index, num_items, key);
419
420 return true;
421 }
422
insert(uint64_t key,void * item)423 bool htable::insert(uint64_t key, void *item)
424 {
425 hlink *hp;
426
427 if (lookup(key)) {
428 return false; /* Already exists */
429 }
430
431 ASSERT(index < buckets);
432 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
433 hp = (hlink *)(((char *)item) + loffset);
434
435 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
436
437 hp->next = table[index];
438 hp->hash = hash;
439 hp->key_type = KEY_TYPE_UINT64;
440 hp->key.uint64_key = key;
441 hp->key_len = 0;
442 table[index] = hp;
443
444 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%lld\n", hp->next, hp->hash, hp->key.uint64_key);
445
446 if (++num_items >= max_items) {
447 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
448 grow_table();
449 }
450
451 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%lld\n", index, num_items, key);
452
453 return true;
454 }
455
insert(uint8_t * key,uint32_t key_len,void * item)456 bool htable::insert(uint8_t *key, uint32_t key_len, void *item)
457 {
458 hlink *hp;
459
460 if (lookup(key, key_len)) {
461 return false; /* Already exists */
462 }
463
464 ASSERT(index < buckets);
465 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
466 hp = (hlink *)(((char *)item) + loffset);
467
468 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
469
470 hp->next = table[index];
471 hp->hash = hash;
472 hp->key_type = KEY_TYPE_BINARY;
473 hp->key.binary_key = key;
474 hp->key_len = key_len;
475 table[index] = hp;
476
477 Dmsg2(debuglevel, "Insert hp->next=%p hp->hash=0x%llx\n", hp->next, hp->hash);
478
479 if (++num_items >= max_items) {
480 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
481 grow_table();
482 }
483
484 Dmsg2(debuglevel, "Leave insert index=%d num_items=%d\n", index, num_items);
485
486 return true;
487 }
488
lookup(char * key)489 void *htable::lookup(char *key)
490 {
491 HashIndex(key);
492 for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
493 ASSERT(hp->key_type == KEY_TYPE_CHAR);
494 if (hash == hp->hash && bstrcmp(key, hp->key.char_key)) {
495 Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
496 return ((char *)hp) - loffset;
497 }
498 }
499
500 return NULL;
501 }
502
lookup(uint32_t key)503 void *htable::lookup(uint32_t key)
504 {
505 HashIndex(key);
506 for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
507 ASSERT(hp->key_type == KEY_TYPE_UINT32);
508 if (hash == hp->hash && key == hp->key.uint32_key) {
509 Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
510 return ((char *)hp) - loffset;
511 }
512 }
513
514 return NULL;
515 }
516
lookup(uint64_t key)517 void *htable::lookup(uint64_t key)
518 {
519 HashIndex(key);
520 for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
521 ASSERT(hp->key_type == KEY_TYPE_UINT64);
522 if (hash == hp->hash && key == hp->key.uint64_key) {
523 Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
524 return ((char *)hp) - loffset;
525 }
526 }
527
528 return NULL;
529 }
530
lookup(uint8_t * key,uint32_t key_len)531 void *htable::lookup(uint8_t *key, uint32_t key_len)
532 {
533 HashIndex(key, key_len);
534 for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
535 ASSERT(hp->key_type == KEY_TYPE_BINARY);
536 if (hash == hp->hash && memcmp(key, hp->key.binary_key, hp->key_len) == 0) {
537 Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
538 return ((char *)hp) - loffset;
539 }
540 }
541
542 return NULL;
543 }
544
next()545 void *htable::next()
546 {
547 Dmsg1(debuglevel, "Enter next: walkptr=%p\n", walkptr);
548 if (walkptr) {
549 walkptr = (hlink *)(walkptr->next);
550 }
551
552 while (!walkptr && walk_index < buckets) {
553 walkptr = table[walk_index++];
554 if (walkptr) {
555 Dmsg3(debuglevel, "new walkptr=%p next=%p inx=%d\n", walkptr,
556 walkptr->next, walk_index-1);
557 }
558 }
559
560 if (walkptr) {
561 Dmsg2(debuglevel, "next: rtn %p walk_index=%d\n",
562 ((char *)walkptr) - loffset, walk_index);
563 return ((char *)walkptr) - loffset;
564 }
565 Dmsg0(debuglevel, "next: return NULL\n");
566
567 return NULL;
568 }
569
first()570 void *htable::first()
571 {
572 Dmsg0(debuglevel, "Enter first\n");
573 walkptr = table[0]; /* get first bucket */
574 walk_index = 1; /* Point to next index */
575
576 while (!walkptr && walk_index < buckets) {
577 walkptr = table[walk_index++]; /* go to next bucket */
578 if (walkptr) {
579 Dmsg3(debuglevel, "first new walkptr=%p next=%p inx=%d\n", walkptr,
580 walkptr->next, walk_index-1);
581 }
582 }
583
584 if (walkptr) {
585 Dmsg1(debuglevel, "Leave first walkptr=%p\n", walkptr);
586 return ((char *)walkptr) - loffset;
587 }
588 Dmsg0(debuglevel, "Leave first walkptr=NULL\n");
589
590 return NULL;
591 }
592
593 /* Destroy the table and its contents */
destroy()594 void htable::destroy()
595 {
596 HashBigFree();
597
598 free(table);
599 table = NULL;
600 GarbageCollectMemory();
601 Dmsg0(100, "Done destroy.\n");
602 }
603