1 /*
2 BAREOS® - Backup Archiving REcovery Open Sourced
3
4 Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
5 Copyright (C) 2014-2019 Bareos GmbH & Co. KG
6
7 This program is Free Software; you can redistribute it and/or
8 modify it under the terms of version three of the GNU Affero General Public
9 License as published by the Free Software Foundation and included
10 in the file LICENSE.
11
12 This program is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Affero General Public License for more details.
16
17 You should have received a copy of the GNU Affero General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.
21 */
22 /*
23 * BAREOS hash table routines
24 *
25 * htable is a hash table of items (pointers). This code is
26 * adapted and enhanced from code I wrote in 1982 for a
27 * relocatable linker. At that time, the hash table size
28 * was fixed and a primary number, which essentially provides
29 * the randomness. In this program, the hash table can grow when
30 * it gets too full, so the table size here is a binary number. The
31 * hashing is provided using an idea from Tcl where the initial
32 * hash code is "randomized" using a simple calculation from
33 * a random number generator that multiplies by a big number
34 * (I multiply by a prime number, while Tcl did not)
35 * then shifts the result down and does the binary division
36 * by masking. Increasing the size of the hash table is simple.
37 * Just create a new larger table, walk the old table and
38 * re-hash insert each entry into the new table.
39 *
40 * Kern Sibbald, July MMIII
41 */
42
43 #include <cinttypes>
44 #include "include/hostconfig.h"
45
46 #ifdef HAVE_HPUX_OS
47 #pragma pack(4)
48 #endif
49
50 #include "include/bareos.h"
51 #include "lib/htable.h"
52
53 #define B_PAGE_SIZE 4096
54 #define MIN_PAGES 32
55 #define MAX_PAGES 2400
56 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
57 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
58
59 static const int debuglevel = 500;
60
61 /*
62 * htable (Hash Table) class.
63 */
64
65 /*
66 * This subroutine gets a big buffer.
67 */
MallocBigBuf(int size)68 void htable::MallocBigBuf(int size)
69 {
70 struct h_mem* hmem;
71
72 hmem = (struct h_mem*)malloc(size);
73 total_size += size;
74 blocks++;
75 hmem->next = mem_block;
76 mem_block = hmem;
77 hmem->mem = mem_block->first;
78 hmem->rem = (char*)hmem + size - hmem->mem;
79 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
80 }
81
82 /*
83 * This routine frees the whole tree.
84 */
HashBigFree()85 void htable::HashBigFree()
86 {
87 struct h_mem *hmem, *rel;
88
89 for (hmem = mem_block; hmem;) {
90 rel = hmem;
91 hmem = hmem->next;
92 Dmsg1(100, "free malloc buf=%p\n", rel);
93 free(rel);
94 }
95 }
96
97 /*
98 * Normal hash malloc routine that gets a "small" buffer from the big buffer
99 */
hash_malloc(int size)100 char* htable::hash_malloc(int size)
101 {
102 int mb_size;
103 char* buf;
104 int asize = BALIGN(size);
105
106 if (mem_block->rem < asize) {
107 if (total_size >= (extend_length / 2)) {
108 mb_size = extend_length;
109 } else {
110 mb_size = extend_length / 2;
111 }
112 MallocBigBuf(mb_size);
113 Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
114 }
115 mem_block->rem -= asize;
116 buf = mem_block->mem;
117 mem_block->mem += asize;
118 return buf;
119 }
120
121 /*
122 * Create hash of key, stored in hash then
123 * create and return the pseudo random bucket index
124 */
HashIndex(char * key)125 void htable::HashIndex(char* key)
126 {
127 hash = 0;
128 for (char* p = key; *p; p++) {
129 hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
130 }
131
132 /*
133 * Multiply by large prime number, take top bits, mask for remainder.
134 */
135 index = ((hash * 1103515249) >> rshift) & mask;
136 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
137 }
138
HashIndex(uint32_t key)139 void htable::HashIndex(uint32_t key)
140 {
141 hash = key;
142
143 /*
144 * Multiply by large prime number, take top bits, mask for remainder.
145 */
146 index = ((hash * 1103515249) >> rshift) & mask;
147 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
148 }
149
HashIndex(uint64_t key)150 void htable::HashIndex(uint64_t key)
151 {
152 hash = key;
153
154 /*
155 * Multiply by large prime number, take top bits, mask for remainder.
156 */
157 index = ((hash * 1103515249) >> rshift) & mask;
158 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
159 }
160
HashIndex(uint8_t * key,uint32_t keylen)161 void htable::HashIndex(uint8_t* key, uint32_t keylen)
162 {
163 hash = 0;
164 for (uint8_t* p = key; keylen--; p++) {
165 hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
166 }
167
168 /*
169 * Multiply by large prime number, take top bits, mask for remainder.
170 */
171 index = ((hash * 1103515249) >> rshift) & mask;
172 Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
173 }
174
175 /*
176 * tsize is the estimated number of entries in the hash table
177 */
htable(void * item,void * link,int tsize,int nr_pages,int nr_entries)178 htable::htable(void* item, void* link, int tsize, int nr_pages, int nr_entries)
179 {
180 init(item, link, tsize, nr_pages, nr_entries);
181 }
182
init(void * item,void * link,int tsize,int nr_pages,int nr_entries)183 void htable::init(void* item,
184 void* link,
185 int tsize,
186 int nr_pages,
187 int nr_entries)
188 {
189 int pwr;
190 int pagesize;
191 int buffer_size;
192
193 memset(this, 0, sizeof(htable));
194 if (tsize < 31) { tsize = 31; }
195 tsize >>= 2;
196 for (pwr = 0; tsize; pwr++) { tsize >>= 1; }
197 loffset = (char*)link - (char*)item;
198 mask = ~((~0) << pwr); /* 3 bits => table size = 8 */
199 rshift = 30 - pwr; /* Start using bits 28, 29, 30 */
200 buckets = 1 << pwr; /* Hash table size -- power of two */
201 max_items =
202 buckets * nr_entries; /* Allow average nr_entries entries per chain */
203 table = (hlink**)malloc(buckets * sizeof(hlink*));
204 memset(table, 0, buckets * sizeof(hlink*));
205
206 #ifdef HAVE_GETPAGESIZE
207 pagesize = getpagesize();
208 #else
209 pagesize = B_PAGE_SIZE;
210 #endif
211 if (nr_pages == 0) {
212 buffer_size = MAX_BUF_SIZE;
213 } else {
214 buffer_size = pagesize * nr_pages;
215 if (buffer_size > MAX_BUF_SIZE) {
216 buffer_size = MAX_BUF_SIZE;
217 } else if (buffer_size < MIN_BUF_SIZE) {
218 buffer_size = MIN_BUF_SIZE;
219 }
220 }
221 MallocBigBuf(buffer_size);
222 extend_length = buffer_size;
223 Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
224 }
225
size()226 uint32_t htable::size() { return num_items; }
227
228 /*
229 * Take each hash link and walk down the chain of items
230 * that hash there counting them (i.e. the hits),
231 * then report that number.
232 * Obiously, the more hits in a chain, the more time
233 * it takes to reference them. Empty chains are not so
234 * hot either -- as it means unused or wasted space.
235 */
236 #define MAX_COUNT 20
stats()237 void htable::stats()
238 {
239 int hits[MAX_COUNT];
240 int max = 0;
241 int i, j;
242 hlink* p;
243 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
244 printf("Hits/bucket: buckets\n");
245 for (i = 0; i < MAX_COUNT; i++) { hits[i] = 0; }
246 for (i = 0; i < (int)buckets; i++) {
247 p = table[i];
248 j = 0;
249 while (p) {
250 p = (hlink*)(p->next);
251 j++;
252 }
253 if (j > max) { max = j; }
254 if (j < MAX_COUNT) { hits[j]++; }
255 }
256 for (i = 0; i < MAX_COUNT; i++) { printf("%2d: %d\n", i, hits[i]); }
257 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items,
258 max_items);
259 printf("max hits in a bucket = %d\n", max);
260 printf("total bytes malloced = %" PRIu64 "\n", total_size);
261 printf("total blocks malloced = %d\n", blocks);
262 }
263
grow_table()264 void htable::grow_table()
265 {
266 htable* big;
267 hlink* cur;
268 void* next_item;
269
270 Dmsg1(100, "Grow called old size = %d\n", buckets);
271
272 /*
273 * Setup a bigger table.
274 */
275 big = (htable*)malloc(sizeof(htable));
276 big->hash = hash;
277 big->total_size = total_size;
278 big->extend_length = extend_length;
279 big->index = index;
280 big->blocks = blocks;
281 big->mem_block = mem_block;
282 big->loffset = loffset;
283 big->mask = mask << 1 | 1;
284 big->rshift = rshift - 1;
285 big->num_items = 0;
286 big->buckets = buckets * 2;
287 big->max_items = big->buckets * 4;
288
289 /*
290 * Create a bigger hash table.
291 */
292 big->table = (hlink**)malloc(big->buckets * sizeof(hlink*));
293 memset(big->table, 0, big->buckets * sizeof(hlink*));
294 big->walkptr = NULL;
295 big->walk_index = 0;
296
297 /*
298 * Insert all the items in the new hash table
299 */
300 Dmsg1(100, "Before copy num_items=%d\n", num_items);
301
302 /*
303 * We walk through the old smaller tree getting items,
304 * but since we are overwriting the colision links, we must
305 * explicitly save the item->next pointer and walk each
306 * colision chain ourselves. We do use next() for getting
307 * to the next bucket.
308 */
309 for (void* item = first(); item;) {
310 cur = (hlink*)((char*)item + loffset);
311 next_item = cur->next; /* Save link overwritten by insert */
312 switch (cur->key_type) {
313 case KEY_TYPE_CHAR:
314 Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
315 big->insert(cur->key.char_key, item);
316 break;
317 case KEY_TYPE_UINT32:
318 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
319 big->insert(cur->key.uint32_key, item);
320 break;
321 case KEY_TYPE_UINT64:
322 Dmsg1(100, "Grow insert: %lld\n", cur->key.uint64_key);
323 big->insert(cur->key.uint64_key, item);
324 break;
325 case KEY_TYPE_BINARY:
326 big->insert(cur->key.binary_key, cur->key_len, item);
327 break;
328 }
329 if (next_item) {
330 item = (void*)((char*)next_item - loffset);
331 } else {
332 walkptr = NULL;
333 item = next();
334 }
335 }
336
337 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
338 if (num_items != big->num_items) {
339 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
340 }
341
342 free(table);
343 memcpy(this, big, sizeof(htable)); /* Move everything across */
344 free(big);
345
346 Dmsg0(100, "Exit grow.\n");
347 }
348
insert(char * key,void * item)349 bool htable::insert(char* key, void* item)
350 {
351 hlink* hp;
352
353 if (lookup(key)) { return false; /* Already exists */ }
354
355 ASSERT(index < buckets);
356 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
357 hp = (hlink*)(((char*)item) + loffset);
358
359 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
360 item, loffset);
361
362 hp->next = table[index];
363 hp->hash = hash;
364 hp->key_type = KEY_TYPE_CHAR;
365 hp->key.char_key = key;
366 hp->key_len = 0;
367 table[index] = hp;
368
369 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n", hp->next,
370 hp->hash, hp->key.char_key);
371
372 if (++num_items >= max_items) {
373 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
374 grow_table();
375 }
376
377 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%s\n", index,
378 num_items, key);
379
380 return true;
381 }
382
insert(uint32_t key,void * item)383 bool htable::insert(uint32_t key, void* item)
384 {
385 hlink* hp;
386
387 if (lookup(key)) { return false; /* Already exists */ }
388
389 ASSERT(index < buckets);
390 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
391 hp = (hlink*)(((char*)item) + loffset);
392
393 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
394 item, loffset);
395
396 hp->next = table[index];
397 hp->hash = hash;
398 hp->key_type = KEY_TYPE_UINT32;
399 hp->key.uint32_key = key;
400 hp->key_len = 0;
401 table[index] = hp;
402
403 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n",
404 hp->next, hp->hash, hp->key.uint32_key);
405
406 if (++num_items >= max_items) {
407 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
408 grow_table();
409 }
410
411 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%ld\n", index,
412 num_items, key);
413
414 return true;
415 }
416
insert(uint64_t key,void * item)417 bool htable::insert(uint64_t key, void* item)
418 {
419 hlink* hp;
420
421 if (lookup(key)) { return false; /* Already exists */ }
422
423 ASSERT(index < buckets);
424 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
425 hp = (hlink*)(((char*)item) + loffset);
426
427 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
428 item, loffset);
429
430 hp->next = table[index];
431 hp->hash = hash;
432 hp->key_type = KEY_TYPE_UINT64;
433 hp->key.uint64_key = key;
434 hp->key_len = 0;
435 table[index] = hp;
436
437 Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%lld\n",
438 hp->next, hp->hash, hp->key.uint64_key);
439
440 if (++num_items >= max_items) {
441 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
442 grow_table();
443 }
444
445 Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%lld\n", index,
446 num_items, key);
447
448 return true;
449 }
450
insert(uint8_t * key,uint32_t key_len,void * item)451 bool htable::insert(uint8_t* key, uint32_t key_len, void* item)
452 {
453 hlink* hp;
454
455 if (lookup(key, key_len)) { return false; /* Already exists */ }
456
457 ASSERT(index < buckets);
458 Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
459 hp = (hlink*)(((char*)item) + loffset);
460
461 Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
462 item, loffset);
463
464 hp->next = table[index];
465 hp->hash = hash;
466 hp->key_type = KEY_TYPE_BINARY;
467 hp->key.binary_key = key;
468 hp->key_len = key_len;
469 table[index] = hp;
470
471 Dmsg2(debuglevel, "Insert hp->next=%p hp->hash=0x%llx\n", hp->next, hp->hash);
472
473 if (++num_items >= max_items) {
474 Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
475 grow_table();
476 }
477
478 Dmsg2(debuglevel, "Leave insert index=%d num_items=%d\n", index, num_items);
479
480 return true;
481 }
482
lookup(char * key)483 void* htable::lookup(char* key)
484 {
485 HashIndex(key);
486 for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
487 ASSERT(hp->key_type == KEY_TYPE_CHAR);
488 if (hash == hp->hash && bstrcmp(key, hp->key.char_key)) {
489 Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
490 return ((char*)hp) - loffset;
491 }
492 }
493
494 return NULL;
495 }
496
lookup(uint32_t key)497 void* htable::lookup(uint32_t key)
498 {
499 HashIndex(key);
500 for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
501 ASSERT(hp->key_type == KEY_TYPE_UINT32);
502 if (hash == hp->hash && key == hp->key.uint32_key) {
503 Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
504 return ((char*)hp) - loffset;
505 }
506 }
507
508 return NULL;
509 }
510
lookup(uint64_t key)511 void* htable::lookup(uint64_t key)
512 {
513 HashIndex(key);
514 for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
515 ASSERT(hp->key_type == KEY_TYPE_UINT64);
516 if (hash == hp->hash && key == hp->key.uint64_key) {
517 Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
518 return ((char*)hp) - loffset;
519 }
520 }
521
522 return NULL;
523 }
524
lookup(uint8_t * key,uint32_t key_len)525 void* htable::lookup(uint8_t* key, uint32_t key_len)
526 {
527 HashIndex(key, key_len);
528 for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
529 ASSERT(hp->key_type == KEY_TYPE_BINARY);
530 if (hash == hp->hash && memcmp(key, hp->key.binary_key, hp->key_len) == 0) {
531 Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
532 return ((char*)hp) - loffset;
533 }
534 }
535
536 return NULL;
537 }
538
next()539 void* htable::next()
540 {
541 Dmsg1(debuglevel, "Enter next: walkptr=%p\n", walkptr);
542 if (walkptr) { walkptr = (hlink*)(walkptr->next); }
543
544 while (!walkptr && walk_index < buckets) {
545 walkptr = table[walk_index++];
546 if (walkptr) {
547 Dmsg3(debuglevel, "new walkptr=%p next=%p inx=%d\n", walkptr,
548 walkptr->next, walk_index - 1);
549 }
550 }
551
552 if (walkptr) {
553 Dmsg2(debuglevel, "next: rtn %p walk_index=%d\n",
554 ((char*)walkptr) - loffset, walk_index);
555 return ((char*)walkptr) - loffset;
556 }
557 Dmsg0(debuglevel, "next: return NULL\n");
558
559 return NULL;
560 }
561
first()562 void* htable::first()
563 {
564 Dmsg0(debuglevel, "Enter first\n");
565 walkptr = table[0]; /* get first bucket */
566 walk_index = 1; /* Point to next index */
567
568 while (!walkptr && walk_index < buckets) {
569 walkptr = table[walk_index++]; /* go to next bucket */
570 if (walkptr) {
571 Dmsg3(debuglevel, "first new walkptr=%p next=%p inx=%d\n", walkptr,
572 walkptr->next, walk_index - 1);
573 }
574 }
575
576 if (walkptr) {
577 Dmsg1(debuglevel, "Leave first walkptr=%p\n", walkptr);
578 return ((char*)walkptr) - loffset;
579 }
580 Dmsg0(debuglevel, "Leave first walkptr=NULL\n");
581
582 return NULL;
583 }
584
585 /* Destroy the table and its contents */
destroy()586 void htable::destroy()
587 {
588 HashBigFree();
589
590 free(table);
591 table = NULL;
592 GarbageCollectMemory();
593 Dmsg0(100, "Done destroy.\n");
594 }
595