1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
5    Copyright (C) 2014-2019 Bareos GmbH & Co. KG
6 
7    This program is Free Software; you can redistribute it and/or
8    modify it under the terms of version three of the GNU Affero General Public
9    License as published by the Free Software Foundation and included
10    in the file LICENSE.
11 
12    This program is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Affero General Public License for more details.
16 
17    You should have received a copy of the GNU Affero General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.
21 */
22 /*
23  * BAREOS hash table routines
24  *
25  * htable is a hash table of items (pointers). This code is
26  * adapted and enhanced from code I wrote in 1982 for a
27  * relocatable linker.  At that time, the hash table size
28  * was fixed and a primary number, which essentially provides
29  * the randomness. In this program, the hash table can grow when
30  * it gets too full, so the table size here is a binary number. The
31  * hashing is provided using an idea from Tcl where the initial
32  * hash code is "randomized" using a simple calculation from
33  * a random number generator that multiplies by a big number
34  * (I multiply by a prime number, while Tcl did not)
35  * then shifts the result down and does the binary division
36  * by masking.  Increasing the size of the hash table is simple.
37  * Just create a new larger table, walk the old table and
38  * re-hash insert each entry into the new table.
39  *
40  * Kern Sibbald, July MMIII
41  */
42 
43 #include <cinttypes>
44 #include "include/hostconfig.h"
45 
46 #ifdef HAVE_HPUX_OS
47 #pragma pack(4)
48 #endif
49 
50 #include "include/bareos.h"
51 #include "lib/htable.h"
52 
53 #define B_PAGE_SIZE 4096
54 #define MIN_PAGES 32
55 #define MAX_PAGES 2400
56 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
57 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
58 
59 static const int debuglevel = 500;
60 
61 /*
62  * htable (Hash Table) class.
63  */
64 
65 /*
66  * This subroutine gets a big buffer.
67  */
MallocBigBuf(int size)68 void htable::MallocBigBuf(int size)
69 {
70   struct h_mem* hmem;
71 
72   hmem = (struct h_mem*)malloc(size);
73   total_size += size;
74   blocks++;
75   hmem->next = mem_block;
76   mem_block = hmem;
77   hmem->mem = mem_block->first;
78   hmem->rem = (char*)hmem + size - hmem->mem;
79   Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
80 }
81 
82 /*
83  * This routine frees the whole tree.
84  */
HashBigFree()85 void htable::HashBigFree()
86 {
87   struct h_mem *hmem, *rel;
88 
89   for (hmem = mem_block; hmem;) {
90     rel = hmem;
91     hmem = hmem->next;
92     Dmsg1(100, "free malloc buf=%p\n", rel);
93     free(rel);
94   }
95 }
96 
97 /*
98  * Normal hash malloc routine that gets a "small" buffer from the big buffer
99  */
hash_malloc(int size)100 char* htable::hash_malloc(int size)
101 {
102   int mb_size;
103   char* buf;
104   int asize = BALIGN(size);
105 
106   if (mem_block->rem < asize) {
107     if (total_size >= (extend_length / 2)) {
108       mb_size = extend_length;
109     } else {
110       mb_size = extend_length / 2;
111     }
112     MallocBigBuf(mb_size);
113     Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
114   }
115   mem_block->rem -= asize;
116   buf = mem_block->mem;
117   mem_block->mem += asize;
118   return buf;
119 }
120 
121 /*
122  * Create hash of key, stored in hash then
123  * create and return the pseudo random bucket index
124  */
HashIndex(char * key)125 void htable::HashIndex(char* key)
126 {
127   hash = 0;
128   for (char* p = key; *p; p++) {
129     hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
130   }
131 
132   /*
133    * Multiply by large prime number, take top bits, mask for remainder.
134    */
135   index = ((hash * 1103515249) >> rshift) & mask;
136   Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
137 }
138 
HashIndex(uint32_t key)139 void htable::HashIndex(uint32_t key)
140 {
141   hash = key;
142 
143   /*
144    * Multiply by large prime number, take top bits, mask for remainder.
145    */
146   index = ((hash * 1103515249) >> rshift) & mask;
147   Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
148 }
149 
HashIndex(uint64_t key)150 void htable::HashIndex(uint64_t key)
151 {
152   hash = key;
153 
154   /*
155    * Multiply by large prime number, take top bits, mask for remainder.
156    */
157   index = ((hash * 1103515249) >> rshift) & mask;
158   Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
159 }
160 
HashIndex(uint8_t * key,uint32_t keylen)161 void htable::HashIndex(uint8_t* key, uint32_t keylen)
162 {
163   hash = 0;
164   for (uint8_t* p = key; keylen--; p++) {
165     hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
166   }
167 
168   /*
169    * Multiply by large prime number, take top bits, mask for remainder.
170    */
171   index = ((hash * 1103515249) >> rshift) & mask;
172   Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
173 }
174 
175 /*
176  * tsize is the estimated number of entries in the hash table
177  */
htable(void * item,void * link,int tsize,int nr_pages,int nr_entries)178 htable::htable(void* item, void* link, int tsize, int nr_pages, int nr_entries)
179 {
180   init(item, link, tsize, nr_pages, nr_entries);
181 }
182 
init(void * item,void * link,int tsize,int nr_pages,int nr_entries)183 void htable::init(void* item,
184                   void* link,
185                   int tsize,
186                   int nr_pages,
187                   int nr_entries)
188 {
189   int pwr;
190   int pagesize;
191   int buffer_size;
192 
193   memset(this, 0, sizeof(htable));
194   if (tsize < 31) { tsize = 31; }
195   tsize >>= 2;
196   for (pwr = 0; tsize; pwr++) { tsize >>= 1; }
197   loffset = (char*)link - (char*)item;
198   mask = ~((~0) << pwr); /* 3 bits => table size = 8 */
199   rshift = 30 - pwr;     /* Start using bits 28, 29, 30 */
200   buckets = 1 << pwr;    /* Hash table size -- power of two */
201   max_items =
202       buckets * nr_entries; /* Allow average nr_entries entries per chain */
203   table = (hlink**)malloc(buckets * sizeof(hlink*));
204   memset(table, 0, buckets * sizeof(hlink*));
205 
206 #ifdef HAVE_GETPAGESIZE
207   pagesize = getpagesize();
208 #else
209   pagesize = B_PAGE_SIZE;
210 #endif
211   if (nr_pages == 0) {
212     buffer_size = MAX_BUF_SIZE;
213   } else {
214     buffer_size = pagesize * nr_pages;
215     if (buffer_size > MAX_BUF_SIZE) {
216       buffer_size = MAX_BUF_SIZE;
217     } else if (buffer_size < MIN_BUF_SIZE) {
218       buffer_size = MIN_BUF_SIZE;
219     }
220   }
221   MallocBigBuf(buffer_size);
222   extend_length = buffer_size;
223   Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
224 }
225 
size()226 uint32_t htable::size() { return num_items; }
227 
228 /*
229  * Take each hash link and walk down the chain of items
230  *  that hash there counting them (i.e. the hits),
231  *  then report that number.
232  * Obiously, the more hits in a chain, the more time
233  *  it takes to reference them. Empty chains are not so
234  *  hot either -- as it means unused or wasted space.
235  */
236 #define MAX_COUNT 20
stats()237 void htable::stats()
238 {
239   int hits[MAX_COUNT];
240   int max = 0;
241   int i, j;
242   hlink* p;
243   printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
244   printf("Hits/bucket: buckets\n");
245   for (i = 0; i < MAX_COUNT; i++) { hits[i] = 0; }
246   for (i = 0; i < (int)buckets; i++) {
247     p = table[i];
248     j = 0;
249     while (p) {
250       p = (hlink*)(p->next);
251       j++;
252     }
253     if (j > max) { max = j; }
254     if (j < MAX_COUNT) { hits[j]++; }
255   }
256   for (i = 0; i < MAX_COUNT; i++) { printf("%2d:           %d\n", i, hits[i]); }
257   printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items,
258          max_items);
259   printf("max hits in a bucket = %d\n", max);
260   printf("total bytes malloced = %" PRIu64 "\n", total_size);
261   printf("total blocks malloced = %d\n", blocks);
262 }
263 
grow_table()264 void htable::grow_table()
265 {
266   htable* big;
267   hlink* cur;
268   void* next_item;
269 
270   Dmsg1(100, "Grow called old size = %d\n", buckets);
271 
272   /*
273    * Setup a bigger table.
274    */
275   big = (htable*)malloc(sizeof(htable));
276   big->hash = hash;
277   big->total_size = total_size;
278   big->extend_length = extend_length;
279   big->index = index;
280   big->blocks = blocks;
281   big->mem_block = mem_block;
282   big->loffset = loffset;
283   big->mask = mask << 1 | 1;
284   big->rshift = rshift - 1;
285   big->num_items = 0;
286   big->buckets = buckets * 2;
287   big->max_items = big->buckets * 4;
288 
289   /*
290    * Create a bigger hash table.
291    */
292   big->table = (hlink**)malloc(big->buckets * sizeof(hlink*));
293   memset(big->table, 0, big->buckets * sizeof(hlink*));
294   big->walkptr = NULL;
295   big->walk_index = 0;
296 
297   /*
298    * Insert all the items in the new hash table
299    */
300   Dmsg1(100, "Before copy num_items=%d\n", num_items);
301 
302   /*
303    * We walk through the old smaller tree getting items,
304    * but since we are overwriting the colision links, we must
305    * explicitly save the item->next pointer and walk each
306    * colision chain ourselves. We do use next() for getting
307    * to the next bucket.
308    */
309   for (void* item = first(); item;) {
310     cur = (hlink*)((char*)item + loffset);
311     next_item = cur->next; /* Save link overwritten by insert */
312     switch (cur->key_type) {
313       case KEY_TYPE_CHAR:
314         Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
315         big->insert(cur->key.char_key, item);
316         break;
317       case KEY_TYPE_UINT32:
318         Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
319         big->insert(cur->key.uint32_key, item);
320         break;
321       case KEY_TYPE_UINT64:
322         Dmsg1(100, "Grow insert: %lld\n", cur->key.uint64_key);
323         big->insert(cur->key.uint64_key, item);
324         break;
325       case KEY_TYPE_BINARY:
326         big->insert(cur->key.binary_key, cur->key_len, item);
327         break;
328     }
329     if (next_item) {
330       item = (void*)((char*)next_item - loffset);
331     } else {
332       walkptr = NULL;
333       item = next();
334     }
335   }
336 
337   Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
338   if (num_items != big->num_items) {
339     Dmsg0(000, "****** Big problems num_items mismatch ******\n");
340   }
341 
342   free(table);
343   memcpy(this, big, sizeof(htable)); /* Move everything across */
344   free(big);
345 
346   Dmsg0(100, "Exit grow.\n");
347 }
348 
insert(char * key,void * item)349 bool htable::insert(char* key, void* item)
350 {
351   hlink* hp;
352 
353   if (lookup(key)) { return false; /* Already exists */ }
354 
355   ASSERT(index < buckets);
356   Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
357   hp = (hlink*)(((char*)item) + loffset);
358 
359   Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
360         item, loffset);
361 
362   hp->next = table[index];
363   hp->hash = hash;
364   hp->key_type = KEY_TYPE_CHAR;
365   hp->key.char_key = key;
366   hp->key_len = 0;
367   table[index] = hp;
368 
369   Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n", hp->next,
370         hp->hash, hp->key.char_key);
371 
372   if (++num_items >= max_items) {
373     Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
374     grow_table();
375   }
376 
377   Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%s\n", index,
378         num_items, key);
379 
380   return true;
381 }
382 
insert(uint32_t key,void * item)383 bool htable::insert(uint32_t key, void* item)
384 {
385   hlink* hp;
386 
387   if (lookup(key)) { return false; /* Already exists */ }
388 
389   ASSERT(index < buckets);
390   Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
391   hp = (hlink*)(((char*)item) + loffset);
392 
393   Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
394         item, loffset);
395 
396   hp->next = table[index];
397   hp->hash = hash;
398   hp->key_type = KEY_TYPE_UINT32;
399   hp->key.uint32_key = key;
400   hp->key_len = 0;
401   table[index] = hp;
402 
403   Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n",
404         hp->next, hp->hash, hp->key.uint32_key);
405 
406   if (++num_items >= max_items) {
407     Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
408     grow_table();
409   }
410 
411   Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%ld\n", index,
412         num_items, key);
413 
414   return true;
415 }
416 
insert(uint64_t key,void * item)417 bool htable::insert(uint64_t key, void* item)
418 {
419   hlink* hp;
420 
421   if (lookup(key)) { return false; /* Already exists */ }
422 
423   ASSERT(index < buckets);
424   Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
425   hp = (hlink*)(((char*)item) + loffset);
426 
427   Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
428         item, loffset);
429 
430   hp->next = table[index];
431   hp->hash = hash;
432   hp->key_type = KEY_TYPE_UINT64;
433   hp->key.uint64_key = key;
434   hp->key_len = 0;
435   table[index] = hp;
436 
437   Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%lld\n",
438         hp->next, hp->hash, hp->key.uint64_key);
439 
440   if (++num_items >= max_items) {
441     Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
442     grow_table();
443   }
444 
445   Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%lld\n", index,
446         num_items, key);
447 
448   return true;
449 }
450 
insert(uint8_t * key,uint32_t key_len,void * item)451 bool htable::insert(uint8_t* key, uint32_t key_len, void* item)
452 {
453   hlink* hp;
454 
455   if (lookup(key, key_len)) { return false; /* Already exists */ }
456 
457   ASSERT(index < buckets);
458   Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
459   hp = (hlink*)(((char*)item) + loffset);
460 
461   Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
462         item, loffset);
463 
464   hp->next = table[index];
465   hp->hash = hash;
466   hp->key_type = KEY_TYPE_BINARY;
467   hp->key.binary_key = key;
468   hp->key_len = key_len;
469   table[index] = hp;
470 
471   Dmsg2(debuglevel, "Insert hp->next=%p hp->hash=0x%llx\n", hp->next, hp->hash);
472 
473   if (++num_items >= max_items) {
474     Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
475     grow_table();
476   }
477 
478   Dmsg2(debuglevel, "Leave insert index=%d num_items=%d\n", index, num_items);
479 
480   return true;
481 }
482 
lookup(char * key)483 void* htable::lookup(char* key)
484 {
485   HashIndex(key);
486   for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
487     ASSERT(hp->key_type == KEY_TYPE_CHAR);
488     if (hash == hp->hash && bstrcmp(key, hp->key.char_key)) {
489       Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
490       return ((char*)hp) - loffset;
491     }
492   }
493 
494   return NULL;
495 }
496 
lookup(uint32_t key)497 void* htable::lookup(uint32_t key)
498 {
499   HashIndex(key);
500   for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
501     ASSERT(hp->key_type == KEY_TYPE_UINT32);
502     if (hash == hp->hash && key == hp->key.uint32_key) {
503       Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
504       return ((char*)hp) - loffset;
505     }
506   }
507 
508   return NULL;
509 }
510 
lookup(uint64_t key)511 void* htable::lookup(uint64_t key)
512 {
513   HashIndex(key);
514   for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
515     ASSERT(hp->key_type == KEY_TYPE_UINT64);
516     if (hash == hp->hash && key == hp->key.uint64_key) {
517       Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
518       return ((char*)hp) - loffset;
519     }
520   }
521 
522   return NULL;
523 }
524 
lookup(uint8_t * key,uint32_t key_len)525 void* htable::lookup(uint8_t* key, uint32_t key_len)
526 {
527   HashIndex(key, key_len);
528   for (hlink* hp = table[index]; hp; hp = (hlink*)hp->next) {
529     ASSERT(hp->key_type == KEY_TYPE_BINARY);
530     if (hash == hp->hash && memcmp(key, hp->key.binary_key, hp->key_len) == 0) {
531       Dmsg1(debuglevel, "lookup return %p\n", ((char*)hp) - loffset);
532       return ((char*)hp) - loffset;
533     }
534   }
535 
536   return NULL;
537 }
538 
next()539 void* htable::next()
540 {
541   Dmsg1(debuglevel, "Enter next: walkptr=%p\n", walkptr);
542   if (walkptr) { walkptr = (hlink*)(walkptr->next); }
543 
544   while (!walkptr && walk_index < buckets) {
545     walkptr = table[walk_index++];
546     if (walkptr) {
547       Dmsg3(debuglevel, "new walkptr=%p next=%p inx=%d\n", walkptr,
548             walkptr->next, walk_index - 1);
549     }
550   }
551 
552   if (walkptr) {
553     Dmsg2(debuglevel, "next: rtn %p walk_index=%d\n",
554           ((char*)walkptr) - loffset, walk_index);
555     return ((char*)walkptr) - loffset;
556   }
557   Dmsg0(debuglevel, "next: return NULL\n");
558 
559   return NULL;
560 }
561 
first()562 void* htable::first()
563 {
564   Dmsg0(debuglevel, "Enter first\n");
565   walkptr = table[0]; /* get first bucket */
566   walk_index = 1;     /* Point to next index */
567 
568   while (!walkptr && walk_index < buckets) {
569     walkptr = table[walk_index++]; /* go to next bucket */
570     if (walkptr) {
571       Dmsg3(debuglevel, "first new walkptr=%p next=%p inx=%d\n", walkptr,
572             walkptr->next, walk_index - 1);
573     }
574   }
575 
576   if (walkptr) {
577     Dmsg1(debuglevel, "Leave first walkptr=%p\n", walkptr);
578     return ((char*)walkptr) - loffset;
579   }
580   Dmsg0(debuglevel, "Leave first walkptr=NULL\n");
581 
582   return NULL;
583 }
584 
585 /* Destroy the table and its contents */
destroy()586 void htable::destroy()
587 {
588   HashBigFree();
589 
590   free(table);
591   table = NULL;
592   GarbageCollectMemory();
593   Dmsg0(100, "Done destroy.\n");
594 }
595