1 /*
2    BAREOS® - Backup Archiving REcovery Open Sourced
3 
4    Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
5    Copyright (C) 2014-2014 Bareos GmbH & Co. KG
6 
7    This program is Free Software; you can redistribute it and/or
8    modify it under the terms of version three of the GNU Affero General Public
9    License as published by the Free Software Foundation and included
10    in the file LICENSE.
11 
12    This program is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15    Affero General Public License for more details.
16 
17    You should have received a copy of the GNU Affero General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.
21 */
22 /*
23  * BAREOS hash table routines
24  *
25  * htable is a hash table of items (pointers). This code is
26  * adapted and enhanced from code I wrote in 1982 for a
27  * relocatable linker.  At that time, the hash table size
28  * was fixed and a primary number, which essentially provides
29  * the randomness. In this program, the hash table can grow when
30  * it gets too full, so the table size here is a binary number. The
31  * hashing is provided using an idea from Tcl where the initial
32  * hash code is "randomized" using a simple calculation from
33  * a random number generator that multiplies by a big number
34  * (I multiply by a prime number, while Tcl did not)
35  * then shifts the result down and does the binary division
36  * by masking.  Increasing the size of the hash table is simple.
37  * Just create a new larger table, walk the old table and
38  * re-hash insert each entry into the new table.
39  *
40  * Kern Sibbald, July MMIII
41  */
42 
43 #include "include/hostconfig.h"
44 
45 #ifdef HAVE_HPUX_OS
46 #pragma pack(4)
47 #endif
48 
49 #include "include/bareos.h"
50 
51 #define B_PAGE_SIZE 4096
52 #define MIN_PAGES 32
53 #define MAX_PAGES 2400
54 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
55 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
56 
57 static const int debuglevel = 500;
58 
59 /*
60  * htable (Hash Table) class.
61  */
62 
63 /*
64  * This subroutine gets a big buffer.
65  */
MallocBigBuf(int size)66 void htable::MallocBigBuf(int size)
67 {
68    struct h_mem *hmem;
69 
70    hmem = (struct h_mem *)malloc(size);
71    total_size += size;
72    blocks++;
73    hmem->next = mem_block;
74    mem_block = hmem;
75    hmem->mem = mem_block->first;
76    hmem->rem = (char *)hmem + size - hmem->mem;
77    Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
78 }
79 
80 /*
81  * This routine frees the whole tree.
82  */
HashBigFree()83 void htable::HashBigFree()
84 {
85    struct h_mem *hmem, *rel;
86 
87    for (hmem=mem_block; hmem; ) {
88       rel = hmem;
89       hmem = hmem->next;
90       Dmsg1(100, "free malloc buf=%p\n", rel);
91       free(rel);
92    }
93 }
94 
95 /*
96  * Normal hash malloc routine that gets a "small" buffer from the big buffer
97  */
hash_malloc(int size)98 char *htable::hash_malloc(int size)
99 {
100    int mb_size;
101    char *buf;
102    int asize = BALIGN(size);
103 
104    if (mem_block->rem < asize) {
105       if (total_size >= (extend_length / 2)) {
106          mb_size = extend_length;
107       } else {
108          mb_size = extend_length / 2;
109       }
110       MallocBigBuf(mb_size);
111       Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
112    }
113    mem_block->rem -= asize;
114    buf = mem_block->mem;
115    mem_block->mem += asize;
116    return buf;
117 }
118 
119 /*
120  * Create hash of key, stored in hash then
121  * create and return the pseudo random bucket index
122  */
HashIndex(char * key)123 void htable::HashIndex(char *key)
124 {
125    hash = 0;
126    for (char *p = key; *p; p++) {
127       hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
128    }
129 
130    /*
131     * Multiply by large prime number, take top bits, mask for remainder.
132     */
133    index = ((hash * 1103515249) >> rshift) & mask;
134    Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
135 }
136 
HashIndex(uint32_t key)137 void htable::HashIndex(uint32_t key)
138 {
139    hash = key;
140 
141    /*
142     * Multiply by large prime number, take top bits, mask for remainder.
143     */
144    index = ((hash * 1103515249) >> rshift) & mask;
145    Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
146 }
147 
HashIndex(uint64_t key)148 void htable::HashIndex(uint64_t key)
149 {
150    hash = key;
151 
152    /*
153     * Multiply by large prime number, take top bits, mask for remainder.
154     */
155    index = ((hash * 1103515249) >> rshift) & mask;
156    Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
157 }
158 
HashIndex(uint8_t * key,uint32_t keylen)159 void htable::HashIndex(uint8_t *key, uint32_t keylen)
160 {
161    hash = 0;
162    for (uint8_t *p = key; keylen--; p++) {
163       hash += ((hash << 5) | (hash >> (sizeof(hash) * 8 - 5))) + (uint32_t)*p;
164    }
165 
166    /*
167     * Multiply by large prime number, take top bits, mask for remainder.
168     */
169    index = ((hash * 1103515249) >> rshift) & mask;
170    Dmsg2(debuglevel, "Leave HashIndex hash=0x%llx index=%d\n", hash, index);
171 }
172 
173 /*
174  * tsize is the estimated number of entries in the hash table
175  */
htable(void * item,void * link,int tsize,int nr_pages,int nr_entries)176 htable::htable(void *item, void *link, int tsize, int nr_pages, int nr_entries)
177 {
178    init(item, link, tsize, nr_pages, nr_entries);
179 }
180 
init(void * item,void * link,int tsize,int nr_pages,int nr_entries)181 void htable::init(void *item, void *link, int tsize, int nr_pages, int nr_entries)
182 {
183    int pwr;
184    int pagesize;
185    int buffer_size;
186 
187    memset(this, 0, sizeof(htable));
188    if (tsize < 31) {
189       tsize = 31;
190    }
191    tsize >>= 2;
192    for (pwr = 0; tsize; pwr++) {
193       tsize >>= 1;
194    }
195    loffset = (char *)link - (char *)item;
196    mask = ~((~0) << pwr);             /* 3 bits => table size = 8 */
197    rshift = 30 - pwr;                 /* Start using bits 28, 29, 30 */
198    buckets = 1 << pwr;                /* Hash table size -- power of two */
199    max_items = buckets * nr_entries;  /* Allow average nr_entries entries per chain */
200    table = (hlink **)malloc(buckets * sizeof(hlink *));
201    memset(table, 0, buckets * sizeof(hlink *));
202 
203 #ifdef HAVE_GETPAGESIZE
204    pagesize = getpagesize();
205 #else
206    pagesize = B_PAGE_SIZE;
207 #endif
208    if (nr_pages == 0) {
209       buffer_size = MAX_BUF_SIZE;
210    } else {
211       buffer_size = pagesize * nr_pages;
212       if (buffer_size > MAX_BUF_SIZE) {
213          buffer_size = MAX_BUF_SIZE;
214       } else if (buffer_size < MIN_BUF_SIZE) {
215          buffer_size = MIN_BUF_SIZE;
216       }
217    }
218    MallocBigBuf(buffer_size);
219    extend_length = buffer_size;
220    Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
221 }
222 
size()223 uint32_t htable::size()
224 {
225    return num_items;
226 }
227 
228 /*
229  * Take each hash link and walk down the chain of items
230  *  that hash there counting them (i.e. the hits),
231  *  then report that number.
232  * Obiously, the more hits in a chain, the more time
233  *  it takes to reference them. Empty chains are not so
234  *  hot either -- as it means unused or wasted space.
235  */
236 #define MAX_COUNT 20
stats()237 void htable::stats()
238 {
239    int hits[MAX_COUNT];
240    int max = 0;
241    int i, j;
242    hlink *p;
243    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
244    printf("Hits/bucket: buckets\n");
245    for (i = 0; i < MAX_COUNT; i++) {
246       hits[i] = 0;
247    }
248    for (i = 0; i < (int)buckets; i++) {
249       p = table[i];
250       j = 0;
251       while (p) {
252          p = (hlink *)(p->next);
253          j++;
254       }
255       if (j > max) {
256          max = j;
257       }
258       if (j < MAX_COUNT) {
259          hits[j]++;
260       }
261    }
262    for (i = 0; i < MAX_COUNT; i++) {
263       printf("%2d:           %d\n",i, hits[i]);
264    }
265    printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
266    printf("max hits in a bucket = %d\n", max);
267    printf("total bytes malloced = %lld\n", (long long int)total_size);
268    printf("total blocks malloced = %d\n", blocks);
269 }
270 
grow_table()271 void htable::grow_table()
272 {
273    htable *big;
274    hlink *cur;
275    void *next_item;
276 
277    Dmsg1(100, "Grow called old size = %d\n", buckets);
278 
279    /*
280     * Setup a bigger table.
281     */
282    big = (htable *)malloc(sizeof(htable));
283    big->hash = hash;
284    big->total_size = total_size;
285    big->extend_length = extend_length;
286    big->index = index;
287    big->blocks = blocks;
288    big->mem_block = mem_block;
289    big->loffset = loffset;
290    big->mask = mask<<1 | 1;
291    big->rshift = rshift - 1;
292    big->num_items = 0;
293    big->buckets = buckets * 2;
294    big->max_items = big->buckets * 4;
295 
296    /*
297     * Create a bigger hash table.
298     */
299    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
300    memset(big->table, 0, big->buckets * sizeof(hlink *));
301    big->walkptr = NULL;
302    big->walk_index = 0;
303 
304    /*
305     * Insert all the items in the new hash table
306     */
307    Dmsg1(100, "Before copy num_items=%d\n", num_items);
308 
309    /*
310     * We walk through the old smaller tree getting items,
311     * but since we are overwriting the colision links, we must
312     * explicitly save the item->next pointer and walk each
313     * colision chain ourselves. We do use next() for getting
314     * to the next bucket.
315     */
316    for (void *item=first(); item; ) {
317       cur = (hlink *)((char *)item + loffset);
318       next_item = cur->next; /* Save link overwritten by insert */
319       switch (cur->key_type) {
320       case KEY_TYPE_CHAR:
321          Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
322          big->insert(cur->key.char_key, item);
323          break;
324       case KEY_TYPE_UINT32:
325          Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
326          big->insert(cur->key.uint32_key, item);
327          break;
328       case KEY_TYPE_UINT64:
329          Dmsg1(100, "Grow insert: %lld\n", cur->key.uint64_key);
330          big->insert(cur->key.uint64_key, item);
331          break;
332       case KEY_TYPE_BINARY:
333          big->insert(cur->key.binary_key, cur->key_len, item);
334          break;
335       }
336       if (next_item) {
337          item = (void *)((char *)next_item - loffset);
338       } else {
339          walkptr = NULL;
340          item = next();
341       }
342    }
343 
344    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
345    if (num_items != big->num_items) {
346       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
347    }
348 
349    free(table);
350    memcpy(this, big, sizeof(htable)); /* Move everything across */
351    free(big);
352 
353    Dmsg0(100, "Exit grow.\n");
354 }
355 
insert(char * key,void * item)356 bool htable::insert(char *key, void *item)
357 {
358    hlink *hp;
359 
360    if (lookup(key)) {
361       return false;                   /* Already exists */
362    }
363 
364    ASSERT(index < buckets);
365    Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
366    hp = (hlink *)(((char *)item) + loffset);
367 
368    Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
369 
370    hp->next = table[index];
371    hp->hash = hash;
372    hp->key_type = KEY_TYPE_CHAR;
373    hp->key.char_key = key;
374    hp->key_len = 0;
375    table[index] = hp;
376 
377    Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n",
378          hp->next, hp->hash, hp->key.char_key);
379 
380    if (++num_items >= max_items) {
381       Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
382       grow_table();
383    }
384 
385    Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
386 
387    return true;
388 }
389 
insert(uint32_t key,void * item)390 bool htable::insert(uint32_t key, void *item)
391 {
392    hlink *hp;
393 
394    if (lookup(key)) {
395       return false;                   /* Already exists */
396    }
397 
398    ASSERT(index < buckets);
399    Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
400    hp = (hlink *)(((char *)item) + loffset);
401 
402    Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
403 
404    hp->next = table[index];
405    hp->hash = hash;
406    hp->key_type = KEY_TYPE_UINT32;
407    hp->key.uint32_key = key;
408    hp->key_len = 0;
409    table[index] = hp;
410 
411    Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n", hp->next, hp->hash, hp->key.uint32_key);
412 
413    if (++num_items >= max_items) {
414       Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
415       grow_table();
416    }
417 
418    Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%ld\n", index, num_items, key);
419 
420    return true;
421 }
422 
insert(uint64_t key,void * item)423 bool htable::insert(uint64_t key, void *item)
424 {
425    hlink *hp;
426 
427    if (lookup(key)) {
428       return false;                   /* Already exists */
429    }
430 
431    ASSERT(index < buckets);
432    Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
433    hp = (hlink *)(((char *)item) + loffset);
434 
435    Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
436 
437    hp->next = table[index];
438    hp->hash = hash;
439    hp->key_type = KEY_TYPE_UINT64;
440    hp->key.uint64_key = key;
441    hp->key_len = 0;
442    table[index] = hp;
443 
444    Dmsg3(debuglevel, "Insert hp->next=%p hp->hash=0x%llx hp->key=%lld\n", hp->next, hp->hash, hp->key.uint64_key);
445 
446    if (++num_items >= max_items) {
447       Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
448       grow_table();
449    }
450 
451    Dmsg3(debuglevel, "Leave insert index=%d num_items=%d key=%lld\n", index, num_items, key);
452 
453    return true;
454 }
455 
insert(uint8_t * key,uint32_t key_len,void * item)456 bool htable::insert(uint8_t *key, uint32_t key_len, void *item)
457 {
458    hlink *hp;
459 
460    if (lookup(key, key_len)) {
461       return false;                   /* Already exists */
462    }
463 
464    ASSERT(index < buckets);
465    Dmsg2(debuglevel, "Insert: hash=%p index=%d\n", hash, index);
466    hp = (hlink *)(((char *)item) + loffset);
467 
468    Dmsg4(debuglevel, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index, item, loffset);
469 
470    hp->next = table[index];
471    hp->hash = hash;
472    hp->key_type = KEY_TYPE_BINARY;
473    hp->key.binary_key = key;
474    hp->key_len = key_len;
475    table[index] = hp;
476 
477    Dmsg2(debuglevel, "Insert hp->next=%p hp->hash=0x%llx\n", hp->next, hp->hash);
478 
479    if (++num_items >= max_items) {
480       Dmsg2(debuglevel, "num_items=%d max_items=%d\n", num_items, max_items);
481       grow_table();
482    }
483 
484    Dmsg2(debuglevel, "Leave insert index=%d num_items=%d\n", index, num_items);
485 
486    return true;
487 }
488 
lookup(char * key)489 void *htable::lookup(char *key)
490 {
491    HashIndex(key);
492    for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
493       ASSERT(hp->key_type == KEY_TYPE_CHAR);
494       if (hash == hp->hash && bstrcmp(key, hp->key.char_key)) {
495          Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
496          return ((char *)hp) - loffset;
497       }
498    }
499 
500    return NULL;
501 }
502 
lookup(uint32_t key)503 void *htable::lookup(uint32_t key)
504 {
505    HashIndex(key);
506    for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
507       ASSERT(hp->key_type == KEY_TYPE_UINT32);
508       if (hash == hp->hash && key == hp->key.uint32_key) {
509          Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
510          return ((char *)hp) - loffset;
511       }
512    }
513 
514    return NULL;
515 }
516 
lookup(uint64_t key)517 void *htable::lookup(uint64_t key)
518 {
519    HashIndex(key);
520    for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
521       ASSERT(hp->key_type == KEY_TYPE_UINT64);
522       if (hash == hp->hash && key == hp->key.uint64_key) {
523          Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
524          return ((char *)hp) - loffset;
525       }
526    }
527 
528    return NULL;
529 }
530 
lookup(uint8_t * key,uint32_t key_len)531 void *htable::lookup(uint8_t *key, uint32_t key_len)
532 {
533    HashIndex(key, key_len);
534    for (hlink *hp = table[index]; hp; hp = (hlink *)hp->next) {
535       ASSERT(hp->key_type == KEY_TYPE_BINARY);
536       if (hash == hp->hash && memcmp(key, hp->key.binary_key, hp->key_len) == 0) {
537          Dmsg1(debuglevel, "lookup return %p\n", ((char *)hp) - loffset);
538          return ((char *)hp) - loffset;
539       }
540    }
541 
542    return NULL;
543 }
544 
next()545 void *htable::next()
546 {
547    Dmsg1(debuglevel, "Enter next: walkptr=%p\n", walkptr);
548    if (walkptr) {
549       walkptr = (hlink *)(walkptr->next);
550    }
551 
552    while (!walkptr && walk_index < buckets) {
553       walkptr = table[walk_index++];
554       if (walkptr) {
555          Dmsg3(debuglevel, "new walkptr=%p next=%p inx=%d\n", walkptr,
556             walkptr->next, walk_index-1);
557       }
558    }
559 
560    if (walkptr) {
561       Dmsg2(debuglevel, "next: rtn %p walk_index=%d\n",
562          ((char *)walkptr) - loffset, walk_index);
563       return ((char *)walkptr) - loffset;
564    }
565    Dmsg0(debuglevel, "next: return NULL\n");
566 
567    return NULL;
568 }
569 
first()570 void *htable::first()
571 {
572    Dmsg0(debuglevel, "Enter first\n");
573    walkptr = table[0];                /* get first bucket */
574    walk_index = 1;                    /* Point to next index */
575 
576    while (!walkptr && walk_index < buckets) {
577       walkptr = table[walk_index++];  /* go to next bucket */
578       if (walkptr) {
579          Dmsg3(debuglevel, "first new walkptr=%p next=%p inx=%d\n", walkptr,
580             walkptr->next, walk_index-1);
581       }
582    }
583 
584    if (walkptr) {
585       Dmsg1(debuglevel, "Leave first walkptr=%p\n", walkptr);
586       return ((char *)walkptr) - loffset;
587    }
588    Dmsg0(debuglevel, "Leave first walkptr=NULL\n");
589 
590    return NULL;
591 }
592 
593 /* Destroy the table and its contents */
destroy()594 void htable::destroy()
595 {
596    HashBigFree();
597 
598    free(table);
599    table = NULL;
600    GarbageCollectMemory();
601    Dmsg0(100, "Done destroy.\n");
602 }
603