1 #include "cmph.h"
2 #include "cmph_structs.h"
3 #include "chm.h"
4 #include "bmz.h"
5 #include "bmz8.h"
6 #include "brz.h"
7 #include "fch.h"
8 #include "bdz.h"
9 #include "bdz_ph.h"
10 #include "chd_ph.h"
11 #include "chd.h"
12
13 #include <stdlib.h>
14 #include <assert.h>
15 #include <string.h>
16 // #define DEBUG
17 #include "debug.h"
18
19 const char *cmph_names[] = {"bmz", "bmz8", "chm", "brz", "fch", "bdz", "bdz_ph", "chd_ph", "chd", NULL };
20
21 typedef struct
22 {
23 void *vector;
24 cmph_uint32 position; // access position when data is a vector
25 } cmph_vector_t;
26
27
28
29 /**
30 * Support a vector of struct as the source of keys.
31 *
32 * E.g. The keys could be the fieldB's in a vector of struct rec where
33 * struct rec is defined as:
34 * struct rec {
35 * fieldA;
36 * fieldB;
37 * fieldC;
38 * }
39 */
40 typedef struct
41 {
42 void *vector; /* Pointer to the vector of struct */
43 cmph_uint32 position; /* current position */
44 cmph_uint32 struct_size; /* The size of the struct */
45 cmph_uint32 key_offset; /* The byte offset of the key in the struct */
46 cmph_uint32 key_len; /* The length of the key */
47 } cmph_struct_vector_t;
48
49
50 static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys);
51 static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source);
52
53 static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys);
54 static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source);
55
key_nlfile_read(void * data,char ** key,cmph_uint32 * keylen)56 static int key_nlfile_read(void *data, char **key, cmph_uint32 *keylen)
57 {
58 FILE *fd = (FILE *)data;
59 *key = NULL;
60 *keylen = 0;
61 while(1)
62 {
63 char buf[BUFSIZ];
64 char *c = fgets(buf, BUFSIZ, fd);
65 if (c == NULL) return -1;
66 if (feof(fd)) return -1;
67 *key = (char *)realloc(*key, *keylen + strlen(buf) + 1);
68 memcpy(*key + *keylen, buf, strlen(buf));
69 *keylen += (cmph_uint32)strlen(buf);
70 if (buf[strlen(buf) - 1] != '\n') continue;
71 break;
72 }
73 if ((*keylen) && (*key)[*keylen - 1] == '\n')
74 {
75 (*key)[(*keylen) - 1] = 0;
76 --(*keylen);
77 }
78 return (int)(*keylen);
79 }
80
key_byte_vector_read(void * data,char ** key,cmph_uint32 * keylen)81 static int key_byte_vector_read(void *data, char **key, cmph_uint32 *keylen)
82 {
83 cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
84 cmph_uint8 **keys_vd = (cmph_uint8 **)cmph_vector->vector;
85 size_t size;
86 memcpy(keylen, keys_vd[cmph_vector->position], sizeof(*keylen));
87 size = *keylen;
88 *key = (char *)malloc(size);
89 memcpy(*key, keys_vd[cmph_vector->position] + sizeof(*keylen), size);
90 cmph_vector->position = cmph_vector->position + 1;
91 return (int)(*keylen);
92
93 }
94
key_struct_vector_read(void * data,char ** key,cmph_uint32 * keylen)95 static int key_struct_vector_read(void *data, char **key, cmph_uint32 *keylen)
96 {
97 cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
98 char *keys_vd = (char *)cmph_struct_vector->vector;
99 size_t size;
100 *keylen = cmph_struct_vector->key_len;
101 size = *keylen;
102 *key = (char *)malloc(size);
103 memcpy(*key, (keys_vd + (cmph_struct_vector->position * cmph_struct_vector->struct_size) + cmph_struct_vector->key_offset), size);
104 cmph_struct_vector->position = cmph_struct_vector->position + 1;
105 return (int)(*keylen);
106 }
107
key_vector_read(void * data,char ** key,cmph_uint32 * keylen)108 static int key_vector_read(void *data, char **key, cmph_uint32 *keylen)
109 {
110 cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
111 char **keys_vd = (char **)cmph_vector->vector;
112 size_t size;
113 *keylen = (cmph_uint32)strlen(keys_vd[cmph_vector->position]);
114 size = *keylen;
115 *key = (char *)malloc(size + 1);
116 strcpy(*key, keys_vd[cmph_vector->position]);
117 cmph_vector->position = cmph_vector->position + 1;
118 return (int)(*keylen);
119
120 }
121
122
key_nlfile_dispose(void * data,char * key,cmph_uint32 keylen)123 static void key_nlfile_dispose(void *data, char *key, cmph_uint32 keylen)
124 {
125 free(key);
126 }
127
key_vector_dispose(void * data,char * key,cmph_uint32 keylen)128 static void key_vector_dispose(void *data, char *key, cmph_uint32 keylen)
129 {
130 free(key);
131 }
132
key_nlfile_rewind(void * data)133 static void key_nlfile_rewind(void *data)
134 {
135 FILE *fd = (FILE *)data;
136 rewind(fd);
137 }
138
key_struct_vector_rewind(void * data)139 static void key_struct_vector_rewind(void *data)
140 {
141 cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)data;
142 cmph_struct_vector->position = 0;
143 }
144
key_vector_rewind(void * data)145 static void key_vector_rewind(void *data)
146 {
147 cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
148 cmph_vector->position = 0;
149 }
150
count_nlfile_keys(FILE * fd)151 static cmph_uint32 count_nlfile_keys(FILE *fd)
152 {
153 cmph_uint32 count = 0;
154 register char * ptr;
155 rewind(fd);
156 while(1)
157 {
158 char buf[BUFSIZ];
159 ptr = fgets(buf, BUFSIZ, fd);
160 if (feof(fd)) break;
161 if (ferror(fd) || ptr == NULL) {
162 perror("Error reading input file");
163 return 0;
164 }
165 if (buf[strlen(buf) - 1] != '\n') continue;
166 ++count;
167 }
168 rewind(fd);
169 return count;
170 }
171
cmph_io_nlfile_adapter(FILE * keys_fd)172 cmph_io_adapter_t *cmph_io_nlfile_adapter(FILE * keys_fd)
173 {
174 cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
175 assert(key_source);
176 key_source->data = (void *)keys_fd;
177 key_source->nkeys = count_nlfile_keys(keys_fd);
178 key_source->read = key_nlfile_read;
179 key_source->dispose = key_nlfile_dispose;
180 key_source->rewind = key_nlfile_rewind;
181 return key_source;
182 }
183
cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source)184 void cmph_io_nlfile_adapter_destroy(cmph_io_adapter_t * key_source)
185 {
186 free(key_source);
187 }
188
cmph_io_nlnkfile_adapter(FILE * keys_fd,cmph_uint32 nkeys)189 cmph_io_adapter_t *cmph_io_nlnkfile_adapter(FILE * keys_fd, cmph_uint32 nkeys)
190 {
191 cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
192 assert(key_source);
193 key_source->data = (void *)keys_fd;
194 key_source->nkeys = nkeys;
195 key_source->read = key_nlfile_read;
196 key_source->dispose = key_nlfile_dispose;
197 key_source->rewind = key_nlfile_rewind;
198 return key_source;
199 }
200
cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source)201 void cmph_io_nlnkfile_adapter_destroy(cmph_io_adapter_t * key_source)
202 {
203 free(key_source);
204 }
205
206
cmph_io_struct_vector_new(void * vector,cmph_uint32 struct_size,cmph_uint32 key_offset,cmph_uint32 key_len,cmph_uint32 nkeys)207 static cmph_io_adapter_t *cmph_io_struct_vector_new(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
208 {
209 cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
210 cmph_struct_vector_t * cmph_struct_vector = (cmph_struct_vector_t *)malloc(sizeof(cmph_struct_vector_t));
211 assert(key_source);
212 assert(cmph_struct_vector);
213 cmph_struct_vector->vector = vector;
214 cmph_struct_vector->position = 0;
215 cmph_struct_vector->struct_size = struct_size;
216 cmph_struct_vector->key_offset = key_offset;
217 cmph_struct_vector->key_len = key_len;
218 key_source->data = (void *)cmph_struct_vector;
219 key_source->nkeys = nkeys;
220 return key_source;
221 }
222
cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source)223 static void cmph_io_struct_vector_destroy(cmph_io_adapter_t * key_source)
224 {
225 cmph_struct_vector_t *cmph_struct_vector = (cmph_struct_vector_t *)key_source->data;
226 cmph_struct_vector->vector = NULL;
227 free(cmph_struct_vector);
228 free(key_source);
229 }
230
cmph_io_vector_new(void * vector,cmph_uint32 nkeys)231 static cmph_io_adapter_t *cmph_io_vector_new(void * vector, cmph_uint32 nkeys)
232 {
233 cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
234 cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
235 assert(key_source);
236 assert(cmph_vector);
237 cmph_vector->vector = vector;
238 cmph_vector->position = 0;
239 key_source->data = (void *)cmph_vector;
240 key_source->nkeys = nkeys;
241 return key_source;
242 }
243
cmph_io_vector_destroy(cmph_io_adapter_t * key_source)244 static void cmph_io_vector_destroy(cmph_io_adapter_t * key_source)
245 {
246 cmph_vector_t *cmph_vector = (cmph_vector_t *)key_source->data;
247 cmph_vector->vector = NULL;
248 free(cmph_vector);
249 free(key_source);
250 }
251
cmph_io_byte_vector_adapter(cmph_uint8 ** vector,cmph_uint32 nkeys)252 cmph_io_adapter_t *cmph_io_byte_vector_adapter(cmph_uint8 ** vector, cmph_uint32 nkeys)
253 {
254 cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
255 key_source->read = key_byte_vector_read;
256 key_source->dispose = key_vector_dispose;
257 key_source->rewind = key_vector_rewind;
258 return key_source;
259 }
cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)260 void cmph_io_byte_vector_adapter_destroy(cmph_io_adapter_t * key_source)
261 {
262 cmph_io_vector_destroy(key_source);
263 }
264
cmph_io_struct_vector_adapter(void * vector,cmph_uint32 struct_size,cmph_uint32 key_offset,cmph_uint32 key_len,cmph_uint32 nkeys)265 cmph_io_adapter_t *cmph_io_struct_vector_adapter(void * vector, cmph_uint32 struct_size, cmph_uint32 key_offset, cmph_uint32 key_len, cmph_uint32 nkeys)
266 {
267 cmph_io_adapter_t * key_source = cmph_io_struct_vector_new(vector, struct_size, key_offset, key_len, nkeys);
268 key_source->read = key_struct_vector_read;
269 key_source->dispose = key_vector_dispose;
270 key_source->rewind = key_struct_vector_rewind;
271 return key_source;
272 }
273
cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)274 void cmph_io_struct_vector_adapter_destroy(cmph_io_adapter_t * key_source)
275 {
276 cmph_io_struct_vector_destroy(key_source);
277 }
278
cmph_io_vector_adapter(char ** vector,cmph_uint32 nkeys)279 cmph_io_adapter_t *cmph_io_vector_adapter(char ** vector, cmph_uint32 nkeys)
280 {
281 cmph_io_adapter_t * key_source = cmph_io_vector_new(vector, nkeys);
282 key_source->read = key_vector_read;
283 key_source->dispose = key_vector_dispose;
284 key_source->rewind = key_vector_rewind;
285 return key_source;
286 }
287
cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source)288 void cmph_io_vector_adapter_destroy(cmph_io_adapter_t * key_source)
289 {
290 cmph_io_vector_destroy(key_source);
291 }
292
cmph_config_new(cmph_io_adapter_t * key_source)293 cmph_config_t *cmph_config_new(cmph_io_adapter_t *key_source)
294 {
295 cmph_config_t *mph = NULL;
296 mph = __config_new(key_source);
297 assert(mph);
298 mph->algo = CMPH_CHM; // default value
299 mph->data = chm_config_new();
300 return mph;
301 }
302
cmph_config_set_algo(cmph_config_t * mph,CMPH_ALGO algo)303 void cmph_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo)
304 {
305 if (algo != mph->algo)
306 {
307 switch (mph->algo)
308 {
309 case CMPH_CHM:
310 chm_config_destroy(mph);
311 break;
312 case CMPH_BMZ:
313 bmz_config_destroy(mph);
314 break;
315 case CMPH_BMZ8:
316 bmz8_config_destroy(mph);
317 break;
318 case CMPH_BRZ:
319 brz_config_destroy(mph);
320 break;
321 case CMPH_FCH:
322 fch_config_destroy(mph);
323 break;
324 case CMPH_BDZ:
325 bdz_config_destroy(mph);
326 break;
327 case CMPH_BDZ_PH:
328 bdz_ph_config_destroy(mph);
329 break;
330 case CMPH_CHD_PH:
331 chd_ph_config_destroy(mph);
332 break;
333 case CMPH_CHD:
334 chd_config_destroy(mph);
335 break;
336 default:
337 assert(0);
338 }
339 switch(algo)
340 {
341 case CMPH_CHM:
342 mph->data = chm_config_new();
343 break;
344 case CMPH_BMZ:
345 mph->data = bmz_config_new();
346 break;
347 case CMPH_BMZ8:
348 mph->data = bmz8_config_new();
349 break;
350 case CMPH_BRZ:
351 mph->data = brz_config_new();
352 break;
353 case CMPH_FCH:
354 mph->data = fch_config_new();
355 break;
356 case CMPH_BDZ:
357 mph->data = bdz_config_new();
358 break;
359 case CMPH_BDZ_PH:
360 mph->data = bdz_ph_config_new();
361 break;
362 case CMPH_CHD_PH:
363 mph->data = chd_ph_config_new();
364 break;
365 case CMPH_CHD:
366 mph->data = chd_config_new(mph);
367 break;
368 default:
369 assert(0);
370 }
371 }
372 mph->algo = algo;
373 }
374
cmph_config_set_tmp_dir(cmph_config_t * mph,cmph_uint8 * tmp_dir)375 void cmph_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir)
376 {
377 if (mph->algo == CMPH_BRZ)
378 {
379 brz_config_set_tmp_dir(mph, tmp_dir);
380 }
381 }
382
383
cmph_config_set_mphf_fd(cmph_config_t * mph,FILE * mphf_fd)384 void cmph_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd)
385 {
386 if (mph->algo == CMPH_BRZ)
387 {
388 brz_config_set_mphf_fd(mph, mphf_fd);
389 }
390 }
391
cmph_config_set_b(cmph_config_t * mph,cmph_uint32 b)392 void cmph_config_set_b(cmph_config_t *mph, cmph_uint32 b)
393 {
394 if (mph->algo == CMPH_BRZ)
395 {
396 brz_config_set_b(mph, b);
397 }
398 else if (mph->algo == CMPH_BDZ)
399 {
400 bdz_config_set_b(mph, b);
401 }
402 else if (mph->algo == CMPH_CHD_PH)
403 {
404 chd_ph_config_set_b(mph, b);
405 }
406 else if (mph->algo == CMPH_CHD)
407 {
408 chd_config_set_b(mph, b);
409 }
410 }
411
cmph_config_set_keys_per_bin(cmph_config_t * mph,cmph_uint32 keys_per_bin)412 void cmph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin)
413 {
414 if (mph->algo == CMPH_CHD_PH)
415 {
416 chd_ph_config_set_keys_per_bin(mph, keys_per_bin);
417 }
418 else if (mph->algo == CMPH_CHD)
419 {
420 chd_config_set_keys_per_bin(mph, keys_per_bin);
421 }
422 }
423
cmph_config_set_memory_availability(cmph_config_t * mph,cmph_uint32 memory_availability)424 void cmph_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability)
425 {
426 if (mph->algo == CMPH_BRZ)
427 {
428 brz_config_set_memory_availability(mph, memory_availability);
429 }
430 }
431
cmph_config_destroy(cmph_config_t * mph)432 void cmph_config_destroy(cmph_config_t *mph)
433 {
434 if(mph)
435 {
436 DEBUGP("Destroying mph with algo %s\n", cmph_names[mph->algo]);
437 switch (mph->algo)
438 {
439 case CMPH_CHM:
440 chm_config_destroy(mph);
441 break;
442 case CMPH_BMZ: /* included -- Fabiano */
443 bmz_config_destroy(mph);
444 break;
445 case CMPH_BMZ8: /* included -- Fabiano */
446 bmz8_config_destroy(mph);
447 break;
448 case CMPH_BRZ: /* included -- Fabiano */
449 brz_config_destroy(mph);
450 break;
451 case CMPH_FCH: /* included -- Fabiano */
452 fch_config_destroy(mph);
453 break;
454 case CMPH_BDZ: /* included -- Fabiano */
455 bdz_config_destroy(mph);
456 break;
457 case CMPH_BDZ_PH: /* included -- Fabiano */
458 bdz_ph_config_destroy(mph);
459 break;
460 case CMPH_CHD_PH: /* included -- Fabiano */
461 chd_ph_config_destroy(mph);
462 break;
463 case CMPH_CHD: /* included -- Fabiano */
464 chd_config_destroy(mph);
465 break;
466 default:
467 assert(0);
468 }
469 __config_destroy(mph);
470 }
471 }
472
cmph_config_set_verbosity(cmph_config_t * mph,cmph_uint32 verbosity)473 void cmph_config_set_verbosity(cmph_config_t *mph, cmph_uint32 verbosity)
474 {
475 mph->verbosity = verbosity;
476 }
477
cmph_config_set_hashfuncs(cmph_config_t * mph,CMPH_HASH * hashfuncs)478 void cmph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
479 {
480 switch (mph->algo)
481 {
482 case CMPH_CHM:
483 chm_config_set_hashfuncs(mph, hashfuncs);
484 break;
485 case CMPH_BMZ: /* included -- Fabiano */
486 bmz_config_set_hashfuncs(mph, hashfuncs);
487 break;
488 case CMPH_BMZ8: /* included -- Fabiano */
489 bmz8_config_set_hashfuncs(mph, hashfuncs);
490 break;
491 case CMPH_BRZ: /* included -- Fabiano */
492 brz_config_set_hashfuncs(mph, hashfuncs);
493 break;
494 case CMPH_FCH: /* included -- Fabiano */
495 fch_config_set_hashfuncs(mph, hashfuncs);
496 break;
497 case CMPH_BDZ: /* included -- Fabiano */
498 bdz_config_set_hashfuncs(mph, hashfuncs);
499 break;
500 case CMPH_BDZ_PH: /* included -- Fabiano */
501 bdz_ph_config_set_hashfuncs(mph, hashfuncs);
502 break;
503 case CMPH_CHD_PH: /* included -- Fabiano */
504 chd_ph_config_set_hashfuncs(mph, hashfuncs);
505 break;
506 case CMPH_CHD: /* included -- Fabiano */
507 chd_config_set_hashfuncs(mph, hashfuncs);
508 break;
509 default:
510 break;
511 }
512 return;
513 }
cmph_config_set_graphsize(cmph_config_t * mph,double c)514 void cmph_config_set_graphsize(cmph_config_t *mph, double c)
515 {
516 mph->c = c;
517 return;
518 }
519
cmph_new(cmph_config_t * mph)520 cmph_t *cmph_new(cmph_config_t *mph)
521 {
522 cmph_t *mphf = NULL;
523 double c = mph->c;
524
525 DEBUGP("Creating mph with algorithm %s\n", cmph_names[mph->algo]);
526 switch (mph->algo)
527 {
528 case CMPH_CHM:
529 DEBUGP("Creating chm hash\n");
530 mphf = chm_new(mph, c);
531 break;
532 case CMPH_BMZ: /* included -- Fabiano */
533 DEBUGP("Creating bmz hash\n");
534 mphf = bmz_new(mph, c);
535 break;
536 case CMPH_BMZ8: /* included -- Fabiano */
537 DEBUGP("Creating bmz8 hash\n");
538 mphf = bmz8_new(mph, c);
539 break;
540 case CMPH_BRZ: /* included -- Fabiano */
541 DEBUGP("Creating brz hash\n");
542 if (c >= 2.0) brz_config_set_algo(mph, CMPH_FCH);
543 else brz_config_set_algo(mph, CMPH_BMZ8);
544 mphf = brz_new(mph, c);
545 break;
546 case CMPH_FCH: /* included -- Fabiano */
547 DEBUGP("Creating fch hash\n");
548 mphf = fch_new(mph, c);
549 break;
550 case CMPH_BDZ: /* included -- Fabiano */
551 DEBUGP("Creating bdz hash\n");
552 mphf = bdz_new(mph, c);
553 break;
554 case CMPH_BDZ_PH: /* included -- Fabiano */
555 DEBUGP("Creating bdz_ph hash\n");
556 mphf = bdz_ph_new(mph, c);
557 break;
558 case CMPH_CHD_PH: /* included -- Fabiano */
559 DEBUGP("Creating chd_ph hash\n");
560 mphf = chd_ph_new(mph, c);
561 break;
562 case CMPH_CHD: /* included -- Fabiano */
563 DEBUGP("Creating chd hash\n");
564 mphf = chd_new(mph, c);
565 break;
566 default:
567 assert(0);
568 }
569 return mphf;
570 }
571
cmph_dump(cmph_t * mphf,FILE * f)572 int cmph_dump(cmph_t *mphf, FILE *f)
573 {
574 switch (mphf->algo)
575 {
576 case CMPH_CHM:
577 return chm_dump(mphf, f);
578 case CMPH_BMZ: /* included -- Fabiano */
579 return bmz_dump(mphf, f);
580 case CMPH_BMZ8: /* included -- Fabiano */
581 return bmz8_dump(mphf, f);
582 case CMPH_BRZ: /* included -- Fabiano */
583 return brz_dump(mphf, f);
584 case CMPH_FCH: /* included -- Fabiano */
585 return fch_dump(mphf, f);
586 case CMPH_BDZ: /* included -- Fabiano */
587 return bdz_dump(mphf, f);
588 case CMPH_BDZ_PH: /* included -- Fabiano */
589 return bdz_ph_dump(mphf, f);
590 case CMPH_CHD_PH: /* included -- Fabiano */
591 return chd_ph_dump(mphf, f);
592 case CMPH_CHD: /* included -- Fabiano */
593 return chd_dump(mphf, f);
594 default:
595 assert(0);
596 }
597 assert(0);
598 return 0;
599 }
cmph_load(FILE * f)600 cmph_t *cmph_load(FILE *f)
601 {
602 cmph_t *mphf = NULL;
603 DEBUGP("Loading mphf generic parts\n");
604 mphf = __cmph_load(f);
605 if (mphf == NULL) return NULL;
606 DEBUGP("Loading mphf algorithm dependent parts\n");
607
608 switch (mphf->algo)
609 {
610 case CMPH_CHM:
611 chm_load(f, mphf);
612 break;
613 case CMPH_BMZ: /* included -- Fabiano */
614 DEBUGP("Loading bmz algorithm dependent parts\n");
615 bmz_load(f, mphf);
616 break;
617 case CMPH_BMZ8: /* included -- Fabiano */
618 DEBUGP("Loading bmz8 algorithm dependent parts\n");
619 bmz8_load(f, mphf);
620 break;
621 case CMPH_BRZ: /* included -- Fabiano */
622 DEBUGP("Loading brz algorithm dependent parts\n");
623 brz_load(f, mphf);
624 break;
625 case CMPH_FCH: /* included -- Fabiano */
626 DEBUGP("Loading fch algorithm dependent parts\n");
627 fch_load(f, mphf);
628 break;
629 case CMPH_BDZ: /* included -- Fabiano */
630 DEBUGP("Loading bdz algorithm dependent parts\n");
631 bdz_load(f, mphf);
632 break;
633 case CMPH_BDZ_PH: /* included -- Fabiano */
634 DEBUGP("Loading bdz_ph algorithm dependent parts\n");
635 bdz_ph_load(f, mphf);
636 break;
637 case CMPH_CHD_PH: /* included -- Fabiano */
638 DEBUGP("Loading chd_ph algorithm dependent parts\n");
639 chd_ph_load(f, mphf);
640 break;
641 case CMPH_CHD: /* included -- Fabiano */
642 DEBUGP("Loading chd algorithm dependent parts\n");
643 chd_load(f, mphf);
644 break;
645 default:
646 assert(0);
647 }
648 DEBUGP("Loaded mphf\n");
649 return mphf;
650 }
651
652
cmph_search(cmph_t * mphf,const char * key,cmph_uint32 keylen)653 cmph_uint32 cmph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
654 {
655 DEBUGP("mphf algorithm: %u \n", mphf->algo);
656 switch(mphf->algo)
657 {
658 case CMPH_CHM:
659 return chm_search(mphf, key, keylen);
660 case CMPH_BMZ: /* included -- Fabiano */
661 DEBUGP("bmz algorithm search\n");
662 return bmz_search(mphf, key, keylen);
663 case CMPH_BMZ8: /* included -- Fabiano */
664 DEBUGP("bmz8 algorithm search\n");
665 return bmz8_search(mphf, key, keylen);
666 case CMPH_BRZ: /* included -- Fabiano */
667 DEBUGP("brz algorithm search\n");
668 return brz_search(mphf, key, keylen);
669 case CMPH_FCH: /* included -- Fabiano */
670 DEBUGP("fch algorithm search\n");
671 return fch_search(mphf, key, keylen);
672 case CMPH_BDZ: /* included -- Fabiano */
673 DEBUGP("bdz algorithm search\n");
674 return bdz_search(mphf, key, keylen);
675 case CMPH_BDZ_PH: /* included -- Fabiano */
676 DEBUGP("bdz_ph algorithm search\n");
677 return bdz_ph_search(mphf, key, keylen);
678 case CMPH_CHD_PH: /* included -- Fabiano */
679 DEBUGP("chd_ph algorithm search\n");
680 return chd_ph_search(mphf, key, keylen);
681 case CMPH_CHD: /* included -- Fabiano */
682 DEBUGP("chd algorithm search\n");
683 return chd_search(mphf, key, keylen);
684 default:
685 assert(0);
686 }
687 assert(0);
688 return 0;
689 }
690
cmph_size(cmph_t * mphf)691 cmph_uint32 cmph_size(cmph_t *mphf)
692 {
693 return mphf->size;
694 }
695
cmph_destroy(cmph_t * mphf)696 void cmph_destroy(cmph_t *mphf)
697 {
698 switch(mphf->algo)
699 {
700 case CMPH_CHM:
701 chm_destroy(mphf);
702 return;
703 case CMPH_BMZ: /* included -- Fabiano */
704 bmz_destroy(mphf);
705 return;
706 case CMPH_BMZ8: /* included -- Fabiano */
707 bmz8_destroy(mphf);
708 return;
709 case CMPH_BRZ: /* included -- Fabiano */
710 brz_destroy(mphf);
711 return;
712 case CMPH_FCH: /* included -- Fabiano */
713 fch_destroy(mphf);
714 return;
715 case CMPH_BDZ: /* included -- Fabiano */
716 bdz_destroy(mphf);
717 return;
718 case CMPH_BDZ_PH: /* included -- Fabiano */
719 bdz_ph_destroy(mphf);
720 return;
721 case CMPH_CHD_PH: /* included -- Fabiano */
722 chd_ph_destroy(mphf);
723 return;
724 case CMPH_CHD: /* included -- Fabiano */
725 chd_destroy(mphf);
726 return;
727 default:
728 assert(0);
729 }
730 assert(0);
731 return;
732 }
733
734
735 /** \fn void cmph_pack(cmph_t *mphf, void *packed_mphf);
736 * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
737 * \param mphf pointer to the resulting mphf
738 * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
739 */
cmph_pack(cmph_t * mphf,void * packed_mphf)740 void cmph_pack(cmph_t *mphf, void *packed_mphf)
741 {
742 // packing algorithm type to be used in cmph.c
743 cmph_uint32 * ptr = (cmph_uint32 *) packed_mphf;
744 *ptr++ = mphf->algo;
745 DEBUGP("mphf->algo = %u\n", mphf->algo);
746 switch(mphf->algo)
747 {
748 case CMPH_CHM:
749 chm_pack(mphf, ptr);
750 break;
751 case CMPH_BMZ: /* included -- Fabiano */
752 bmz_pack(mphf, ptr);
753 break;
754 case CMPH_BMZ8: /* included -- Fabiano */
755 bmz8_pack(mphf, ptr);
756 break;
757 case CMPH_BRZ: /* included -- Fabiano */
758 brz_pack(mphf, ptr);
759 break;
760 case CMPH_FCH: /* included -- Fabiano */
761 fch_pack(mphf, ptr);
762 break;
763 case CMPH_BDZ: /* included -- Fabiano */
764 bdz_pack(mphf, ptr);
765 break;
766 case CMPH_BDZ_PH: /* included -- Fabiano */
767 bdz_ph_pack(mphf, ptr);
768 break;
769 case CMPH_CHD_PH: /* included -- Fabiano */
770 chd_ph_pack(mphf, ptr);
771 break;
772 case CMPH_CHD: /* included -- Fabiano */
773 chd_pack(mphf, ptr);
774 break;
775 default:
776 assert(0);
777 }
778 return;
779 }
780
781 /** \fn cmph_uint32 cmph_packed_size(cmph_t *mphf);
782 * \brief Return the amount of space needed to pack mphf.
783 * \param mphf pointer to a mphf
784 * \return the size of the packed function or zero for failures
785 */
cmph_packed_size(cmph_t * mphf)786 cmph_uint32 cmph_packed_size(cmph_t *mphf)
787 {
788 switch(mphf->algo)
789 {
790 case CMPH_CHM:
791 return chm_packed_size(mphf);
792 case CMPH_BMZ: /* included -- Fabiano */
793 return bmz_packed_size(mphf);
794 case CMPH_BMZ8: /* included -- Fabiano */
795 return bmz8_packed_size(mphf);
796 case CMPH_BRZ: /* included -- Fabiano */
797 return brz_packed_size(mphf);
798 case CMPH_FCH: /* included -- Fabiano */
799 return fch_packed_size(mphf);
800 case CMPH_BDZ: /* included -- Fabiano */
801 return bdz_packed_size(mphf);
802 case CMPH_BDZ_PH: /* included -- Fabiano */
803 return bdz_ph_packed_size(mphf);
804 case CMPH_CHD_PH: /* included -- Fabiano */
805 return chd_ph_packed_size(mphf);
806 case CMPH_CHD: /* included -- Fabiano */
807 return chd_packed_size(mphf);
808 default:
809 assert(0);
810 }
811 return 0; // FAILURE
812 }
813
814 /** cmph_uint32 cmph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
815 * \brief Use the packed mphf to do a search.
816 * \param packed_mphf pointer to the packed mphf
817 * \param key key to be hashed
818 * \param keylen key legth in bytes
819 * \return The mphf value
820 */
cmph_search_packed(void * packed_mphf,const char * key,cmph_uint32 keylen)821 cmph_uint32 cmph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
822 {
823 cmph_uint32 *ptr = (cmph_uint32 *)packed_mphf;
824 // fprintf(stderr, "algo:%u\n", *ptr);
825 switch(*ptr)
826 {
827 case CMPH_CHM:
828 return chm_search_packed(++ptr, key, keylen);
829 case CMPH_BMZ: /* included -- Fabiano */
830 return bmz_search_packed(++ptr, key, keylen);
831 case CMPH_BMZ8: /* included -- Fabiano */
832 return bmz8_search_packed(++ptr, key, keylen);
833 case CMPH_BRZ: /* included -- Fabiano */
834 return brz_search_packed(++ptr, key, keylen);
835 case CMPH_FCH: /* included -- Fabiano */
836 return fch_search_packed(++ptr, key, keylen);
837 case CMPH_BDZ: /* included -- Fabiano */
838 return bdz_search_packed(++ptr, key, keylen);
839 case CMPH_BDZ_PH: /* included -- Fabiano */
840 return bdz_ph_search_packed(++ptr, key, keylen);
841 case CMPH_CHD_PH: /* included -- Fabiano */
842 return chd_ph_search_packed(++ptr, key, keylen);
843 case CMPH_CHD: /* included -- Fabiano */
844 return chd_search_packed(++ptr, key, keylen);
845 default:
846 assert(0);
847 }
848 return 0; // FAILURE
849 }
850