1 #include "bdz.h"
2 #include "cmph_structs.h"
3 #include "bdz_structs.h"
4 #include "hash.h"
5 #include "bitbool.h"
6 
7 #include <math.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <string.h>
12 // #define DEBUG
13 #include "debug.h"
14 #define UNASSIGNED 3U
15 #define NULL_EDGE 0xffffffff
16 
17 //cmph_uint32 ngrafos = 0;
18 //cmph_uint32 ngrafos_aciclicos = 0;
19 // table used for looking up the number of assigned vertices  a 8-bit integer
20 const cmph_uint8 bdz_lookup_table[] =
21 {
22 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
23 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
24 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
25 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
26 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
27 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
28 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
29 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
30 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
31 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
32 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 3, 3, 3, 2,
33 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
34 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
35 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
36 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 2, 2, 1,
37 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 0
38 };
39 
40 typedef struct
41 {
42 	cmph_uint32 vertices[3];
43 	cmph_uint32 next_edges[3];
44 }bdz_edge_t;
45 
46 typedef cmph_uint32 * bdz_queue_t;
47 
bdz_alloc_queue(bdz_queue_t * queuep,cmph_uint32 nedges)48 static void bdz_alloc_queue(bdz_queue_t * queuep, cmph_uint32 nedges)
49 {
50 	(*queuep)=(cmph_uint32 *)malloc(nedges*sizeof(cmph_uint32));
51 };
bdz_free_queue(bdz_queue_t * queue)52 static void bdz_free_queue(bdz_queue_t * queue)
53 {
54 	free(*queue);
55 };
56 
57 typedef struct
58 {
59 	cmph_uint32 nedges;
60 	bdz_edge_t * edges;
61 	cmph_uint32 * first_edge;
62 	cmph_uint8 * vert_degree;
63 }bdz_graph3_t;
64 
65 
bdz_alloc_graph3(bdz_graph3_t * graph3,cmph_uint32 nedges,cmph_uint32 nvertices)66 static void bdz_alloc_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
67 {
68 	graph3->edges=(bdz_edge_t *)malloc(nedges*sizeof(bdz_edge_t));
69 	graph3->first_edge=(cmph_uint32 *)malloc(nvertices*sizeof(cmph_uint32));
70 	graph3->vert_degree=(cmph_uint8 *)malloc((size_t)nvertices);
71 };
bdz_init_graph3(bdz_graph3_t * graph3,cmph_uint32 nedges,cmph_uint32 nvertices)72 static void bdz_init_graph3(bdz_graph3_t * graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
73 {
74 	memset(graph3->first_edge,0xff,nvertices*sizeof(cmph_uint32));
75 	memset(graph3->vert_degree,0,(size_t)nvertices);
76 	graph3->nedges=0;
77 };
bdz_free_graph3(bdz_graph3_t * graph3)78 static void bdz_free_graph3(bdz_graph3_t *graph3)
79 {
80 	free(graph3->edges);
81 	free(graph3->first_edge);
82 	free(graph3->vert_degree);
83 };
84 
bdz_partial_free_graph3(bdz_graph3_t * graph3)85 static void bdz_partial_free_graph3(bdz_graph3_t *graph3)
86 {
87 	free(graph3->first_edge);
88 	free(graph3->vert_degree);
89 	graph3->first_edge = NULL;
90 	graph3->vert_degree = NULL;
91 };
92 
bdz_add_edge(bdz_graph3_t * graph3,cmph_uint32 v0,cmph_uint32 v1,cmph_uint32 v2)93 static void bdz_add_edge(bdz_graph3_t * graph3, cmph_uint32 v0, cmph_uint32 v1, cmph_uint32 v2)
94 {
95 	graph3->edges[graph3->nedges].vertices[0]=v0;
96 	graph3->edges[graph3->nedges].vertices[1]=v1;
97 	graph3->edges[graph3->nedges].vertices[2]=v2;
98 	graph3->edges[graph3->nedges].next_edges[0]=graph3->first_edge[v0];
99 	graph3->edges[graph3->nedges].next_edges[1]=graph3->first_edge[v1];
100 	graph3->edges[graph3->nedges].next_edges[2]=graph3->first_edge[v2];
101 	graph3->first_edge[v0]=graph3->first_edge[v1]=graph3->first_edge[v2]=graph3->nedges;
102 	graph3->vert_degree[v0]++;
103 	graph3->vert_degree[v1]++;
104 	graph3->vert_degree[v2]++;
105 	graph3->nedges++;
106 };
107 
bdz_dump_graph(bdz_graph3_t * graph3,cmph_uint32 nedges,cmph_uint32 nvertices)108 static void bdz_dump_graph(bdz_graph3_t* graph3, cmph_uint32 nedges, cmph_uint32 nvertices)
109 {
110 	cmph_uint32 i;
111 	for(i=0;i<nedges;i++){
112 		printf("\nedge %d %d %d %d ",i,graph3->edges[i].vertices[0],
113 			graph3->edges[i].vertices[1],graph3->edges[i].vertices[2]);
114 		printf(" nexts %d %d %d",graph3->edges[i].next_edges[0],
115 				graph3->edges[i].next_edges[1],graph3->edges[i].next_edges[2]);
116 	};
117 
118         #ifdef DEBUG
119 	for(i=0;i<nvertices;i++){
120 		printf("\nfirst for vertice %d %d ",i,graph3->first_edge[i]);
121 
122 	};
123         #endif
124 };
125 
bdz_remove_edge(bdz_graph3_t * graph3,cmph_uint32 curr_edge)126 static void bdz_remove_edge(bdz_graph3_t * graph3, cmph_uint32 curr_edge)
127 {
128 	cmph_uint32 i,j=0,vert,edge1,edge2;
129 	for(i=0;i<3;i++){
130 		vert=graph3->edges[curr_edge].vertices[i];
131 		edge1=graph3->first_edge[vert];
132 		edge2=NULL_EDGE;
133 		while(edge1!=curr_edge&&edge1!=NULL_EDGE){
134 			edge2=edge1;
135 			if(graph3->edges[edge1].vertices[0]==vert){
136 				j=0;
137 			} else if(graph3->edges[edge1].vertices[1]==vert){
138 				j=1;
139 			} else
140 				j=2;
141 			edge1=graph3->edges[edge1].next_edges[j];
142 		};
143 		if(edge1==NULL_EDGE){
144 			printf("\nerror remove edge %d dump graph",curr_edge);
145 			bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
146 			exit(-1);
147 		};
148 
149 		if(edge2!=NULL_EDGE){
150 			graph3->edges[edge2].next_edges[j] =
151 				graph3->edges[edge1].next_edges[i];
152 		} else
153 			graph3->first_edge[vert]=
154 				graph3->edges[edge1].next_edges[i];
155 		graph3->vert_degree[vert]--;
156 	};
157 
158 };
159 
bdz_generate_queue(cmph_uint32 nedges,cmph_uint32 nvertices,bdz_queue_t queue,bdz_graph3_t * graph3)160 static int bdz_generate_queue(cmph_uint32 nedges, cmph_uint32 nvertices, bdz_queue_t queue, bdz_graph3_t* graph3)
161 {
162 	cmph_uint32 i,v0,v1,v2;
163 	cmph_uint32 queue_head=0,queue_tail=0;
164 	cmph_uint32 curr_edge;
165 	cmph_uint32 tmp_edge;
166 	cmph_uint8 * marked_edge = (cmph_uint8 *)malloc((size_t)(nedges >> 3) + 1);
167 	memset(marked_edge, 0, (size_t)(nedges >> 3) + 1);
168 
169 	for(i=0;i<nedges;i++){
170 		v0=graph3->edges[i].vertices[0];
171 		v1=graph3->edges[i].vertices[1];
172 		v2=graph3->edges[i].vertices[2];
173 		if(graph3->vert_degree[v0]==1 ||
174 				graph3->vert_degree[v1]==1 ||
175 				graph3->vert_degree[v2]==1){
176 			if(!GETBIT(marked_edge,i)) {
177 				queue[queue_head++]=i;
178 				SETBIT(marked_edge,i);
179 			}
180 		};
181 	};
182         DEBUGP("Queue head %d Queue tail %d\n", queue_head, queue_tail);
183         #ifdef DEBUG
184 	bdz_dump_graph(graph3,graph3->nedges,graph3->nedges+graph3->nedges/4);
185         #endif
186 	while(queue_tail!=queue_head){
187 		curr_edge=queue[queue_tail++];
188 		bdz_remove_edge(graph3,curr_edge);
189 		DEBUGP("Removing edge %d\n", curr_edge);
190 		v0=graph3->edges[curr_edge].vertices[0];
191 		v1=graph3->edges[curr_edge].vertices[1];
192 		v2=graph3->edges[curr_edge].vertices[2];
193 		if(graph3->vert_degree[v0]==1 ) {
194 			tmp_edge=graph3->first_edge[v0];
195 			if(!GETBIT(marked_edge,tmp_edge)) {
196 				queue[queue_head++]=tmp_edge;
197 				SETBIT(marked_edge,tmp_edge);
198 			};
199 
200 		};
201 		if(graph3->vert_degree[v1]==1) {
202 			tmp_edge=graph3->first_edge[v1];
203 			if(!GETBIT(marked_edge,tmp_edge)){
204 				queue[queue_head++]=tmp_edge;
205 				SETBIT(marked_edge,tmp_edge);
206 			};
207 
208 		};
209 		if(graph3->vert_degree[v2]==1){
210 			tmp_edge=graph3->first_edge[v2];
211 			if(!GETBIT(marked_edge,tmp_edge)){
212 				queue[queue_head++]=tmp_edge;
213 				SETBIT(marked_edge,tmp_edge);
214 			};
215 		};
216 	};
217 	free(marked_edge);
218 	return (int)(queue_head-nedges);/* returns 0 if successful otherwies return negative number*/
219 };
220 
221 static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue);
222 static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue);
223 static void ranking(bdz_config_data_t *bdz);
224 static cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex);
225 
bdz_config_new(void)226 bdz_config_data_t *bdz_config_new(void)
227 {
228 	bdz_config_data_t *bdz;
229 	bdz = (bdz_config_data_t *)malloc(sizeof(bdz_config_data_t));
230         if (!bdz) return NULL;
231 	memset(bdz, 0, sizeof(bdz_config_data_t));
232 	bdz->hashfunc = CMPH_HASH_JENKINS;
233 	bdz->g = NULL;
234 	bdz->hl = NULL;
235 	bdz->k = 0; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
236 	bdz->b = 7; // number of bits of k
237 	bdz->ranktablesize = 0; //number of entries in ranktable, $n/k +1$
238 	bdz->ranktable = NULL; // rank table
239 	return bdz;
240 }
241 
bdz_config_destroy(cmph_config_t * mph)242 void bdz_config_destroy(cmph_config_t *mph)
243 {
244 	bdz_config_data_t *data = (bdz_config_data_t *)mph->data;
245 	DEBUGP("Destroying algorithm dependent data\n");
246 	free(data);
247 }
248 
bdz_config_set_b(cmph_config_t * mph,cmph_uint32 b)249 void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b)
250 {
251 	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
252 	if (b <= 2 || b > 10) b = 7; // validating restrictions over parameter b.
253 	bdz->b = (cmph_uint8)b;
254 	DEBUGP("b: %u\n", b);
255 
256 }
257 
bdz_config_set_hashfuncs(cmph_config_t * mph,CMPH_HASH * hashfuncs)258 void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs)
259 {
260 	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
261 	CMPH_HASH *hashptr = hashfuncs;
262 	cmph_uint32 i = 0;
263 	while(*hashptr != CMPH_HASH_COUNT)
264 	{
265 		if (i >= 1) break; //bdz only uses one linear hash function
266 		bdz->hashfunc = *hashptr;
267 		++i, ++hashptr;
268 	}
269 }
270 
bdz_new(cmph_config_t * mph,double c)271 cmph_t *bdz_new(cmph_config_t *mph, double c)
272 {
273 	cmph_t *mphf = NULL;
274 	bdz_data_t *bdzf = NULL;
275 	cmph_uint32 iterations;
276 	bdz_queue_t edges;
277 	bdz_graph3_t graph3;
278 	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
279 	#ifdef CMPH_TIMING
280 	double construction_time_begin = 0.0;
281 	double construction_time = 0.0;
282 	ELAPSED_TIME_IN_SECONDS(&construction_time_begin);
283 	#endif
284 
285 
286 	if (c == 0) c = 1.23; // validating restrictions over parameter c.
287 	DEBUGP("c: %f\n", c);
288 	bdz->m = mph->key_source->nkeys;
289 	bdz->r = (cmph_uint32)ceil((c * mph->key_source->nkeys)/3);
290 	if ((bdz->r % 2) == 0) bdz->r+=1;
291 	bdz->n = 3*bdz->r;
292 
293 	bdz->k = (1U << bdz->b);
294 	DEBUGP("b: %u -- k: %u\n", bdz->b, bdz->k);
295 
296 	bdz->ranktablesize = (cmph_uint32)ceil(bdz->n/(double)bdz->k);
297 	DEBUGP("ranktablesize: %u\n", bdz->ranktablesize);
298 
299 
300 	bdz_alloc_graph3(&graph3, bdz->m, bdz->n);
301 	bdz_alloc_queue(&edges,bdz->m);
302 	DEBUGP("Created hypergraph\n");
303 
304 	DEBUGP("m (edges): %u n (vertices): %u  r: %u c: %f \n", bdz->m, bdz->n, bdz->r, c);
305 
306 	// Mapping step
307 	iterations = 1000;
308 	if (mph->verbosity)
309 	{
310 		fprintf(stderr, "Entering mapping step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
311 	}
312 	while(1)
313 	{
314 		int ok;
315 		DEBUGP("linear hash function \n");
316 		bdz->hl = hash_state_new(bdz->hashfunc, 15);
317 
318 		ok = bdz_mapping(mph, &graph3, edges);
319                 //ok = 0;
320 		if (!ok)
321 		{
322 			--iterations;
323 			hash_state_destroy(bdz->hl);
324 			bdz->hl = NULL;
325 			DEBUGP("%u iterations remaining\n", iterations);
326 			if (mph->verbosity)
327 			{
328 				fprintf(stderr, "acyclic graph creation failure - %u iterations remaining\n", iterations);
329 			}
330 			if (iterations == 0) break;
331 		}
332 		else break;
333 	}
334 
335 	if (iterations == 0)
336 	{
337 		bdz_free_queue(&edges);
338 		bdz_free_graph3(&graph3);
339 		return NULL;
340 	}
341 	bdz_partial_free_graph3(&graph3);
342 	// Assigning step
343 	if (mph->verbosity)
344 	{
345 		fprintf(stderr, "Entering assigning step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
346 	}
347 	assigning(bdz, &graph3, edges);
348 
349 	bdz_free_queue(&edges);
350 	bdz_free_graph3(&graph3);
351 	if (mph->verbosity)
352 	{
353 		fprintf(stderr, "Entering ranking step for mph creation of %u keys with graph sized %u\n", bdz->m, bdz->n);
354 	}
355 	ranking(bdz);
356 	#ifdef CMPH_TIMING
357 	ELAPSED_TIME_IN_SECONDS(&construction_time);
358 	#endif
359 	mphf = (cmph_t *)malloc(sizeof(cmph_t));
360 	mphf->algo = mph->algo;
361 	bdzf = (bdz_data_t *)malloc(sizeof(bdz_data_t));
362 	bdzf->g = bdz->g;
363 	bdz->g = NULL; //transfer memory ownership
364 	bdzf->hl = bdz->hl;
365 	bdz->hl = NULL; //transfer memory ownership
366 	bdzf->ranktable = bdz->ranktable;
367 	bdz->ranktable = NULL; //transfer memory ownership
368 	bdzf->ranktablesize = bdz->ranktablesize;
369 	bdzf->k = bdz->k;
370 	bdzf->b = bdz->b;
371 	bdzf->n = bdz->n;
372 	bdzf->m = bdz->m;
373 	bdzf->r = bdz->r;
374 	mphf->data = bdzf;
375 	mphf->size = bdz->m;
376 
377 	DEBUGP("Successfully generated minimal perfect hash\n");
378 	if (mph->verbosity)
379 	{
380 		fprintf(stderr, "Successfully generated minimal perfect hash function\n");
381 	}
382 
383 
384 	#ifdef CMPH_TIMING
385 	register cmph_uint32 space_usage = bdz_packed_size(mphf)*8;
386 	register cmph_uint32 keys_per_bucket = 1;
387 	construction_time = construction_time - construction_time_begin;
388 	fprintf(stdout, "%u\t%.2f\t%u\t%.4f\t%.4f\n", bdz->m, bdz->m/(double)bdz->n, keys_per_bucket, construction_time, space_usage/(double)bdz->m);
389 	#endif
390 
391 	return mphf;
392 }
393 
394 
bdz_mapping(cmph_config_t * mph,bdz_graph3_t * graph3,bdz_queue_t queue)395 static int bdz_mapping(cmph_config_t *mph, bdz_graph3_t* graph3, bdz_queue_t queue)
396 {
397 	cmph_uint32 e;
398 	int cycles = 0;
399 	cmph_uint32 hl[3];
400 	bdz_config_data_t *bdz = (bdz_config_data_t *)mph->data;
401 	bdz_init_graph3(graph3, bdz->m, bdz->n);
402 	mph->key_source->rewind(mph->key_source->data);
403 	for (e = 0; e < mph->key_source->nkeys; ++e)
404 	{
405 		cmph_uint32 h0, h1, h2;
406 		cmph_uint32 keylen;
407 		char *key = NULL;
408 		mph->key_source->read(mph->key_source->data, &key, &keylen);
409 		hash_vector(bdz->hl, key, keylen,hl);
410 		h0 = hl[0] % bdz->r;
411 		h1 = hl[1] % bdz->r + bdz->r;
412 		h2 = hl[2] % bdz->r + (bdz->r << 1);
413                 DEBUGP("Key: %.*s (%u %u %u)\n", keylen, key, h0, h1, h2);
414 		mph->key_source->dispose(mph->key_source->data, key, keylen);
415 		bdz_add_edge(graph3,h0,h1,h2);
416 	}
417 	cycles = bdz_generate_queue(bdz->m, bdz->n, queue, graph3);
418 	return (cycles == 0);
419 }
420 
assigning(bdz_config_data_t * bdz,bdz_graph3_t * graph3,bdz_queue_t queue)421 static void assigning(bdz_config_data_t *bdz, bdz_graph3_t* graph3, bdz_queue_t queue)
422 {
423 	cmph_uint32 i;
424 	cmph_uint32 nedges=graph3->nedges;
425 	cmph_uint32 curr_edge;
426 	cmph_uint32 v0,v1,v2;
427 	cmph_uint8 * marked_vertices = (cmph_uint8 *)malloc((size_t)(bdz->n >> 3) + 1);
428         cmph_uint32 sizeg = (cmph_uint32)ceil(bdz->n/4.0);
429 	bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
430 	memset(marked_vertices, 0, (size_t)(bdz->n >> 3) + 1);
431 	memset(bdz->g, 0xff, (size_t)(sizeg));
432 
433 	for(i=nedges-1;i+1>=1;i--){
434 		curr_edge=queue[i];
435 		v0=graph3->edges[curr_edge].vertices[0];
436 		v1=graph3->edges[curr_edge].vertices[1];
437 		v2=graph3->edges[curr_edge].vertices[2];
438 		DEBUGP("B:%u %u %u -- %u %u %u edge %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2), curr_edge);
439 		if(!GETBIT(marked_vertices, v0)){
440 			if(!GETBIT(marked_vertices,v1))
441 			{
442 				SETVALUE1(bdz->g, v1, UNASSIGNED);
443 				SETBIT(marked_vertices, v1);
444 			}
445 			if(!GETBIT(marked_vertices,v2))
446 			{
447 				SETVALUE1(bdz->g, v2, UNASSIGNED);
448 				SETBIT(marked_vertices, v2);
449 			}
450 			SETVALUE1(bdz->g, v0, (6-(GETVALUE(bdz->g, v1) + GETVALUE(bdz->g,v2)))%3);
451 			SETBIT(marked_vertices, v0);
452 		} else if(!GETBIT(marked_vertices, v1)) {
453 			if(!GETBIT(marked_vertices, v2))
454 			{
455 				SETVALUE1(bdz->g, v2, UNASSIGNED);
456 				SETBIT(marked_vertices, v2);
457 			}
458 			SETVALUE1(bdz->g, v1, (7-(GETVALUE(bdz->g, v0)+GETVALUE(bdz->g, v2)))%3);
459 			SETBIT(marked_vertices, v1);
460 		}else {
461 			SETVALUE1(bdz->g, v2, (8-(GETVALUE(bdz->g,v0)+GETVALUE(bdz->g, v1)))%3);
462 			SETBIT(marked_vertices, v2);
463 		}
464 		DEBUGP("A:%u %u %u -- %u %u %u\n", v0, v1, v2, GETVALUE(bdz->g, v0), GETVALUE(bdz->g, v1), GETVALUE(bdz->g, v2));
465 	};
466 	free(marked_vertices);
467 }
468 
469 
ranking(bdz_config_data_t * bdz)470 static void ranking(bdz_config_data_t *bdz)
471 {
472 	cmph_uint32 i, j, offset = 0U, count = 0U, size = (bdz->k >> 2U), nbytes_total = (cmph_uint32)ceil(bdz->n/4.0), nbytes;
473 	bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
474 	// ranktable computation
475 	bdz->ranktable[0] = 0;
476 	i = 1;
477 	while(1)
478 	{
479 		if(i == bdz->ranktablesize) break;
480 		nbytes = size < nbytes_total? size : nbytes_total;
481 		for(j = 0; j < nbytes; j++)
482 		{
483 			count += bdz_lookup_table[*(bdz->g + offset + j)];
484 		}
485 		bdz->ranktable[i] = count;
486 		offset += nbytes;
487 		nbytes_total -= size;
488 		i++;
489 	}
490 }
491 
492 
bdz_dump(cmph_t * mphf,FILE * fd)493 int bdz_dump(cmph_t *mphf, FILE *fd)
494 {
495 	char *buf = NULL;
496 	cmph_uint32 buflen;
497 	register size_t nbytes;
498 	bdz_data_t *data = (bdz_data_t *)mphf->data;
499 	__cmph_dump(mphf, fd);
500 
501 	hash_state_dump(data->hl, &buf, &buflen);
502 	DEBUGP("Dumping hash state with %u bytes to disk\n", buflen);
503 	nbytes = fwrite(&buflen, sizeof(cmph_uint32), (size_t)1, fd);
504 	nbytes = fwrite(buf, (size_t)buflen, (size_t)1, fd);
505 	free(buf);
506 
507 	nbytes = fwrite(&(data->n), sizeof(cmph_uint32), (size_t)1, fd);
508 	nbytes = fwrite(&(data->m), sizeof(cmph_uint32), (size_t)1, fd);
509 	nbytes = fwrite(&(data->r), sizeof(cmph_uint32), (size_t)1, fd);
510 
511 	cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
512 	nbytes = fwrite(data->g, sizeof(cmph_uint8)*sizeg, (size_t)1, fd);
513 
514 	nbytes = fwrite(&(data->k), sizeof(cmph_uint32), (size_t)1, fd);
515 	nbytes = fwrite(&(data->b), sizeof(cmph_uint8), (size_t)1, fd);
516 	nbytes = fwrite(&(data->ranktablesize), sizeof(cmph_uint32), (size_t)1, fd);
517 
518 	nbytes = fwrite(data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize), (size_t)1, fd);
519 	#ifdef DEBUG
520 	cmph_uint32 i;
521 	fprintf(stderr, "G: ");
522 	for (i = 0; i < data->n; ++i) fprintf(stderr, "%u ", GETVALUE(data->g, i));
523 	fprintf(stderr, "\n");
524 	#endif
525 	return 1;
526 }
527 
bdz_load(FILE * f,cmph_t * mphf)528 void bdz_load(FILE *f, cmph_t *mphf)
529 {
530 	char *buf = NULL;
531 	cmph_uint32 buflen, sizeg;
532 	register size_t nbytes;
533 	bdz_data_t *bdz = (bdz_data_t *)malloc(sizeof(bdz_data_t));
534 
535 	DEBUGP("Loading bdz mphf\n");
536 	mphf->data = bdz;
537 
538 	nbytes = fread(&buflen, sizeof(cmph_uint32), (size_t)1, f);
539 	DEBUGP("Hash state has %u bytes\n", buflen);
540 	buf = (char *)malloc((size_t)buflen);
541 	nbytes = fread(buf, (size_t)buflen, (size_t)1, f);
542 	bdz->hl = hash_state_load(buf, buflen);
543 	free(buf);
544 
545 
546 	DEBUGP("Reading m and n\n");
547 	nbytes = fread(&(bdz->n), sizeof(cmph_uint32), (size_t)1, f);
548 	nbytes = fread(&(bdz->m), sizeof(cmph_uint32), (size_t)1, f);
549 	nbytes = fread(&(bdz->r), sizeof(cmph_uint32), (size_t)1, f);
550 	sizeg = (cmph_uint32)ceil(bdz->n/4.0);
551 	bdz->g = (cmph_uint8 *)calloc((size_t)(sizeg), sizeof(cmph_uint8));
552 	nbytes = fread(bdz->g, sizeg*sizeof(cmph_uint8), (size_t)1, f);
553 
554 	nbytes = fread(&(bdz->k), sizeof(cmph_uint32), (size_t)1, f);
555 	nbytes = fread(&(bdz->b), sizeof(cmph_uint8), (size_t)1, f);
556 	nbytes = fread(&(bdz->ranktablesize), sizeof(cmph_uint32), (size_t)1, f);
557 
558 	bdz->ranktable = (cmph_uint32 *)calloc((size_t)bdz->ranktablesize, sizeof(cmph_uint32));
559 	nbytes = fread(bdz->ranktable, sizeof(cmph_uint32)*(bdz->ranktablesize), (size_t)1, f);
560 
561 	#ifdef DEBUG
562 	cmph_uint32  i = 0;
563 	fprintf(stderr, "G: ");
564 	for (i = 0; i < bdz->n; ++i) fprintf(stderr, "%u ", GETVALUE(bdz->g,i));
565 	fprintf(stderr, "\n");
566 	#endif
567 	return;
568 }
569 
570 
rank(cmph_uint32 b,cmph_uint32 * ranktable,cmph_uint8 * g,cmph_uint32 vertex)571 static inline cmph_uint32 rank(cmph_uint32 b, cmph_uint32 * ranktable, cmph_uint8 * g, cmph_uint32 vertex)
572 {
573 	register cmph_uint32 index = vertex >> b;
574 	register cmph_uint32 base_rank = ranktable[index];
575 	register cmph_uint32 beg_idx_v = index << b;
576 	register cmph_uint32 beg_idx_b = beg_idx_v >> 2;
577 	register cmph_uint32 end_idx_b = vertex >> 2;
578 	while(beg_idx_b < end_idx_b)
579 	{
580 		base_rank += bdz_lookup_table[*(g + beg_idx_b++)];
581 
582 	}
583         DEBUGP("base rank %u\n", base_rank);
584 	beg_idx_v = beg_idx_b << 2;
585         DEBUGP("beg_idx_v %u\n", beg_idx_v);
586 	while(beg_idx_v < vertex)
587 	{
588 		if(GETVALUE(g, beg_idx_v) != UNASSIGNED) base_rank++;
589 		beg_idx_v++;
590 	}
591 
592 	return base_rank;
593 }
594 
bdz_search(cmph_t * mphf,const char * key,cmph_uint32 keylen)595 cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen)
596 {
597 	register cmph_uint32 vertex;
598 	register bdz_data_t *bdz = (bdz_data_t *)mphf->data;
599 	cmph_uint32 hl[3];
600 	hash_vector(bdz->hl, key, keylen, hl);
601 	hl[0] = hl[0] % bdz->r;
602 	hl[1] = hl[1] % bdz->r + bdz->r;
603 	hl[2] = hl[2] % bdz->r + (bdz->r << 1);
604 	vertex = hl[(GETVALUE(bdz->g, hl[0]) + GETVALUE(bdz->g, hl[1]) + GETVALUE(bdz->g, hl[2])) % 3];
605         DEBUGP("Search found vertex %u\n", vertex);
606 	return rank(bdz->b, bdz->ranktable, bdz->g, vertex);
607 }
608 
609 
bdz_destroy(cmph_t * mphf)610 void bdz_destroy(cmph_t *mphf)
611 {
612 	bdz_data_t *data = (bdz_data_t *)mphf->data;
613 	free(data->g);
614 	hash_state_destroy(data->hl);
615 	free(data->ranktable);
616 	free(data);
617 	free(mphf);
618 }
619 
620 /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
621  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
622  *  \param mphf pointer to the resulting mphf
623  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size()
624  */
bdz_pack(cmph_t * mphf,void * packed_mphf)625 void bdz_pack(cmph_t *mphf, void *packed_mphf)
626 {
627 	bdz_data_t *data = (bdz_data_t *)mphf->data;
628 	cmph_uint8 * ptr = (cmph_uint8 *)packed_mphf;
629 
630 	// packing hl type
631 	CMPH_HASH hl_type = hash_get_type(data->hl);
632 	*((cmph_uint32 *) ptr) = hl_type;
633 	ptr += sizeof(cmph_uint32);
634 
635 	// packing hl
636 	hash_state_pack(data->hl, ptr);
637 	ptr += hash_state_packed_size(hl_type);
638 
639 	// packing r
640 	*((cmph_uint32 *) ptr) = data->r;
641 	ptr += sizeof(data->r);
642 
643 	// packing ranktablesize
644 	*((cmph_uint32 *) ptr) = data->ranktablesize;
645 	ptr += sizeof(data->ranktablesize);
646 
647 	// packing ranktable
648 	memcpy(ptr, data->ranktable, sizeof(cmph_uint32)*(data->ranktablesize));
649 	ptr += sizeof(cmph_uint32)*(data->ranktablesize);
650 
651 	// packing b
652 	*ptr++ = data->b;
653 
654 	// packing g
655 	cmph_uint32 sizeg = (cmph_uint32)ceil(data->n/4.0);
656 	memcpy(ptr, data->g,  sizeof(cmph_uint8)*sizeg);
657 }
658 
659 /** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
660  *  \brief Return the amount of space needed to pack mphf.
661  *  \param mphf pointer to a mphf
662  *  \return the size of the packed function or zero for failures
663  */
bdz_packed_size(cmph_t * mphf)664 cmph_uint32 bdz_packed_size(cmph_t *mphf)
665 {
666 	bdz_data_t *data = (bdz_data_t *)mphf->data;
667 
668 	CMPH_HASH hl_type = hash_get_type(data->hl);
669 
670 	return (cmph_uint32)(sizeof(CMPH_ALGO) + hash_state_packed_size(hl_type) + 3*sizeof(cmph_uint32) + sizeof(cmph_uint32)*(data->ranktablesize) + sizeof(cmph_uint8) + sizeof(cmph_uint8)* (cmph_uint32)(ceil(data->n/4.0)));
671 }
672 
673 /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
674  *  \brief Use the packed mphf to do a search.
675  *  \param  packed_mphf pointer to the packed mphf
676  *  \param key key to be hashed
677  *  \param keylen key legth in bytes
678  *  \return The mphf value
679  */
bdz_search_packed(void * packed_mphf,const char * key,cmph_uint32 keylen)680 cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen)
681 {
682 
683 	register cmph_uint32 vertex;
684 	register CMPH_HASH hl_type  = (CMPH_HASH)(*(cmph_uint32 *)packed_mphf);
685 	register cmph_uint8 *hl_ptr = (cmph_uint8 *)(packed_mphf) + 4;
686 
687 	register cmph_uint32 *ranktable = (cmph_uint32*)(hl_ptr + hash_state_packed_size(hl_type));
688 
689 	register cmph_uint32 r = *ranktable++;
690 	register cmph_uint32 ranktablesize = *ranktable++;
691 	register cmph_uint8 * g = (cmph_uint8 *)(ranktable + ranktablesize);
692 	register cmph_uint8 b = *g++;
693 
694 	cmph_uint32 hl[3];
695 	hash_vector_packed(hl_ptr, hl_type, key, keylen, hl);
696 	hl[0] = hl[0] % r;
697 	hl[1] = hl[1] % r + r;
698 	hl[2] = hl[2] % r + (r << 1);
699 	vertex = hl[(GETVALUE(g, hl[0]) + GETVALUE(g, hl[1]) + GETVALUE(g, hl[2])) % 3];
700 	return rank(b, ranktable, g, vertex);
701 }
702