1 #include<stdlib.h>
2 #include<stdio.h>
3 #include<limits.h>
4 #include<string.h>
5 #include"compressed_rank.h"
6 #include"bitbool.h"
7 // #define DEBUG
8 #include"debug.h"
compressed_rank_i_log2(cmph_uint32 x)9 static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
10 {
11 	register cmph_uint32 res = 0;
12 
13 	while(x > 1)
14 	{
15 		x >>= 1;
16 		res++;
17 	}
18 	return res;
19 };
20 
compressed_rank_init(compressed_rank_t * cr)21 void compressed_rank_init(compressed_rank_t * cr)
22 {
23 	cr->max_val = 0;
24 	cr->n = 0;
25 	cr->rem_r = 0;
26 	select_init(&cr->sel);
27 	cr->vals_rems = 0;
28 }
29 
compressed_rank_destroy(compressed_rank_t * cr)30 void compressed_rank_destroy(compressed_rank_t * cr)
31 {
32 	free(cr->vals_rems);
33 	cr->vals_rems = 0;
34 	select_destroy(&cr->sel);
35 }
36 
compressed_rank_generate(compressed_rank_t * cr,cmph_uint32 * vals_table,cmph_uint32 n)37 void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
38 {
39 	register cmph_uint32 i,j;
40 	register cmph_uint32 rems_mask;
41 	register cmph_uint32 * select_vec = 0;
42 	cr->n = n;
43 	cr->max_val = vals_table[cr->n - 1];
44 	cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
45 	if(cr->rem_r == 0)
46 	{
47 		cr->rem_r = 1;
48 	}
49 	select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
50 	cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
51 	rems_mask = (1U << cr->rem_r) - 1U;
52 
53 	for(i = 0; i < cr->n; i++)
54 	{
55 		set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
56 	}
57 
58 	for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
59 	{
60 		while(i > (vals_table[j] >> cr->rem_r))
61 		{
62 			j++;
63 		}
64 		select_vec[i - 1] = j;
65 	};
66 
67 
68 	// FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
69 	// I changed the select structure to work up to m, instead of up to m - 1.
70 	select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
71 
72 	free(select_vec);
73 }
74 
compressed_rank_query(compressed_rank_t * cr,cmph_uint32 idx)75 cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
76 {
77 	register cmph_uint32 rems_mask;
78 	register cmph_uint32 val_quot, val_rem;
79 	register cmph_uint32 sel_res, rank;
80 
81 	if(idx > cr->max_val)
82 	{
83 		return cr->n;
84 	}
85 
86 	val_quot = idx >> cr->rem_r;
87 	rems_mask = (1U << cr->rem_r) - 1U;
88 	val_rem = idx & rems_mask;
89 	if(val_quot == 0)
90 	{
91 		rank = sel_res = 0;
92 	}
93 	else
94 	{
95 		sel_res = select_query(&cr->sel, val_quot - 1) + 1;
96 		rank = sel_res - val_quot;
97 	}
98 
99 	do
100 	{
101 		if(GETBIT32(cr->sel.bits_vec, sel_res))
102 		{
103 			break;
104 		}
105 		if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
106 		{
107 			break;
108 		}
109 		sel_res++;
110 		rank++;
111 	} while(1);
112 
113 	return rank;
114 }
115 
compressed_rank_get_space_usage(compressed_rank_t * cr)116 cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
117 {
118 	register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
119 	space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
120 	space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
121 	return space_usage;
122 }
123 
compressed_rank_dump(compressed_rank_t * cr,char ** buf,cmph_uint32 * buflen)124 void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
125 {
126 	register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
127 	register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
128 	register cmph_uint32 pos = 0;
129 	char * buf_sel = 0;
130 	cmph_uint32 buflen_sel = 0;
131 #ifdef DEBUG
132 	cmph_uint32 i;
133 #endif
134 
135 	*buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size +  vals_rems_size;
136 
137 	DEBUGP("sel_size = %u\n", sel_size);
138 	DEBUGP("vals_rems_size = %u\n", vals_rems_size);
139 
140 	*buf = (char *)calloc(*buflen, sizeof(char));
141 
142 	if (!*buf)
143 	{
144 		*buflen = UINT_MAX;
145 		return;
146 	}
147 
148 	// dumping max_val, n and rem_r
149 	memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
150 	pos += (cmph_uint32)sizeof(cmph_uint32);
151 	DEBUGP("max_val = %u\n", cr->max_val);
152 
153 	memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
154 	pos += (cmph_uint32)sizeof(cmph_uint32);
155 	DEBUGP("n = %u\n", cr->n);
156 
157 	memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
158 	pos += (cmph_uint32)sizeof(cmph_uint32);
159 	DEBUGP("rem_r = %u\n", cr->rem_r);
160 
161 	// dumping sel
162 	select_dump(&cr->sel, &buf_sel, &buflen_sel);
163 	memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
164 	pos += (cmph_uint32)sizeof(cmph_uint32);
165 	DEBUGP("buflen_sel = %u\n", buflen_sel);
166 
167 	memcpy(*buf + pos, buf_sel, buflen_sel);
168 
169 	#ifdef DEBUG
170 	i = 0;
171 	for(i = 0; i < buflen_sel; i++)
172 	{
173 	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
174 	}
175 	#endif
176 	pos += buflen_sel;
177 
178 	free(buf_sel);
179 
180 	// dumping vals_rems
181 	memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
182 	#ifdef DEBUG
183 	for(i = 0; i < vals_rems_size; i++)
184 	{
185 	    DEBUGP("pos = %u -- vals_rems_size = %u  -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
186 	}
187 	#endif
188 	pos += vals_rems_size;
189 
190 	DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
191 }
192 
compressed_rank_load(compressed_rank_t * cr,const char * buf,cmph_uint32 buflen)193 void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
194 {
195 	register cmph_uint32 pos = 0;
196 	cmph_uint32 buflen_sel = 0;
197 	register cmph_uint32 vals_rems_size = 0;
198 #ifdef DEBUG
199 	cmph_uint32 i;
200 #endif
201 
202 	// loading max_val, n, and rem_r
203 	memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
204 	pos += (cmph_uint32)sizeof(cmph_uint32);
205 	DEBUGP("max_val = %u\n", cr->max_val);
206 
207 	memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
208 	pos += (cmph_uint32)sizeof(cmph_uint32);
209 	DEBUGP("n = %u\n", cr->n);
210 
211 	memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
212 	pos += (cmph_uint32)sizeof(cmph_uint32);
213 	DEBUGP("rem_r = %u\n", cr->rem_r);
214 
215 	// loading sel
216 	memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
217 	pos += (cmph_uint32)sizeof(cmph_uint32);
218 	DEBUGP("buflen_sel = %u\n", buflen_sel);
219 
220 	select_load(&cr->sel, buf + pos, buflen_sel);
221 	#ifdef DEBUG
222 	i = 0;
223 	for(i = 0; i < buflen_sel; i++)
224 	{
225 	    DEBUGP("pos = %u  -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
226 	}
227 	#endif
228 	pos += buflen_sel;
229 
230 	// loading vals_rems
231 	if(cr->vals_rems)
232 	{
233 		free(cr->vals_rems);
234 	}
235 	vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
236 	cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
237 	vals_rems_size *= 4;
238 	memcpy(cr->vals_rems, buf + pos, vals_rems_size);
239 
240 	#ifdef DEBUG
241 	for(i = 0; i < vals_rems_size; i++)
242 	{
243 	    DEBUGP("pos = %u -- vals_rems_size = %u  -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
244 	}
245 	#endif
246 	pos += vals_rems_size;
247 
248 	DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
249 }
250 
251 
252 
compressed_rank_pack(compressed_rank_t * cr,void * cr_packed)253 void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
254 {
255 	if (cr && cr_packed)
256 	{
257 		char *buf = NULL;
258 		cmph_uint32 buflen = 0;
259 		compressed_rank_dump(cr, &buf, &buflen);
260 		memcpy(cr_packed, buf, buflen);
261 		free(buf);
262 	}
263 }
264 
compressed_rank_packed_size(compressed_rank_t * cr)265 cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
266 {
267 	register cmph_uint32 sel_size = select_packed_size(&cr->sel);
268 	register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
269 	return 4 * (cmph_uint32)sizeof(cmph_uint32)  + sel_size +  vals_rems_size;
270 }
271 
compressed_rank_query_packed(void * cr_packed,cmph_uint32 idx)272 cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
273 {
274 	// unpacking cr_packed
275 	register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
276 	register cmph_uint32 max_val = *ptr++;
277 	register cmph_uint32 n = *ptr++;
278 	register cmph_uint32 rem_r = *ptr++;
279 	register cmph_uint32 buflen_sel = *ptr++;
280 	register cmph_uint32 * sel_packed = ptr;
281 
282 	register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
283 
284 	register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
285 
286 	// compressed sequence query computation
287 	register cmph_uint32 rems_mask;
288 	register cmph_uint32 val_quot, val_rem;
289 	register cmph_uint32 sel_res, rank;
290 
291 	if(idx > max_val)
292 	{
293 		return n;
294 	}
295 
296 	val_quot = idx >> rem_r;
297 	rems_mask = (1U << rem_r) - 1U;
298 	val_rem = idx & rems_mask;
299 	if(val_quot == 0)
300 	{
301 		rank = sel_res = 0;
302 	}
303 	else
304 	{
305 		sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
306 		rank = sel_res - val_quot;
307 	}
308 
309 	do
310 	{
311 		if(GETBIT32(bits_vec, sel_res))
312 		{
313 			break;
314 		}
315 		if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
316 		{
317 			break;
318 		}
319 		sel_res++;
320 		rank++;
321 	} while(1);
322 
323 	return rank;
324 }
325 
326 
327 
328