1 #include<stdlib.h>
2 #include<stdio.h>
3 #include<limits.h>
4 #include<string.h>
5 #include"compressed_rank.h"
6 #include"bitbool.h"
7 // #define DEBUG
8 #include"debug.h"
compressed_rank_i_log2(cmph_uint32 x)9 static inline cmph_uint32 compressed_rank_i_log2(cmph_uint32 x)
10 {
11 register cmph_uint32 res = 0;
12
13 while(x > 1)
14 {
15 x >>= 1;
16 res++;
17 }
18 return res;
19 };
20
compressed_rank_init(compressed_rank_t * cr)21 void compressed_rank_init(compressed_rank_t * cr)
22 {
23 cr->max_val = 0;
24 cr->n = 0;
25 cr->rem_r = 0;
26 select_init(&cr->sel);
27 cr->vals_rems = 0;
28 }
29
compressed_rank_destroy(compressed_rank_t * cr)30 void compressed_rank_destroy(compressed_rank_t * cr)
31 {
32 free(cr->vals_rems);
33 cr->vals_rems = 0;
34 select_destroy(&cr->sel);
35 }
36
compressed_rank_generate(compressed_rank_t * cr,cmph_uint32 * vals_table,cmph_uint32 n)37 void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n)
38 {
39 register cmph_uint32 i,j;
40 register cmph_uint32 rems_mask;
41 register cmph_uint32 * select_vec = 0;
42 cr->n = n;
43 cr->max_val = vals_table[cr->n - 1];
44 cr->rem_r = compressed_rank_i_log2(cr->max_val/cr->n);
45 if(cr->rem_r == 0)
46 {
47 cr->rem_r = 1;
48 }
49 select_vec = (cmph_uint32 *) calloc(cr->max_val >> cr->rem_r, sizeof(cmph_uint32));
50 cr->vals_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cr->n, cr->rem_r), sizeof(cmph_uint32));
51 rems_mask = (1U << cr->rem_r) - 1U;
52
53 for(i = 0; i < cr->n; i++)
54 {
55 set_bits_value(cr->vals_rems, i, vals_table[i] & rems_mask, cr->rem_r, rems_mask);
56 }
57
58 for(i = 1, j = 0; i <= cr->max_val >> cr->rem_r; i++)
59 {
60 while(i > (vals_table[j] >> cr->rem_r))
61 {
62 j++;
63 }
64 select_vec[i - 1] = j;
65 };
66
67
68 // FABIANO: before it was (cr->total_length >> cr->rem_r) + 1. But I wiped out the + 1 because
69 // I changed the select structure to work up to m, instead of up to m - 1.
70 select_generate(&cr->sel, select_vec, cr->max_val >> cr->rem_r, cr->n);
71
72 free(select_vec);
73 }
74
compressed_rank_query(compressed_rank_t * cr,cmph_uint32 idx)75 cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx)
76 {
77 register cmph_uint32 rems_mask;
78 register cmph_uint32 val_quot, val_rem;
79 register cmph_uint32 sel_res, rank;
80
81 if(idx > cr->max_val)
82 {
83 return cr->n;
84 }
85
86 val_quot = idx >> cr->rem_r;
87 rems_mask = (1U << cr->rem_r) - 1U;
88 val_rem = idx & rems_mask;
89 if(val_quot == 0)
90 {
91 rank = sel_res = 0;
92 }
93 else
94 {
95 sel_res = select_query(&cr->sel, val_quot - 1) + 1;
96 rank = sel_res - val_quot;
97 }
98
99 do
100 {
101 if(GETBIT32(cr->sel.bits_vec, sel_res))
102 {
103 break;
104 }
105 if(get_bits_value(cr->vals_rems, rank, cr->rem_r, rems_mask) >= val_rem)
106 {
107 break;
108 }
109 sel_res++;
110 rank++;
111 } while(1);
112
113 return rank;
114 }
115
compressed_rank_get_space_usage(compressed_rank_t * cr)116 cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr)
117 {
118 register cmph_uint32 space_usage = select_get_space_usage(&cr->sel);
119 space_usage += BITS_TABLE_SIZE(cr->n, cr->rem_r)*(cmph_uint32)sizeof(cmph_uint32)*8;
120 space_usage += 3*(cmph_uint32)sizeof(cmph_uint32)*8;
121 return space_usage;
122 }
123
compressed_rank_dump(compressed_rank_t * cr,char ** buf,cmph_uint32 * buflen)124 void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen)
125 {
126 register cmph_uint32 sel_size = select_packed_size(&(cr->sel));
127 register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
128 register cmph_uint32 pos = 0;
129 char * buf_sel = 0;
130 cmph_uint32 buflen_sel = 0;
131 #ifdef DEBUG
132 cmph_uint32 i;
133 #endif
134
135 *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
136
137 DEBUGP("sel_size = %u\n", sel_size);
138 DEBUGP("vals_rems_size = %u\n", vals_rems_size);
139
140 *buf = (char *)calloc(*buflen, sizeof(char));
141
142 if (!*buf)
143 {
144 *buflen = UINT_MAX;
145 return;
146 }
147
148 // dumping max_val, n and rem_r
149 memcpy(*buf, &(cr->max_val), sizeof(cmph_uint32));
150 pos += (cmph_uint32)sizeof(cmph_uint32);
151 DEBUGP("max_val = %u\n", cr->max_val);
152
153 memcpy(*buf + pos, &(cr->n), sizeof(cmph_uint32));
154 pos += (cmph_uint32)sizeof(cmph_uint32);
155 DEBUGP("n = %u\n", cr->n);
156
157 memcpy(*buf + pos, &(cr->rem_r), sizeof(cmph_uint32));
158 pos += (cmph_uint32)sizeof(cmph_uint32);
159 DEBUGP("rem_r = %u\n", cr->rem_r);
160
161 // dumping sel
162 select_dump(&cr->sel, &buf_sel, &buflen_sel);
163 memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
164 pos += (cmph_uint32)sizeof(cmph_uint32);
165 DEBUGP("buflen_sel = %u\n", buflen_sel);
166
167 memcpy(*buf + pos, buf_sel, buflen_sel);
168
169 #ifdef DEBUG
170 i = 0;
171 for(i = 0; i < buflen_sel; i++)
172 {
173 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
174 }
175 #endif
176 pos += buflen_sel;
177
178 free(buf_sel);
179
180 // dumping vals_rems
181 memcpy(*buf + pos, cr->vals_rems, vals_rems_size);
182 #ifdef DEBUG
183 for(i = 0; i < vals_rems_size; i++)
184 {
185 DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(*buf + pos + i));
186 }
187 #endif
188 pos += vals_rems_size;
189
190 DEBUGP("Dumped compressed rank structure with size %u bytes\n", *buflen);
191 }
192
compressed_rank_load(compressed_rank_t * cr,const char * buf,cmph_uint32 buflen)193 void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen)
194 {
195 register cmph_uint32 pos = 0;
196 cmph_uint32 buflen_sel = 0;
197 register cmph_uint32 vals_rems_size = 0;
198 #ifdef DEBUG
199 cmph_uint32 i;
200 #endif
201
202 // loading max_val, n, and rem_r
203 memcpy(&(cr->max_val), buf, sizeof(cmph_uint32));
204 pos += (cmph_uint32)sizeof(cmph_uint32);
205 DEBUGP("max_val = %u\n", cr->max_val);
206
207 memcpy(&(cr->n), buf + pos, sizeof(cmph_uint32));
208 pos += (cmph_uint32)sizeof(cmph_uint32);
209 DEBUGP("n = %u\n", cr->n);
210
211 memcpy(&(cr->rem_r), buf + pos, sizeof(cmph_uint32));
212 pos += (cmph_uint32)sizeof(cmph_uint32);
213 DEBUGP("rem_r = %u\n", cr->rem_r);
214
215 // loading sel
216 memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
217 pos += (cmph_uint32)sizeof(cmph_uint32);
218 DEBUGP("buflen_sel = %u\n", buflen_sel);
219
220 select_load(&cr->sel, buf + pos, buflen_sel);
221 #ifdef DEBUG
222 i = 0;
223 for(i = 0; i < buflen_sel; i++)
224 {
225 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
226 }
227 #endif
228 pos += buflen_sel;
229
230 // loading vals_rems
231 if(cr->vals_rems)
232 {
233 free(cr->vals_rems);
234 }
235 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r);
236 cr->vals_rems = (cmph_uint32 *) calloc(vals_rems_size, sizeof(cmph_uint32));
237 vals_rems_size *= 4;
238 memcpy(cr->vals_rems, buf + pos, vals_rems_size);
239
240 #ifdef DEBUG
241 for(i = 0; i < vals_rems_size; i++)
242 {
243 DEBUGP("pos = %u -- vals_rems_size = %u -- vals_rems[%u] = %u\n", pos, vals_rems_size, i, *(buf + pos + i));
244 }
245 #endif
246 pos += vals_rems_size;
247
248 DEBUGP("Loaded compressed rank structure with size %u bytes\n", buflen);
249 }
250
251
252
compressed_rank_pack(compressed_rank_t * cr,void * cr_packed)253 void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed)
254 {
255 if (cr && cr_packed)
256 {
257 char *buf = NULL;
258 cmph_uint32 buflen = 0;
259 compressed_rank_dump(cr, &buf, &buflen);
260 memcpy(cr_packed, buf, buflen);
261 free(buf);
262 }
263 }
264
compressed_rank_packed_size(compressed_rank_t * cr)265 cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr)
266 {
267 register cmph_uint32 sel_size = select_packed_size(&cr->sel);
268 register cmph_uint32 vals_rems_size = BITS_TABLE_SIZE(cr->n, cr->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
269 return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + vals_rems_size;
270 }
271
compressed_rank_query_packed(void * cr_packed,cmph_uint32 idx)272 cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx)
273 {
274 // unpacking cr_packed
275 register cmph_uint32 *ptr = (cmph_uint32 *)cr_packed;
276 register cmph_uint32 max_val = *ptr++;
277 register cmph_uint32 n = *ptr++;
278 register cmph_uint32 rem_r = *ptr++;
279 register cmph_uint32 buflen_sel = *ptr++;
280 register cmph_uint32 * sel_packed = ptr;
281
282 register cmph_uint32 * bits_vec = sel_packed + 2; // skipping n and m
283
284 register cmph_uint32 * vals_rems = (ptr += (buflen_sel >> 2));
285
286 // compressed sequence query computation
287 register cmph_uint32 rems_mask;
288 register cmph_uint32 val_quot, val_rem;
289 register cmph_uint32 sel_res, rank;
290
291 if(idx > max_val)
292 {
293 return n;
294 }
295
296 val_quot = idx >> rem_r;
297 rems_mask = (1U << rem_r) - 1U;
298 val_rem = idx & rems_mask;
299 if(val_quot == 0)
300 {
301 rank = sel_res = 0;
302 }
303 else
304 {
305 sel_res = select_query_packed(sel_packed, val_quot - 1) + 1;
306 rank = sel_res - val_quot;
307 }
308
309 do
310 {
311 if(GETBIT32(bits_vec, sel_res))
312 {
313 break;
314 }
315 if(get_bits_value(vals_rems, rank, rem_r, rems_mask) >= val_rem)
316 {
317 break;
318 }
319 sel_res++;
320 rank++;
321 } while(1);
322
323 return rank;
324 }
325
326
327
328