1 #include "compressed_seq.h"
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <assert.h>
5 #include <limits.h>
6 #include <string.h>
7
8 #include "bitbool.h"
9
10 // #define DEBUG
11 #include "debug.h"
12
compressed_seq_i_log2(cmph_uint32 x)13 static inline cmph_uint32 compressed_seq_i_log2(cmph_uint32 x)
14 {
15 register cmph_uint32 res = 0;
16
17 while(x > 1)
18 {
19 x >>= 1;
20 res++;
21 }
22 return res;
23 };
24
compressed_seq_init(compressed_seq_t * cs)25 void compressed_seq_init(compressed_seq_t * cs)
26 {
27 select_init(&cs->sel);
28 cs->n = 0;
29 cs->rem_r = 0;
30 cs->length_rems = 0;
31 cs->total_length = 0;
32 cs->store_table = 0;
33 }
34
compressed_seq_destroy(compressed_seq_t * cs)35 void compressed_seq_destroy(compressed_seq_t * cs)
36 {
37 free(cs->store_table);
38 cs->store_table = 0;
39 free(cs->length_rems);
40 cs->length_rems = 0;
41 select_destroy(&cs->sel);
42 };
43
44
compressed_seq_generate(compressed_seq_t * cs,cmph_uint32 * vals_table,cmph_uint32 n)45 void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n)
46 {
47 register cmph_uint32 i;
48 // lengths: represents lengths of encoded values
49 register cmph_uint32 * lengths = (cmph_uint32 *)calloc(n, sizeof(cmph_uint32));
50 register cmph_uint32 rems_mask;
51 register cmph_uint32 stored_value;
52
53 cs->n = n;
54 cs->total_length = 0;
55
56 for(i = 0; i < cs->n; i++)
57 {
58 if(vals_table[i] == 0)
59 {
60 lengths[i] = 0;
61 }
62 else
63 {
64 lengths[i] = compressed_seq_i_log2(vals_table[i] + 1);
65 cs->total_length += lengths[i];
66 };
67 };
68
69 if(cs->store_table)
70 {
71 free(cs->store_table);
72 }
73 cs->store_table = (cmph_uint32 *) calloc(((cs->total_length + 31) >> 5), sizeof(cmph_uint32));
74 cs->total_length = 0;
75
76 for(i = 0; i < cs->n; i++)
77 {
78 if(vals_table[i] == 0)
79 continue;
80 stored_value = vals_table[i] - ((1U << lengths[i]) - 1U);
81 set_bits_at_pos(cs->store_table, cs->total_length, stored_value, lengths[i]);
82 cs->total_length += lengths[i];
83 };
84
85 cs->rem_r = compressed_seq_i_log2(cs->total_length/cs->n);
86
87 if(cs->rem_r == 0)
88 {
89 cs->rem_r = 1;
90 }
91
92 if(cs->length_rems)
93 {
94 free(cs->length_rems);
95 }
96
97 cs->length_rems = (cmph_uint32 *) calloc(BITS_TABLE_SIZE(cs->n, cs->rem_r), sizeof(cmph_uint32));
98
99 rems_mask = (1U << cs->rem_r) - 1U;
100 cs->total_length = 0;
101
102 for(i = 0; i < cs->n; i++)
103 {
104 cs->total_length += lengths[i];
105 set_bits_value(cs->length_rems, i, cs->total_length & rems_mask, cs->rem_r, rems_mask);
106 lengths[i] = cs->total_length >> cs->rem_r;
107 };
108
109 select_init(&cs->sel);
110
111 // FABIANO: before it was (cs->total_length >> cs->rem_r) + 1. But I wiped out the + 1 because
112 // I changed the select structure to work up to m, instead of up to m - 1.
113 select_generate(&cs->sel, lengths, cs->n, (cs->total_length >> cs->rem_r));
114
115 free(lengths);
116 };
117
compressed_seq_get_space_usage(compressed_seq_t * cs)118 cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs)
119 {
120 register cmph_uint32 space_usage = select_get_space_usage(&cs->sel);
121 space_usage += ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32) * 8;
122 space_usage += BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32) * 8;
123 return 4 * (cmph_uint32)sizeof(cmph_uint32) * 8 + space_usage;
124 }
125
compressed_seq_query(compressed_seq_t * cs,cmph_uint32 idx)126 cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx)
127 {
128 register cmph_uint32 enc_idx, enc_length;
129 register cmph_uint32 rems_mask;
130 register cmph_uint32 stored_value;
131 register cmph_uint32 sel_res;
132
133 assert(idx < cs->n); // FABIANO ADDED
134
135 rems_mask = (1U << cs->rem_r) - 1U;
136
137 if(idx == 0)
138 {
139 enc_idx = 0;
140 sel_res = select_query(&cs->sel, idx);
141 }
142 else
143 {
144 sel_res = select_query(&cs->sel, idx - 1);
145
146 enc_idx = (sel_res - (idx - 1)) << cs->rem_r;
147 enc_idx += get_bits_value(cs->length_rems, idx-1, cs->rem_r, rems_mask);
148
149 sel_res = select_next_query(&cs->sel, sel_res);
150 };
151
152 enc_length = (sel_res - idx) << cs->rem_r;
153 enc_length += get_bits_value(cs->length_rems, idx, cs->rem_r, rems_mask);
154 enc_length -= enc_idx;
155 if(enc_length == 0)
156 return 0;
157
158 stored_value = get_bits_at_pos(cs->store_table, enc_idx, enc_length);
159 return stored_value + ((1U << enc_length) - 1U);
160 };
161
compressed_seq_dump(compressed_seq_t * cs,char ** buf,cmph_uint32 * buflen)162 void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen)
163 {
164 register cmph_uint32 sel_size = select_packed_size(&(cs->sel));
165 register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * 4;
166 register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * 4;
167 register cmph_uint32 pos = 0;
168 char * buf_sel = 0;
169 cmph_uint32 buflen_sel = 0;
170
171 *buflen = 4*(cmph_uint32)sizeof(cmph_uint32) + sel_size + length_rems_size + store_table_size;
172
173 DEBUGP("sel_size = %u\n", sel_size);
174 DEBUGP("length_rems_size = %u\n", length_rems_size);
175 DEBUGP("store_table_size = %u\n", store_table_size);
176 *buf = (char *)calloc(*buflen, sizeof(char));
177
178 if (!*buf)
179 {
180 *buflen = UINT_MAX;
181 return;
182 }
183
184 // dumping n, rem_r and total_length
185 memcpy(*buf, &(cs->n), sizeof(cmph_uint32));
186 pos += (cmph_uint32)sizeof(cmph_uint32);
187 DEBUGP("n = %u\n", cs->n);
188
189 memcpy(*buf + pos, &(cs->rem_r), sizeof(cmph_uint32));
190 pos += (cmph_uint32)sizeof(cmph_uint32);
191 DEBUGP("rem_r = %u\n", cs->rem_r);
192
193 memcpy(*buf + pos, &(cs->total_length), sizeof(cmph_uint32));
194 pos += (cmph_uint32)sizeof(cmph_uint32);
195 DEBUGP("total_length = %u\n", cs->total_length);
196
197
198 // dumping sel
199 select_dump(&cs->sel, &buf_sel, &buflen_sel);
200 memcpy(*buf + pos, &buflen_sel, sizeof(cmph_uint32));
201 pos += (cmph_uint32)sizeof(cmph_uint32);
202 DEBUGP("buflen_sel = %u\n", buflen_sel);
203
204 memcpy(*buf + pos, buf_sel, buflen_sel);
205 #ifdef DEBUG
206 cmph_uint32 i = 0;
207 for(i = 0; i < buflen_sel; i++)
208 {
209 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(*buf + pos + i));
210 }
211 #endif
212 pos += buflen_sel;
213
214 free(buf_sel);
215
216 // dumping length_rems
217 memcpy(*buf + pos, cs->length_rems, length_rems_size);
218 #ifdef DEBUG
219 for(i = 0; i < length_rems_size; i++)
220 {
221 DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(*buf + pos + i));
222 }
223 #endif
224 pos += length_rems_size;
225
226 // dumping store_table
227 memcpy(*buf + pos, cs->store_table, store_table_size);
228
229 #ifdef DEBUG
230 for(i = 0; i < store_table_size; i++)
231 {
232 DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(*buf + pos + i));
233 }
234 #endif
235 DEBUGP("Dumped compressed sequence structure with size %u bytes\n", *buflen);
236 }
237
compressed_seq_load(compressed_seq_t * cs,const char * buf,cmph_uint32 buflen)238 void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen)
239 {
240 register cmph_uint32 pos = 0;
241 cmph_uint32 buflen_sel = 0;
242 register cmph_uint32 length_rems_size = 0;
243 register cmph_uint32 store_table_size = 0;
244
245 // loading n, rem_r and total_length
246 memcpy(&(cs->n), buf, sizeof(cmph_uint32));
247 pos += (cmph_uint32)sizeof(cmph_uint32);
248 DEBUGP("n = %u\n", cs->n);
249
250 memcpy(&(cs->rem_r), buf + pos, sizeof(cmph_uint32));
251 pos += (cmph_uint32)sizeof(cmph_uint32);
252 DEBUGP("rem_r = %u\n", cs->rem_r);
253
254 memcpy(&(cs->total_length), buf + pos, sizeof(cmph_uint32));
255 pos += (cmph_uint32)sizeof(cmph_uint32);
256 DEBUGP("total_length = %u\n", cs->total_length);
257
258 // loading sel
259 memcpy(&buflen_sel, buf + pos, sizeof(cmph_uint32));
260 pos += (cmph_uint32)sizeof(cmph_uint32);
261 DEBUGP("buflen_sel = %u\n", buflen_sel);
262
263 select_load(&cs->sel, buf + pos, buflen_sel);
264 #ifdef DEBUG
265 cmph_uint32 i = 0;
266 for(i = 0; i < buflen_sel; i++)
267 {
268 DEBUGP("pos = %u -- buf_sel[%u] = %u\n", pos, i, *(buf + pos + i));
269 }
270 #endif
271 pos += buflen_sel;
272
273 // loading length_rems
274 if(cs->length_rems)
275 {
276 free(cs->length_rems);
277 }
278 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r);
279 cs->length_rems = (cmph_uint32 *) calloc(length_rems_size, sizeof(cmph_uint32));
280 length_rems_size *= 4;
281 memcpy(cs->length_rems, buf + pos, length_rems_size);
282
283 #ifdef DEBUG
284 for(i = 0; i < length_rems_size; i++)
285 {
286 DEBUGP("pos = %u -- length_rems_size = %u -- length_rems[%u] = %u\n", pos, length_rems_size, i, *(buf + pos + i));
287 }
288 #endif
289 pos += length_rems_size;
290
291 // loading store_table
292 store_table_size = ((cs->total_length + 31) >> 5);
293 if(cs->store_table)
294 {
295 free(cs->store_table);
296 }
297 cs->store_table = (cmph_uint32 *) calloc(store_table_size, sizeof(cmph_uint32));
298 store_table_size *= 4;
299 memcpy(cs->store_table, buf + pos, store_table_size);
300
301 #ifdef DEBUG
302 for(i = 0; i < store_table_size; i++)
303 {
304 DEBUGP("pos = %u -- store_table_size = %u -- store_table[%u] = %u\n", pos, store_table_size, i, *(buf + pos + i));
305 }
306 #endif
307
308 DEBUGP("Loaded compressed sequence structure with size %u bytes\n", buflen);
309 }
310
compressed_seq_pack(compressed_seq_t * cs,void * cs_packed)311 void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed)
312 {
313 if (cs && cs_packed)
314 {
315 char *buf = NULL;
316 cmph_uint32 buflen = 0;
317 compressed_seq_dump(cs, &buf, &buflen);
318 memcpy(cs_packed, buf, buflen);
319 free(buf);
320 }
321
322 }
323
compressed_seq_packed_size(compressed_seq_t * cs)324 cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs)
325 {
326 register cmph_uint32 sel_size = select_packed_size(&cs->sel);
327 register cmph_uint32 store_table_size = ((cs->total_length + 31) >> 5) * (cmph_uint32)sizeof(cmph_uint32);
328 register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(cs->n, cs->rem_r) * (cmph_uint32)sizeof(cmph_uint32);
329 return 4 * (cmph_uint32)sizeof(cmph_uint32) + sel_size + store_table_size + length_rems_size;
330 }
331
332
compressed_seq_query_packed(void * cs_packed,cmph_uint32 idx)333 cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx)
334 {
335 // unpacking cs_packed
336 register cmph_uint32 *ptr = (cmph_uint32 *)cs_packed;
337 register cmph_uint32 n = *ptr++;
338 register cmph_uint32 rem_r = *ptr++;
339 ptr++; // skipping total_length
340 // register cmph_uint32 total_length = *ptr++;
341 register cmph_uint32 buflen_sel = *ptr++;
342 register cmph_uint32 * sel_packed = ptr;
343 register cmph_uint32 * length_rems = (ptr += (buflen_sel >> 2));
344 register cmph_uint32 length_rems_size = BITS_TABLE_SIZE(n, rem_r);
345 register cmph_uint32 * store_table = (ptr += length_rems_size);
346
347 // compressed sequence query computation
348 register cmph_uint32 enc_idx, enc_length;
349 register cmph_uint32 rems_mask;
350 register cmph_uint32 stored_value;
351 register cmph_uint32 sel_res;
352
353 rems_mask = (1U << rem_r) - 1U;
354
355 if(idx == 0)
356 {
357 enc_idx = 0;
358 sel_res = select_query_packed(sel_packed, idx);
359 }
360 else
361 {
362 sel_res = select_query_packed(sel_packed, idx - 1);
363
364 enc_idx = (sel_res - (idx - 1)) << rem_r;
365 enc_idx += get_bits_value(length_rems, idx-1, rem_r, rems_mask);
366
367 sel_res = select_next_query_packed(sel_packed, sel_res);
368 };
369
370 enc_length = (sel_res - idx) << rem_r;
371 enc_length += get_bits_value(length_rems, idx, rem_r, rems_mask);
372 enc_length -= enc_idx;
373 if(enc_length == 0)
374 return 0;
375
376 stored_value = get_bits_at_pos(store_table, enc_idx, enc_length);
377 return stored_value + ((1U << enc_length) - 1U);
378 }
379