1 /*
2 * Copyright (c) 2014-2019 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Author: James Bonfield, Wellcome Trust Sanger Institute. 2014
36 */
37
38 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
39 #include <config.h>
40
41 #include <stdint.h>
42 #include <stdlib.h>
43 #include <stdio.h>
44 #include <unistd.h>
45 #include <assert.h>
46 #include <string.h>
47 #include <sys/time.h>
48 #include <limits.h>
49
50 #include "rANS_static.h"
51 #include "rANS_byte.h"
52
53 #define TF_SHIFT 12
54 #define TOTFREQ (1<<TF_SHIFT)
55
56 #define ABS(a) ((a)>0?(a):-(a))
57 #ifndef BLK_SIZE
58 # define BLK_SIZE 1024*1024
59 #endif
60
61 // Room to allow for expanded BLK_SIZE on worst case compression.
62 #define BLK_SIZE2 ((int)(1.05*BLK_SIZE))
63
64 /*-----------------------------------------------------------------------------
65 * Memory to memory compression functions.
66 *
67 * These are original versions without any manual loop unrolling. They
68 * are easier to understand, but can be up to 2x slower.
69 */
70
rans_compress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)71 unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
72 unsigned int *out_size) {
73 unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
74 unsigned char *cp, *out_end;
75 RansEncSymbol syms[256];
76 RansState rans0, rans1, rans2, rans3;
77 uint8_t* ptr;
78 int F[256] = {0}, i, j, tab_size, rle, x, fsum = 0;
79 int m = 0, M = 0;
80 uint64_t tr;
81
82 if (!out_buf)
83 return NULL;
84
85 ptr = out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
86
87 // Compute statistics
88 for (i = 0; i < in_size; i++) {
89 F[in[i]]++;
90 }
91 tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size;
92 normalise_harder:
93 // Normalise so T[i] == TOTFREQ
94 for (fsum = m = M = j = 0; j < 256; j++) {
95 if (!F[j])
96 continue;
97
98 if (m < F[j])
99 m = F[j], M = j;
100
101 if ((F[j] = (F[j]*tr)>>31) == 0)
102 F[j] = 1;
103 fsum += F[j];
104 }
105
106 fsum++;
107 if (fsum < TOTFREQ) {
108 F[M] += TOTFREQ-fsum;
109 } else if (fsum-TOTFREQ > F[M]/2) {
110 // Corner case to avoid excessive frequency reduction
111 tr = 2104533975; goto normalise_harder; // equiv to *0.98.
112 } else {
113 F[M] -= fsum-TOTFREQ;
114 }
115
116 //printf("F[%d]=%d\n", M, F[M]);
117 assert(F[M]>0);
118
119 // Encode statistics.
120 cp = out_buf+9;
121
122 for (x = rle = j = 0; j < 256; j++) {
123 if (F[j]) {
124 // j
125 if (rle) {
126 rle--;
127 } else {
128 *cp++ = j;
129 if (!rle && j && F[j-1]) {
130 for(rle=j+1; rle<256 && F[rle]; rle++)
131 ;
132 rle -= j+1;
133 *cp++ = rle;
134 }
135 //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
136 }
137
138 // F[j]
139 if (F[j]<128) {
140 *cp++ = F[j];
141 } else {
142 *cp++ = 128 | (F[j]>>8);
143 *cp++ = F[j]&0xff;
144 }
145 RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
146 x += F[j];
147 }
148 }
149 *cp++ = 0;
150
151 //write(1, out_buf+4, cp-(out_buf+4));
152 tab_size = cp-out_buf;
153
154 RansEncInit(&rans0);
155 RansEncInit(&rans1);
156 RansEncInit(&rans2);
157 RansEncInit(&rans3);
158
159 switch (i=(in_size&3)) {
160 case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); // fall through
161 case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); // fall through
162 case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); // fall through
163 case 0:
164 break;
165 }
166 for (i=(in_size &~3); i>0; i-=4) {
167 RansEncSymbol *s3 = &syms[in[i-1]];
168 RansEncSymbol *s2 = &syms[in[i-2]];
169 RansEncSymbol *s1 = &syms[in[i-3]];
170 RansEncSymbol *s0 = &syms[in[i-4]];
171
172 RansEncPutSymbol(&rans3, &ptr, s3);
173 RansEncPutSymbol(&rans2, &ptr, s2);
174 RansEncPutSymbol(&rans1, &ptr, s1);
175 RansEncPutSymbol(&rans0, &ptr, s0);
176 }
177
178 RansEncFlush(&rans3, &ptr);
179 RansEncFlush(&rans2, &ptr);
180 RansEncFlush(&rans1, &ptr);
181 RansEncFlush(&rans0, &ptr);
182
183 // Finalise block size and return it
184 *out_size = (out_end - ptr) + tab_size;
185
186 cp = out_buf;
187
188 *cp++ = 0; // order
189 *cp++ = ((*out_size-9)>> 0) & 0xff;
190 *cp++ = ((*out_size-9)>> 8) & 0xff;
191 *cp++ = ((*out_size-9)>>16) & 0xff;
192 *cp++ = ((*out_size-9)>>24) & 0xff;
193
194 *cp++ = (in_size>> 0) & 0xff;
195 *cp++ = (in_size>> 8) & 0xff;
196 *cp++ = (in_size>>16) & 0xff;
197 *cp++ = (in_size>>24) & 0xff;
198
199 memmove(out_buf + tab_size, ptr, out_end-ptr);
200
201 return out_buf;
202 }
203
204 typedef struct {
205 unsigned char R[TOTFREQ];
206 } ari_decoder;
207
rans_uncompress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)208 unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
209 unsigned int *out_size) {
210 /* Load in the static tables */
211 unsigned char *cp = in + 9;
212 unsigned char *cp_end = in + in_size;
213 int i, j, x, rle;
214 unsigned int out_sz, in_sz;
215 char *out_buf;
216 ari_decoder D;
217 RansDecSymbol syms[256];
218
219 if (in_size < 26) // Need at least this many bytes just to start
220 return NULL;
221
222 if (*in++ != 0) // Order-0 check
223 return NULL;
224
225 in_sz = ((((uint32_t) in[0])<<0) | (((uint32_t) in[1])<<8) |
226 (((uint32_t) in[2])<<16) | (((uint32_t) in[3])<<24));
227 out_sz = ((((uint32_t) in[4])<<0) | (((uint32_t) in[5])<<8) |
228 (((uint32_t) in[6])<<16) | (((uint32_t) in[7])<<24));
229 if (in_sz != in_size-9)
230 return NULL;
231
232 if (out_sz >= INT_MAX)
233 return NULL; // protect against some overflow cases
234
235 // Precompute reverse lookup of frequency.
236 rle = x = 0;
237 j = *cp++;
238 do {
239 int F, C;
240 if (cp > cp_end - 16) return NULL; // Not enough input bytes left
241 if ((F = *cp++) >= 128) {
242 F &= ~128;
243 F = ((F & 127) << 8) | *cp++;
244 }
245 C = x;
246
247 RansDecSymbolInit(&syms[j], C, F);
248
249 /* Build reverse lookup table */
250 if (x + F > TOTFREQ)
251 return NULL;
252 memset(&D.R[x], j, F);
253
254 x += F;
255
256 if (!rle && j+1 == *cp) {
257 j = *cp++;
258 rle = *cp++;
259 } else if (rle) {
260 rle--;
261 j++;
262 if (j > 255)
263 return NULL;
264 } else {
265 j = *cp++;
266 }
267 } while(j);
268
269 if (x < TOTFREQ-1 || x > TOTFREQ)
270 return NULL;
271 if (x < TOTFREQ) // historically we fill 4095, not 4096
272 D.R[x] = D.R[x-1];
273
274 if (cp > cp_end - 16) return NULL; // Not enough input bytes left
275
276 RansState rans0, rans1, rans2, rans3;
277 uint8_t *ptr = cp;
278 RansDecInit(&rans0, &ptr);
279 RansDecInit(&rans1, &ptr);
280 RansDecInit(&rans2, &ptr);
281 RansDecInit(&rans3, &ptr);
282
283 out_buf = malloc(out_sz);
284 if (!out_buf)
285 return NULL;
286
287 int out_end = (out_sz&~3);
288
289 RansState R[4];
290 R[0] = rans0;
291 R[1] = rans1;
292 R[2] = rans2;
293 R[3] = rans3;
294 uint32_t mask = (1u << TF_SHIFT)-1;
295
296 for (i=0; i < out_end; i+=4) {
297 uint32_t m[4] = {R[0] & mask,
298 R[1] & mask,
299 R[2] & mask,
300 R[3] & mask};
301 uint8_t c[4] = {D.R[m[0]],
302 D.R[m[1]],
303 D.R[m[2]],
304 D.R[m[3]]};
305 out_buf[i+0] = c[0];
306 out_buf[i+1] = c[1];
307 out_buf[i+2] = c[2];
308 out_buf[i+3] = c[3];
309
310 // In theory all TOTFREQ elements of D.R are filled out, but it's
311 // possible this may not be true (invalid input). We could
312 // check with x == TOTFREQ after filling out D.R matrix, but
313 // for historical reasons this sums to TOTFREQ-1 leaving one
314 // byte in D.R uninitialised. Or we could check here that
315 // syms[c[0..3]].freq > 0 and initialising syms, but that is
316 // slow.
317 //
318 // We take the former approach and accept a potential for garbage in
319 // -> garbage out in the rare 1 in TOTFREQ case as the overhead of
320 // continuous validation of freq > 0 is steep on this tight loop.
321
322 // RansDecAdvanceSymbolStep(&R[0], &syms[c[0]], TF_SHIFT);
323 // RansDecAdvanceSymbolStep(&R[1], &syms[c[1]], TF_SHIFT);
324 // RansDecAdvanceSymbolStep(&R[2], &syms[c[2]], TF_SHIFT);
325 // RansDecAdvanceSymbolStep(&R[3], &syms[c[3]], TF_SHIFT);
326 R[0] = syms[c[0]].freq * (R[0]>>TF_SHIFT);
327 R[0] += m[0] - syms[c[0]].start;
328 R[1] = syms[c[1]].freq * (R[1]>>TF_SHIFT);
329 R[1] += m[1] - syms[c[1]].start;
330 R[2] = syms[c[2]].freq * (R[2]>>TF_SHIFT);
331 R[2] += m[2] - syms[c[2]].start;
332 R[3] = syms[c[3]].freq * (R[3]>>TF_SHIFT);
333 R[3] += m[3] - syms[c[3]].start;
334
335 if (ptr < cp_end - 8) { // Each renorm reads no more than 2 bytes
336 RansDecRenorm(&R[0], &ptr);
337 RansDecRenorm(&R[1], &ptr);
338 RansDecRenorm(&R[2], &ptr);
339 RansDecRenorm(&R[3], &ptr);
340 } else {
341 RansDecRenormSafe(&R[0], &ptr, cp_end);
342 RansDecRenormSafe(&R[1], &ptr, cp_end);
343 RansDecRenormSafe(&R[2], &ptr, cp_end);
344 RansDecRenormSafe(&R[3], &ptr, cp_end);
345 }
346 }
347
348 switch(out_sz&3) {
349 case 3:
350 out_buf[out_end+2] = D.R[RansDecGet(&R[2], TF_SHIFT)];
351 // fall through
352 case 2:
353 out_buf[out_end+1] = D.R[RansDecGet(&R[1], TF_SHIFT)];
354 // fall through
355 case 1:
356 out_buf[out_end] = D.R[RansDecGet(&R[0], TF_SHIFT)];
357 // fall through
358 default:
359 break;
360 }
361
362 *out_size = out_sz;
363
364 return (unsigned char *)out_buf;
365 }
366
rans_compress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)367 unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
368 unsigned int *out_size) {
369 unsigned char *out_buf = NULL, *out_end, *cp;
370 unsigned int last_i, tab_size, rle_i, rle_j;
371 RansEncSymbol (*syms)[256] = NULL; /* syms[256][256] */
372 int (*F)[256] = NULL; /* F[256][256] */
373 int *T = NULL; /* T[256] */
374 int i, j;
375 unsigned char c;
376
377 if (in_size < 4)
378 return rans_compress_O0(in, in_size, out_size);
379
380 syms = malloc(256 * sizeof(*syms));
381 if (!syms) goto cleanup;
382 F = calloc(256, sizeof(*F));
383 if (!F) goto cleanup;
384 T = calloc(256, sizeof(*T));
385 if (!T) goto cleanup;
386 out_buf = malloc(1.05*in_size + 257*257*3 + 9);
387 if (!out_buf) goto cleanup;
388
389 out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
390 cp = out_buf+9;
391
392 //for (last = 0, i=in_size-1; i>=0; i--) {
393 // F[last][c = in[i]]++;
394 // T[last]++;
395 // last = c;
396 //}
397
398 for (last_i=i=0; i<in_size; i++) {
399 F[last_i][c = in[i]]++;
400 T[last_i]++;
401 last_i = c;
402 }
403 F[0][in[1*(in_size>>2)]]++;
404 F[0][in[2*(in_size>>2)]]++;
405 F[0][in[3*(in_size>>2)]]++;
406 T[0]+=3;
407
408 // Normalise so T[i] == TOTFREQ
409 for (rle_i = i = 0; i < 256; i++) {
410 int t2, m, M;
411 unsigned int x;
412
413 if (T[i] == 0)
414 continue;
415
416 //uint64_t p = (TOTFREQ * TOTFREQ) / t;
417 double p = ((double)TOTFREQ)/T[i];
418 normalise_harder:
419 for (t2 = m = M = j = 0; j < 256; j++) {
420 if (!F[i][j])
421 continue;
422
423 if (m < F[i][j])
424 m = F[i][j], M = j;
425
426 //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
427 if ((F[i][j] *= p) == 0)
428 F[i][j] = 1;
429 t2 += F[i][j];
430 }
431
432 t2++;
433 if (t2 < TOTFREQ) {
434 F[i][M] += TOTFREQ-t2;
435 } else if (t2-TOTFREQ >= F[i][M]/2) {
436 // Corner case to avoid excessive frequency reduction
437 p = .98; goto normalise_harder;
438 } else {
439 F[i][M] -= t2-TOTFREQ;
440 }
441
442 // Store frequency table
443 // i
444 if (rle_i) {
445 rle_i--;
446 } else {
447 *cp++ = i;
448 // FIXME: could use order-0 statistics to observe which alphabet
449 // symbols are present and base RLE on that ordering instead.
450 if (i && T[i-1]) {
451 for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
452 ;
453 rle_i -= i+1;
454 *cp++ = rle_i;
455 }
456 }
457
458 int *F_i_ = F[i];
459 x = 0;
460 rle_j = 0;
461 for (j = 0; j < 256; j++) {
462 if (F_i_[j]) {
463 //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
464
465 // j
466 if (rle_j) {
467 rle_j--;
468 } else {
469 *cp++ = j;
470 if (!rle_j && j && F_i_[j-1]) {
471 for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
472 ;
473 rle_j -= j+1;
474 *cp++ = rle_j;
475 }
476 }
477
478 // F_i_[j]
479 if (F_i_[j]<128) {
480 *cp++ = F_i_[j];
481 } else {
482 *cp++ = 128 | (F_i_[j]>>8);
483 *cp++ = F_i_[j]&0xff;
484 }
485
486 RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
487 x += F_i_[j];
488 }
489 }
490 *cp++ = 0;
491 }
492 *cp++ = 0;
493
494 //write(1, out_buf+4, cp-(out_buf+4));
495 tab_size = cp - out_buf;
496 assert(tab_size < 257*257*3);
497
498 RansState rans0, rans1, rans2, rans3;
499 RansEncInit(&rans0);
500 RansEncInit(&rans1);
501 RansEncInit(&rans2);
502 RansEncInit(&rans3);
503
504 uint8_t* ptr = out_end;
505
506 int isz4 = in_size>>2;
507 int i0 = 1*isz4-2;
508 int i1 = 2*isz4-2;
509 int i2 = 3*isz4-2;
510 int i3 = 4*isz4-2;
511
512 unsigned char l0 = in[i0+1];
513 unsigned char l1 = in[i1+1];
514 unsigned char l2 = in[i2+1];
515 unsigned char l3 = in[i3+1];
516
517 // Deal with the remainder
518 l3 = in[in_size-1];
519 for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
520 unsigned char c3 = in[i3];
521 RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
522 l3 = c3;
523 }
524
525 for (; i0 >= 0; i0--, i1--, i2--, i3--) {
526 unsigned char c0, c1, c2, c3;
527 RansEncSymbol *s3 = &syms[c3 = in[i3]][l3];
528 RansEncSymbol *s2 = &syms[c2 = in[i2]][l2];
529 RansEncSymbol *s1 = &syms[c1 = in[i1]][l1];
530 RansEncSymbol *s0 = &syms[c0 = in[i0]][l0];
531
532 RansEncPutSymbol(&rans3, &ptr, s3);
533 RansEncPutSymbol(&rans2, &ptr, s2);
534 RansEncPutSymbol(&rans1, &ptr, s1);
535 RansEncPutSymbol(&rans0, &ptr, s0);
536
537 l0 = c0;
538 l1 = c1;
539 l2 = c2;
540 l3 = c3;
541 }
542
543 RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
544 RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
545 RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
546 RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
547
548 RansEncFlush(&rans3, &ptr);
549 RansEncFlush(&rans2, &ptr);
550 RansEncFlush(&rans1, &ptr);
551 RansEncFlush(&rans0, &ptr);
552
553 *out_size = (out_end - ptr) + tab_size;
554
555 cp = out_buf;
556 *cp++ = 1; // order
557
558 *cp++ = ((*out_size-9)>> 0) & 0xff;
559 *cp++ = ((*out_size-9)>> 8) & 0xff;
560 *cp++ = ((*out_size-9)>>16) & 0xff;
561 *cp++ = ((*out_size-9)>>24) & 0xff;
562
563 *cp++ = (in_size>> 0) & 0xff;
564 *cp++ = (in_size>> 8) & 0xff;
565 *cp++ = (in_size>>16) & 0xff;
566 *cp++ = (in_size>>24) & 0xff;
567
568 memmove(out_buf + tab_size, ptr, out_end-ptr);
569
570 cleanup:
571 free(syms);
572 free(F);
573 free(T);
574
575 return out_buf;
576 }
577
rans_uncompress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)578 unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
579 unsigned int *out_size) {
580 /* Load in the static tables */
581 unsigned char *cp = in + 9;
582 unsigned char *ptr_end = in + in_size;
583 int i, j = -999, x, rle_i, rle_j;
584 unsigned int out_sz, in_sz;
585 char *out_buf = NULL;
586 ari_decoder *D = NULL; /* D[256] */
587 RansDecSymbol (*syms)[256] = NULL; /* syms[256][256] */
588
589 if (in_size < 27) // Need at least this many bytes to start
590 return NULL;
591
592 if (*in++ != 1) // Order-1 check
593 return NULL;
594
595 in_sz = ((((uint32_t) in[0])<<0) | (((uint32_t) in[1])<<8) |
596 (((uint32_t) in[2])<<16) | (((uint32_t) in[3])<<24));
597 out_sz = ((((uint32_t) in[4])<<0) | (((uint32_t) in[5])<<8) |
598 (((uint32_t) in[6])<<16) | (((uint32_t) in[7])<<24));
599 if (in_sz != in_size-9)
600 return NULL;
601
602 if (out_sz >= INT_MAX)
603 return NULL; // protect against some overflow cases
604
605 // calloc may add 2% overhead to CRAM decode, but on linux with glibc it's
606 // often the same thing due to using mmap.
607 D = calloc(256, sizeof(*D));
608 if (!D) goto cleanup;
609 syms = malloc(256 * sizeof(*syms));
610 if (!syms) goto cleanup;
611 /* These memsets prevent illegal memory access in syms due to
612 broken compressed data. As D is calloc'd, all illegal transitions
613 will end up in either row or column 0 of syms. */
614 memset(&syms[0], 0, sizeof(syms[0]));
615 for (i = 1; i < 256; i++) memset(&syms[i][0], 0, sizeof(syms[0][0]));
616
617 //fprintf(stderr, "out_sz=%d\n", out_sz);
618
619 //i = *cp++;
620 rle_i = 0;
621 i = *cp++;
622 do {
623 rle_j = x = 0;
624 j = *cp++;
625 do {
626 int F, C;
627 if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
628 if ((F = *cp++) >= 128) {
629 F &= ~128;
630 F = ((F & 127) << 8) | *cp++;
631 }
632 C = x;
633
634 //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
635
636 if (!F)
637 F = TOTFREQ;
638
639 RansDecSymbolInit(&syms[i][j], C, F);
640
641 /* Build reverse lookup table */
642 if (x + F > TOTFREQ)
643 goto cleanup;
644 memset(&D[i].R[x], j, F);
645
646 x += F;
647
648 if (!rle_j && j+1 == *cp) {
649 j = *cp++;
650 rle_j = *cp++;
651 } else if (rle_j) {
652 rle_j--;
653 j++;
654 if (j > 255)
655 goto cleanup;
656 } else {
657 j = *cp++;
658 }
659 } while(j);
660
661 if (x < TOTFREQ-1 || x > TOTFREQ)
662 goto cleanup;
663 if (x < TOTFREQ) // historically we fill 4095, not 4096
664 D[i].R[x] = D[i].R[x-1];
665
666 if (!rle_i && i+1 == *cp) {
667 i = *cp++;
668 rle_i = *cp++;
669 } else if (rle_i) {
670 rle_i--;
671 i++;
672 if (i > 255)
673 goto cleanup;
674 } else {
675 i = *cp++;
676 }
677 } while (i);
678
679 // Precompute reverse lookup of frequency.
680
681 RansState rans0, rans1, rans2, rans3;
682 uint8_t *ptr = cp;
683 if (ptr > ptr_end - 16) goto cleanup; // Not enough input bytes left
684 RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
685 RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
686 RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
687 RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
688
689 int isz4 = out_sz>>2;
690 int l0 = 0;
691 int l1 = 0;
692 int l2 = 0;
693 int l3 = 0;
694 int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
695
696 RansState R[4];
697 R[0] = rans0;
698 R[1] = rans1;
699 R[2] = rans2;
700 R[3] = rans3;
701
702 /* Allocate output buffer */
703 out_buf = malloc(out_sz);
704 if (!out_buf) goto cleanup;
705
706 for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
707 uint32_t m[4] = {R[0] & ((1u << TF_SHIFT)-1),
708 R[1] & ((1u << TF_SHIFT)-1),
709 R[2] & ((1u << TF_SHIFT)-1),
710 R[3] & ((1u << TF_SHIFT)-1)};
711
712 uint8_t c[4] = {D[l0].R[m[0]],
713 D[l1].R[m[1]],
714 D[l2].R[m[2]],
715 D[l3].R[m[3]]};
716
717 out_buf[i4[0]] = c[0];
718 out_buf[i4[1]] = c[1];
719 out_buf[i4[2]] = c[2];
720 out_buf[i4[3]] = c[3];
721
722 //RansDecAdvanceSymbolStep(&R[0], &syms[l0][c[0]], TF_SHIFT);
723 //RansDecAdvanceSymbolStep(&R[1], &syms[l1][c[1]], TF_SHIFT);
724 //RansDecAdvanceSymbolStep(&R[2], &syms[l2][c[2]], TF_SHIFT);
725 //RansDecAdvanceSymbolStep(&R[3], &syms[l3][c[3]], TF_SHIFT);
726
727 R[0] = syms[l0][c[0]].freq * (R[0]>>TF_SHIFT);
728 R[0] += m[0] - syms[l0][c[0]].start;
729 R[1] = syms[l1][c[1]].freq * (R[1]>>TF_SHIFT);
730 R[1] += m[1] - syms[l1][c[1]].start;
731 R[2] = syms[l2][c[2]].freq * (R[2]>>TF_SHIFT);
732 R[2] += m[2] - syms[l2][c[2]].start;
733 R[3] = syms[l3][c[3]].freq * (R[3]>>TF_SHIFT);
734 R[3] += m[3] - syms[l3][c[3]].start;
735
736 if (ptr < ptr_end - 8) { // Each renorm reads no more than 2 bytes
737 RansDecRenorm(&R[0], &ptr);
738 RansDecRenorm(&R[1], &ptr);
739 RansDecRenorm(&R[2], &ptr);
740 RansDecRenorm(&R[3], &ptr);
741 } else {
742 RansDecRenormSafe(&R[0], &ptr, ptr_end);
743 RansDecRenormSafe(&R[1], &ptr, ptr_end);
744 RansDecRenormSafe(&R[2], &ptr, ptr_end);
745 RansDecRenormSafe(&R[3], &ptr, ptr_end);
746 }
747
748 l0 = c[0];
749 l1 = c[1];
750 l2 = c[2];
751 l3 = c[3];
752 }
753
754 // Remainder
755 for (; i4[3] < out_sz; i4[3]++) {
756 unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
757 out_buf[i4[3]] = c3;
758
759 uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
760 R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
761 RansDecRenormSafe(&R[3], &ptr, ptr_end);
762 l3 = c3;
763 }
764
765 *out_size = out_sz;
766
767 cleanup:
768 if (D)
769 free(D);
770 free(syms);
771
772 return (unsigned char *)out_buf;
773 }
774
775 /*-----------------------------------------------------------------------------
776 * Simple interface to the order-0 vs order-1 encoders and decoders.
777 */
rans_compress(unsigned char * in,unsigned int in_size,unsigned int * out_size,int order)778 unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
779 unsigned int *out_size, int order) {
780 return order
781 ? rans_compress_O1(in, in_size, out_size)
782 : rans_compress_O0(in, in_size, out_size);
783 }
784
rans_uncompress(unsigned char * in,unsigned int in_size,unsigned int * out_size)785 unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
786 unsigned int *out_size) {
787 /* Both rans_uncompress functions need to be able to read at least 9
788 bytes. */
789 if (in_size < 9)
790 return NULL;
791 return in[0]
792 ? rans_uncompress_O1(in, in_size, out_size)
793 : rans_uncompress_O0(in, in_size, out_size);
794 }
795
796
797 #ifdef TEST_MAIN
798 /*-----------------------------------------------------------------------------
799 * Main.
800 *
801 * This is a simple command line tool for testing order-0 and order-1
802 * compression using the rANS codec. Simply compile with
803 *
804 * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static
805 *
806 * Usage: cram/rANS_static -o0 < file > file.o0
807 * cram/rANS_static -d < file.o0 > file2
808 *
809 * cram/rANS_static -o1 < file > file.o1
810 * cram/rANS_static -d < file.o1 > file2
811 */
main(int argc,char ** argv)812 int main(int argc, char **argv) {
813 int opt, order = 0;
814 unsigned char in_buf[BLK_SIZE2+257*257*3];
815 int decode = 0;
816 FILE *infp = stdin, *outfp = stdout;
817 struct timeval tv1, tv2;
818 size_t bytes = 0;
819
820 extern char *optarg;
821 extern int optind;
822
823 while ((opt = getopt(argc, argv, "o:d")) != -1) {
824 switch (opt) {
825 case 'o':
826 order = atoi(optarg);
827 break;
828
829 case 'd':
830 decode = 1;
831 break;
832 }
833 }
834
835 order = order ? 1 : 0; // Only support O(0) and O(1)
836
837 if (optind < argc) {
838 if (!(infp = fopen(argv[optind], "rb"))) {
839 perror(argv[optind]);
840 return 1;
841 }
842 optind++;
843 }
844
845 if (optind < argc) {
846 if (!(outfp = fopen(argv[optind], "wb"))) {
847 perror(argv[optind]);
848 fclose(infp);
849 return 1;
850 }
851 optind++;
852 }
853
854 gettimeofday(&tv1, NULL);
855
856 if (decode) {
857 // Only used in some test implementations of RC_GetFreq()
858 //RC_init();
859 //RC_init2();
860
861 for (;;) {
862 uint32_t in_size, out_size;
863 unsigned char *out;
864
865 if (9 != fread(in_buf, 1, 9, infp))
866 break;
867 in_size = *(int *)&in_buf[1];
868 if (in_size != fread(in_buf+9, 1, in_size, infp)) {
869 fprintf(stderr, "Truncated input\n");
870 exit(1);
871 }
872 out = rans_uncompress(in_buf, in_size+9, &out_size);
873 if (!out)
874 abort();
875
876 fwrite(out, 1, out_size, outfp);
877 free(out);
878
879 bytes += out_size;
880 }
881 } else {
882 for (;;) {
883 uint32_t in_size, out_size;
884 unsigned char *out;
885
886 in_size = fread(in_buf, 1, BLK_SIZE, infp);
887 if (in_size <= 0)
888 break;
889
890 out = rans_compress(in_buf, in_size, &out_size, order);
891
892 fwrite(out, 1, out_size, outfp);
893 free(out);
894
895 bytes += in_size;
896 }
897 }
898
899 gettimeofday(&tv2, NULL);
900
901 fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n",
902 (long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
903 tv2.tv_usec - tv1.tv_usec,
904 (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
905 tv2.tv_usec - tv1.tv_usec));
906
907 if (infp != stdin) fclose(infp);
908 if (outfp != stdout) fclose(outfp);
909
910 return 0;
911 }
912 #endif
913