1 /*
2 * Copyright (c) 2014 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Author: James Bonfield, Wellcome Trust Sanger Institute. 2014
36 */
37
38 #include <config.h>
39
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <unistd.h>
44 #include <assert.h>
45 #include <string.h>
46 #include <sys/time.h>
47
48 #include "cram/rANS_static.h"
49 #include "cram/rANS_byte.h"
50
51 #define TF_SHIFT 12
52 #define TOTFREQ (1<<TF_SHIFT)
53
54 #define ABS(a) ((a)>0?(a):-(a))
55 #ifndef BLK_SIZE
56 # define BLK_SIZE 1024*1024
57 #endif
58
59 // Room to allow for expanded BLK_SIZE on worst case compression.
60 #define BLK_SIZE2 ((int)(1.05*BLK_SIZE))
61
62 /*-----------------------------------------------------------------------------
63 * Memory to memory compression functions.
64 *
65 * These are original versions without any manual loop unrolling. They
66 * are easier to understand, but can be up to 2x slower.
67 */
68
rans_compress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)69 unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
70 unsigned int *out_size) {
71 unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
72 unsigned char *cp, *out_end;
73 RansEncSymbol syms[256];
74 RansState rans0, rans1, rans2, rans3;
75 uint8_t* ptr;
76 int F[256] = {0}, i, j, tab_size, rle, x, fsum = 0;
77 int m = 0, M = 0;
78 uint64_t tr;
79
80 if (!out_buf)
81 return NULL;
82
83 ptr = out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
84
85 // Compute statistics
86 for (i = 0; i < in_size; i++) {
87 F[in[i]]++;
88 }
89 tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size;
90 normalise_harder:
91 // Normalise so T[i] == TOTFREQ
92 for (m = M = j = 0; j < 256; j++) {
93 if (!F[j])
94 continue;
95
96 if (m < F[j])
97 m = F[j], M = j;
98
99 if ((F[j] = (F[j]*tr)>>31) == 0)
100 F[j] = 1;
101 fsum += F[j];
102 }
103
104 fsum++;
105 if (fsum < TOTFREQ) {
106 F[M] += TOTFREQ-fsum;
107 } else if (fsum-TOTFREQ > F[M]/2) {
108 // Corner case to avoid excessive frequency reduction
109 tr = 2104533975; goto normalise_harder; // equiv to *0.98.
110 } else {
111 F[M] -= fsum-TOTFREQ;
112 }
113
114 //printf("F[%d]=%d\n", M, F[M]);
115 assert(F[M]>0);
116
117 // Encode statistics.
118 cp = out_buf+9;
119
120 for (x = rle = j = 0; j < 256; j++) {
121 if (F[j]) {
122 // j
123 if (rle) {
124 rle--;
125 } else {
126 *cp++ = j;
127 if (!rle && j && F[j-1]) {
128 for(rle=j+1; rle<256 && F[rle]; rle++)
129 ;
130 rle -= j+1;
131 *cp++ = rle;
132 }
133 //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
134 }
135
136 // F[j]
137 if (F[j]<128) {
138 *cp++ = F[j];
139 } else {
140 *cp++ = 128 | (F[j]>>8);
141 *cp++ = F[j]&0xff;
142 }
143 RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
144 x += F[j];
145 }
146 }
147 *cp++ = 0;
148
149 //write(1, out_buf+4, cp-(out_buf+4));
150 tab_size = cp-out_buf;
151
152 RansEncInit(&rans0);
153 RansEncInit(&rans1);
154 RansEncInit(&rans2);
155 RansEncInit(&rans3);
156
157 switch (i=(in_size&3)) {
158 case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]);
159 case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]);
160 case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]);
161 case 0:
162 break;
163 }
164 for (i=(in_size &~3); i>0; i-=4) {
165 RansEncSymbol *s3 = &syms[in[i-1]];
166 RansEncSymbol *s2 = &syms[in[i-2]];
167 RansEncSymbol *s1 = &syms[in[i-3]];
168 RansEncSymbol *s0 = &syms[in[i-4]];
169
170 RansEncPutSymbol(&rans3, &ptr, s3);
171 RansEncPutSymbol(&rans2, &ptr, s2);
172 RansEncPutSymbol(&rans1, &ptr, s1);
173 RansEncPutSymbol(&rans0, &ptr, s0);
174 }
175
176 RansEncFlush(&rans3, &ptr);
177 RansEncFlush(&rans2, &ptr);
178 RansEncFlush(&rans1, &ptr);
179 RansEncFlush(&rans0, &ptr);
180
181 // Finalise block size and return it
182 *out_size = (out_end - ptr) + tab_size;
183
184 cp = out_buf;
185
186 *cp++ = 0; // order
187 *cp++ = ((*out_size-9)>> 0) & 0xff;
188 *cp++ = ((*out_size-9)>> 8) & 0xff;
189 *cp++ = ((*out_size-9)>>16) & 0xff;
190 *cp++ = ((*out_size-9)>>24) & 0xff;
191
192 *cp++ = (in_size>> 0) & 0xff;
193 *cp++ = (in_size>> 8) & 0xff;
194 *cp++ = (in_size>>16) & 0xff;
195 *cp++ = (in_size>>24) & 0xff;
196
197 memmove(out_buf + tab_size, ptr, out_end-ptr);
198
199 return out_buf;
200 }
201
202 typedef struct {
203 unsigned char R[TOTFREQ];
204 } ari_decoder;
205
rans_uncompress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)206 unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
207 unsigned int *out_size) {
208 /* Load in the static tables */
209 unsigned char *cp = in + 9;
210 unsigned char *cp_end = in + in_size;
211 int i, j, x, rle;
212 unsigned int out_sz, in_sz;
213 char *out_buf;
214 ari_decoder D;
215 RansDecSymbol syms[256];
216
217 if (in_size < 26) // Need at least this many bytes just to start
218 return NULL;
219
220 if (*in++ != 0) // Order-0 check
221 return NULL;
222
223 in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | ((in[3])<<24);
224 out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | ((in[7])<<24);
225 if (in_sz != in_size-9)
226 return NULL;
227
228 // Precompute reverse lookup of frequency.
229 rle = x = 0;
230 j = *cp++;
231 do {
232 int F, C;
233 if (cp > cp_end - 16) return NULL; // Not enough input bytes left
234 if ((F = *cp++) >= 128) {
235 F &= ~128;
236 F = ((F & 127) << 8) | *cp++;
237 }
238 C = x;
239
240 RansDecSymbolInit(&syms[j], C, F);
241
242 /* Build reverse lookup table */
243 if (x + F > TOTFREQ)
244 return NULL;
245 memset(&D.R[x], j, F);
246
247 x += F;
248
249 if (!rle && j+1 == *cp) {
250 j = *cp++;
251 rle = *cp++;
252 } else if (rle) {
253 rle--;
254 j++;
255 if (j > 255)
256 return NULL;
257 } else {
258 j = *cp++;
259 }
260 } while(j);
261
262 if (x < TOTFREQ-1 || x > TOTFREQ)
263 return NULL;
264 if (x < TOTFREQ) // historically we fill 4095, not 4096
265 D.R[x] = D.R[x-1];
266
267 if (cp > cp_end - 16) return NULL; // Not enough input bytes left
268
269 RansState rans0, rans1, rans2, rans3;
270 uint8_t *ptr = cp;
271 RansDecInit(&rans0, &ptr);
272 RansDecInit(&rans1, &ptr);
273 RansDecInit(&rans2, &ptr);
274 RansDecInit(&rans3, &ptr);
275
276 out_buf = malloc(out_sz);
277 if (!out_buf)
278 return NULL;
279
280 int out_end = (out_sz&~3);
281
282 RansState R[4];
283 R[0] = rans0;
284 R[1] = rans1;
285 R[2] = rans2;
286 R[3] = rans3;
287 uint32_t mask = (1u << TF_SHIFT)-1;
288
289 for (i=0; i < out_end; i+=4) {
290 uint32_t m[4] = {R[0] & mask,
291 R[1] & mask,
292 R[2] & mask,
293 R[3] & mask};
294 uint8_t c[4] = {D.R[m[0]],
295 D.R[m[1]],
296 D.R[m[2]],
297 D.R[m[3]]};
298 out_buf[i+0] = c[0];
299 out_buf[i+1] = c[1];
300 out_buf[i+2] = c[2];
301 out_buf[i+3] = c[3];
302
303 // In theory all TOTFREQ elements of D.R are filled out, but it's
304 // possible this may not be true (invalid input). We could
305 // check with x == TOTFREQ after filling out D.R matrix, but
306 // for historical reasons this sums to TOTFREQ-1 leaving one
307 // byte in D.R uninitialised. Or we could check here that
308 // syms[c[0..3]].freq > 0 and initialising syms, but that is
309 // slow.
310 //
311 // We take the former approach and accept a potential for garbage in
312 // -> garbage out in the rare 1 in TOTFREQ case as the overhead of
313 // continuous validation of freq > 0 is steep on this tight loop.
314
315 // RansDecAdvanceSymbolStep(&R[0], &syms[c[0]], TF_SHIFT);
316 // RansDecAdvanceSymbolStep(&R[1], &syms[c[1]], TF_SHIFT);
317 // RansDecAdvanceSymbolStep(&R[2], &syms[c[2]], TF_SHIFT);
318 // RansDecAdvanceSymbolStep(&R[3], &syms[c[3]], TF_SHIFT);
319 R[0] = syms[c[0]].freq * (R[0]>>TF_SHIFT);
320 R[0] += m[0] - syms[c[0]].start;
321 R[1] = syms[c[1]].freq * (R[1]>>TF_SHIFT);
322 R[1] += m[1] - syms[c[1]].start;
323 R[2] = syms[c[2]].freq * (R[2]>>TF_SHIFT);
324 R[2] += m[2] - syms[c[2]].start;
325 R[3] = syms[c[3]].freq * (R[3]>>TF_SHIFT);
326 R[3] += m[3] - syms[c[3]].start;
327
328 if (ptr < cp_end - 8) { // Each renorm reads no more than 2 bytes
329 RansDecRenorm(&R[0], &ptr);
330 RansDecRenorm(&R[1], &ptr);
331 RansDecRenorm(&R[2], &ptr);
332 RansDecRenorm(&R[3], &ptr);
333 } else {
334 RansDecRenormSafe(&R[0], &ptr, cp_end);
335 RansDecRenormSafe(&R[1], &ptr, cp_end);
336 RansDecRenormSafe(&R[2], &ptr, cp_end);
337 RansDecRenormSafe(&R[3], &ptr, cp_end);
338 }
339 }
340
341 switch(out_sz&3) {
342 case 3:
343 out_buf[out_end+2] = D.R[RansDecGet(&R[2], TF_SHIFT)];
344 case 2:
345 out_buf[out_end+1] = D.R[RansDecGet(&R[1], TF_SHIFT)];
346 case 1:
347 out_buf[out_end] = D.R[RansDecGet(&R[0], TF_SHIFT)];
348 default:
349 break;
350 }
351
352 *out_size = out_sz;
353
354 return (unsigned char *)out_buf;
355 }
356
rans_compress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)357 unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
358 unsigned int *out_size) {
359 unsigned char *out_buf = NULL, *out_end, *cp;
360 unsigned int last_i, tab_size, rle_i, rle_j;
361 RansEncSymbol (*syms)[256] = NULL; /* syms[256][256] */
362 int (*F)[256] = NULL; /* F[256][256] */
363 int *T = NULL; /* T[256] */
364 int i, j;
365 unsigned char c;
366
367 if (in_size < 4)
368 return rans_compress_O0(in, in_size, out_size);
369
370 syms = malloc(256 * sizeof(*syms));
371 if (!syms) goto cleanup;
372 F = calloc(256, sizeof(*F));
373 if (!F) goto cleanup;
374 T = calloc(256, sizeof(*T));
375 if (!T) goto cleanup;
376 out_buf = malloc(1.05*in_size + 257*257*3 + 9);
377 if (!out_buf) goto cleanup;
378
379 out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
380 cp = out_buf+9;
381
382 //for (last = 0, i=in_size-1; i>=0; i--) {
383 // F[last][c = in[i]]++;
384 // T[last]++;
385 // last = c;
386 //}
387
388 for (last_i=i=0; i<in_size; i++) {
389 F[last_i][c = in[i]]++;
390 T[last_i]++;
391 last_i = c;
392 }
393 F[0][in[1*(in_size>>2)]]++;
394 F[0][in[2*(in_size>>2)]]++;
395 F[0][in[3*(in_size>>2)]]++;
396 T[0]+=3;
397
398 // Normalise so T[i] == TOTFREQ
399 for (rle_i = i = 0; i < 256; i++) {
400 int t2, m, M;
401 unsigned int x;
402
403 if (T[i] == 0)
404 continue;
405
406 //uint64_t p = (TOTFREQ * TOTFREQ) / t;
407 double p = ((double)TOTFREQ)/T[i];
408 normalise_harder:
409 for (t2 = m = M = j = 0; j < 256; j++) {
410 if (!F[i][j])
411 continue;
412
413 if (m < F[i][j])
414 m = F[i][j], M = j;
415
416 //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
417 if ((F[i][j] *= p) == 0)
418 F[i][j] = 1;
419 t2 += F[i][j];
420 }
421
422 t2++;
423 if (t2 < TOTFREQ) {
424 F[i][M] += TOTFREQ-t2;
425 } else if (t2-TOTFREQ >= F[i][M]/2) {
426 // Corner case to avoid excessive frequency reduction
427 p = .98; goto normalise_harder;
428 } else {
429 F[i][M] -= t2-TOTFREQ;
430 }
431
432 // Store frequency table
433 // i
434 if (rle_i) {
435 rle_i--;
436 } else {
437 *cp++ = i;
438 // FIXME: could use order-0 statistics to observe which alphabet
439 // symbols are present and base RLE on that ordering instead.
440 if (i && T[i-1]) {
441 for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
442 ;
443 rle_i -= i+1;
444 *cp++ = rle_i;
445 }
446 }
447
448 int *F_i_ = F[i];
449 x = 0;
450 rle_j = 0;
451 for (j = 0; j < 256; j++) {
452 if (F_i_[j]) {
453 //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
454
455 // j
456 if (rle_j) {
457 rle_j--;
458 } else {
459 *cp++ = j;
460 if (!rle_j && j && F_i_[j-1]) {
461 for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
462 ;
463 rle_j -= j+1;
464 *cp++ = rle_j;
465 }
466 }
467
468 // F_i_[j]
469 if (F_i_[j]<128) {
470 *cp++ = F_i_[j];
471 } else {
472 *cp++ = 128 | (F_i_[j]>>8);
473 *cp++ = F_i_[j]&0xff;
474 }
475
476 RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
477 x += F_i_[j];
478 }
479 }
480 *cp++ = 0;
481 }
482 *cp++ = 0;
483
484 //write(1, out_buf+4, cp-(out_buf+4));
485 tab_size = cp - out_buf;
486 assert(tab_size < 257*257*3);
487
488 RansState rans0, rans1, rans2, rans3;
489 RansEncInit(&rans0);
490 RansEncInit(&rans1);
491 RansEncInit(&rans2);
492 RansEncInit(&rans3);
493
494 uint8_t* ptr = out_end;
495
496 int isz4 = in_size>>2;
497 int i0 = 1*isz4-2;
498 int i1 = 2*isz4-2;
499 int i2 = 3*isz4-2;
500 int i3 = 4*isz4-2;
501
502 unsigned char l0 = in[i0+1];
503 unsigned char l1 = in[i1+1];
504 unsigned char l2 = in[i2+1];
505 unsigned char l3 = in[i3+1];
506
507 // Deal with the remainder
508 l3 = in[in_size-1];
509 for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
510 unsigned char c3 = in[i3];
511 RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
512 l3 = c3;
513 }
514
515 for (; i0 >= 0; i0--, i1--, i2--, i3--) {
516 unsigned char c0, c1, c2, c3;
517 RansEncSymbol *s3 = &syms[c3 = in[i3]][l3];
518 RansEncSymbol *s2 = &syms[c2 = in[i2]][l2];
519 RansEncSymbol *s1 = &syms[c1 = in[i1]][l1];
520 RansEncSymbol *s0 = &syms[c0 = in[i0]][l0];
521
522 RansEncPutSymbol(&rans3, &ptr, s3);
523 RansEncPutSymbol(&rans2, &ptr, s2);
524 RansEncPutSymbol(&rans1, &ptr, s1);
525 RansEncPutSymbol(&rans0, &ptr, s0);
526
527 l0 = c0;
528 l1 = c1;
529 l2 = c2;
530 l3 = c3;
531 }
532
533 RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
534 RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
535 RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
536 RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
537
538 RansEncFlush(&rans3, &ptr);
539 RansEncFlush(&rans2, &ptr);
540 RansEncFlush(&rans1, &ptr);
541 RansEncFlush(&rans0, &ptr);
542
543 *out_size = (out_end - ptr) + tab_size;
544
545 cp = out_buf;
546 *cp++ = 1; // order
547
548 *cp++ = ((*out_size-9)>> 0) & 0xff;
549 *cp++ = ((*out_size-9)>> 8) & 0xff;
550 *cp++ = ((*out_size-9)>>16) & 0xff;
551 *cp++ = ((*out_size-9)>>24) & 0xff;
552
553 *cp++ = (in_size>> 0) & 0xff;
554 *cp++ = (in_size>> 8) & 0xff;
555 *cp++ = (in_size>>16) & 0xff;
556 *cp++ = (in_size>>24) & 0xff;
557
558 memmove(out_buf + tab_size, ptr, out_end-ptr);
559
560 cleanup:
561 free(syms);
562 free(F);
563 free(T);
564
565 return out_buf;
566 }
567
rans_uncompress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)568 unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
569 unsigned int *out_size) {
570 /* Load in the static tables */
571 unsigned char *cp = in + 9;
572 unsigned char *ptr_end = in + in_size;
573 int i, j = -999, x, rle_i, rle_j;
574 unsigned int out_sz, in_sz;
575 char *out_buf = NULL;
576 ari_decoder *D = NULL; /* D[256] */
577 RansDecSymbol (*syms)[256] = NULL; /* syms[256][256] */
578
579 if (in_size < 27) // Need at least this many bytes to start
580 return NULL;
581
582 if (*in++ != 1) // Order-1 check
583 return NULL;
584
585 in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | ((in[3])<<24);
586 out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | ((in[7])<<24);
587 if (in_sz != in_size-9)
588 return NULL;
589
590 // calloc may add 2% overhead to CRAM decode, but on linux with glibc it's
591 // often the same thing due to using mmap.
592 D = calloc(256, sizeof(*D));
593 if (!D) goto cleanup;
594 syms = malloc(256 * sizeof(*syms));
595 if (!syms) goto cleanup;
596 /* These memsets prevent illegal memory access in syms due to
597 broken compressed data. As D is calloc'd, all illegal transitions
598 will end up in either row or column 0 of syms. */
599 memset(&syms[0], 0, sizeof(syms[0]));
600 for (i = 1; i < 256; i++) memset(&syms[i][0], 0, sizeof(syms[0][0]));
601
602 //fprintf(stderr, "out_sz=%d\n", out_sz);
603
604 //i = *cp++;
605 rle_i = 0;
606 i = *cp++;
607 do {
608 rle_j = x = 0;
609 j = *cp++;
610 do {
611 int F, C;
612 if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
613 if ((F = *cp++) >= 128) {
614 F &= ~128;
615 F = ((F & 127) << 8) | *cp++;
616 }
617 C = x;
618
619 //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
620
621 if (!F)
622 F = TOTFREQ;
623
624 RansDecSymbolInit(&syms[i][j], C, F);
625
626 /* Build reverse lookup table */
627 if (x + F > TOTFREQ)
628 goto cleanup;
629 memset(&D[i].R[x], j, F);
630
631 x += F;
632
633 if (!rle_j && j+1 == *cp) {
634 j = *cp++;
635 rle_j = *cp++;
636 } else if (rle_j) {
637 rle_j--;
638 j++;
639 if (j > 255)
640 goto cleanup;
641 } else {
642 j = *cp++;
643 }
644 } while(j);
645
646 if (x < TOTFREQ-1 || x > TOTFREQ)
647 goto cleanup;
648 if (x < TOTFREQ) // historically we fill 4095, not 4096
649 D[i].R[x] = D[i].R[x-1];
650
651 if (!rle_i && i+1 == *cp) {
652 i = *cp++;
653 rle_i = *cp++;
654 } else if (rle_i) {
655 rle_i--;
656 i++;
657 if (i > 255)
658 goto cleanup;
659 } else {
660 i = *cp++;
661 }
662 } while (i);
663
664 // Precompute reverse lookup of frequency.
665
666 RansState rans0, rans1, rans2, rans3;
667 uint8_t *ptr = cp;
668 if (ptr > ptr_end - 16) goto cleanup; // Not enough input bytes left
669 RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
670 RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
671 RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
672 RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
673
674 int isz4 = out_sz>>2;
675 int l0 = 0;
676 int l1 = 0;
677 int l2 = 0;
678 int l3 = 0;
679 int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
680
681 RansState R[4];
682 R[0] = rans0;
683 R[1] = rans1;
684 R[2] = rans2;
685 R[3] = rans3;
686
687 /* Allocate output buffer */
688 out_buf = malloc(out_sz);
689 if (!out_buf) goto cleanup;
690
691 for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
692 uint32_t m[4] = {R[0] & ((1u << TF_SHIFT)-1),
693 R[1] & ((1u << TF_SHIFT)-1),
694 R[2] & ((1u << TF_SHIFT)-1),
695 R[3] & ((1u << TF_SHIFT)-1)};
696
697 uint8_t c[4] = {D[l0].R[m[0]],
698 D[l1].R[m[1]],
699 D[l2].R[m[2]],
700 D[l3].R[m[3]]};
701
702 out_buf[i4[0]] = c[0];
703 out_buf[i4[1]] = c[1];
704 out_buf[i4[2]] = c[2];
705 out_buf[i4[3]] = c[3];
706
707 //RansDecAdvanceSymbolStep(&R[0], &syms[l0][c[0]], TF_SHIFT);
708 //RansDecAdvanceSymbolStep(&R[1], &syms[l1][c[1]], TF_SHIFT);
709 //RansDecAdvanceSymbolStep(&R[2], &syms[l2][c[2]], TF_SHIFT);
710 //RansDecAdvanceSymbolStep(&R[3], &syms[l3][c[3]], TF_SHIFT);
711
712 R[0] = syms[l0][c[0]].freq * (R[0]>>TF_SHIFT);
713 R[0] += m[0] - syms[l0][c[0]].start;
714 R[1] = syms[l1][c[1]].freq * (R[1]>>TF_SHIFT);
715 R[1] += m[1] - syms[l1][c[1]].start;
716 R[2] = syms[l2][c[2]].freq * (R[2]>>TF_SHIFT);
717 R[2] += m[2] - syms[l2][c[2]].start;
718 R[3] = syms[l3][c[3]].freq * (R[3]>>TF_SHIFT);
719 R[3] += m[3] - syms[l3][c[3]].start;
720
721 if (ptr < ptr_end - 8) { // Each renorm reads no more than 2 bytes
722 RansDecRenorm(&R[0], &ptr);
723 RansDecRenorm(&R[1], &ptr);
724 RansDecRenorm(&R[2], &ptr);
725 RansDecRenorm(&R[3], &ptr);
726 } else {
727 RansDecRenormSafe(&R[0], &ptr, ptr_end);
728 RansDecRenormSafe(&R[1], &ptr, ptr_end);
729 RansDecRenormSafe(&R[2], &ptr, ptr_end);
730 RansDecRenormSafe(&R[3], &ptr, ptr_end);
731 }
732
733 l0 = c[0];
734 l1 = c[1];
735 l2 = c[2];
736 l3 = c[3];
737 }
738
739 // Remainder
740 for (; i4[3] < out_sz; i4[3]++) {
741 unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
742 out_buf[i4[3]] = c3;
743
744 uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
745 R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
746 RansDecRenormSafe(&R[3], &ptr, ptr_end);
747 l3 = c3;
748 }
749
750 *out_size = out_sz;
751
752 cleanup:
753 if (D)
754 free(D);
755 free(syms);
756
757 return (unsigned char *)out_buf;
758 }
759
760 /*-----------------------------------------------------------------------------
761 * Simple interface to the order-0 vs order-1 encoders and decoders.
762 */
rans_compress(unsigned char * in,unsigned int in_size,unsigned int * out_size,int order)763 unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
764 unsigned int *out_size, int order) {
765 return order
766 ? rans_compress_O1(in, in_size, out_size)
767 : rans_compress_O0(in, in_size, out_size);
768 }
769
rans_uncompress(unsigned char * in,unsigned int in_size,unsigned int * out_size)770 unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
771 unsigned int *out_size) {
772 /* Both rans_uncompress functions need to be able to read at least 9
773 bytes. */
774 if (in_size < 9)
775 return NULL;
776 return in[0]
777 ? rans_uncompress_O1(in, in_size, out_size)
778 : rans_uncompress_O0(in, in_size, out_size);
779 }
780
781
782 #ifdef TEST_MAIN
783 /*-----------------------------------------------------------------------------
784 * Main.
785 *
786 * This is a simple command line tool for testing order-0 and order-1
787 * compression using the rANS codec. Simply compile with
788 *
789 * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static
790 *
791 * Usage: cram/rANS_static -o0 < file > file.o0
792 * cram/rANS_static -d < file.o0 > file2
793 *
794 * cram/rANS_static -o1 < file > file.o1
795 * cram/rANS_static -d < file.o1 > file2
796 */
main(int argc,char ** argv)797 int main(int argc, char **argv) {
798 int opt, order = 0;
799 unsigned char in_buf[BLK_SIZE2+257*257*3];
800 int decode = 0;
801 FILE *infp = stdin, *outfp = stdout;
802 struct timeval tv1, tv2;
803 size_t bytes = 0;
804
805 extern char *optarg;
806 extern int optind;
807
808 while ((opt = getopt(argc, argv, "o:d")) != -1) {
809 switch (opt) {
810 case 'o':
811 order = atoi(optarg);
812 break;
813
814 case 'd':
815 decode = 1;
816 break;
817 }
818 }
819
820 order = order ? 1 : 0; // Only support O(0) and O(1)
821
822 if (optind < argc) {
823 if (!(infp = fopen(argv[optind], "rb"))) {
824 perror(argv[optind]);
825 return 1;
826 }
827 optind++;
828 }
829
830 if (optind < argc) {
831 if (!(outfp = fopen(argv[optind], "wb"))) {
832 perror(argv[optind]);
833 return 1;
834 }
835 optind++;
836 }
837
838 gettimeofday(&tv1, NULL);
839
840 if (decode) {
841 // Only used in some test implementations of RC_GetFreq()
842 //RC_init();
843 //RC_init2();
844
845 for (;;) {
846 uint32_t in_size, out_size;
847 unsigned char *out;
848
849 if (9 != fread(in_buf, 1, 9, infp))
850 break;
851 in_size = *(int *)&in_buf[1];
852 if (in_size != fread(in_buf+9, 1, in_size, infp)) {
853 fprintf(stderr, "Truncated input\n");
854 exit(1);
855 }
856 out = rans_uncompress(in_buf, in_size+9, &out_size);
857 if (!out)
858 abort();
859
860 fwrite(out, 1, out_size, outfp);
861 free(out);
862
863 bytes += out_size;
864 }
865 } else {
866 for (;;) {
867 uint32_t in_size, out_size;
868 unsigned char *out;
869
870 in_size = fread(in_buf, 1, BLK_SIZE, infp);
871 if (in_size <= 0)
872 break;
873
874 out = rans_compress(in_buf, in_size, &out_size, order);
875
876 fwrite(out, 1, out_size, outfp);
877 free(out);
878
879 bytes += in_size;
880 }
881 }
882
883 gettimeofday(&tv2, NULL);
884
885 fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n",
886 (long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
887 tv2.tv_usec - tv1.tv_usec,
888 (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
889 tv2.tv_usec - tv1.tv_usec));
890 return 0;
891 }
892 #endif
893