1 /*
2  * Copyright (c) 2014 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *       Institute nor the names of its contributors may be used to endorse
18  *       or promote products derived from this software without specific
19  *       prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Author: James Bonfield, Wellcome Trust Sanger Institute. 2014
36  */
37 
38 #include <config.h>
39 
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <unistd.h>
44 #include <assert.h>
45 #include <string.h>
46 #include <sys/time.h>
47 
48 #include "cram/rANS_static.h"
49 #include "cram/rANS_byte.h"
50 
51 #define TF_SHIFT 12
52 #define TOTFREQ (1<<TF_SHIFT)
53 
54 #define ABS(a) ((a)>0?(a):-(a))
55 #ifndef BLK_SIZE
56 #  define BLK_SIZE 1024*1024
57 #endif
58 
59 // Room to allow for expanded BLK_SIZE on worst case compression.
60 #define BLK_SIZE2 ((int)(1.05*BLK_SIZE))
61 
62 /*-----------------------------------------------------------------------------
63  * Memory to memory compression functions.
64  *
65  * These are original versions without any manual loop unrolling. They
66  * are easier to understand, but can be up to 2x slower.
67  */
68 
rans_compress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)69 unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
70                                 unsigned int *out_size) {
71     unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
72     unsigned char *cp, *out_end;
73     RansEncSymbol syms[256];
74     RansState rans0, rans1, rans2, rans3;
75     uint8_t* ptr;
76     int F[256] = {0}, i, j, tab_size, rle, x, fsum = 0;
77     int m = 0, M = 0;
78     uint64_t tr;
79 
80     if (!out_buf)
81         return NULL;
82 
83     ptr = out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
84 
85     // Compute statistics
86     for (i = 0; i < in_size; i++) {
87         F[in[i]]++;
88     }
89     tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size;
90  normalise_harder:
91     // Normalise so T[i] == TOTFREQ
92     for (m = M = j = 0; j < 256; j++) {
93         if (!F[j])
94             continue;
95 
96         if (m < F[j])
97             m = F[j], M = j;
98 
99         if ((F[j] = (F[j]*tr)>>31) == 0)
100             F[j] = 1;
101         fsum += F[j];
102     }
103 
104     fsum++;
105     if (fsum < TOTFREQ) {
106         F[M] += TOTFREQ-fsum;
107     } else if (fsum-TOTFREQ > F[M]/2) {
108         // Corner case to avoid excessive frequency reduction
109         tr = 2104533975; goto normalise_harder; // equiv to *0.98.
110     } else {
111         F[M] -= fsum-TOTFREQ;
112     }
113 
114     //printf("F[%d]=%d\n", M, F[M]);
115     assert(F[M]>0);
116 
117     // Encode statistics.
118     cp = out_buf+9;
119 
120     for (x = rle = j = 0; j < 256; j++) {
121         if (F[j]) {
122             // j
123             if (rle) {
124                 rle--;
125             } else {
126                 *cp++ = j;
127                 if (!rle && j && F[j-1])  {
128                     for(rle=j+1; rle<256 && F[rle]; rle++)
129                         ;
130                     rle -= j+1;
131                     *cp++ = rle;
132                 }
133                 //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
134             }
135 
136             // F[j]
137             if (F[j]<128) {
138                 *cp++ = F[j];
139             } else {
140                 *cp++ = 128 | (F[j]>>8);
141                 *cp++ = F[j]&0xff;
142             }
143             RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
144             x += F[j];
145         }
146     }
147     *cp++ = 0;
148 
149     //write(1, out_buf+4, cp-(out_buf+4));
150     tab_size = cp-out_buf;
151 
152     RansEncInit(&rans0);
153     RansEncInit(&rans1);
154     RansEncInit(&rans2);
155     RansEncInit(&rans3);
156 
157     switch (i=(in_size&3)) {
158     case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]);
159     case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]);
160     case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]);
161     case 0:
162         break;
163     }
164     for (i=(in_size &~3); i>0; i-=4) {
165         RansEncSymbol *s3 = &syms[in[i-1]];
166         RansEncSymbol *s2 = &syms[in[i-2]];
167         RansEncSymbol *s1 = &syms[in[i-3]];
168         RansEncSymbol *s0 = &syms[in[i-4]];
169 
170         RansEncPutSymbol(&rans3, &ptr, s3);
171         RansEncPutSymbol(&rans2, &ptr, s2);
172         RansEncPutSymbol(&rans1, &ptr, s1);
173         RansEncPutSymbol(&rans0, &ptr, s0);
174     }
175 
176     RansEncFlush(&rans3, &ptr);
177     RansEncFlush(&rans2, &ptr);
178     RansEncFlush(&rans1, &ptr);
179     RansEncFlush(&rans0, &ptr);
180 
181     // Finalise block size and return it
182     *out_size = (out_end - ptr) + tab_size;
183 
184     cp = out_buf;
185 
186     *cp++ = 0; // order
187     *cp++ = ((*out_size-9)>> 0) & 0xff;
188     *cp++ = ((*out_size-9)>> 8) & 0xff;
189     *cp++ = ((*out_size-9)>>16) & 0xff;
190     *cp++ = ((*out_size-9)>>24) & 0xff;
191 
192     *cp++ = (in_size>> 0) & 0xff;
193     *cp++ = (in_size>> 8) & 0xff;
194     *cp++ = (in_size>>16) & 0xff;
195     *cp++ = (in_size>>24) & 0xff;
196 
197     memmove(out_buf + tab_size, ptr, out_end-ptr);
198 
199     return out_buf;
200 }
201 
202 typedef struct {
203     unsigned char R[TOTFREQ];
204 } ari_decoder;
205 
rans_uncompress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)206 unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
207                                   unsigned int *out_size) {
208     /* Load in the static tables */
209     unsigned char *cp = in + 9;
210     unsigned char *cp_end = in + in_size;
211     int i, j, x, rle;
212     unsigned int out_sz, in_sz;
213     char *out_buf;
214     ari_decoder D;
215     RansDecSymbol syms[256];
216 
217     if (in_size < 26) // Need at least this many bytes just to start
218         return NULL;
219 
220     if (*in++ != 0) // Order-0 check
221         return NULL;
222 
223     in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | ((in[3])<<24);
224     out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | ((in[7])<<24);
225     if (in_sz != in_size-9)
226         return NULL;
227 
228     // Precompute reverse lookup of frequency.
229     rle = x = 0;
230     j = *cp++;
231     do {
232         int F, C;
233         if (cp > cp_end - 16) return NULL; // Not enough input bytes left
234         if ((F = *cp++) >= 128) {
235             F &= ~128;
236             F = ((F & 127) << 8) | *cp++;
237         }
238         C = x;
239 
240         RansDecSymbolInit(&syms[j], C, F);
241 
242         /* Build reverse lookup table */
243         if (x + F > TOTFREQ)
244             return NULL;
245         memset(&D.R[x], j, F);
246 
247         x += F;
248 
249         if (!rle && j+1 == *cp) {
250             j = *cp++;
251             rle = *cp++;
252         } else if (rle) {
253             rle--;
254             j++;
255             if (j > 255)
256                 return NULL;
257         } else {
258             j = *cp++;
259         }
260     } while(j);
261 
262     if (x < TOTFREQ-1 || x > TOTFREQ)
263         return NULL;
264     if (x < TOTFREQ) // historically we fill 4095, not 4096
265         D.R[x] = D.R[x-1];
266 
267     if (cp > cp_end - 16) return NULL; // Not enough input bytes left
268 
269     RansState rans0, rans1, rans2, rans3;
270     uint8_t *ptr = cp;
271     RansDecInit(&rans0, &ptr);
272     RansDecInit(&rans1, &ptr);
273     RansDecInit(&rans2, &ptr);
274     RansDecInit(&rans3, &ptr);
275 
276     out_buf = malloc(out_sz);
277     if (!out_buf)
278         return NULL;
279 
280     int out_end = (out_sz&~3);
281 
282     RansState R[4];
283     R[0] = rans0;
284     R[1] = rans1;
285     R[2] = rans2;
286     R[3] = rans3;
287     uint32_t mask = (1u << TF_SHIFT)-1;
288 
289     for (i=0; i < out_end; i+=4) {
290         uint32_t m[4] = {R[0] & mask,
291                          R[1] & mask,
292                          R[2] & mask,
293                          R[3] & mask};
294         uint8_t c[4] = {D.R[m[0]],
295                         D.R[m[1]],
296                         D.R[m[2]],
297                         D.R[m[3]]};
298         out_buf[i+0] = c[0];
299         out_buf[i+1] = c[1];
300         out_buf[i+2] = c[2];
301         out_buf[i+3] = c[3];
302 
303         // In theory all TOTFREQ elements of D.R are filled out, but it's
304         // possible this may not be true (invalid input).  We could
305         // check with x == TOTFREQ after filling out D.R matrix, but
306         // for historical reasons this sums to TOTFREQ-1 leaving one
307         // byte in D.R uninitialised. Or we could check here that
308         // syms[c[0..3]].freq > 0 and initialising syms, but that is
309         // slow.
310         //
311         // We take the former approach and accept a potential for garbage in
312         // -> garbage out in the rare 1 in TOTFREQ case as the overhead of
313         // continuous validation of freq > 0 is steep on this tight loop.
314 
315         // RansDecAdvanceSymbolStep(&R[0], &syms[c[0]], TF_SHIFT);
316         // RansDecAdvanceSymbolStep(&R[1], &syms[c[1]], TF_SHIFT);
317         // RansDecAdvanceSymbolStep(&R[2], &syms[c[2]], TF_SHIFT);
318         // RansDecAdvanceSymbolStep(&R[3], &syms[c[3]], TF_SHIFT);
319         R[0] = syms[c[0]].freq * (R[0]>>TF_SHIFT);
320         R[0] += m[0] - syms[c[0]].start;
321         R[1] = syms[c[1]].freq * (R[1]>>TF_SHIFT);
322         R[1] += m[1] - syms[c[1]].start;
323         R[2] = syms[c[2]].freq * (R[2]>>TF_SHIFT);
324         R[2] += m[2] - syms[c[2]].start;
325         R[3] = syms[c[3]].freq * (R[3]>>TF_SHIFT);
326         R[3] += m[3] - syms[c[3]].start;
327 
328         if (ptr < cp_end - 8) { // Each renorm reads no more than 2 bytes
329             RansDecRenorm(&R[0], &ptr);
330             RansDecRenorm(&R[1], &ptr);
331             RansDecRenorm(&R[2], &ptr);
332             RansDecRenorm(&R[3], &ptr);
333         } else {
334             RansDecRenormSafe(&R[0], &ptr, cp_end);
335             RansDecRenormSafe(&R[1], &ptr, cp_end);
336             RansDecRenormSafe(&R[2], &ptr, cp_end);
337             RansDecRenormSafe(&R[3], &ptr, cp_end);
338         }
339     }
340 
341     switch(out_sz&3) {
342     case 3:
343         out_buf[out_end+2] = D.R[RansDecGet(&R[2], TF_SHIFT)];
344     case 2:
345         out_buf[out_end+1] = D.R[RansDecGet(&R[1], TF_SHIFT)];
346     case 1:
347         out_buf[out_end] = D.R[RansDecGet(&R[0], TF_SHIFT)];
348     default:
349         break;
350     }
351 
352     *out_size = out_sz;
353 
354     return (unsigned char *)out_buf;
355 }
356 
rans_compress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)357 unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
358                                 unsigned int *out_size) {
359     unsigned char *out_buf = NULL, *out_end, *cp;
360     unsigned int last_i, tab_size, rle_i, rle_j;
361     RansEncSymbol (*syms)[256] = NULL;  /* syms[256][256] */
362     int (*F)[256] = NULL;               /* F[256][256]    */
363     int *T = NULL;                      /* T[256]         */
364     int i, j;
365     unsigned char c;
366 
367     if (in_size < 4)
368         return rans_compress_O0(in, in_size, out_size);
369 
370     syms = malloc(256 * sizeof(*syms));
371     if (!syms) goto cleanup;
372     F = calloc(256, sizeof(*F));
373     if (!F) goto cleanup;
374     T = calloc(256, sizeof(*T));
375     if (!T) goto cleanup;
376     out_buf = malloc(1.05*in_size + 257*257*3 + 9);
377     if (!out_buf) goto cleanup;
378 
379     out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
380     cp = out_buf+9;
381 
382     //for (last = 0, i=in_size-1; i>=0; i--) {
383     //  F[last][c = in[i]]++;
384     //  T[last]++;
385     //  last = c;
386     //}
387 
388     for (last_i=i=0; i<in_size; i++) {
389         F[last_i][c = in[i]]++;
390         T[last_i]++;
391         last_i = c;
392     }
393     F[0][in[1*(in_size>>2)]]++;
394     F[0][in[2*(in_size>>2)]]++;
395     F[0][in[3*(in_size>>2)]]++;
396     T[0]+=3;
397 
398     // Normalise so T[i] == TOTFREQ
399     for (rle_i = i = 0; i < 256; i++) {
400         int t2, m, M;
401         unsigned int x;
402 
403         if (T[i] == 0)
404             continue;
405 
406         //uint64_t p = (TOTFREQ * TOTFREQ) / t;
407         double p = ((double)TOTFREQ)/T[i];
408     normalise_harder:
409         for (t2 = m = M = j = 0; j < 256; j++) {
410             if (!F[i][j])
411                 continue;
412 
413             if (m < F[i][j])
414                 m = F[i][j], M = j;
415 
416             //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
417             if ((F[i][j] *= p) == 0)
418                 F[i][j] = 1;
419             t2 += F[i][j];
420         }
421 
422         t2++;
423         if (t2 < TOTFREQ) {
424             F[i][M] += TOTFREQ-t2;
425         } else if (t2-TOTFREQ >= F[i][M]/2) {
426             // Corner case to avoid excessive frequency reduction
427             p = .98; goto normalise_harder;
428         } else {
429             F[i][M] -= t2-TOTFREQ;
430         }
431 
432         // Store frequency table
433         // i
434         if (rle_i) {
435             rle_i--;
436         } else {
437             *cp++ = i;
438             // FIXME: could use order-0 statistics to observe which alphabet
439             // symbols are present and base RLE on that ordering instead.
440             if (i && T[i-1]) {
441                 for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
442                     ;
443                 rle_i -= i+1;
444                 *cp++ = rle_i;
445             }
446         }
447 
448         int *F_i_ = F[i];
449         x = 0;
450         rle_j = 0;
451         for (j = 0; j < 256; j++) {
452             if (F_i_[j]) {
453                 //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
454 
455                 // j
456                 if (rle_j) {
457                     rle_j--;
458                 } else {
459                     *cp++ = j;
460                     if (!rle_j && j && F_i_[j-1]) {
461                         for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
462                             ;
463                         rle_j -= j+1;
464                         *cp++ = rle_j;
465                     }
466                 }
467 
468                 // F_i_[j]
469                 if (F_i_[j]<128) {
470                     *cp++ = F_i_[j];
471                 } else {
472                     *cp++ = 128 | (F_i_[j]>>8);
473                     *cp++ = F_i_[j]&0xff;
474                 }
475 
476                 RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
477                 x += F_i_[j];
478             }
479         }
480         *cp++ = 0;
481     }
482     *cp++ = 0;
483 
484     //write(1, out_buf+4, cp-(out_buf+4));
485     tab_size = cp - out_buf;
486     assert(tab_size < 257*257*3);
487 
488     RansState rans0, rans1, rans2, rans3;
489     RansEncInit(&rans0);
490     RansEncInit(&rans1);
491     RansEncInit(&rans2);
492     RansEncInit(&rans3);
493 
494     uint8_t* ptr = out_end;
495 
496     int isz4 = in_size>>2;
497     int i0 = 1*isz4-2;
498     int i1 = 2*isz4-2;
499     int i2 = 3*isz4-2;
500     int i3 = 4*isz4-2;
501 
502     unsigned char l0 = in[i0+1];
503     unsigned char l1 = in[i1+1];
504     unsigned char l2 = in[i2+1];
505     unsigned char l3 = in[i3+1];
506 
507     // Deal with the remainder
508     l3 = in[in_size-1];
509     for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
510         unsigned char c3 = in[i3];
511         RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
512         l3 = c3;
513     }
514 
515     for (; i0 >= 0; i0--, i1--, i2--, i3--) {
516         unsigned char c0, c1, c2, c3;
517         RansEncSymbol *s3 = &syms[c3 = in[i3]][l3];
518         RansEncSymbol *s2 = &syms[c2 = in[i2]][l2];
519         RansEncSymbol *s1 = &syms[c1 = in[i1]][l1];
520         RansEncSymbol *s0 = &syms[c0 = in[i0]][l0];
521 
522         RansEncPutSymbol(&rans3, &ptr, s3);
523         RansEncPutSymbol(&rans2, &ptr, s2);
524         RansEncPutSymbol(&rans1, &ptr, s1);
525         RansEncPutSymbol(&rans0, &ptr, s0);
526 
527         l0 = c0;
528         l1 = c1;
529         l2 = c2;
530         l3 = c3;
531     }
532 
533     RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
534     RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
535     RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
536     RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
537 
538     RansEncFlush(&rans3, &ptr);
539     RansEncFlush(&rans2, &ptr);
540     RansEncFlush(&rans1, &ptr);
541     RansEncFlush(&rans0, &ptr);
542 
543     *out_size = (out_end - ptr) + tab_size;
544 
545     cp = out_buf;
546     *cp++ = 1; // order
547 
548     *cp++ = ((*out_size-9)>> 0) & 0xff;
549     *cp++ = ((*out_size-9)>> 8) & 0xff;
550     *cp++ = ((*out_size-9)>>16) & 0xff;
551     *cp++ = ((*out_size-9)>>24) & 0xff;
552 
553     *cp++ = (in_size>> 0) & 0xff;
554     *cp++ = (in_size>> 8) & 0xff;
555     *cp++ = (in_size>>16) & 0xff;
556     *cp++ = (in_size>>24) & 0xff;
557 
558     memmove(out_buf + tab_size, ptr, out_end-ptr);
559 
560  cleanup:
561     free(syms);
562     free(F);
563     free(T);
564 
565     return out_buf;
566 }
567 
rans_uncompress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)568 unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
569                                   unsigned int *out_size) {
570     /* Load in the static tables */
571     unsigned char *cp = in + 9;
572     unsigned char *ptr_end = in + in_size;
573     int i, j = -999, x, rle_i, rle_j;
574     unsigned int out_sz, in_sz;
575     char *out_buf = NULL;
576     ari_decoder *D = NULL;              /* D[256] */
577     RansDecSymbol (*syms)[256] = NULL;  /* syms[256][256] */
578 
579     if (in_size < 27) // Need at least this many bytes to start
580         return NULL;
581 
582     if (*in++ != 1) // Order-1 check
583         return NULL;
584 
585     in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | ((in[3])<<24);
586     out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | ((in[7])<<24);
587     if (in_sz != in_size-9)
588         return NULL;
589 
590     // calloc may add 2% overhead to CRAM decode, but on linux with glibc it's
591     // often the same thing due to using mmap.
592     D = calloc(256, sizeof(*D));
593     if (!D) goto cleanup;
594     syms = malloc(256 * sizeof(*syms));
595     if (!syms) goto cleanup;
596     /* These memsets prevent illegal memory access in syms due to
597        broken compressed data.  As D is calloc'd, all illegal transitions
598        will end up in either row or column 0 of syms. */
599     memset(&syms[0], 0, sizeof(syms[0]));
600     for (i = 1; i < 256; i++) memset(&syms[i][0], 0, sizeof(syms[0][0]));
601 
602     //fprintf(stderr, "out_sz=%d\n", out_sz);
603 
604     //i = *cp++;
605     rle_i = 0;
606     i = *cp++;
607     do {
608         rle_j = x = 0;
609         j = *cp++;
610         do {
611             int F, C;
612             if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
613             if ((F = *cp++) >= 128) {
614                 F &= ~128;
615                 F = ((F & 127) << 8) | *cp++;
616             }
617             C = x;
618 
619             //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
620 
621             if (!F)
622                 F = TOTFREQ;
623 
624             RansDecSymbolInit(&syms[i][j], C, F);
625 
626             /* Build reverse lookup table */
627             if (x + F > TOTFREQ)
628                 goto cleanup;
629             memset(&D[i].R[x], j, F);
630 
631             x += F;
632 
633             if (!rle_j && j+1 == *cp) {
634                 j = *cp++;
635                 rle_j = *cp++;
636             } else if (rle_j) {
637                 rle_j--;
638                 j++;
639                 if (j > 255)
640                     goto cleanup;
641             } else {
642                 j = *cp++;
643             }
644         } while(j);
645 
646         if (x < TOTFREQ-1 || x > TOTFREQ)
647             goto cleanup;
648         if (x < TOTFREQ) // historically we fill 4095, not 4096
649             D[i].R[x] = D[i].R[x-1];
650 
651         if (!rle_i && i+1 == *cp) {
652             i = *cp++;
653             rle_i = *cp++;
654         } else if (rle_i) {
655             rle_i--;
656             i++;
657             if (i > 255)
658                 goto cleanup;
659         } else {
660             i = *cp++;
661         }
662     } while (i);
663 
664     // Precompute reverse lookup of frequency.
665 
666     RansState rans0, rans1, rans2, rans3;
667     uint8_t *ptr = cp;
668     if (ptr > ptr_end - 16) goto cleanup; // Not enough input bytes left
669     RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
670     RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
671     RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
672     RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
673 
674     int isz4 = out_sz>>2;
675     int l0 = 0;
676     int l1 = 0;
677     int l2 = 0;
678     int l3 = 0;
679     int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
680 
681     RansState R[4];
682     R[0] = rans0;
683     R[1] = rans1;
684     R[2] = rans2;
685     R[3] = rans3;
686 
687     /* Allocate output buffer */
688     out_buf = malloc(out_sz);
689     if (!out_buf) goto cleanup;
690 
691     for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
692         uint32_t m[4] = {R[0] & ((1u << TF_SHIFT)-1),
693                          R[1] & ((1u << TF_SHIFT)-1),
694                          R[2] & ((1u << TF_SHIFT)-1),
695                          R[3] & ((1u << TF_SHIFT)-1)};
696 
697         uint8_t c[4] = {D[l0].R[m[0]],
698                         D[l1].R[m[1]],
699                         D[l2].R[m[2]],
700                         D[l3].R[m[3]]};
701 
702         out_buf[i4[0]] = c[0];
703         out_buf[i4[1]] = c[1];
704         out_buf[i4[2]] = c[2];
705         out_buf[i4[3]] = c[3];
706 
707         //RansDecAdvanceSymbolStep(&R[0], &syms[l0][c[0]], TF_SHIFT);
708         //RansDecAdvanceSymbolStep(&R[1], &syms[l1][c[1]], TF_SHIFT);
709         //RansDecAdvanceSymbolStep(&R[2], &syms[l2][c[2]], TF_SHIFT);
710         //RansDecAdvanceSymbolStep(&R[3], &syms[l3][c[3]], TF_SHIFT);
711 
712         R[0] = syms[l0][c[0]].freq * (R[0]>>TF_SHIFT);
713         R[0] += m[0] - syms[l0][c[0]].start;
714         R[1] = syms[l1][c[1]].freq * (R[1]>>TF_SHIFT);
715         R[1] += m[1] - syms[l1][c[1]].start;
716         R[2] = syms[l2][c[2]].freq * (R[2]>>TF_SHIFT);
717         R[2] += m[2] - syms[l2][c[2]].start;
718         R[3] = syms[l3][c[3]].freq * (R[3]>>TF_SHIFT);
719         R[3] += m[3] - syms[l3][c[3]].start;
720 
721         if (ptr < ptr_end - 8) { // Each renorm reads no more than 2 bytes
722             RansDecRenorm(&R[0], &ptr);
723             RansDecRenorm(&R[1], &ptr);
724             RansDecRenorm(&R[2], &ptr);
725             RansDecRenorm(&R[3], &ptr);
726         } else {
727             RansDecRenormSafe(&R[0], &ptr, ptr_end);
728             RansDecRenormSafe(&R[1], &ptr, ptr_end);
729             RansDecRenormSafe(&R[2], &ptr, ptr_end);
730             RansDecRenormSafe(&R[3], &ptr, ptr_end);
731         }
732 
733         l0 = c[0];
734         l1 = c[1];
735         l2 = c[2];
736         l3 = c[3];
737     }
738 
739     // Remainder
740     for (; i4[3] < out_sz; i4[3]++) {
741         unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
742         out_buf[i4[3]] = c3;
743 
744         uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
745         R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
746         RansDecRenormSafe(&R[3], &ptr, ptr_end);
747         l3 = c3;
748     }
749 
750     *out_size = out_sz;
751 
752  cleanup:
753     if (D)
754         free(D);
755     free(syms);
756 
757     return (unsigned char *)out_buf;
758 }
759 
760 /*-----------------------------------------------------------------------------
761  * Simple interface to the order-0 vs order-1 encoders and decoders.
762  */
rans_compress(unsigned char * in,unsigned int in_size,unsigned int * out_size,int order)763 unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
764                              unsigned int *out_size, int order) {
765     return order
766         ? rans_compress_O1(in, in_size, out_size)
767         : rans_compress_O0(in, in_size, out_size);
768 }
769 
rans_uncompress(unsigned char * in,unsigned int in_size,unsigned int * out_size)770 unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
771                                unsigned int *out_size) {
772     /* Both rans_uncompress functions need to be able to read at least 9
773        bytes. */
774     if (in_size < 9)
775         return NULL;
776     return in[0]
777         ? rans_uncompress_O1(in, in_size, out_size)
778         : rans_uncompress_O0(in, in_size, out_size);
779 }
780 
781 
782 #ifdef TEST_MAIN
783 /*-----------------------------------------------------------------------------
784  * Main.
785  *
786  * This is a simple command line tool for testing order-0 and order-1
787  * compression using the rANS codec. Simply compile with
788  *
789  * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static
790  *
791  * Usage: cram/rANS_static -o0 < file    > file.o0
792  *        cram/rANS_static -d  < file.o0 > file2
793  *
794  *        cram/rANS_static -o1 < file    > file.o1
795  *        cram/rANS_static -d  < file.o1 > file2
796  */
main(int argc,char ** argv)797 int main(int argc, char **argv) {
798     int opt, order = 0;
799     unsigned char in_buf[BLK_SIZE2+257*257*3];
800     int decode = 0;
801     FILE *infp = stdin, *outfp = stdout;
802     struct timeval tv1, tv2;
803     size_t bytes = 0;
804 
805     extern char *optarg;
806     extern int optind;
807 
808     while ((opt = getopt(argc, argv, "o:d")) != -1) {
809         switch (opt) {
810         case 'o':
811             order = atoi(optarg);
812             break;
813 
814         case 'd':
815             decode = 1;
816             break;
817         }
818     }
819 
820     order = order ? 1 : 0; // Only support O(0) and O(1)
821 
822     if (optind < argc) {
823         if (!(infp = fopen(argv[optind], "rb"))) {
824             perror(argv[optind]);
825             return 1;
826         }
827         optind++;
828     }
829 
830     if (optind < argc) {
831         if (!(outfp = fopen(argv[optind], "wb"))) {
832             perror(argv[optind]);
833             return 1;
834         }
835         optind++;
836     }
837 
838     gettimeofday(&tv1, NULL);
839 
840     if (decode) {
841         // Only used in some test implementations of RC_GetFreq()
842         //RC_init();
843         //RC_init2();
844 
845         for (;;) {
846             uint32_t in_size, out_size;
847             unsigned char *out;
848 
849             if (9 != fread(in_buf, 1, 9, infp))
850                 break;
851             in_size = *(int *)&in_buf[1];
852             if (in_size != fread(in_buf+9, 1, in_size, infp)) {
853                 fprintf(stderr, "Truncated input\n");
854                 exit(1);
855             }
856             out = rans_uncompress(in_buf, in_size+9, &out_size);
857             if (!out)
858                 abort();
859 
860             fwrite(out, 1, out_size, outfp);
861             free(out);
862 
863             bytes += out_size;
864         }
865     } else {
866         for (;;) {
867             uint32_t in_size, out_size;
868             unsigned char *out;
869 
870             in_size = fread(in_buf, 1, BLK_SIZE, infp);
871             if (in_size <= 0)
872                 break;
873 
874             out = rans_compress(in_buf, in_size, &out_size, order);
875 
876             fwrite(out, 1, out_size, outfp);
877             free(out);
878 
879             bytes += in_size;
880         }
881     }
882 
883     gettimeofday(&tv2, NULL);
884 
885     fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n",
886             (long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
887             tv2.tv_usec - tv1.tv_usec,
888             (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
889                              tv2.tv_usec - tv1.tv_usec));
890     return 0;
891 }
892 #endif
893