1 /*
2  * Copyright (c) 2014-2019 Genome Research Ltd.
3  * Author(s): James Bonfield
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  *    1. Redistributions of source code must retain the above copyright notice,
9  *       this list of conditions and the following disclaimer.
10  *
11  *    2. Redistributions in binary form must reproduce the above
12  *       copyright notice, this list of conditions and the following
13  *       disclaimer in the documentation and/or other materials provided
14  *       with the distribution.
15  *
16  *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17  *       Institute nor the names of its contributors may be used to endorse
18  *       or promote products derived from this software without specific
19  *       prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24  * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25  * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Author: James Bonfield, Wellcome Trust Sanger Institute. 2014
36  */
37 
38 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
39 #include <config.h>
40 
41 #include <stdint.h>
42 #include <stdlib.h>
43 #include <stdio.h>
44 #include <unistd.h>
45 #include <assert.h>
46 #include <string.h>
47 #include <sys/time.h>
48 #include <limits.h>
49 
50 #include "rANS_static.h"
51 #include "rANS_byte.h"
52 
53 #define TF_SHIFT 12
54 #define TOTFREQ (1<<TF_SHIFT)
55 
56 #define ABS(a) ((a)>0?(a):-(a))
57 #ifndef BLK_SIZE
58 #  define BLK_SIZE 1024*1024
59 #endif
60 
61 // Room to allow for expanded BLK_SIZE on worst case compression.
62 #define BLK_SIZE2 ((int)(1.05*BLK_SIZE))
63 
64 /*-----------------------------------------------------------------------------
65  * Memory to memory compression functions.
66  *
67  * These are original versions without any manual loop unrolling. They
68  * are easier to understand, but can be up to 2x slower.
69  */
70 
rans_compress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)71 unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
72                                 unsigned int *out_size) {
73     unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
74     unsigned char *cp, *out_end;
75     RansEncSymbol syms[256];
76     RansState rans0, rans1, rans2, rans3;
77     uint8_t* ptr;
78     int F[256] = {0}, i, j, tab_size, rle, x, fsum = 0;
79     int m = 0, M = 0;
80     uint64_t tr;
81 
82     if (!out_buf)
83         return NULL;
84 
85     ptr = out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
86 
87     // Compute statistics
88     for (i = 0; i < in_size; i++) {
89         F[in[i]]++;
90     }
91     tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size;
92  normalise_harder:
93     // Normalise so T[i] == TOTFREQ
94     for (fsum = m = M = j = 0; j < 256; j++) {
95         if (!F[j])
96             continue;
97 
98         if (m < F[j])
99             m = F[j], M = j;
100 
101         if ((F[j] = (F[j]*tr)>>31) == 0)
102             F[j] = 1;
103         fsum += F[j];
104     }
105 
106     fsum++;
107     if (fsum < TOTFREQ) {
108         F[M] += TOTFREQ-fsum;
109     } else if (fsum-TOTFREQ > F[M]/2) {
110         // Corner case to avoid excessive frequency reduction
111         tr = 2104533975; goto normalise_harder; // equiv to *0.98.
112     } else {
113         F[M] -= fsum-TOTFREQ;
114     }
115 
116     //printf("F[%d]=%d\n", M, F[M]);
117     assert(F[M]>0);
118 
119     // Encode statistics.
120     cp = out_buf+9;
121 
122     for (x = rle = j = 0; j < 256; j++) {
123         if (F[j]) {
124             // j
125             if (rle) {
126                 rle--;
127             } else {
128                 *cp++ = j;
129                 if (!rle && j && F[j-1])  {
130                     for(rle=j+1; rle<256 && F[rle]; rle++)
131                         ;
132                     rle -= j+1;
133                     *cp++ = rle;
134                 }
135                 //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
136             }
137 
138             // F[j]
139             if (F[j]<128) {
140                 *cp++ = F[j];
141             } else {
142                 *cp++ = 128 | (F[j]>>8);
143                 *cp++ = F[j]&0xff;
144             }
145             RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
146             x += F[j];
147         }
148     }
149     *cp++ = 0;
150 
151     //write(1, out_buf+4, cp-(out_buf+4));
152     tab_size = cp-out_buf;
153 
154     RansEncInit(&rans0);
155     RansEncInit(&rans1);
156     RansEncInit(&rans2);
157     RansEncInit(&rans3);
158 
159     switch (i=(in_size&3)) {
160     case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); // fall through
161     case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); // fall through
162     case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); // fall through
163     case 0:
164         break;
165     }
166     for (i=(in_size &~3); i>0; i-=4) {
167         RansEncSymbol *s3 = &syms[in[i-1]];
168         RansEncSymbol *s2 = &syms[in[i-2]];
169         RansEncSymbol *s1 = &syms[in[i-3]];
170         RansEncSymbol *s0 = &syms[in[i-4]];
171 
172         RansEncPutSymbol(&rans3, &ptr, s3);
173         RansEncPutSymbol(&rans2, &ptr, s2);
174         RansEncPutSymbol(&rans1, &ptr, s1);
175         RansEncPutSymbol(&rans0, &ptr, s0);
176     }
177 
178     RansEncFlush(&rans3, &ptr);
179     RansEncFlush(&rans2, &ptr);
180     RansEncFlush(&rans1, &ptr);
181     RansEncFlush(&rans0, &ptr);
182 
183     // Finalise block size and return it
184     *out_size = (out_end - ptr) + tab_size;
185 
186     cp = out_buf;
187 
188     *cp++ = 0; // order
189     *cp++ = ((*out_size-9)>> 0) & 0xff;
190     *cp++ = ((*out_size-9)>> 8) & 0xff;
191     *cp++ = ((*out_size-9)>>16) & 0xff;
192     *cp++ = ((*out_size-9)>>24) & 0xff;
193 
194     *cp++ = (in_size>> 0) & 0xff;
195     *cp++ = (in_size>> 8) & 0xff;
196     *cp++ = (in_size>>16) & 0xff;
197     *cp++ = (in_size>>24) & 0xff;
198 
199     memmove(out_buf + tab_size, ptr, out_end-ptr);
200 
201     return out_buf;
202 }
203 
204 typedef struct {
205     unsigned char R[TOTFREQ];
206 } ari_decoder;
207 
rans_uncompress_O0(unsigned char * in,unsigned int in_size,unsigned int * out_size)208 unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
209                                   unsigned int *out_size) {
210     /* Load in the static tables */
211     unsigned char *cp = in + 9;
212     unsigned char *cp_end = in + in_size;
213     int i, j, x, rle;
214     unsigned int out_sz, in_sz;
215     char *out_buf;
216     ari_decoder D;
217     RansDecSymbol syms[256];
218 
219     if (in_size < 26) // Need at least this many bytes just to start
220         return NULL;
221 
222     if (*in++ != 0) // Order-0 check
223         return NULL;
224 
225     in_sz  = ((((uint32_t) in[0])<<0) | (((uint32_t) in[1])<<8) |
226               (((uint32_t) in[2])<<16) | (((uint32_t) in[3])<<24));
227     out_sz = ((((uint32_t) in[4])<<0) | (((uint32_t) in[5])<<8) |
228               (((uint32_t) in[6])<<16) | (((uint32_t) in[7])<<24));
229     if (in_sz != in_size-9)
230         return NULL;
231 
232     if (out_sz >= INT_MAX)
233         return NULL; // protect against some overflow cases
234 
235     // Precompute reverse lookup of frequency.
236     rle = x = 0;
237     j = *cp++;
238     do {
239         int F, C;
240         if (cp > cp_end - 16) return NULL; // Not enough input bytes left
241         if ((F = *cp++) >= 128) {
242             F &= ~128;
243             F = ((F & 127) << 8) | *cp++;
244         }
245         C = x;
246 
247         RansDecSymbolInit(&syms[j], C, F);
248 
249         /* Build reverse lookup table */
250         if (x + F > TOTFREQ)
251             return NULL;
252         memset(&D.R[x], j, F);
253 
254         x += F;
255 
256         if (!rle && j+1 == *cp) {
257             j = *cp++;
258             rle = *cp++;
259         } else if (rle) {
260             rle--;
261             j++;
262             if (j > 255)
263                 return NULL;
264         } else {
265             j = *cp++;
266         }
267     } while(j);
268 
269     if (x < TOTFREQ-1 || x > TOTFREQ)
270         return NULL;
271     if (x < TOTFREQ) // historically we fill 4095, not 4096
272         D.R[x] = D.R[x-1];
273 
274     if (cp > cp_end - 16) return NULL; // Not enough input bytes left
275 
276     RansState rans0, rans1, rans2, rans3;
277     uint8_t *ptr = cp;
278     RansDecInit(&rans0, &ptr);
279     RansDecInit(&rans1, &ptr);
280     RansDecInit(&rans2, &ptr);
281     RansDecInit(&rans3, &ptr);
282 
283     out_buf = malloc(out_sz);
284     if (!out_buf)
285         return NULL;
286 
287     int out_end = (out_sz&~3);
288 
289     RansState R[4];
290     R[0] = rans0;
291     R[1] = rans1;
292     R[2] = rans2;
293     R[3] = rans3;
294     uint32_t mask = (1u << TF_SHIFT)-1;
295 
296     for (i=0; i < out_end; i+=4) {
297         uint32_t m[4] = {R[0] & mask,
298                          R[1] & mask,
299                          R[2] & mask,
300                          R[3] & mask};
301         uint8_t c[4] = {D.R[m[0]],
302                         D.R[m[1]],
303                         D.R[m[2]],
304                         D.R[m[3]]};
305         out_buf[i+0] = c[0];
306         out_buf[i+1] = c[1];
307         out_buf[i+2] = c[2];
308         out_buf[i+3] = c[3];
309 
310         // In theory all TOTFREQ elements of D.R are filled out, but it's
311         // possible this may not be true (invalid input).  We could
312         // check with x == TOTFREQ after filling out D.R matrix, but
313         // for historical reasons this sums to TOTFREQ-1 leaving one
314         // byte in D.R uninitialised. Or we could check here that
315         // syms[c[0..3]].freq > 0 and initialising syms, but that is
316         // slow.
317         //
318         // We take the former approach and accept a potential for garbage in
319         // -> garbage out in the rare 1 in TOTFREQ case as the overhead of
320         // continuous validation of freq > 0 is steep on this tight loop.
321 
322         // RansDecAdvanceSymbolStep(&R[0], &syms[c[0]], TF_SHIFT);
323         // RansDecAdvanceSymbolStep(&R[1], &syms[c[1]], TF_SHIFT);
324         // RansDecAdvanceSymbolStep(&R[2], &syms[c[2]], TF_SHIFT);
325         // RansDecAdvanceSymbolStep(&R[3], &syms[c[3]], TF_SHIFT);
326         R[0] = syms[c[0]].freq * (R[0]>>TF_SHIFT);
327         R[0] += m[0] - syms[c[0]].start;
328         R[1] = syms[c[1]].freq * (R[1]>>TF_SHIFT);
329         R[1] += m[1] - syms[c[1]].start;
330         R[2] = syms[c[2]].freq * (R[2]>>TF_SHIFT);
331         R[2] += m[2] - syms[c[2]].start;
332         R[3] = syms[c[3]].freq * (R[3]>>TF_SHIFT);
333         R[3] += m[3] - syms[c[3]].start;
334 
335         if (ptr < cp_end - 8) { // Each renorm reads no more than 2 bytes
336             RansDecRenorm(&R[0], &ptr);
337             RansDecRenorm(&R[1], &ptr);
338             RansDecRenorm(&R[2], &ptr);
339             RansDecRenorm(&R[3], &ptr);
340         } else {
341             RansDecRenormSafe(&R[0], &ptr, cp_end);
342             RansDecRenormSafe(&R[1], &ptr, cp_end);
343             RansDecRenormSafe(&R[2], &ptr, cp_end);
344             RansDecRenormSafe(&R[3], &ptr, cp_end);
345         }
346     }
347 
348     switch(out_sz&3) {
349     case 3:
350         out_buf[out_end+2] = D.R[RansDecGet(&R[2], TF_SHIFT)];
351         // fall through
352     case 2:
353         out_buf[out_end+1] = D.R[RansDecGet(&R[1], TF_SHIFT)];
354         // fall through
355     case 1:
356         out_buf[out_end] = D.R[RansDecGet(&R[0], TF_SHIFT)];
357         // fall through
358     default:
359         break;
360     }
361 
362     *out_size = out_sz;
363 
364     return (unsigned char *)out_buf;
365 }
366 
rans_compress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)367 unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
368                                 unsigned int *out_size) {
369     unsigned char *out_buf = NULL, *out_end, *cp;
370     unsigned int last_i, tab_size, rle_i, rle_j;
371     RansEncSymbol (*syms)[256] = NULL;  /* syms[256][256] */
372     int (*F)[256] = NULL;               /* F[256][256]    */
373     int *T = NULL;                      /* T[256]         */
374     int i, j;
375     unsigned char c;
376 
377     if (in_size < 4)
378         return rans_compress_O0(in, in_size, out_size);
379 
380     syms = malloc(256 * sizeof(*syms));
381     if (!syms) goto cleanup;
382     F = calloc(256, sizeof(*F));
383     if (!F) goto cleanup;
384     T = calloc(256, sizeof(*T));
385     if (!T) goto cleanup;
386     out_buf = malloc(1.05*in_size + 257*257*3 + 9);
387     if (!out_buf) goto cleanup;
388 
389     out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;
390     cp = out_buf+9;
391 
392     //for (last = 0, i=in_size-1; i>=0; i--) {
393     //  F[last][c = in[i]]++;
394     //  T[last]++;
395     //  last = c;
396     //}
397 
398     for (last_i=i=0; i<in_size; i++) {
399         F[last_i][c = in[i]]++;
400         T[last_i]++;
401         last_i = c;
402     }
403     F[0][in[1*(in_size>>2)]]++;
404     F[0][in[2*(in_size>>2)]]++;
405     F[0][in[3*(in_size>>2)]]++;
406     T[0]+=3;
407 
408     // Normalise so T[i] == TOTFREQ
409     for (rle_i = i = 0; i < 256; i++) {
410         int t2, m, M;
411         unsigned int x;
412 
413         if (T[i] == 0)
414             continue;
415 
416         //uint64_t p = (TOTFREQ * TOTFREQ) / t;
417         double p = ((double)TOTFREQ)/T[i];
418     normalise_harder:
419         for (t2 = m = M = j = 0; j < 256; j++) {
420             if (!F[i][j])
421                 continue;
422 
423             if (m < F[i][j])
424                 m = F[i][j], M = j;
425 
426             //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
427             if ((F[i][j] *= p) == 0)
428                 F[i][j] = 1;
429             t2 += F[i][j];
430         }
431 
432         t2++;
433         if (t2 < TOTFREQ) {
434             F[i][M] += TOTFREQ-t2;
435         } else if (t2-TOTFREQ >= F[i][M]/2) {
436             // Corner case to avoid excessive frequency reduction
437             p = .98; goto normalise_harder;
438         } else {
439             F[i][M] -= t2-TOTFREQ;
440         }
441 
442         // Store frequency table
443         // i
444         if (rle_i) {
445             rle_i--;
446         } else {
447             *cp++ = i;
448             // FIXME: could use order-0 statistics to observe which alphabet
449             // symbols are present and base RLE on that ordering instead.
450             if (i && T[i-1]) {
451                 for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
452                     ;
453                 rle_i -= i+1;
454                 *cp++ = rle_i;
455             }
456         }
457 
458         int *F_i_ = F[i];
459         x = 0;
460         rle_j = 0;
461         for (j = 0; j < 256; j++) {
462             if (F_i_[j]) {
463                 //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
464 
465                 // j
466                 if (rle_j) {
467                     rle_j--;
468                 } else {
469                     *cp++ = j;
470                     if (!rle_j && j && F_i_[j-1]) {
471                         for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
472                             ;
473                         rle_j -= j+1;
474                         *cp++ = rle_j;
475                     }
476                 }
477 
478                 // F_i_[j]
479                 if (F_i_[j]<128) {
480                     *cp++ = F_i_[j];
481                 } else {
482                     *cp++ = 128 | (F_i_[j]>>8);
483                     *cp++ = F_i_[j]&0xff;
484                 }
485 
486                 RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
487                 x += F_i_[j];
488             }
489         }
490         *cp++ = 0;
491     }
492     *cp++ = 0;
493 
494     //write(1, out_buf+4, cp-(out_buf+4));
495     tab_size = cp - out_buf;
496     assert(tab_size < 257*257*3);
497 
498     RansState rans0, rans1, rans2, rans3;
499     RansEncInit(&rans0);
500     RansEncInit(&rans1);
501     RansEncInit(&rans2);
502     RansEncInit(&rans3);
503 
504     uint8_t* ptr = out_end;
505 
506     int isz4 = in_size>>2;
507     int i0 = 1*isz4-2;
508     int i1 = 2*isz4-2;
509     int i2 = 3*isz4-2;
510     int i3 = 4*isz4-2;
511 
512     unsigned char l0 = in[i0+1];
513     unsigned char l1 = in[i1+1];
514     unsigned char l2 = in[i2+1];
515     unsigned char l3 = in[i3+1];
516 
517     // Deal with the remainder
518     l3 = in[in_size-1];
519     for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
520         unsigned char c3 = in[i3];
521         RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
522         l3 = c3;
523     }
524 
525     for (; i0 >= 0; i0--, i1--, i2--, i3--) {
526         unsigned char c0, c1, c2, c3;
527         RansEncSymbol *s3 = &syms[c3 = in[i3]][l3];
528         RansEncSymbol *s2 = &syms[c2 = in[i2]][l2];
529         RansEncSymbol *s1 = &syms[c1 = in[i1]][l1];
530         RansEncSymbol *s0 = &syms[c0 = in[i0]][l0];
531 
532         RansEncPutSymbol(&rans3, &ptr, s3);
533         RansEncPutSymbol(&rans2, &ptr, s2);
534         RansEncPutSymbol(&rans1, &ptr, s1);
535         RansEncPutSymbol(&rans0, &ptr, s0);
536 
537         l0 = c0;
538         l1 = c1;
539         l2 = c2;
540         l3 = c3;
541     }
542 
543     RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
544     RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
545     RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
546     RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
547 
548     RansEncFlush(&rans3, &ptr);
549     RansEncFlush(&rans2, &ptr);
550     RansEncFlush(&rans1, &ptr);
551     RansEncFlush(&rans0, &ptr);
552 
553     *out_size = (out_end - ptr) + tab_size;
554 
555     cp = out_buf;
556     *cp++ = 1; // order
557 
558     *cp++ = ((*out_size-9)>> 0) & 0xff;
559     *cp++ = ((*out_size-9)>> 8) & 0xff;
560     *cp++ = ((*out_size-9)>>16) & 0xff;
561     *cp++ = ((*out_size-9)>>24) & 0xff;
562 
563     *cp++ = (in_size>> 0) & 0xff;
564     *cp++ = (in_size>> 8) & 0xff;
565     *cp++ = (in_size>>16) & 0xff;
566     *cp++ = (in_size>>24) & 0xff;
567 
568     memmove(out_buf + tab_size, ptr, out_end-ptr);
569 
570  cleanup:
571     free(syms);
572     free(F);
573     free(T);
574 
575     return out_buf;
576 }
577 
rans_uncompress_O1(unsigned char * in,unsigned int in_size,unsigned int * out_size)578 unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
579                                   unsigned int *out_size) {
580     /* Load in the static tables */
581     unsigned char *cp = in + 9;
582     unsigned char *ptr_end = in + in_size;
583     int i, j = -999, x, rle_i, rle_j;
584     unsigned int out_sz, in_sz;
585     char *out_buf = NULL;
586     ari_decoder *D = NULL;              /* D[256] */
587     RansDecSymbol (*syms)[256] = NULL;  /* syms[256][256] */
588 
589     if (in_size < 27) // Need at least this many bytes to start
590         return NULL;
591 
592     if (*in++ != 1) // Order-1 check
593         return NULL;
594 
595     in_sz  = ((((uint32_t) in[0])<<0) | (((uint32_t) in[1])<<8) |
596               (((uint32_t) in[2])<<16) | (((uint32_t) in[3])<<24));
597     out_sz = ((((uint32_t) in[4])<<0) | (((uint32_t) in[5])<<8) |
598               (((uint32_t) in[6])<<16) | (((uint32_t) in[7])<<24));
599     if (in_sz != in_size-9)
600         return NULL;
601 
602     if (out_sz >= INT_MAX)
603         return NULL; // protect against some overflow cases
604 
605     // calloc may add 2% overhead to CRAM decode, but on linux with glibc it's
606     // often the same thing due to using mmap.
607     D = calloc(256, sizeof(*D));
608     if (!D) goto cleanup;
609     syms = malloc(256 * sizeof(*syms));
610     if (!syms) goto cleanup;
611     /* These memsets prevent illegal memory access in syms due to
612        broken compressed data.  As D is calloc'd, all illegal transitions
613        will end up in either row or column 0 of syms. */
614     memset(&syms[0], 0, sizeof(syms[0]));
615     for (i = 1; i < 256; i++) memset(&syms[i][0], 0, sizeof(syms[0][0]));
616 
617     //fprintf(stderr, "out_sz=%d\n", out_sz);
618 
619     //i = *cp++;
620     rle_i = 0;
621     i = *cp++;
622     do {
623         rle_j = x = 0;
624         j = *cp++;
625         do {
626             int F, C;
627             if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
628             if ((F = *cp++) >= 128) {
629                 F &= ~128;
630                 F = ((F & 127) << 8) | *cp++;
631             }
632             C = x;
633 
634             //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
635 
636             if (!F)
637                 F = TOTFREQ;
638 
639             RansDecSymbolInit(&syms[i][j], C, F);
640 
641             /* Build reverse lookup table */
642             if (x + F > TOTFREQ)
643                 goto cleanup;
644             memset(&D[i].R[x], j, F);
645 
646             x += F;
647 
648             if (!rle_j && j+1 == *cp) {
649                 j = *cp++;
650                 rle_j = *cp++;
651             } else if (rle_j) {
652                 rle_j--;
653                 j++;
654                 if (j > 255)
655                     goto cleanup;
656             } else {
657                 j = *cp++;
658             }
659         } while(j);
660 
661         if (x < TOTFREQ-1 || x > TOTFREQ)
662             goto cleanup;
663         if (x < TOTFREQ) // historically we fill 4095, not 4096
664             D[i].R[x] = D[i].R[x-1];
665 
666         if (!rle_i && i+1 == *cp) {
667             i = *cp++;
668             rle_i = *cp++;
669         } else if (rle_i) {
670             rle_i--;
671             i++;
672             if (i > 255)
673                 goto cleanup;
674         } else {
675             i = *cp++;
676         }
677     } while (i);
678 
679     // Precompute reverse lookup of frequency.
680 
681     RansState rans0, rans1, rans2, rans3;
682     uint8_t *ptr = cp;
683     if (ptr > ptr_end - 16) goto cleanup; // Not enough input bytes left
684     RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
685     RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
686     RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
687     RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
688 
689     int isz4 = out_sz>>2;
690     int l0 = 0;
691     int l1 = 0;
692     int l2 = 0;
693     int l3 = 0;
694     int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
695 
696     RansState R[4];
697     R[0] = rans0;
698     R[1] = rans1;
699     R[2] = rans2;
700     R[3] = rans3;
701 
702     /* Allocate output buffer */
703     out_buf = malloc(out_sz);
704     if (!out_buf) goto cleanup;
705 
706     for (; i4[0] < isz4; i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
707         uint32_t m[4] = {R[0] & ((1u << TF_SHIFT)-1),
708                          R[1] & ((1u << TF_SHIFT)-1),
709                          R[2] & ((1u << TF_SHIFT)-1),
710                          R[3] & ((1u << TF_SHIFT)-1)};
711 
712         uint8_t c[4] = {D[l0].R[m[0]],
713                         D[l1].R[m[1]],
714                         D[l2].R[m[2]],
715                         D[l3].R[m[3]]};
716 
717         out_buf[i4[0]] = c[0];
718         out_buf[i4[1]] = c[1];
719         out_buf[i4[2]] = c[2];
720         out_buf[i4[3]] = c[3];
721 
722         //RansDecAdvanceSymbolStep(&R[0], &syms[l0][c[0]], TF_SHIFT);
723         //RansDecAdvanceSymbolStep(&R[1], &syms[l1][c[1]], TF_SHIFT);
724         //RansDecAdvanceSymbolStep(&R[2], &syms[l2][c[2]], TF_SHIFT);
725         //RansDecAdvanceSymbolStep(&R[3], &syms[l3][c[3]], TF_SHIFT);
726 
727         R[0] = syms[l0][c[0]].freq * (R[0]>>TF_SHIFT);
728         R[0] += m[0] - syms[l0][c[0]].start;
729         R[1] = syms[l1][c[1]].freq * (R[1]>>TF_SHIFT);
730         R[1] += m[1] - syms[l1][c[1]].start;
731         R[2] = syms[l2][c[2]].freq * (R[2]>>TF_SHIFT);
732         R[2] += m[2] - syms[l2][c[2]].start;
733         R[3] = syms[l3][c[3]].freq * (R[3]>>TF_SHIFT);
734         R[3] += m[3] - syms[l3][c[3]].start;
735 
736         if (ptr < ptr_end - 8) { // Each renorm reads no more than 2 bytes
737             RansDecRenorm(&R[0], &ptr);
738             RansDecRenorm(&R[1], &ptr);
739             RansDecRenorm(&R[2], &ptr);
740             RansDecRenorm(&R[3], &ptr);
741         } else {
742             RansDecRenormSafe(&R[0], &ptr, ptr_end);
743             RansDecRenormSafe(&R[1], &ptr, ptr_end);
744             RansDecRenormSafe(&R[2], &ptr, ptr_end);
745             RansDecRenormSafe(&R[3], &ptr, ptr_end);
746         }
747 
748         l0 = c[0];
749         l1 = c[1];
750         l2 = c[2];
751         l3 = c[3];
752     }
753 
754     // Remainder
755     for (; i4[3] < out_sz; i4[3]++) {
756         unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
757         out_buf[i4[3]] = c3;
758 
759         uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
760         R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
761         RansDecRenormSafe(&R[3], &ptr, ptr_end);
762         l3 = c3;
763     }
764 
765     *out_size = out_sz;
766 
767  cleanup:
768     if (D)
769         free(D);
770     free(syms);
771 
772     return (unsigned char *)out_buf;
773 }
774 
775 /*-----------------------------------------------------------------------------
776  * Simple interface to the order-0 vs order-1 encoders and decoders.
777  */
rans_compress(unsigned char * in,unsigned int in_size,unsigned int * out_size,int order)778 unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
779                              unsigned int *out_size, int order) {
780     return order
781         ? rans_compress_O1(in, in_size, out_size)
782         : rans_compress_O0(in, in_size, out_size);
783 }
784 
rans_uncompress(unsigned char * in,unsigned int in_size,unsigned int * out_size)785 unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
786                                unsigned int *out_size) {
787     /* Both rans_uncompress functions need to be able to read at least 9
788        bytes. */
789     if (in_size < 9)
790         return NULL;
791     return in[0]
792         ? rans_uncompress_O1(in, in_size, out_size)
793         : rans_uncompress_O0(in, in_size, out_size);
794 }
795 
796 
797 #ifdef TEST_MAIN
798 /*-----------------------------------------------------------------------------
799  * Main.
800  *
801  * This is a simple command line tool for testing order-0 and order-1
802  * compression using the rANS codec. Simply compile with
803  *
804  * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static
805  *
806  * Usage: cram/rANS_static -o0 < file    > file.o0
807  *        cram/rANS_static -d  < file.o0 > file2
808  *
809  *        cram/rANS_static -o1 < file    > file.o1
810  *        cram/rANS_static -d  < file.o1 > file2
811  */
main(int argc,char ** argv)812 int main(int argc, char **argv) {
813     int opt, order = 0;
814     unsigned char in_buf[BLK_SIZE2+257*257*3];
815     int decode = 0;
816     FILE *infp = stdin, *outfp = stdout;
817     struct timeval tv1, tv2;
818     size_t bytes = 0;
819 
820     extern char *optarg;
821     extern int optind;
822 
823     while ((opt = getopt(argc, argv, "o:d")) != -1) {
824         switch (opt) {
825         case 'o':
826             order = atoi(optarg);
827             break;
828 
829         case 'd':
830             decode = 1;
831             break;
832         }
833     }
834 
835     order = order ? 1 : 0; // Only support O(0) and O(1)
836 
837     if (optind < argc) {
838         if (!(infp = fopen(argv[optind], "rb"))) {
839             perror(argv[optind]);
840             return 1;
841         }
842         optind++;
843     }
844 
845     if (optind < argc) {
846         if (!(outfp = fopen(argv[optind], "wb"))) {
847             perror(argv[optind]);
848             fclose(infp);
849             return 1;
850         }
851         optind++;
852     }
853 
854     gettimeofday(&tv1, NULL);
855 
856     if (decode) {
857         // Only used in some test implementations of RC_GetFreq()
858         //RC_init();
859         //RC_init2();
860 
861         for (;;) {
862             uint32_t in_size, out_size;
863             unsigned char *out;
864 
865             if (9 != fread(in_buf, 1, 9, infp))
866                 break;
867             in_size = *(int *)&in_buf[1];
868             if (in_size != fread(in_buf+9, 1, in_size, infp)) {
869                 fprintf(stderr, "Truncated input\n");
870                 exit(1);
871             }
872             out = rans_uncompress(in_buf, in_size+9, &out_size);
873             if (!out)
874                 abort();
875 
876             fwrite(out, 1, out_size, outfp);
877             free(out);
878 
879             bytes += out_size;
880         }
881     } else {
882         for (;;) {
883             uint32_t in_size, out_size;
884             unsigned char *out;
885 
886             in_size = fread(in_buf, 1, BLK_SIZE, infp);
887             if (in_size <= 0)
888                 break;
889 
890             out = rans_compress(in_buf, in_size, &out_size, order);
891 
892             fwrite(out, 1, out_size, outfp);
893             free(out);
894 
895             bytes += in_size;
896         }
897     }
898 
899     gettimeofday(&tv2, NULL);
900 
901     fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\n",
902             (long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
903             tv2.tv_usec - tv1.tv_usec,
904             (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 +
905                              tv2.tv_usec - tv1.tv_usec));
906 
907     if (infp  != stdin)  fclose(infp);
908     if (outfp != stdout) fclose(outfp);
909 
910     return 0;
911 }
912 #endif
913