1 /*
2 * Copyright (c) 2019,2021 Genome Research Ltd.
3 * Author(s): James Bonfield
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following
13 * disclaimer in the documentation and/or other materials provided
14 * with the distribution.
15 *
16 * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17 * Institute nor the names of its contributors may be used to endorse
18 * or promote products derived from this software without specific
19 * prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <string.h>
35
36 /*
37 * Data transpose by N. Common to rANS4x16 and arith_dynamic decoders.
38 *
39 * Tuned for specific common cases of N.
40 */
unstripe(unsigned char * out,unsigned char * outN,unsigned int ulen,unsigned int N,unsigned int idxN[256])41 static inline void unstripe(unsigned char *out, unsigned char *outN,
42 unsigned int ulen, unsigned int N,
43 unsigned int idxN[256]) {
44 int j = 0, k;
45
46 if (ulen >= N) {
47 switch (N) {
48 case 4:
49 while (j < ulen-4) {
50 for (k = 0; k < 4; k++)
51 out[j++] = outN[idxN[k]++];
52 }
53 break;
54
55 case 2:
56 while (j < ulen-2) {
57 for (k = 0; k < 2; k++)
58 out[j++] = outN[idxN[k]++];
59 }
60 break;
61
62 default:
63 // General case, around 25% slower overall decode
64 while (j < ulen-N) {
65 for (k = 0; k < N; k++)
66 out[j++] = outN[idxN[k]++];
67 }
68 break;
69 }
70 }
71 for (k = 0; j < ulen; k++)
72 out[j++] = outN[idxN[k]++];
73 }
74
75 #define MAGIC 8
76
77 /*
78 * Order 0 histogram construction. 8-way unrolled to avoid cache collisions.
79 */
80 static inline
hist8(unsigned char * in,unsigned int in_size,uint32_t F0[256])81 void hist8(unsigned char *in, unsigned int in_size, uint32_t F0[256]) {
82 uint32_t F1[256+MAGIC] = {0}, F2[256+MAGIC] = {0}, F3[256+MAGIC] = {0};
83 uint32_t F4[256+MAGIC] = {0}, F5[256+MAGIC] = {0}, F6[256+MAGIC] = {0};
84 uint32_t F7[256+MAGIC] = {0};
85
86 unsigned int i, i8 = in_size & ~7;
87 for (i = 0; i < i8; i+=8) {
88 F0[in[i+0]]++;
89 F1[in[i+1]]++;
90 F2[in[i+2]]++;
91 F3[in[i+3]]++;
92 F4[in[i+4]]++;
93 F5[in[i+5]]++;
94 F6[in[i+6]]++;
95 F7[in[i+7]]++;
96 }
97 while (i < in_size)
98 F0[in[i++]]++;
99
100 for (i = 0; i < 256; i++)
101 F0[i] += F1[i] + F2[i] + F3[i] + F4[i] + F5[i] + F6[i] + F7[i];
102 }
103
104 /*
105 * A variant of hist8 that simply marks the presence of a symbol rather
106 * than its frequency.
107 */
108 static inline
present8(unsigned char * in,unsigned int in_size,uint32_t F0[256])109 void present8(unsigned char *in, unsigned int in_size,
110 uint32_t F0[256]) {
111 uint32_t F1[256+MAGIC] = {0}, F2[256+MAGIC] = {0}, F3[256+MAGIC] = {0};
112 uint32_t F4[256+MAGIC] = {0}, F5[256+MAGIC] = {0}, F6[256+MAGIC] = {0};
113 uint32_t F7[256+MAGIC] = {0};
114
115 unsigned int i, i8 = in_size & ~7;
116 for (i = 0; i < i8; i+=8) {
117 F0[in[i+0]]=1;
118 F1[in[i+1]]=1;
119 F2[in[i+2]]=1;
120 F3[in[i+3]]=1;
121 F4[in[i+4]]=1;
122 F5[in[i+5]]=1;
123 F6[in[i+6]]=1;
124 F7[in[i+7]]=1;
125 }
126 while (i < in_size)
127 F0[in[i++]]=1;
128
129 for (i = 0; i < 256; i++)
130 F0[i] += F1[i] + F2[i] + F3[i] + F4[i] + F5[i] + F6[i] + F7[i];
131 }
132
133 /*
134 * Order 1 histogram construction. 4-way unrolled to avoid cache collisions.
135 */
136 static inline
hist1_4(unsigned char * in,unsigned int in_size,uint32_t F0[256][256],uint32_t * T0)137 void hist1_4(unsigned char *in, unsigned int in_size,
138 uint32_t F0[256][256], uint32_t *T0) {
139 uint32_t T1[256+MAGIC] = {0}, T2[256+MAGIC] = {0}, T3[256+MAGIC] = {0};
140
141 unsigned char l = 0, c;
142 unsigned char *in_end = in + in_size;
143
144 unsigned char cc[5] = {0};
145 if (in_size > 500000) {
146 uint32_t F1[256][259] = {{0}};
147 while (in < in_end-8) {
148 memcpy(cc, in, 4); in += 4;
149 T0[cc[4]]++; F0[cc[4]][cc[0]]++;
150 T1[cc[0]]++; F1[cc[0]][cc[1]]++;
151 T2[cc[1]]++; F0[cc[1]][cc[2]]++;
152 T3[cc[2]]++; F1[cc[2]][cc[3]]++;
153 cc[4] = cc[3];
154
155 memcpy(cc, in, 4); in += 4;
156 T0[cc[4]]++; F0[cc[4]][cc[0]]++;
157 T1[cc[0]]++; F1[cc[0]][cc[1]]++;
158 T2[cc[1]]++; F0[cc[1]][cc[2]]++;
159 T3[cc[2]]++; F1[cc[2]][cc[3]]++;
160 cc[4] = cc[3];
161 }
162 l = cc[3];
163
164 while (in < in_end) {
165 F0[l][c = *in++]++;
166 T0[l]++;
167 l = c;
168 }
169
170 int i, j;
171 for (i = 0; i < 256; i++)
172 for (j = 0; j < 256; j++)
173 F0[i][j] += F1[i][j];
174 } else {
175 while (in < in_end-8) {
176 memcpy(cc, in, 4); in += 4;
177 T0[cc[4]]++; F0[cc[4]][cc[0]]++;
178 T1[cc[0]]++; F0[cc[0]][cc[1]]++;
179 T2[cc[1]]++; F0[cc[1]][cc[2]]++;
180 T3[cc[2]]++; F0[cc[2]][cc[3]]++;
181 cc[4] = cc[3];
182
183 memcpy(cc, in, 4); in += 4;
184 T0[cc[4]]++; F0[cc[4]][cc[0]]++;
185 T1[cc[0]]++; F0[cc[0]][cc[1]]++;
186 T2[cc[1]]++; F0[cc[1]][cc[2]]++;
187 T3[cc[2]]++; F0[cc[2]][cc[3]]++;
188 cc[4] = cc[3];
189 }
190 l = cc[3];
191
192 while (in < in_end) {
193 F0[l][c = *in++]++;
194 T0[l]++;
195 l = c;
196 }
197 }
198
199 int i;
200 for (i = 0; i < 256; i++)
201 T0[i]+=T1[i]+T2[i]+T3[i];
202 }
203