1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 /*
22 File: ag_enc.c
23
24 Contains: Adaptive Golomb encode routines.
25
26 Copyright: (c) 2001-2011 Apple, Inc.
27 */
28
29 #include "aglib.h"
30 #include "ALACBitUtilities.h"
31 #include "EndianPortable.h"
32 #include "ALACAudioTypes.h"
33
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #if __GNUC__ && TARGET_OS_MAC
39 #if __POWERPC__
40 #include <ppc_intrinsics.h>
41 #else
42 #include <libkern/OSByteOrder.h>
43 #endif
44 #endif
45
46 #define CODE_TO_LONG_MAXBITS 32
47 #define N_MAX_MEAN_CLAMP 0xffff
48 #define N_MEAN_CLAMP_VAL 0xffff
49 #define REPORT_VAL 40
50
51 #if __GNUC__
52 #define ALWAYS_INLINE __attribute__((always_inline))
53 #else
54 #define ALWAYS_INLINE
55 #endif
56
57
58 /* And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this
59 to help the compiler out. In many cases this required manual inlining or a macro. Sorry
60 if it is ugly but the performance gains are well worth it.
61 - WSK 5/19/04
62 */
63
64 // note: implementing this with some kind of "count leading zeros" assembly is a big performance win
lead(int32_t m)65 static inline int32_t lead( int32_t m )
66 {
67 long j;
68 unsigned long c = (1ul << 31);
69
70 for(j=0; j < 32; j++)
71 {
72 if((c & m) != 0)
73 break;
74 c >>= 1;
75 }
76 return (j);
77 }
78
79 #define arithmin(a, b) ((a) < (b) ? (a) : (b))
80
lg3a(int32_t x)81 static inline int32_t ALWAYS_INLINE lg3a( int32_t x)
82 {
83 int32_t result;
84
85 x += 3;
86 result = lead(x);
87
88 return 31 - result;
89 }
90
abs_func(int32_t a)91 static inline int32_t ALWAYS_INLINE abs_func( int32_t a )
92 {
93 // note: the CW PPC intrinsic __abs() turns into these instructions so no need to try and use it
94 int32_t isneg = a >> 31;
95 int32_t xorval = a ^ isneg;
96 int32_t result = xorval-isneg;
97
98 return result;
99 }
100
read32bit(uint8_t * buffer)101 static inline uint32_t ALWAYS_INLINE read32bit( uint8_t * buffer )
102 {
103 // embedded CPUs typically can't read unaligned 32-bit words so just read the bytes
104 uint32_t value;
105
106 value = ((uint32_t)buffer[0] << 24) | ((uint32_t)buffer[1] << 16) |
107 ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3];
108 return value;
109 }
110
111 #if PRAGMA_MARK
112 #pragma mark -
113 #endif
114
dyn_code(int32_t m,int32_t k,int32_t n,uint32_t * outNumBits)115 static inline int32_t dyn_code(int32_t m, int32_t k, int32_t n, uint32_t *outNumBits)
116 {
117 uint32_t div, mod, de;
118 uint32_t numBits;
119 uint32_t value;
120
121 //Assert( n >= 0 );
122
123 div = n/m;
124
125 if(div >= MAX_PREFIX_16)
126 {
127 numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
128 value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
129 }
130 else
131 {
132 mod = n%m;
133 de = (mod == 0);
134 numBits = div + k + 1 - de;
135 value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
136
137 // if coding this way is bigger than doing escape, then do escape
138 if (numBits > MAX_PREFIX_16 + MAX_DATATYPE_BITS_16)
139 {
140 numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
141 value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
142 }
143 }
144
145 *outNumBits = numBits;
146
147 return (int32_t) value;
148 }
149
150
dyn_code_32bit(int32_t maxbits,uint32_t m,uint32_t k,uint32_t n,uint32_t * outNumBits,uint32_t * outValue,uint32_t * overflow,uint32_t * overflowbits)151 static inline int32_t dyn_code_32bit(int32_t maxbits, uint32_t m, uint32_t k, uint32_t n, uint32_t *outNumBits, uint32_t *outValue, uint32_t *overflow, uint32_t *overflowbits)
152 {
153 uint32_t div, mod, de;
154 uint32_t numBits;
155 uint32_t value;
156 int32_t didOverflow = 0;
157
158 div = n/m;
159
160 if (div < MAX_PREFIX_32)
161 {
162 mod = n - (m * div);
163
164 de = (mod == 0);
165 numBits = div + k + 1 - de;
166 value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
167 if (numBits > 25)
168 goto codeasescape;
169 }
170 else
171 {
172 codeasescape:
173 numBits = MAX_PREFIX_32;
174 value = (((1<<MAX_PREFIX_32)-1));
175 *overflow = n;
176 *overflowbits = maxbits;
177 didOverflow = 1;
178 }
179
180 *outNumBits = numBits;
181 *outValue = value;
182
183 return didOverflow;
184 }
185
186
dyn_jam_noDeref(unsigned char * out,uint32_t bitPos,uint32_t numBits,uint32_t value)187 static inline void ALWAYS_INLINE dyn_jam_noDeref(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
188 {
189 uint32_t *i = (uint32_t *)(out + (bitPos >> 3));
190 uint32_t mask;
191 uint32_t curr;
192 uint32_t shift;
193
194 //Assert( numBits <= 32 );
195
196 curr = *i;
197 curr = Swap32NtoB( curr );
198
199 shift = 32 - (bitPos & 7) - numBits;
200
201 mask = ~0u >> (32 - numBits); // mask must be created in two steps to avoid compiler sequencing ambiguity
202 mask <<= shift;
203
204 value = (value << shift) & mask;
205 value |= curr & ~mask;
206
207 *i = Swap32BtoN( value );
208 }
209
210
dyn_jam_noDeref_large(unsigned char * out,uint32_t bitPos,uint32_t numBits,uint32_t value)211 static inline void ALWAYS_INLINE dyn_jam_noDeref_large(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
212 {
213 uint32_t * i = (uint32_t *)(out + (bitPos>>3));
214 uint32_t w;
215 uint32_t curr;
216 uint32_t mask;
217 int32_t shiftvalue = (32 - (bitPos&7) - numBits);
218
219 //Assert(numBits <= 32);
220
221 curr = *i;
222 curr = Swap32NtoB( curr );
223
224 if (shiftvalue < 0)
225 {
226 uint8_t tailbyte;
227 uint8_t *tailptr;
228
229 w = value >> -shiftvalue;
230 mask = ~0u >> -shiftvalue;
231 w |= (curr & ~mask);
232
233 tailptr = ((uint8_t *)i) + 4;
234 tailbyte = (value << ((8+shiftvalue))) & 0xff;
235 *tailptr = (uint8_t)tailbyte;
236 }
237 else
238 {
239 mask = ~0u >> (32 - numBits);
240 mask <<= shiftvalue; // mask must be created in two steps to avoid compiler sequencing ambiguity
241
242 w = (value << shiftvalue) & mask;
243 w |= curr & ~mask;
244 }
245
246 *i = Swap32BtoN( w );
247 }
248
249
dyn_comp(AGParamRecPtr params,int32_t * pc,BitBuffer * bitstream,int32_t numSamples,int32_t bitSize,uint32_t * outNumBits)250 int32_t dyn_comp( AGParamRecPtr params, int32_t * pc, BitBuffer * bitstream, int32_t numSamples, int32_t bitSize, uint32_t * outNumBits )
251 {
252 unsigned char * out;
253 uint32_t bitPos, startPos;
254 uint32_t m, k, n, c, mz, nz;
255 uint32_t numBits;
256 uint32_t value;
257 int32_t del, zmode;
258 uint32_t overflow, overflowbits;
259 int32_t status;
260
261 // shadow the variables in params so there's not the dereferencing overhead
262 uint32_t mb, pb, kb, wb;
263 int32_t rowPos = 0;
264 int32_t rowSize = params->sw;
265 int32_t rowJump = (params->fw) - rowSize;
266 int32_t * inPtr = pc;
267
268 *outNumBits = 0;
269 RequireAction( (bitSize >= 1) && (bitSize <= 32), return kALAC_ParamError; );
270
271 out = bitstream->cur;
272 startPos = bitstream->bitIndex;
273 bitPos = startPos;
274
275 mb = params->mb = params->mb0;
276 pb = params->pb;
277 kb = params->kb;
278 wb = params->wb;
279 zmode = 0;
280
281 c=0;
282 status = ALAC_noErr;
283
284 while (c < numSamples)
285 {
286 m = mb >> QBSHIFT;
287 k = lg3a(m);
288 if ( k > kb)
289 {
290 k = kb;
291 }
292 m = (1<<k)-1;
293
294 del = *inPtr++;
295 rowPos++;
296
297 n = (abs_func(del) << 1) - ((del >> 31) & 1) - zmode;
298 //Assert( 32-lead(n) <= bitSize );
299
300 if ( dyn_code_32bit(bitSize, m, k, n, &numBits, &value, &overflow, &overflowbits) )
301 {
302 dyn_jam_noDeref(out, bitPos, numBits, value);
303 bitPos += numBits;
304 dyn_jam_noDeref_large(out, bitPos, overflowbits, overflow);
305 bitPos += overflowbits;
306 }
307 else
308 {
309 dyn_jam_noDeref(out, bitPos, numBits, value);
310 bitPos += numBits;
311 }
312
313 c++;
314 if ( rowPos >= rowSize)
315 {
316 rowPos = 0;
317 inPtr += rowJump;
318 }
319
320 mb = pb * (n + zmode) + mb - ((pb *mb)>>QBSHIFT);
321
322 // update mean tracking if it's overflowed
323 if (n > N_MAX_MEAN_CLAMP)
324 mb = N_MEAN_CLAMP_VAL;
325
326 zmode = 0;
327
328 RequireAction(c <= numSamples, status = kALAC_ParamError; goto Exit; );
329
330 if (((mb << MMULSHIFT) < QB) && (c < numSamples))
331 {
332 zmode = 1;
333 nz = 0;
334
335 while(c<numSamples && *inPtr == 0)
336 {
337 /* Take care of wrap-around globals. */
338 ++inPtr;
339 ++nz;
340 ++c;
341 if ( ++rowPos >= rowSize)
342 {
343 rowPos = 0;
344 inPtr += rowJump;
345 }
346
347 if(nz >= 65535)
348 {
349 zmode = 0;
350 break;
351 }
352 }
353
354 k = lead(mb) - BITOFF+((mb+MOFF)>>MDENSHIFT);
355 mz = ((1<<k)-1) & wb;
356
357 value = dyn_code(mz, k, nz, &numBits);
358 dyn_jam_noDeref(out, bitPos, numBits, value);
359 bitPos += numBits;
360
361 mb = 0;
362 }
363 }
364
365 *outNumBits = (bitPos - startPos);
366 BitBufferAdvance( bitstream, *outNumBits );
367
368 Exit:
369 return status;
370 }
371