1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		ag_dec.c
23 
24 	Contains:   Adaptive Golomb decode routines.
25 
26 	Copyright:	(c) 2001-2011 Apple, Inc.
27 */
28 
29 #include "aglib.h"
30 #include "ALACBitUtilities.h"
31 #include "ALACAudioTypes.h"
32 
33 #include <math.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #if __GNUC__ && TARGET_OS_MAC
38 	#if __POWERPC__
39 		#include <ppc_intrinsics.h>
40 	#else
41 		#include <libkern/OSByteOrder.h>
42 	#endif
43 #endif
44 
45 #define CODE_TO_LONG_MAXBITS	32
46 #define N_MAX_MEAN_CLAMP		0xffff
47 #define N_MEAN_CLAMP_VAL		0xffff
48 #define REPORT_VAL  40
49 
50 #if __GNUC__
51 #define ALWAYS_INLINE		__attribute__((always_inline))
52 #else
53 #define ALWAYS_INLINE
54 #endif
55 
56 /*	And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this
57 	to help the compiler out.   In many cases this required manual inlining or a macro.  Sorry
58 	if it is ugly but the performance gains are well worth it.
59 	- WSK 5/19/04
60 */
61 
set_standard_ag_params(AGParamRecPtr params,uint32_t fullwidth,uint32_t sectorwidth)62 void set_standard_ag_params(AGParamRecPtr params, uint32_t fullwidth, uint32_t sectorwidth)
63 {
64 	/* Use
65 		fullwidth = sectorwidth = numOfSamples, for analog 1-dimensional type-short data,
66 		but use
67 		fullwidth = full image width, sectorwidth = sector (patch) width
68 		for such as image (2-dim.) data.
69 	*/
70 	set_ag_params( params, MB0, PB0, KB0, fullwidth, sectorwidth, MAX_RUN_DEFAULT );
71 }
72 
set_ag_params(AGParamRecPtr params,uint32_t m,uint32_t p,uint32_t k,uint32_t f,uint32_t s,uint32_t maxrun)73 void set_ag_params(AGParamRecPtr params, uint32_t m, uint32_t p, uint32_t k, uint32_t f, uint32_t s, uint32_t maxrun)
74 {
75 	params->mb = params->mb0 = m;
76 	params->pb = p;
77 	params->kb = k;
78 	params->wb = (1u<<params->kb)-1;
79 	params->qb = QB-params->pb;
80 	params->fw = f;
81 	params->sw = s;
82 	params->maxrun = maxrun;
83 }
84 
85 #if PRAGMA_MARK
86 #pragma mark -
87 #endif
88 
89 
90 // note: implementing this with some kind of "count leading zeros" assembly is a big performance win
lead(int32_t m)91 static inline int32_t lead( int32_t m )
92 {
93 	long j;
94 	unsigned long c = (1ul << 31);
95 
96 	for(j=0; j < 32; j++)
97 	{
98 		if((c & m) != 0)
99 			break;
100 		c >>= 1;
101 	}
102 	return (j);
103 }
104 
105 #define arithmin(a, b) ((a) < (b) ? (a) : (b))
106 
lg3a(int32_t x)107 static inline int32_t ALWAYS_INLINE lg3a( int32_t x)
108 {
109     int32_t result;
110 
111     x += 3;
112     result = lead(x);
113 
114     return 31 - result;
115 }
116 
read32bit(uint8_t * buffer)117 static inline uint32_t ALWAYS_INLINE read32bit( uint8_t * buffer )
118 {
119 	// embedded CPUs typically can't read unaligned 32-bit words so just read the bytes
120 	uint32_t		value;
121 
122 	value = ((uint32_t)buffer[0] << 24) | ((uint32_t)buffer[1] << 16) |
123 			 ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3];
124 	return value;
125 
126 }
127 
128 #if PRAGMA_MARK
129 #pragma mark -
130 #endif
131 
132 #define get_next_fromlong(inlong, suff)		((inlong) >> (32 - (suff)))
133 
134 
135 static inline uint32_t ALWAYS_INLINE
getstreambits(uint8_t * in,int32_t bitoffset,int32_t numbits)136 getstreambits( uint8_t *in, int32_t bitoffset, int32_t numbits )
137 {
138 	uint32_t	load1, load2;
139 	uint32_t	byteoffset = bitoffset / 8;
140 	uint32_t	result;
141 
142 	//Assert( numbits <= 32 );
143 
144 	load1 = read32bit( in + byteoffset );
145 
146 	if ( (numbits + (bitoffset & 0x7)) > 32)
147 	{
148 		int32_t load2shift;
149 
150 		result = load1 << (bitoffset & 0x7);
151 		load2 = (uint32_t) in[byteoffset+4];
152 		load2shift = (8-(numbits + (bitoffset & 0x7)-32));
153 		load2 >>= load2shift;
154 		result >>= (32-numbits);
155 		result |= load2;
156 	}
157 	else
158 	{
159 		result = load1 >> (32-numbits-(bitoffset & 7));
160 	}
161 
162 	// a shift of >= "the number of bits in the type of the value being shifted" results in undefined
163 	// behavior so don't try to shift by 32
164 	if ( numbits != (sizeof(result) * 8) )
165 		result &= ~(0xfffffffful << numbits);
166 
167 	return result;
168 }
169 
170 
dyn_get(unsigned char * in,uint32_t * bitPos,uint32_t m,uint32_t k)171 static inline int32_t dyn_get(unsigned char *in, uint32_t *bitPos, uint32_t m, uint32_t k)
172 {
173     uint32_t	tempbits = *bitPos;
174     uint32_t		result;
175     uint32_t		pre = 0, v;
176     uint32_t		streamlong;
177 
178 	streamlong = read32bit( in + (tempbits >> 3) );
179     streamlong <<= (tempbits & 7);
180 
181     /* find the number of bits in the prefix */
182     {
183         uint32_t	notI = ~streamlong;
184     	pre = lead( notI);
185     }
186 
187     if(pre >= MAX_PREFIX_16)
188     {
189         pre = MAX_PREFIX_16;
190         tempbits += pre;
191         streamlong <<= pre;
192         result = get_next_fromlong(streamlong,MAX_DATATYPE_BITS_16);
193         tempbits += MAX_DATATYPE_BITS_16;
194 
195     }
196     else
197     {
198         // all of the bits must fit within the long we have loaded
199         //Assert(pre+1+k <= 32);
200 
201         tempbits += pre;
202         tempbits += 1;
203         streamlong <<= pre+1;
204         v = get_next_fromlong(streamlong, k);
205         tempbits += k;
206 
207         result = pre*m + v-1;
208 
209         if(v<2) {
210             result -= (v-1);
211             tempbits -= 1;
212         }
213     }
214 
215     *bitPos = tempbits;
216     return result;
217 }
218 
219 
dyn_get_32bit(uint8_t * in,uint32_t * bitPos,int32_t m,int32_t k,int32_t maxbits)220 static inline int32_t dyn_get_32bit( uint8_t * in, uint32_t * bitPos, int32_t m, int32_t k, int32_t maxbits )
221 {
222 	uint32_t	tempbits = *bitPos;
223 	uint32_t		v;
224 	uint32_t		streamlong;
225 	uint32_t		result;
226 
227 	streamlong = read32bit( in + (tempbits >> 3) );
228 	streamlong <<= (tempbits & 7);
229 
230 	/* find the number of bits in the prefix */
231 	{
232 		uint32_t notI = ~streamlong;
233 		result = lead( notI);
234 	}
235 
236 	if(result >= MAX_PREFIX_32)
237 	{
238 		result = getstreambits(in, tempbits+MAX_PREFIX_32, maxbits);
239 		tempbits += MAX_PREFIX_32 + maxbits;
240 	}
241 	else
242 	{
243 		/* all of the bits must fit within the long we have loaded*/
244 		//Assert(k<=14);
245 		//Assert(result<MAX_PREFIX_32);
246 		//Assert(result+1+k <= 32);
247 
248 		tempbits += result;
249 		tempbits += 1;
250 
251 		if (k != 1)
252 		{
253 			streamlong <<= result+1;
254 			v = get_next_fromlong(streamlong, k);
255 			tempbits += k;
256 			tempbits -= 1;
257 			result = result*m;
258 
259 			if(v>=2)
260 			{
261 				result += (v-1);
262 				tempbits += 1;
263 			}
264 		}
265 	}
266 
267 	*bitPos = tempbits;
268 
269 	return result;
270 }
271 
dyn_decomp(AGParamRecPtr params,BitBuffer * bitstream,int32_t * pc,int32_t numSamples,int32_t maxSize,uint32_t * outNumBits)272 int32_t dyn_decomp( AGParamRecPtr params, BitBuffer * bitstream, int32_t * pc, int32_t numSamples, int32_t maxSize, uint32_t * outNumBits )
273 {
274     uint8_t 		*in;
275     int32_t			*outPtr = pc;
276     uint32_t 	bitPos, startPos, maxPos;
277     uint32_t		j, m, k, n, c, mz;
278     int32_t			del, zmode;
279     uint32_t 	mb;
280     uint32_t	pb_local = params->pb;
281     uint32_t	kb_local = params->kb;
282     uint32_t	wb_local = params->wb;
283     int32_t				status;
284 
285 	RequireAction( (bitstream != nil) && (pc != nil) && (outNumBits != nil), return kALAC_ParamError; );
286 	*outNumBits = 0;
287 
288 	in = bitstream->cur;
289 	startPos = bitstream->bitIndex;
290 	maxPos = bitstream->byteSize * 8;
291 	bitPos = startPos;
292 
293     mb = params->mb0;
294     zmode = 0;
295 
296     c = 0;
297 	status = ALAC_noErr;
298 
299     while (c < numSamples)
300     {
301 		// bail if we've run off the end of the buffer
302     	RequireAction( bitPos < maxPos, status = kALAC_ParamError; goto Exit; );
303 
304         m = (mb)>>QBSHIFT;
305         k = lg3a(m);
306 
307         k = arithmin(k, kb_local);
308         m = (1<<k)-1;
309 
310 		n = dyn_get_32bit( in, &bitPos, m, k, maxSize );
311 
312         // least significant bit is sign bit
313         {
314         	uint32_t	ndecode = n + zmode;
315             int32_t		multiplier = (- (ndecode&1));
316 
317             multiplier |= 1;
318             del = ((ndecode+1) >> 1) * (multiplier);
319         }
320 
321         *outPtr++ = del;
322 
323         c++;
324 
325         mb = pb_local*(n+zmode) + mb - ((pb_local*mb)>>QBSHIFT);
326 
327 		// update mean tracking
328 		if (n > N_MAX_MEAN_CLAMP)
329 			mb = N_MEAN_CLAMP_VAL;
330 
331         zmode = 0;
332 
333         if (((mb << MMULSHIFT) < QB) && (c < numSamples))
334         {
335             zmode = 1;
336             k = lead(mb) - BITOFF+((mb+MOFF)>>MDENSHIFT);
337             mz = ((1<<k)-1) & wb_local;
338 
339             n = dyn_get(in, &bitPos, mz, k);
340 
341             RequireAction(c+n <= numSamples, status = kALAC_ParamError; goto Exit; );
342 
343             for(j=0; j < n; j++)
344             {
345                 *outPtr++ = 0;
346                 ++c;
347             }
348 
349             if(n >= 65535)
350             	zmode = 0;
351 
352             mb = 0;
353         }
354     }
355 
356 Exit:
357 	*outNumBits = (bitPos - startPos);
358 	BitBufferAdvance( bitstream, *outNumBits );
359 	RequireAction( bitstream->cur <= bitstream->end, status = kALAC_ParamError; );
360 
361     return status;
362 }
363