1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		ag_enc.c
23 
24 	Contains:   Adaptive Golomb encode routines.
25 
26 	Copyright:	(c) 2001-2011 Apple, Inc.
27 */
28 
29 #include "aglib.h"
30 #include "ALACBitUtilities.h"
31 #include "EndianPortable.h"
32 #include "ALACAudioTypes.h"
33 
34 #include <math.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #if __GNUC__ && TARGET_OS_MAC
39 	#if __POWERPC__
40 		#include <ppc_intrinsics.h>
41 	#else
42 		#include <libkern/OSByteOrder.h>
43 	#endif
44 #endif
45 
46 #define CODE_TO_LONG_MAXBITS	32
47 #define N_MAX_MEAN_CLAMP		0xffff
48 #define N_MEAN_CLAMP_VAL		0xffff
49 #define REPORT_VAL  40
50 
51 #if __GNUC__
52 #define ALWAYS_INLINE		__attribute__((always_inline))
53 #else
54 #define ALWAYS_INLINE
55 #endif
56 
57 
58 /*	And on the subject of the CodeWarrior x86 compiler and inlining, I reworked a lot of this
59 	to help the compiler out.   In many cases this required manual inlining or a macro.  Sorry
60 	if it is ugly but the performance gains are well worth it.
61 	- WSK 5/19/04
62 */
63 
64 // note: implementing this with some kind of "count leading zeros" assembly is a big performance win
lead(int32_t m)65 static inline int32_t lead( int32_t m )
66 {
67 	long j;
68 	unsigned long c = (1ul << 31);
69 
70 	for(j=0; j < 32; j++)
71 	{
72 		if((c & m) != 0)
73 			break;
74 		c >>= 1;
75 	}
76 	return (j);
77 }
78 
79 #define arithmin(a, b) ((a) < (b) ? (a) : (b))
80 
lg3a(int32_t x)81 static inline int32_t ALWAYS_INLINE lg3a( int32_t x)
82 {
83     int32_t result;
84 
85     x += 3;
86     result = lead(x);
87 
88     return 31 - result;
89 }
90 
abs_func(int32_t a)91 static inline int32_t ALWAYS_INLINE abs_func( int32_t a )
92 {
93 	// note: the CW PPC intrinsic __abs() turns into these instructions so no need to try and use it
94 	int32_t isneg  = a >> 31;
95 	int32_t xorval = a ^ isneg;
96 	int32_t result = xorval-isneg;
97 
98 	return result;
99 }
100 
read32bit(uint8_t * buffer)101 static inline uint32_t ALWAYS_INLINE read32bit( uint8_t * buffer )
102 {
103 	// embedded CPUs typically can't read unaligned 32-bit words so just read the bytes
104 	uint32_t		value;
105 
106 	value = ((uint32_t)buffer[0] << 24) | ((uint32_t)buffer[1] << 16) |
107 			 ((uint32_t)buffer[2] << 8) | (uint32_t)buffer[3];
108 	return value;
109 }
110 
111 #if PRAGMA_MARK
112 #pragma mark -
113 #endif
114 
dyn_code(int32_t m,int32_t k,int32_t n,uint32_t * outNumBits)115 static inline int32_t dyn_code(int32_t m, int32_t k, int32_t n, uint32_t *outNumBits)
116 {
117 	uint32_t 	div, mod, de;
118 	uint32_t	numBits;
119 	uint32_t	value;
120 
121 	//Assert( n >= 0 );
122 
123 	div = n/m;
124 
125 	if(div >= MAX_PREFIX_16)
126 	{
127 		numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
128 		value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
129 	}
130 	else
131 	{
132 		mod = n%m;
133 		de = (mod == 0);
134 		numBits = div + k + 1 - de;
135 		value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
136 
137 		// if coding this way is bigger than doing escape, then do escape
138 		if (numBits > MAX_PREFIX_16 + MAX_DATATYPE_BITS_16)
139 		{
140 		    numBits = MAX_PREFIX_16 + MAX_DATATYPE_BITS_16;
141 		    value = (((1<<MAX_PREFIX_16)-1)<<MAX_DATATYPE_BITS_16) + n;
142 		}
143 	}
144 
145 	*outNumBits = numBits;
146 
147 	return (int32_t) value;
148 }
149 
150 
dyn_code_32bit(int32_t maxbits,uint32_t m,uint32_t k,uint32_t n,uint32_t * outNumBits,uint32_t * outValue,uint32_t * overflow,uint32_t * overflowbits)151 static inline int32_t dyn_code_32bit(int32_t maxbits, uint32_t m, uint32_t k, uint32_t n, uint32_t *outNumBits, uint32_t *outValue, uint32_t *overflow, uint32_t *overflowbits)
152 {
153 	uint32_t 	div, mod, de;
154 	uint32_t	numBits;
155 	uint32_t	value;
156 	int32_t			didOverflow = 0;
157 
158 	div = n/m;
159 
160 	if (div < MAX_PREFIX_32)
161 	{
162 		mod = n - (m * div);
163 
164 		de = (mod == 0);
165 		numBits = div + k + 1 - de;
166 		value = (((1<<div)-1)<<(numBits-div)) + mod + 1 - de;
167 		if (numBits > 25)
168 			goto codeasescape;
169 	}
170 	else
171 	{
172 codeasescape:
173 		numBits = MAX_PREFIX_32;
174 		value = (((1<<MAX_PREFIX_32)-1));
175 		*overflow = n;
176 		*overflowbits = maxbits;
177 		didOverflow = 1;
178 	}
179 
180 	*outNumBits = numBits;
181 	*outValue = value;
182 
183 	return didOverflow;
184 }
185 
186 
dyn_jam_noDeref(unsigned char * out,uint32_t bitPos,uint32_t numBits,uint32_t value)187 static inline void ALWAYS_INLINE dyn_jam_noDeref(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
188 {
189 	uint32_t	*i = (uint32_t *)(out + (bitPos >> 3));
190 	uint32_t	mask;
191 	uint32_t	curr;
192 	uint32_t	shift;
193 
194 	//Assert( numBits <= 32 );
195 
196 	curr = *i;
197 	curr = Swap32NtoB( curr );
198 
199 	shift = 32 - (bitPos & 7) - numBits;
200 
201 	mask = ~0u >> (32 - numBits);		// mask must be created in two steps to avoid compiler sequencing ambiguity
202 	mask <<= shift;
203 
204 	value  = (value << shift) & mask;
205 	value |= curr & ~mask;
206 
207 	*i = Swap32BtoN( value );
208 }
209 
210 
dyn_jam_noDeref_large(unsigned char * out,uint32_t bitPos,uint32_t numBits,uint32_t value)211 static inline void ALWAYS_INLINE dyn_jam_noDeref_large(unsigned char *out, uint32_t bitPos, uint32_t numBits, uint32_t value)
212 {
213 	uint32_t *	i = (uint32_t *)(out + (bitPos>>3));
214 	uint32_t	w;
215 	uint32_t	curr;
216 	uint32_t	mask;
217 	int32_t			shiftvalue = (32 - (bitPos&7) - numBits);
218 
219 	//Assert(numBits <= 32);
220 
221 	curr = *i;
222 	curr = Swap32NtoB( curr );
223 
224 	if (shiftvalue < 0)
225 	{
226 		uint8_t 	tailbyte;
227 		uint8_t 	*tailptr;
228 
229 		w = value >> -shiftvalue;
230 		mask = ~0u >> -shiftvalue;
231 		w |= (curr & ~mask);
232 
233 		tailptr = ((uint8_t *)i) + 4;
234 		tailbyte = (value << ((8+shiftvalue))) & 0xff;
235 		*tailptr = (uint8_t)tailbyte;
236 	}
237 	else
238 	{
239 		mask = ~0u >> (32 - numBits);
240 		mask <<= shiftvalue;			// mask must be created in two steps to avoid compiler sequencing ambiguity
241 
242 		w  = (value << shiftvalue) & mask;
243 		w |= curr & ~mask;
244 	}
245 
246 	*i = Swap32BtoN( w );
247 }
248 
249 
dyn_comp(AGParamRecPtr params,int32_t * pc,BitBuffer * bitstream,int32_t numSamples,int32_t bitSize,uint32_t * outNumBits)250 int32_t dyn_comp( AGParamRecPtr params, int32_t * pc, BitBuffer * bitstream, int32_t numSamples, int32_t bitSize, uint32_t * outNumBits )
251 {
252     unsigned char *		out;
253     uint32_t		bitPos, startPos;
254     uint32_t			m, k, n, c, mz, nz;
255     uint32_t		numBits;
256     uint32_t			value;
257     int32_t				del, zmode;
258 	uint32_t		overflow, overflowbits;
259     int32_t					status;
260 
261     // shadow the variables in params so there's not the dereferencing overhead
262     uint32_t		mb, pb, kb, wb;
263     int32_t					rowPos = 0;
264     int32_t					rowSize = params->sw;
265     int32_t					rowJump = (params->fw) - rowSize;
266     int32_t *			inPtr = pc;
267 
268 	*outNumBits = 0;
269 	RequireAction( (bitSize >= 1) && (bitSize <= 32), return kALAC_ParamError; );
270 
271 	out = bitstream->cur;
272 	startPos = bitstream->bitIndex;
273     bitPos = startPos;
274 
275     mb = params->mb = params->mb0;
276     pb = params->pb;
277     kb = params->kb;
278     wb = params->wb;
279     zmode = 0;
280 
281     c=0;
282 	status = ALAC_noErr;
283 
284     while (c < numSamples)
285     {
286         m  = mb >> QBSHIFT;
287         k = lg3a(m);
288         if ( k > kb)
289         {
290         	k = kb;
291         }
292         m = (1<<k)-1;
293 
294         del = *inPtr++;
295         rowPos++;
296 
297         n = (abs_func(del) << 1) - ((del >> 31) & 1) - zmode;
298 		//Assert( 32-lead(n) <= bitSize );
299 
300 		if ( dyn_code_32bit(bitSize, m, k, n, &numBits, &value, &overflow, &overflowbits) )
301 		{
302 			dyn_jam_noDeref(out, bitPos, numBits, value);
303 			bitPos += numBits;
304 			dyn_jam_noDeref_large(out, bitPos, overflowbits, overflow);
305 			bitPos += overflowbits;
306 		}
307 		else
308 		{
309 			dyn_jam_noDeref(out, bitPos, numBits, value);
310 			bitPos += numBits;
311 		}
312 
313         c++;
314         if ( rowPos >= rowSize)
315         {
316         	rowPos = 0;
317         	inPtr += rowJump;
318         }
319 
320         mb = pb * (n + zmode) + mb - ((pb *mb)>>QBSHIFT);
321 
322 		// update mean tracking if it's overflowed
323 		if (n > N_MAX_MEAN_CLAMP)
324 			mb = N_MEAN_CLAMP_VAL;
325 
326         zmode = 0;
327 
328         RequireAction(c <= numSamples, status = kALAC_ParamError; goto Exit; );
329 
330         if (((mb << MMULSHIFT) < QB) && (c < numSamples))
331         {
332             zmode = 1;
333             nz = 0;
334 
335             while(c<numSamples && *inPtr == 0)
336             {
337             	/* Take care of wrap-around globals. */
338                 ++inPtr;
339                 ++nz;
340                 ++c;
341                 if ( ++rowPos >= rowSize)
342                 {
343                 	rowPos = 0;
344                 	inPtr += rowJump;
345                 }
346 
347                 if(nz >= 65535)
348                 {
349                 	zmode = 0;
350                 	break;
351                 }
352             }
353 
354             k = lead(mb) - BITOFF+((mb+MOFF)>>MDENSHIFT);
355             mz = ((1<<k)-1) & wb;
356 
357             value = dyn_code(mz, k, nz, &numBits);
358             dyn_jam_noDeref(out, bitPos, numBits, value);
359             bitPos += numBits;
360 
361             mb = 0;
362         }
363     }
364 
365     *outNumBits = (bitPos - startPos);
366 	BitBufferAdvance( bitstream, *outNumBits );
367 
368 Exit:
369 	return status;
370 }
371