1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		dp_dec.c
23 
24 	Contains:	Dynamic Predictor decode routines
25 
26 	Copyright:	(c) 2001-2011 Apple, Inc.
27 */
28 
29 
30 #include "dplib.h"
31 #include <string.h>
32 
33 #if __GNUC__
34 #define ALWAYS_INLINE		__attribute__((always_inline))
35 #else
36 #define ALWAYS_INLINE
37 #endif
38 
39 #if TARGET_CPU_PPC && (__MWERKS__ >= 0x3200)
40 // align loops to a 16 byte boundary to make the G5 happy
41 #pragma function_align 16
42 #define LOOP_ALIGN			asm { align 16 }
43 #else
44 #define LOOP_ALIGN
45 #endif
46 
sign_of_int(int32_t i)47 static inline int32_t ALWAYS_INLINE sign_of_int( int32_t i )
48 {
49     int32_t negishift;
50 
51     negishift = ((uint32_t)-i) >> 31;
52     return negishift | (i >> 31);
53 }
54 
unpc_block(int32_t * pc1,int32_t * out,int32_t num,int16_t * coefs,int32_t numactive,uint32_t chanbits,uint32_t denshift)55 void unpc_block( int32_t * pc1, int32_t * out, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift )
56 {
57 	register int16_t	a0, a1, a2, a3;
58 	register int32_t	b0, b1, b2, b3;
59 	int32_t					j, k, lim;
60 	int32_t				sum1, sg, sgn, top, dd;
61 	int32_t *			pout;
62 	int32_t				del, del0;
63 	uint32_t			chanshift = 32 - chanbits;
64 	int32_t				denhalf = 1<<(denshift-1);
65 
66 	out[0] = pc1[0];
67 	if ( numactive == 0 )
68 	{
69 		// just copy if numactive == 0 (but don't bother if in/out pointers the same)
70 		if ( (num > 1)  && (pc1 != out) )
71 			memcpy( &out[1], &pc1[1], (num - 1) * sizeof(int32_t) );
72 		return;
73 	}
74 	if ( numactive == 31 )
75 	{
76 		// short-circuit if numactive == 31
77 		int32_t		prev;
78 
79 		/*	this code is written such that the in/out buffers can be the same
80 			to conserve buffer space on embedded devices like the iPod
81 
82 			(original code)
83 			for ( j = 1; j < num; j++ )
84 				del = pc1[j] + out[j-1];
85 				out[j] = (del << chanshift) >> chanshift;
86 		*/
87 		prev = out[0];
88 		for ( j = 1; j < num; j++ )
89 		{
90 			del = pc1[j] + prev;
91 			prev = (del << chanshift) >> chanshift;
92 			out[j] = prev;
93 		}
94 		return;
95 	}
96 
97 	for ( j = 1; j <= numactive; j++ )
98 	{
99 		del = pc1[j] + out[j-1];
100 		out[j] = (del << chanshift) >> chanshift;
101 	}
102 
103 	lim = numactive + 1;
104 
105 	if ( numactive == 4 )
106 	{
107 		// optimization for numactive == 4
108 		register int16_t	a0, a1, a2, a3;
109 		register int32_t	b0, b1, b2, b3;
110 
111 		a0 = coefs[0];
112 		a1 = coefs[1];
113 		a2 = coefs[2];
114 		a3 = coefs[3];
115 
116 		for ( j = lim; j < num; j++ )
117 		{
118 			LOOP_ALIGN
119 
120 			top = out[j - lim];
121 			pout = out + j - 1;
122 
123 			b0 = top - pout[0];
124 			b1 = top - pout[-1];
125 			b2 = top - pout[-2];
126 			b3 = top - pout[-3];
127 
128 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift;
129 
130 			del = pc1[j];
131 			del0 = del;
132 			sg = sign_of_int(del);
133 			del += top + sum1;
134 
135 			out[j] = (del << chanshift) >> chanshift;
136 
137 			if ( sg > 0 )
138 			{
139 				sgn = sign_of_int( b3 );
140 				a3 -= sgn;
141 				del0 -= (4 - 3) * ((sgn * b3) >> denshift);
142 				if ( del0 <= 0 )
143 					continue;
144 
145 				sgn = sign_of_int( b2 );
146 				a2 -= sgn;
147 				del0 -= (4 - 2) * ((sgn * b2) >> denshift);
148 				if ( del0 <= 0 )
149 					continue;
150 
151 				sgn = sign_of_int( b1 );
152 				a1 -= sgn;
153 				del0 -= (4 - 1) * ((sgn * b1) >> denshift);
154 				if ( del0 <= 0 )
155 					continue;
156 
157 				a0 -= sign_of_int( b0 );
158 			}
159 			else if ( sg < 0 )
160 			{
161 				// note: to avoid unnecessary negations, we flip the value of "sgn"
162 				sgn = -sign_of_int( b3 );
163 				a3 -= sgn;
164 				del0 -= (4 - 3) * ((sgn * b3) >> denshift);
165 				if ( del0 >= 0 )
166 					continue;
167 
168 				sgn = -sign_of_int( b2 );
169 				a2 -= sgn;
170 				del0 -= (4 - 2) * ((sgn * b2) >> denshift);
171 				if ( del0 >= 0 )
172 					continue;
173 
174 				sgn = -sign_of_int( b1 );
175 				a1 -= sgn;
176 				del0 -= (4 - 1) * ((sgn * b1) >> denshift);
177 				if ( del0 >= 0 )
178 					continue;
179 
180 				a0 += sign_of_int( b0 );
181 			}
182 		}
183 
184 		coefs[0] = a0;
185 		coefs[1] = a1;
186 		coefs[2] = a2;
187 		coefs[3] = a3;
188 	}
189 	else if ( numactive == 8 )
190 	{
191 		register int16_t	a4, a5, a6, a7;
192 		register int32_t	b4, b5, b6, b7;
193 
194 		// optimization for numactive == 8
195 		a0 = coefs[0];
196 		a1 = coefs[1];
197 		a2 = coefs[2];
198 		a3 = coefs[3];
199 		a4 = coefs[4];
200 		a5 = coefs[5];
201 		a6 = coefs[6];
202 		a7 = coefs[7];
203 
204 		for ( j = lim; j < num; j++ )
205 		{
206 			LOOP_ALIGN
207 
208 			top = out[j - lim];
209 			pout = out + j - 1;
210 
211 			b0 = top - (*pout--);
212 			b1 = top - (*pout--);
213 			b2 = top - (*pout--);
214 			b3 = top - (*pout--);
215 			b4 = top - (*pout--);
216 			b5 = top - (*pout--);
217 			b6 = top - (*pout--);
218 			b7 = top - (*pout);
219 			pout += 8;
220 
221 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
222 					- a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift;
223 
224 			del = pc1[j];
225 			del0 = del;
226 			sg = sign_of_int(del);
227 			del += top + sum1;
228 
229 			out[j] = (del << chanshift) >> chanshift;
230 
231 			if ( sg > 0 )
232 			{
233 				sgn = sign_of_int( b7 );
234 				a7 -= sgn;
235 				del0 -= 1 * ((sgn * b7) >> denshift);
236 				if ( del0 <= 0 )
237 					continue;
238 
239 				sgn = sign_of_int( b6 );
240 				a6 -= sgn;
241 				del0 -= 2 * ((sgn * b6) >> denshift);
242 				if ( del0 <= 0 )
243 					continue;
244 
245 				sgn = sign_of_int( b5 );
246 				a5 -= sgn;
247 				del0 -= 3 * ((sgn * b5) >> denshift);
248 				if ( del0 <= 0 )
249 					continue;
250 
251 				sgn = sign_of_int( b4 );
252 				a4 -= sgn;
253 				del0 -= 4 * ((sgn * b4) >> denshift);
254 				if ( del0 <= 0 )
255 					continue;
256 
257 				sgn = sign_of_int( b3 );
258 				a3 -= sgn;
259 				del0 -= 5 * ((sgn * b3) >> denshift);
260 				if ( del0 <= 0 )
261 					continue;
262 
263 				sgn = sign_of_int( b2 );
264 				a2 -= sgn;
265 				del0 -= 6 * ((sgn * b2) >> denshift);
266 				if ( del0 <= 0 )
267 					continue;
268 
269 				sgn = sign_of_int( b1 );
270 				a1 -= sgn;
271 				del0 -= 7 * ((sgn * b1) >> denshift);
272 				if ( del0 <= 0 )
273 					continue;
274 
275 				a0 -= sign_of_int( b0 );
276 			}
277 			else if ( sg < 0 )
278 			{
279 				// note: to avoid unnecessary negations, we flip the value of "sgn"
280 				sgn = -sign_of_int( b7 );
281 				a7 -= sgn;
282 				del0 -= 1 * ((sgn * b7) >> denshift);
283 				if ( del0 >= 0 )
284 					continue;
285 
286 				sgn = -sign_of_int( b6 );
287 				a6 -= sgn;
288 				del0 -= 2 * ((sgn * b6) >> denshift);
289 				if ( del0 >= 0 )
290 					continue;
291 
292 				sgn = -sign_of_int( b5 );
293 				a5 -= sgn;
294 				del0 -= 3 * ((sgn * b5) >> denshift);
295 				if ( del0 >= 0 )
296 					continue;
297 
298 				sgn = -sign_of_int( b4 );
299 				a4 -= sgn;
300 				del0 -= 4 * ((sgn * b4) >> denshift);
301 				if ( del0 >= 0 )
302 					continue;
303 
304 				sgn = -sign_of_int( b3 );
305 				a3 -= sgn;
306 				del0 -= 5 * ((sgn * b3) >> denshift);
307 				if ( del0 >= 0 )
308 					continue;
309 
310 				sgn = -sign_of_int( b2 );
311 				a2 -= sgn;
312 				del0 -= 6 * ((sgn * b2) >> denshift);
313 				if ( del0 >= 0 )
314 					continue;
315 
316 				sgn = -sign_of_int( b1 );
317 				a1 -= sgn;
318 				del0 -= 7 * ((sgn * b1) >> denshift);
319 				if ( del0 >= 0 )
320 					continue;
321 
322 				a0 += sign_of_int( b0 );
323 			}
324 		}
325 
326 		coefs[0] = a0;
327 		coefs[1] = a1;
328 		coefs[2] = a2;
329 		coefs[3] = a3;
330 		coefs[4] = a4;
331 		coefs[5] = a5;
332 		coefs[6] = a6;
333 		coefs[7] = a7;
334 	}
335 	else
336 	{
337 		// general case
338 		for ( j = lim; j < num; j++ )
339 		{
340 			LOOP_ALIGN
341 
342 			sum1 = 0;
343 			pout = out + j - 1;
344 			top = out[j-lim];
345 
346 			for ( k = 0; k < numactive; k++ )
347 				sum1 += coefs[k] * (pout[-k] - top);
348 
349 			del = pc1[j];
350 			del0 = del;
351 			sg = sign_of_int( del );
352 			del += top + ((sum1 + denhalf) >> denshift);
353 			out[j] = (del << chanshift) >> chanshift;
354 
355 			if ( sg > 0 )
356 			{
357 				for ( k = (numactive - 1); k >= 0; k-- )
358 				{
359 					dd = top - pout[-k];
360 					sgn = sign_of_int( dd );
361 					coefs[k] -= sgn;
362 					del0 -= (numactive - k) * ((sgn * dd) >> denshift);
363 					if ( del0 <= 0 )
364 						break;
365 				}
366 			}
367 			else if ( sg < 0 )
368 			{
369 				for ( k = (numactive - 1); k >= 0; k-- )
370 				{
371 					dd = top - pout[-k];
372 					sgn = sign_of_int( dd );
373 					coefs[k] += sgn;
374 					del0 -= (numactive - k) * ((-sgn * dd) >> denshift);
375 					if ( del0 >= 0 )
376 						break;
377 				}
378 			}
379 		}
380 	}
381 }
382