1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		dp_enc.c
23 
24 	Contains:	Dynamic Predictor encode routines
25 
26 	Copyright:	(c) 2001-2011 Apple, Inc.
27 */
28 
29 #include "dplib.h"
30 #include <string.h>
31 
32 #if __GNUC__
33 #define ALWAYS_INLINE		__attribute__((always_inline))
34 #else
35 #define ALWAYS_INLINE
36 #endif
37 
38 #if TARGET_CPU_PPC && (__MWERKS__ >= 0x3200)
39 // align loops to a 16 byte boundary to make the G5 happy
40 #pragma function_align 16
41 #define LOOP_ALIGN			asm { align 16 }
42 #else
43 #define LOOP_ALIGN
44 #endif
45 
init_coefs(int16_t * coefs,uint32_t denshift,int32_t numPairs)46 void init_coefs( int16_t * coefs, uint32_t denshift, int32_t numPairs )
47 {
48 	int32_t		k;
49 	int32_t		den = 1 << denshift;
50 
51 	coefs[0] = (AINIT * den) >> 4;
52 	coefs[1] = (BINIT * den) >> 4;
53 	coefs[2] = (CINIT * den) >> 4;
54 	for ( k = 3; k < numPairs; k++ )
55 		coefs[k]  = 0;
56 }
57 
copy_coefs(int16_t * srcCoefs,int16_t * dstCoefs,int32_t numPairs)58 void copy_coefs( int16_t * srcCoefs, int16_t * dstCoefs, int32_t numPairs )
59 {
60 	int32_t k;
61 
62 	for ( k = 0; k < numPairs; k++ )
63 		dstCoefs[k] = srcCoefs[k];
64 }
65 
sign_of_int(int32_t i)66 static inline int32_t ALWAYS_INLINE sign_of_int( int32_t i )
67 {
68     int32_t negishift;
69 
70     negishift = ((uint32_t)-i) >> 31;
71     return negishift | (i >> 31);
72 }
73 
pc_block(int32_t * in,int32_t * pc1,int32_t num,int16_t * coefs,int32_t numactive,uint32_t chanbits,uint32_t denshift)74 void pc_block( int32_t * in, int32_t * pc1, int32_t num, int16_t * coefs, int32_t numactive, uint32_t chanbits, uint32_t denshift )
75 {
76 	register int16_t	a0, a1, a2, a3;
77 	register int32_t	b0, b1, b2, b3;
78 	int32_t					j, k, lim;
79 	int32_t *			pin;
80 	int32_t				sum1, dd;
81 	int32_t				sg, sgn;
82 	int32_t				top;
83 	int32_t				del, del0;
84 	uint32_t			chanshift = 32 - chanbits;
85 	int32_t				denhalf = 1 << (denshift - 1);
86 
87 	pc1[0] = in[0];
88 	if ( numactive == 0 )
89 	{
90 		// just copy if numactive == 0 (but don't bother if in/out pointers the same)
91 		if ( (num > 1) && (in != pc1) )
92 			memcpy( &pc1[1], &in[1], (num - 1) * sizeof(int32_t) );
93 		return;
94 	}
95 	if ( numactive == 31 )
96 	{
97 		// short-circuit if numactive == 31
98 		for( j = 1; j < num; j++ )
99 		{
100 			del = in[j] - in[j-1];
101 			pc1[j] = (del << chanshift) >> chanshift;
102 		}
103 		return;
104 	}
105 
106 	for ( j = 1; j <= numactive; j++ )
107 	{
108 		del = in[j] - in[j-1];
109 		pc1[j] = (del << chanshift) >> chanshift;
110 	}
111 
112 	lim = numactive + 1;
113 
114 	if ( numactive == 4 )
115 	{
116 		// optimization for numactive == 4
117 		a0 = coefs[0];
118 		a1 = coefs[1];
119 		a2 = coefs[2];
120 		a3 = coefs[3];
121 
122 		for ( j = lim; j < num; j++ )
123 		{
124 			LOOP_ALIGN
125 
126 			top = in[j - lim];
127 			pin = in + j - 1;
128 
129 			b0 = top - pin[0];
130 			b1 = top - pin[-1];
131 			b2 = top - pin[-2];
132 			b3 = top - pin[-3];
133 
134 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3) >> denshift;
135 
136 			del = in[j] - top - sum1;
137 			del = (del << chanshift) >> chanshift;
138 			pc1[j] = del;
139 			del0 = del;
140 
141 			sg = sign_of_int(del);
142 			if ( sg > 0 )
143 			{
144 				sgn = sign_of_int( b3 );
145 				a3 -= sgn;
146 				del0 -= (4 - 3) * ((sgn * b3) >> denshift);
147 				if ( del0 <= 0 )
148 					continue;
149 
150 				sgn = sign_of_int( b2 );
151 				a2 -= sgn;
152 				del0 -= (4 - 2) * ((sgn * b2) >> denshift);
153 				if ( del0 <= 0 )
154 					continue;
155 
156 				sgn = sign_of_int( b1 );
157 				a1 -= sgn;
158 				del0 -= (4 - 1) * ((sgn * b1) >> denshift);
159 				if ( del0 <= 0 )
160 					continue;
161 
162 				a0 -= sign_of_int( b0 );
163 			}
164 			else if ( sg < 0 )
165 			{
166 				// note: to avoid unnecessary negations, we flip the value of "sgn"
167 				sgn = -sign_of_int( b3 );
168 				a3 -= sgn;
169 				del0 -= (4 - 3) * ((sgn * b3) >> denshift);
170 				if ( del0 >= 0 )
171 					continue;
172 
173 				sgn = -sign_of_int( b2 );
174 				a2 -= sgn;
175 				del0 -= (4 - 2) * ((sgn * b2) >> denshift);
176 				if ( del0 >= 0 )
177 					continue;
178 
179 				sgn = -sign_of_int( b1 );
180 				a1 -= sgn;
181 				del0 -= (4 - 1) * ((sgn * b1) >> denshift);
182 				if ( del0 >= 0 )
183 					continue;
184 
185 				a0 += sign_of_int( b0 );
186 			}
187 		}
188 
189 		coefs[0] = a0;
190 		coefs[1] = a1;
191 		coefs[2] = a2;
192 		coefs[3] = a3;
193 	}
194 	else if ( numactive == 8 )
195 	{
196 		// optimization for numactive == 8
197 		register int16_t	a4, a5, a6, a7;
198 		register int32_t	b4, b5, b6, b7;
199 
200 		a0 = coefs[0];
201 		a1 = coefs[1];
202 		a2 = coefs[2];
203 		a3 = coefs[3];
204 		a4 = coefs[4];
205 		a5 = coefs[5];
206 		a6 = coefs[6];
207 		a7 = coefs[7];
208 
209 		for ( j = lim; j < num; j++ )
210 		{
211 			LOOP_ALIGN
212 
213 			top = in[j - lim];
214 			pin = in + j - 1;
215 
216 			b0 = top - (*pin--);
217 			b1 = top - (*pin--);
218 			b2 = top - (*pin--);
219 			b3 = top - (*pin--);
220 			b4 = top - (*pin--);
221 			b5 = top - (*pin--);
222 			b6 = top - (*pin--);
223 			b7 = top - (*pin);
224 			pin += 8;
225 
226 			sum1 = (denhalf - a0 * b0 - a1 * b1 - a2 * b2 - a3 * b3
227 					- a4 * b4 - a5 * b5 - a6 * b6 - a7 * b7) >> denshift;
228 
229 			del = in[j] - top - sum1;
230 			del = (del << chanshift) >> chanshift;
231 			pc1[j] = del;
232 			del0 = del;
233 
234 			sg = sign_of_int(del);
235 			if ( sg > 0 )
236 			{
237 				sgn = sign_of_int( b7 );
238 				a7 -= sgn;
239 				del0 -= 1 * ((sgn * b7) >> denshift);
240 				if ( del0 <= 0 )
241 					continue;
242 
243 				sgn = sign_of_int( b6 );
244 				a6 -= sgn;
245 				del0 -= 2 * ((sgn * b6) >> denshift);
246 				if ( del0 <= 0 )
247 					continue;
248 
249 				sgn = sign_of_int( b5 );
250 				a5 -= sgn;
251 				del0 -= 3 * ((sgn * b5) >> denshift);
252 				if ( del0 <= 0 )
253 					continue;
254 
255 				sgn = sign_of_int( b4 );
256 				a4 -= sgn;
257 				del0 -= 4 * ((sgn * b4) >> denshift);
258 				if ( del0 <= 0 )
259 					continue;
260 
261 				sgn = sign_of_int( b3 );
262 				a3 -= sgn;
263 				del0 -= 5 * ((sgn * b3) >> denshift);
264 				if ( del0 <= 0 )
265 					continue;
266 
267 				sgn = sign_of_int( b2 );
268 				a2 -= sgn;
269 				del0 -= 6 * ((sgn * b2) >> denshift);
270 				if ( del0 <= 0 )
271 					continue;
272 
273 				sgn = sign_of_int( b1 );
274 				a1 -= sgn;
275 				del0 -= 7 * ((sgn * b1) >> denshift);
276 				if ( del0 <= 0 )
277 					continue;
278 
279 				a0 -= sign_of_int( b0 );
280 			}
281 			else if ( sg < 0 )
282 			{
283 				// note: to avoid unnecessary negations, we flip the value of "sgn"
284 				sgn = -sign_of_int( b7 );
285 				a7 -= sgn;
286 				del0 -= 1 * ((sgn * b7) >> denshift);
287 				if ( del0 >= 0 )
288 					continue;
289 
290 				sgn = -sign_of_int( b6 );
291 				a6 -= sgn;
292 				del0 -= 2 * ((sgn * b6) >> denshift);
293 				if ( del0 >= 0 )
294 					continue;
295 
296 				sgn = -sign_of_int( b5 );
297 				a5 -= sgn;
298 				del0 -= 3 * ((sgn * b5) >> denshift);
299 				if ( del0 >= 0 )
300 					continue;
301 
302 				sgn = -sign_of_int( b4 );
303 				a4 -= sgn;
304 				del0 -= 4 * ((sgn * b4) >> denshift);
305 				if ( del0 >= 0 )
306 					continue;
307 
308 				sgn = -sign_of_int( b3 );
309 				a3 -= sgn;
310 				del0 -= 5 * ((sgn * b3) >> denshift);
311 				if ( del0 >= 0 )
312 					continue;
313 
314 				sgn = -sign_of_int( b2 );
315 				a2 -= sgn;
316 				del0 -= 6 * ((sgn * b2) >> denshift);
317 				if ( del0 >= 0 )
318 					continue;
319 
320 				sgn = -sign_of_int( b1 );
321 				a1 -= sgn;
322 				del0 -= 7 * ((sgn * b1) >> denshift);
323 				if ( del0 >= 0 )
324 					continue;
325 
326 				a0 += sign_of_int( b0 );
327 			}
328 		}
329 
330 		coefs[0] = a0;
331 		coefs[1] = a1;
332 		coefs[2] = a2;
333 		coefs[3] = a3;
334 		coefs[4] = a4;
335 		coefs[5] = a5;
336 		coefs[6] = a6;
337 		coefs[7] = a7;
338 	}
339 	else
340 	{
341 //pc_block_general:
342 		// general case
343 		for ( j = lim; j < num; j++ )
344 		{
345 			LOOP_ALIGN
346 
347 			top = in[j - lim];
348 			pin = in + j - 1;
349 
350 			sum1 = 0;
351 			for ( k = 0; k < numactive; k++ )
352 				sum1 -= coefs[k] * (top - pin[-k]);
353 
354 			del = in[j] - top - ((sum1 + denhalf) >> denshift);
355 			del = (del << chanshift) >> chanshift;
356 			pc1[j] = del;
357 			del0 = del;
358 
359 			sg = sign_of_int( del );
360 			if ( sg > 0 )
361 			{
362 				for ( k = (numactive - 1); k >= 0; k-- )
363 				{
364 					dd = top - pin[-k];
365 					sgn = sign_of_int( dd );
366 					coefs[k] -= sgn;
367 					del0 -= (numactive - k) * ((sgn * dd) >> denshift);
368 					if ( del0 <= 0 )
369 						break;
370 				}
371 			}
372 			else if ( sg < 0 )
373 			{
374 				for ( k = (numactive - 1); k >= 0; k-- )
375 				{
376 					dd = top - pin[-k];
377 					sgn = sign_of_int( dd );
378 					coefs[k] += sgn;
379 					del0 -= (numactive - k) * ((-sgn * dd) >> denshift);
380 					if ( del0 >= 0 )
381 						break;
382 				}
383 			}
384 		}
385 	}
386 }
387