1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		matrix_dec.c
23 
24 	Contains:	ALAC mixing/matrixing decode routines.
25 
26 	Copyright:	(c) 2004-2011 Apple, Inc.
27 */
28 
29 #include "matrixlib.h"
30 #include "ALACAudioTypes.h"
31 
32 // up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word
33 #if TARGET_RT_BIG_ENDIAN
34 	#define LBYTE	2
35 	#define MBYTE	1
36 	#define HBYTE	0
37 #else
38 	#define LBYTE	0
39 	#define MBYTE	1
40 	#define HBYTE	2
41 #endif
42 
43 /*
44     There is no plain middle-side option; instead there are various mixing
45     modes including middle-side, each lossless, as embodied in the mix()
46     and unmix() functions.  These functions exploit a generalized middle-side
47     transformation:
48 
49     u := [(rL + (m-r)R)/m];
50     v := L - R;
51 
52     where [ ] denotes integer floor.  The (lossless) inverse is
53 
54     L = u + v - [rV/m];
55     R = L - v;
56 */
57 
58 // 16-bit routines
59 
unmix16(int32_t * u,int32_t * v,int16_t * out,uint32_t stride,int32_t numSamples,int32_t mixbits,int32_t mixres)60 void unmix16( int32_t * u, int32_t * v, int16_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres )
61 {
62 	int16_t *	op = out;
63 	int32_t 		j;
64 
65 	if ( mixres != 0 )
66 	{
67 		/* matrixed stereo */
68 		for ( j = 0; j < numSamples; j++ )
69 		{
70 			int32_t		l, r;
71 
72 			l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
73 			r = l - v[j];
74 
75 			op[0] = (int16_t) l;
76 			op[1] = (int16_t) r;
77 			op += stride;
78 		}
79 	}
80 	else
81 	{
82 		/* Conventional separated stereo. */
83 		for ( j = 0; j < numSamples; j++ )
84 		{
85 			op[0] = (int16_t) u[j];
86 			op[1] = (int16_t) v[j];
87 			op += stride;
88 		}
89 	}
90 }
91 
92 // 20-bit routines
93 // - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers
94 
unmix20(int32_t * u,int32_t * v,uint8_t * out,uint32_t stride,int32_t numSamples,int32_t mixbits,int32_t mixres)95 void unmix20( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t mixbits, int32_t mixres )
96 {
97 	uint8_t *	op = out;
98 	int32_t 		j;
99 
100 	if ( mixres != 0 )
101 	{
102 		/* matrixed stereo */
103 		for ( j = 0; j < numSamples; j++ )
104 		{
105 			int32_t		l, r;
106 
107 			l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
108 			r = l - v[j];
109 
110 			l <<= 4;
111 			r <<= 4;
112 
113 			op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
114 			op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
115 			op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
116 			op += 3;
117 
118 			op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
119 			op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
120 			op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
121 
122 			op += (stride - 1) * 3;
123 		}
124 	}
125 	else
126 	{
127 		/* Conventional separated stereo. */
128 		for ( j = 0; j < numSamples; j++ )
129 		{
130 			int32_t		val;
131 
132 			val = u[j] << 4;
133 			op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
134 			op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
135 			op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
136 			op += 3;
137 
138 			val = v[j] << 4;
139 			op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
140 			op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
141 			op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
142 
143 			op += (stride - 1) * 3;
144 		}
145 	}
146 }
147 
148 // 24-bit routines
149 // - the 24 bits of data are right-justified in the input/output predictor buffers
150 
unmix24(int32_t * u,int32_t * v,uint8_t * out,uint32_t stride,int32_t numSamples,int32_t mixbits,int32_t mixres,uint16_t * shiftUV,int32_t bytesShifted)151 void unmix24( int32_t * u, int32_t * v, uint8_t * out, uint32_t stride, int32_t numSamples,
152 				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
153 {
154 	uint8_t *	op = out;
155 	int32_t			shift = bytesShifted * 8;
156 	int32_t		l, r;
157 	int32_t 		j, k;
158 
159 	if ( mixres != 0 )
160 	{
161 		/* matrixed stereo */
162 		if ( bytesShifted != 0 )
163 		{
164 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
165 			{
166 				l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
167 				r = l - v[j];
168 
169 				l = (l << shift) | (uint32_t) shiftUV[k + 0];
170 				r = (r << shift) | (uint32_t) shiftUV[k + 1];
171 
172 				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
173 				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
174 				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
175 				op += 3;
176 
177 				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
178 				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
179 				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
180 
181 				op += (stride - 1) * 3;
182 			}
183 		}
184 		else
185 		{
186 			for ( j = 0; j < numSamples; j++ )
187 			{
188 				l = u[j] + v[j] - ((mixres * v[j]) >> mixbits);
189 				r = l - v[j];
190 
191 				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
192 				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
193 				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
194 				op += 3;
195 
196 				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
197 				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
198 				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
199 
200 				op += (stride - 1) * 3;
201 			}
202 		}
203 	}
204 	else
205 	{
206 		/* Conventional separated stereo. */
207 		if ( bytesShifted != 0 )
208 		{
209 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
210 			{
211 				l = u[j];
212 				r = v[j];
213 
214 				l = (l << shift) | (uint32_t) shiftUV[k + 0];
215 				r = (r << shift) | (uint32_t) shiftUV[k + 1];
216 
217 				op[HBYTE] = (uint8_t)((l >> 16) & 0xffu);
218 				op[MBYTE] = (uint8_t)((l >>  8) & 0xffu);
219 				op[LBYTE] = (uint8_t)((l >>  0) & 0xffu);
220 				op += 3;
221 
222 				op[HBYTE] = (uint8_t)((r >> 16) & 0xffu);
223 				op[MBYTE] = (uint8_t)((r >>  8) & 0xffu);
224 				op[LBYTE] = (uint8_t)((r >>  0) & 0xffu);
225 
226 				op += (stride - 1) * 3;
227 			}
228 		}
229 		else
230 		{
231 			for ( j = 0; j < numSamples; j++ )
232 			{
233 				int32_t		val;
234 
235 				val = u[j];
236 				op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
237 				op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
238 				op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
239 				op += 3;
240 
241 				val = v[j];
242 				op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
243 				op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
244 				op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
245 
246 				op += (stride - 1) * 3;
247 			}
248 		}
249 	}
250 }
251 
252 // 32-bit routines
253 // - note that these really expect the internal data width to be < 32 but the arrays are 32-bit
254 // - otherwise, the calculations might overflow into the 33rd bit and be lost
255 // - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers
256 
unmix32(int32_t * u,int32_t * v,int32_t * out,uint32_t stride,int32_t numSamples,int32_t mixbits,int32_t mixres,uint16_t * shiftUV,int32_t bytesShifted)257 void unmix32( int32_t * u, int32_t * v, int32_t * out, uint32_t stride, int32_t numSamples,
258 				int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
259 {
260 	int32_t *	op = out;
261 	int32_t			shift = bytesShifted * 8;
262 	int32_t		l, r;
263 	int32_t 		j, k;
264 
265 	if ( mixres != 0 )
266 	{
267 		//Assert( bytesShifted != 0 );
268 
269 		/* matrixed stereo with shift */
270 		for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
271 		{
272 			int32_t		lt, rt;
273 
274 			lt = u[j];
275 			rt = v[j];
276 
277 			l = lt + rt - ((mixres * rt) >> mixbits);
278 			r = l - rt;
279 
280 			op[0] = (l << shift) | (uint32_t) shiftUV[k + 0];
281 			op[1] = (r << shift) | (uint32_t) shiftUV[k + 1];
282 			op += stride;
283 		}
284 	}
285 	else
286 	{
287 		if ( bytesShifted == 0 )
288 		{
289 			/* interleaving w/o shift */
290 			for ( j = 0; j < numSamples; j++ )
291 			{
292 				op[0] = u[j];
293 				op[1] = v[j];
294 				op += stride;
295 			}
296 		}
297 		else
298 		{
299 			/* interleaving with shift */
300 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
301 			{
302 				op[0] = (u[j] << shift) | (uint32_t) shiftUV[k + 0];
303 				op[1] = (v[j] << shift) | (uint32_t) shiftUV[k + 1];
304 				op += stride;
305 			}
306 		}
307 	}
308 }
309 
310 // 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
311 
copyPredictorTo24(int32_t * in,uint8_t * out,uint32_t stride,int32_t numSamples)312 void copyPredictorTo24( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples )
313 {
314 	uint8_t *	op = out;
315 	int32_t			j;
316 
317 	for ( j = 0; j < numSamples; j++ )
318 	{
319 		int32_t		val = in[j];
320 
321 		op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
322 		op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
323 		op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
324 		op += (stride * 3);
325 	}
326 }
327 
copyPredictorTo24Shift(int32_t * in,uint16_t * shift,uint8_t * out,uint32_t stride,int32_t numSamples,int32_t bytesShifted)328 void copyPredictorTo24Shift( int32_t * in, uint16_t * shift, uint8_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted )
329 {
330 	uint8_t *	op = out;
331 	int32_t			shiftVal = bytesShifted * 8;
332 	int32_t			j;
333 
334 	//Assert( bytesShifted != 0 );
335 
336 	for ( j = 0; j < numSamples; j++ )
337 	{
338 		int32_t		val = in[j];
339 
340 		val = (val << shiftVal) | (uint32_t) shift[j];
341 
342 		op[HBYTE] = (uint8_t)((val >> 16) & 0xffu);
343 		op[MBYTE] = (uint8_t)((val >>  8) & 0xffu);
344 		op[LBYTE] = (uint8_t)((val >>  0) & 0xffu);
345 		op += (stride * 3);
346 	}
347 }
348 
copyPredictorTo20(int32_t * in,uint8_t * out,uint32_t stride,int32_t numSamples)349 void copyPredictorTo20( int32_t * in, uint8_t * out, uint32_t stride, int32_t numSamples )
350 {
351 	uint8_t *	op = out;
352 	int32_t			j;
353 
354 	// 32-bit predictor values are right-aligned but 20-bit output values should be left-aligned
355 	// in the 24-bit output buffer
356 	for ( j = 0; j < numSamples; j++ )
357 	{
358 		int32_t		val = in[j];
359 
360 		op[HBYTE] = (uint8_t)((val >> 12) & 0xffu);
361 		op[MBYTE] = (uint8_t)((val >>  4) & 0xffu);
362 		op[LBYTE] = (uint8_t)((val <<  4) & 0xffu);
363 		op += (stride * 3);
364 	}
365 }
366 
copyPredictorTo32(int32_t * in,int32_t * out,uint32_t stride,int32_t numSamples)367 void copyPredictorTo32( int32_t * in, int32_t * out, uint32_t stride, int32_t numSamples )
368 {
369 	int32_t			i, j;
370 
371 	// this is only a subroutine to abstract the "iPod can only output 16-bit data" problem
372 	for ( i = 0, j = 0; i < numSamples; i++, j += stride )
373 		out[j] = in[i];
374 }
375 
copyPredictorTo32Shift(int32_t * in,uint16_t * shift,int32_t * out,uint32_t stride,int32_t numSamples,int32_t bytesShifted)376 void copyPredictorTo32Shift( int32_t * in, uint16_t * shift, int32_t * out, uint32_t stride, int32_t numSamples, int32_t bytesShifted )
377 {
378 	int32_t *		op = out;
379 	uint32_t		shiftVal = bytesShifted * 8;
380 	int32_t				j;
381 
382 	//Assert( bytesShifted != 0 );
383 
384 	// this is only a subroutine to abstract the "iPod can only output 16-bit data" problem
385 	for ( j = 0; j < numSamples; j++ )
386 	{
387 		op[0] = (in[j] << shiftVal) | (uint32_t) shift[j];
388 		op += stride;
389 	}
390 }
391