1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		matrix_enc.c
23 
24 	Contains:	ALAC mixing/matrixing encode routines.
25 
26 	Copyright:	(c) 2004-2011 Apple, Inc.
27 */
28 
29 #include "matrixlib.h"
30 #include "ALACAudioTypes.h"
31 
32 // up to 24-bit "offset" macros for the individual bytes of a 20/24-bit word
33 #if TARGET_RT_BIG_ENDIAN
34 	#define LBYTE	2
35 	#define MBYTE	1
36 	#define HBYTE	0
37 #else
38 	#define LBYTE	0
39 	#define MBYTE	1
40 	#define HBYTE	2
41 #endif
42 
43 /*
44     There is no plain middle-side option; instead there are various mixing
45     modes including middle-side, each lossless, as embodied in the mix()
46     and unmix() functions.  These functions exploit a generalized middle-side
47     transformation:
48 
49     u := [(rL + (m-r)R)/m];
50     v := L - R;
51 
52     where [ ] denotes integer floor.  The (lossless) inverse is
53 
54     L = u + v - [rV/m];
55     R = L - v;
56 */
57 
58 // 16-bit routines
59 
mix16(int16_t * in,uint32_t stride,int32_t * u,int32_t * v,int32_t numSamples,int32_t mixbits,int32_t mixres)60 void mix16( int16_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
61 {
62 	int16_t	*	ip = in;
63 	int32_t			j;
64 
65 	if ( mixres != 0 )
66 	{
67 		int32_t		mod = 1 << mixbits;
68 		int32_t		m2;
69 
70 		/* matrixed stereo */
71 		m2 = mod - mixres;
72 		for ( j = 0; j < numSamples; j++ )
73 		{
74 			int32_t		l, r;
75 
76 			l = (int32_t) ip[0];
77 			r = (int32_t) ip[1];
78 			ip += stride;
79 			u[j] = (mixres * l + m2 * r) >> mixbits;
80 			v[j] = l - r;
81 		}
82 	}
83 	else
84 	{
85 		/* Conventional separated stereo. */
86 		for ( j = 0; j < numSamples; j++ )
87 		{
88 			u[j] = (int32_t) ip[0];
89 			v[j] = (int32_t) ip[1];
90 			ip += stride;
91 		}
92 	}
93 }
94 
95 // 20-bit routines
96 // - the 20 bits of data are left-justified in 3 bytes of storage but right-aligned for input/output predictor buffers
97 
mix20(uint8_t * in,uint32_t stride,int32_t * u,int32_t * v,int32_t numSamples,int32_t mixbits,int32_t mixres)98 void mix20( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples, int32_t mixbits, int32_t mixres )
99 {
100 	int32_t		l, r;
101 	uint8_t *	ip = in;
102 	int32_t			j;
103 
104 	if ( mixres != 0 )
105 	{
106 		/* matrixed stereo */
107 		int32_t		mod = 1 << mixbits;
108 		int32_t		m2 = mod - mixres;
109 
110 		for ( j = 0; j < numSamples; j++ )
111 		{
112 			l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
113 			l = (l << 8) >> 12;
114 			ip += 3;
115 
116 			r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
117 			r = (r << 8) >> 12;
118 			ip += (stride - 1) * 3;
119 
120 			u[j] = (mixres * l + m2 * r) >> mixbits;
121 			v[j] = l - r;
122 		}
123 	}
124 	else
125 	{
126 		/* Conventional separated stereo. */
127 		for ( j = 0; j < numSamples; j++ )
128 		{
129 			l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
130 			u[j] = (l << 8) >> 12;
131 			ip += 3;
132 
133 			r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
134 			v[j] = (r << 8) >> 12;
135 			ip += (stride - 1) * 3;
136 		}
137 	}
138 }
139 
140 // 24-bit routines
141 // - the 24 bits of data are right-justified in the input/output predictor buffers
142 
mix24(uint8_t * in,uint32_t stride,int32_t * u,int32_t * v,int32_t numSamples,int32_t mixbits,int32_t mixres,uint16_t * shiftUV,int32_t bytesShifted)143 void mix24( uint8_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
144 			int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
145 {
146 	int32_t		l, r;
147 	uint8_t *	ip = in;
148 	int32_t			shift = bytesShifted * 8;
149 	uint32_t	mask  = (1ul << shift) - 1;
150 	int32_t			j, k;
151 
152 	if ( mixres != 0 )
153 	{
154 		/* matrixed stereo */
155 		int32_t		mod = 1 << mixbits;
156 		int32_t		m2 = mod - mixres;
157 
158 		if ( bytesShifted != 0 )
159 		{
160 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
161 			{
162 				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
163 				l = (l << 8) >> 8;
164 				ip += 3;
165 
166 				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
167 				r = (r << 8) >> 8;
168 				ip += (stride - 1) * 3;
169 
170 				shiftUV[k + 0] = (uint16_t)(l & mask);
171 				shiftUV[k + 1] = (uint16_t)(r & mask);
172 
173 				l >>= shift;
174 				r >>= shift;
175 
176 				u[j] = (mixres * l + m2 * r) >> mixbits;
177 				v[j] = l - r;
178 			}
179 		}
180 		else
181 		{
182 			for ( j = 0; j < numSamples; j++ )
183 			{
184 				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
185 				l = (l << 8) >> 8;
186 				ip += 3;
187 
188 				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
189 				r = (r << 8) >> 8;
190 				ip += (stride - 1) * 3;
191 
192 				u[j] = (mixres * l + m2 * r) >> mixbits;
193 				v[j] = l - r;
194 			}
195 		}
196 	}
197 	else
198 	{
199 		/* Conventional separated stereo. */
200 		if ( bytesShifted != 0 )
201 		{
202 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
203 			{
204 				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
205 				l = (l << 8) >> 8;
206 				ip += 3;
207 
208 				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
209 				r = (r << 8) >> 8;
210 				ip += (stride - 1) * 3;
211 
212 				shiftUV[k + 0] = (uint16_t)(l & mask);
213 				shiftUV[k + 1] = (uint16_t)(r & mask);
214 
215 				l >>= shift;
216 				r >>= shift;
217 
218 				u[j] = l;
219 				v[j] = r;
220 			}
221 		}
222 		else
223 		{
224 			for ( j = 0; j < numSamples; j++ )
225 			{
226 				l = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
227 				u[j] = (l << 8) >> 8;
228 				ip += 3;
229 
230 				r = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
231 				v[j] = (r << 8) >> 8;
232 				ip += (stride - 1) * 3;
233 			}
234 		}
235 	}
236 }
237 
238 // 32-bit routines
239 // - note that these really expect the internal data width to be < 32 but the arrays are 32-bit
240 // - otherwise, the calculations might overflow into the 33rd bit and be lost
241 // - therefore, these routines deal with the specified "unused lower" bytes in the "shift" buffers
242 
mix32(int32_t * in,uint32_t stride,int32_t * u,int32_t * v,int32_t numSamples,int32_t mixbits,int32_t mixres,uint16_t * shiftUV,int32_t bytesShifted)243 void mix32( int32_t * in, uint32_t stride, int32_t * u, int32_t * v, int32_t numSamples,
244 			int32_t mixbits, int32_t mixres, uint16_t * shiftUV, int32_t bytesShifted )
245 {
246 	int32_t	*	ip = in;
247 	int32_t			shift = bytesShifted * 8;
248 	uint32_t	mask  = (1ul << shift) - 1;
249 	int32_t		l, r;
250 	int32_t			j, k;
251 
252 	if ( mixres != 0 )
253 	{
254 		int32_t		mod = 1 << mixbits;
255 		int32_t		m2;
256 
257 		//Assert( bytesShifted != 0 );
258 
259 		/* matrixed stereo with shift */
260 		m2 = mod - mixres;
261 		for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
262 		{
263 			l = ip[0];
264 			r = ip[1];
265 			ip += stride;
266 
267 			shiftUV[k + 0] = (uint16_t)(l & mask);
268 			shiftUV[k + 1] = (uint16_t)(r & mask);
269 
270 			l >>= shift;
271 			r >>= shift;
272 
273 			u[j] = (mixres * l + m2 * r) >> mixbits;
274 			v[j] = l - r;
275 		}
276 	}
277 	else
278 	{
279 		if ( bytesShifted == 0 )
280 		{
281 			/* de-interleaving w/o shift */
282 			for ( j = 0; j < numSamples; j++ )
283 			{
284 				u[j] = ip[0];
285 				v[j] = ip[1];
286 				ip += stride;
287 			}
288 		}
289 		else
290 		{
291 			/* de-interleaving with shift */
292 			for ( j = 0, k = 0; j < numSamples; j++, k += 2 )
293 			{
294 				l = ip[0];
295 				r = ip[1];
296 				ip += stride;
297 
298 				shiftUV[k + 0] = (uint16_t)(l & mask);
299 				shiftUV[k + 1] = (uint16_t)(r & mask);
300 
301 				l >>= shift;
302 				r >>= shift;
303 
304 				u[j] = l;
305 				v[j] = r;
306 			}
307 		}
308 	}
309 }
310 
311 // 20/24-bit <-> 32-bit helper routines (not really matrixing but convenient to put here)
312 
copy20ToPredictor(uint8_t * in,uint32_t stride,int32_t * out,int32_t numSamples)313 void copy20ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
314 {
315 	uint8_t *	ip = in;
316 	int32_t			j;
317 
318 	for ( j = 0; j < numSamples; j++ )
319 	{
320 		int32_t			val;
321 
322 		// 20-bit values are left-aligned in the 24-bit input buffer but right-aligned in the 32-bit output buffer
323 		val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
324 		out[j] = (val << 8) >> 12;
325 		ip += stride * 3;
326 	}
327 }
328 
copy24ToPredictor(uint8_t * in,uint32_t stride,int32_t * out,int32_t numSamples)329 void copy24ToPredictor( uint8_t * in, uint32_t stride, int32_t * out, int32_t numSamples )
330 {
331 	uint8_t *	ip = in;
332 	int32_t			j;
333 
334 	for ( j = 0; j < numSamples; j++ )
335 	{
336 		int32_t			val;
337 
338 		val = (int32_t)( ((uint32_t)ip[HBYTE] << 16) | ((uint32_t)ip[MBYTE] << 8) | (uint32_t)ip[LBYTE] );
339 		out[j] = (val << 8) >> 8;
340 		ip += stride * 3;
341 	}
342 }
343