1 /*
2  * Copyright (c) 2011 Apple Inc. All rights reserved.
3  *
4  * @APPLE_APACHE_LICENSE_HEADER_START@
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * @APPLE_APACHE_LICENSE_HEADER_END@
19  */
20 
21 /*
22 	File:		ALACEncoder.cpp
23 */
24 
25 // build stuff
26 #define VERBOSE_DEBUG		0
27 
28 // headers
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 #include "ALACEncoder.h"
34 
35 #include "aglib.h"
36 #include "dplib.h"
37 #include "matrixlib.h"
38 
39 #include "ALACBitUtilities.h"
40 #include "ALACAudioTypes.h"
41 #include "EndianPortable.h"
42 
43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
46 
47 // defines/constants
48 const uint32_t kALACEncoderMagic	= 'dpge';
49 const uint32_t kMaxSampleSize		= 32;			// max allowed bit width is 32
50 const uint32_t kDefaultMixBits	= 2;
51 const uint32_t kDefaultMixRes		= 0;
52 const uint32_t kMaxRes			= 4;
53 const uint32_t kDefaultNumUV		= 8;
54 const uint32_t kMinUV				= 4;
55 const uint32_t kMaxUV				= 8;
56 
57 // static functions
58 #if VERBOSE_DEBUG
59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
60 #endif
61 
62 
63 /*
64 	Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
65 				at the beginning of the frame for that channel.  Indicates whether SCE, CPE, or LFE.
66 				Each particular field is accessed via the current channel index.  Note that the channel
67 				index increments by two for channel pairs.
68 
69 	For example:
70 
71 			C L R 3-channel input		= (ID_CPE << 3) | (ID_SCE)
72 				index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
73 				index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
74 
75 			C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
76 				index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
77 				index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
78 				index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
79 				index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
80 				index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
81 */
82 static const uint32_t	sChannelMaps[kALACMaxChannels] =
83 {
84 	ID_SCE,
85 	ID_CPE,
86 	(ID_CPE << 3) | (ID_SCE),
87 	(ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
88 	(ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
89 	(ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
90 	(ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
91 	(ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
92 };
93 
94 static const uint32_t sSupportediPodSampleRates[] =
95 {
96 	8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
97 };
98 
99 /*
100 	Constructor
101 */
ALACEncoder()102 ALACEncoder::ALACEncoder() :
103 	mBitDepth( 0 ),
104     mFastMode( 0 ),
105 	mMixBufferU( nil ),
106 	mMixBufferV( nil ),
107 	mPredictorU( nil ),
108 	mPredictorV( nil ),
109 	mShiftBufferUV( nil ),
110 	mWorkBuffer( nil ),
111 
112 
113 	mTotalBytesGenerated( 0 ),
114 	mAvgBitRate( 0 ),
115 	mMaxFrameBytes( 0 )
116 {
117 	// overrides
118 	mFrameSize = kALACDefaultFrameSize;
119 }
120 
121 /*
122 	Destructor
123 */
~ALACEncoder()124 ALACEncoder::~ALACEncoder()
125 {
126 	// delete the matrix mixing buffers
127 	if ( mMixBufferU )
128     {
129 		free(mMixBufferU);
130         mMixBufferU = NULL;
131     }
132 	if ( mMixBufferV )
133     {
134 		free(mMixBufferV);
135         mMixBufferV = NULL;
136     }
137 
138 	// delete the dynamic predictor's "corrector" buffers
139 	if ( mPredictorU )
140     {
141 		free(mPredictorU);
142         mPredictorU = NULL;
143     }
144 	if ( mPredictorV )
145     {
146 		free(mPredictorV);
147         mPredictorV = NULL;
148     }
149 
150 	// delete the unused byte shift buffer
151 	if ( mShiftBufferUV )
152     {
153 		free(mShiftBufferUV);
154         mShiftBufferUV = NULL;
155     }
156 
157 	// delete the work buffer
158 	if ( mWorkBuffer )
159     {
160 		free(mWorkBuffer);
161         mWorkBuffer = NULL;
162     }
163 }
164 
165 #if PRAGMA_MARK
166 #pragma mark -
167 #endif
168 
169 /*
170 	HEADER SPECIFICATION
171 
172         For every segment we adopt the following header:
173 
174 			1 byte reserved			(always 0)
175 			1 byte flags			(see below)
176 			[4 byte frame length]	(optional, see below)
177 			     ---Next, the per-segment ALAC parameters---
178 			1 byte mixBits			(middle-side parameter)
179 			1 byte mixRes			(middle-side parameter, interpreted as signed char)
180 
181 			1 byte shiftU			(4 bits modeU, 4 bits denShiftU)
182 			1 byte filterU			(3 bits pbFactorU, 5 bits numU)
183 			(numU) shorts			(signed DP coefficients for V channel)
184 			     ---Next, 2nd-channel ALAC parameters in case of stereo mode---
185 			1 byte shiftV			(4 bits modeV, 4 bits denShiftV)
186 			1 byte filterV			(3 bits pbFactorV, 5 bits numV)
187 			(numV) shorts			(signed DP coefficients for V channel)
188 			     ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
189 			     ---Then comes the AG-compressor bitstream---
190 
191 
192         FLAGS
193         -----
194 
195 		The presence of certain flag bits changes the header format such that the parameters might
196 		not even be sent.  The currently defined flags format is:
197 
198 			0000psse
199 
200 			where		0 	= reserved, must be 0
201 						p	= 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
202 						ss	= 2-bit field indicating "number of shift-off bytes ignored by compression"
203 						e	= 1-bit field indicating "escape"
204 
205 		The "partial frame" flag means that the following segment is not equal to the frame length specified
206 		in the out-of-band decoder configuration.  This allows the decoder to deal with end-of-file partial
207 		segments without incurring the 32-bit overhead for each segment.
208 
209 		The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
210 		uncompressed.  The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
211 		quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
212 		uncompressed size.  However, by shifting the input values down and running the remaining bits through
213 		the normal compression algorithm, a net win can be achieved.  If this field is non-zero, it means that
214 		the shifted-off bytes follow after the parameter section of the header and before the compressed
215 		bitstream.  Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
216 		bytes are shifted off the bottom which helps the eventual compression ratio.  For stereo channels,
217 		the shifted off bytes are interleaved.
218 
219         The "escape" flag means that this segment was not compressed b/c the compressed size would be
220         >= uncompressed size.  In that case, the audio data was passed through uncompressed after the header.
221         The other header parameter bytes will not be sent.
222 
223 
224 		PARAMETERS
225 		----------
226 
227 		If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
228 
229 			4 + (2 + 2 * numU)                   (mono mode)
230 			4 + (2 + 2 * numV) + (2 + 2 * numV)  (stereo mode)
231 
232         where the ALAC filter-lengths numU, numV are bounded by a
233         constant (in the current source, numU, numV <= NUMCOEPAIRS), and
234         this forces an absolute upper bound on header size.
235 
236         Each segment-decode process loads up these bytes from the front of the
237         local stream, in the above order, then follows with the entropy-encoded
238         bits for the given segment.
239 
240         To generalize middle-side, there are various mixing modes including middle-side, each lossless,
241         as embodied in the mix() and unmix() functions.  These functions exploit a generalized middle-side
242         transformation:
243 
244         u := [(rL + (m-r)R)/m];
245         v := L - R;
246 
247         where [ ] denotes integer floor.  The (lossless) inverse is
248 
249         L = u + v - [rV/m];
250         R = L - v;
251 
252         In the segment header, m and r are encoded in mixBits and mixRes.
253         Classical "middle-side" is obtained with m = 2, r = 1, but now
254         we have more generalized mixes.
255 
256         NOTES
257         -----
258         The relevance of the ALAC coefficients is explained in detail
259         in patent documents.
260 */
261 
262 /*
263 	EncodeStereo()
264 	- encode a channel pair
265 */
EncodeStereo(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
267 {
268 	BitBuffer		workBits;
269 	BitBuffer		startBits = *bitstream;			// squirrel away copy of current state in case we need to go back and do an escape packet
270 	AGParamRec		agParams;
271 	uint32_t          bits1, bits2;
272 	uint32_t			dilate;
273 	int32_t			mixBits, mixRes, maxRes;
274 	uint32_t			minBits, minBits1, minBits2;
275 	uint32_t			numU, numV;
276 	uint32_t			mode;
277 	uint32_t			pbFactor;
278 	uint32_t			chanBits;
279 	uint32_t			denShift;
280 	uint8_t			bytesShifted;
281 	SearchCoefs		coefsU;
282 	SearchCoefs		coefsV;
283 	uint32_t			index;
284 	uint8_t			partialFrame;
285 	uint32_t			escapeBits;
286 	bool			doEscape;
287 	int32_t		status = ALAC_noErr;
288 
289 	// make sure we handle this bit-depth before we get going
290 	RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
291 
292 	// reload coefs pointers for this channel pair
293 	// - note that, while you might think they should be re-initialized per block, retaining state across blocks
294 	//	 actually results in better overall compression
295 	// - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
296 	//	 different coefs for the different passes of "mixRes" results in even better compression
297 	coefsU = (SearchCoefs) mCoefsU[channelIndex];
298 	coefsV = (SearchCoefs) mCoefsV[channelIndex];
299 
300 	// matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
301 	// so enable 16-bit "shift off" and encode in 17-bit mode
302 	// - in addition, 24-bit mode really improves with one byte shifted off
303 	if ( mBitDepth == 32 )
304 		bytesShifted = 2;
305 	else if ( mBitDepth >= 24 )
306 		bytesShifted = 1;
307 	else
308 		bytesShifted = 0;
309 
310 	chanBits = mBitDepth - (bytesShifted * 8) + 1;
311 
312 	// flag whether or not this is a partial frame
313 	partialFrame = (numSamples == mFrameSize) ? 0 : 1;
314 
315 	// brute-force encode optimization loop
316 	// - run over variations of the encoding params to find the best choice
317 	mixBits		= kDefaultMixBits;
318 	maxRes		= kMaxRes;
319 	numU = numV = kDefaultNumUV;
320 	denShift	= DENSHIFT_DEFAULT;
321 	mode		= 0;
322 	pbFactor	= 4;
323 	dilate		= 8;
324 
325 	minBits	= minBits1 = minBits2 = 1ul << 31;
326 
327     int32_t		bestRes = mLastMixRes[channelIndex];
328 
329     for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
330     {
331         // mix the stereo inputs
332         switch ( mBitDepth )
333         {
334             case 16:
335                 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
336                 break;
337             case 20:
338                 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
339                 break;
340             case 24:
341                 // includes extraction of shifted-off bytes
342                 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
343                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
344                 break;
345             case 32:
346                 // includes extraction of shifted-off bytes
347                 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
348                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
349                 break;
350         }
351 
352         BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
353 
354         // run the dynamic predictors
355         pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
356         pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
357 
358         // run the lossless compressor on each channel
359         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
360         status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
361         RequireNoErr( status, goto Exit; );
362 
363         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
364         status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
365         RequireNoErr( status, goto Exit; );
366 
367         // look for best match
368         if ( (bits1 + bits2) < minBits1 )
369         {
370             minBits1 = bits1 + bits2;
371             bestRes = mixRes;
372         }
373     }
374 
375     mLastMixRes[channelIndex] = (int16_t)bestRes;
376 
377 	// mix the stereo inputs with the current best mixRes
378 	mixRes = mLastMixRes[channelIndex];
379 	switch ( mBitDepth )
380 	{
381 		case 16:
382 			mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
383 			break;
384 		case 20:
385 			mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
386 			break;
387 		case 24:
388 			// also extracts the shifted off bytes into the shift buffers
389 			mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
390 					mixBits, mixRes, mShiftBufferUV, bytesShifted );
391 			break;
392 		case 32:
393 			// also extracts the shifted off bytes into the shift buffers
394 			mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
395 					mixBits, mixRes, mShiftBufferUV, bytesShifted );
396 			break;
397 	}
398 
399 	// now it's time for the predictor coefficient search loop
400 	numU = numV = kMinUV;
401 	minBits1 = minBits2 = 1ul << 31;
402 
403 	for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
404 	{
405 		BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
406 
407 		dilate = 32;
408 
409 		// run the predictor over the same data multiple times to help it converge
410 		for ( uint32_t converge = 0; converge < 8; converge++ )
411 		{
412 		    pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
413 		    pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
414 		}
415 
416 		dilate = 8;
417 
418 		set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
419 		status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
420 
421 		if ( (bits1 * dilate + 16 * numUV) < minBits1 )
422 		{
423 			minBits1 = bits1 * dilate + 16 * numUV;
424 			numU = numUV;
425 		}
426 
427 		set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
428 		status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
429 
430 		if ( (bits2 * dilate + 16 * numUV) < minBits2 )
431 		{
432 			minBits2 = bits2 * dilate + 16 * numUV;
433 			numV = numUV;
434 		}
435 	}
436 
437 	// test for escape hatch if best calculated compressed size turns out to be more than the input size
438 	minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
439 	if ( bytesShifted != 0 )
440 		minBits += (numSamples * (bytesShifted * 8) * 2);
441 
442 	escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8);	/* 2 common header bytes */
443 
444 	doEscape = (minBits >= escapeBits) ? true : false;
445 
446 	if ( doEscape == false )
447 	{
448 		// write bitstream header and coefs
449 		BitBufferWrite( bitstream, 0, 12 );
450 		BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
451 		if ( partialFrame )
452 			BitBufferWrite( bitstream, numSamples, 32 );
453 		BitBufferWrite( bitstream, mixBits, 8 );
454 		BitBufferWrite( bitstream, mixRes, 8 );
455 
456 		//Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
457 		//Assert( (pbFactor < 8) && (numU < 32) );
458 		//Assert( (pbFactor < 8) && (numV < 32) );
459 
460 		BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
461 		BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
462 		for ( index = 0; index < numU; index++ )
463 			BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
464 
465 		BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
466 		BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
467 		for ( index = 0; index < numV; index++ )
468 			BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
469 
470 		// if shift active, write the interleaved shift buffers
471 		if ( bytesShifted != 0 )
472 		{
473 			uint32_t		bitShift = bytesShifted * 8;
474 
475 			//Assert( bitShift <= 16 );
476 
477 			for ( index = 0; index < (numSamples * 2); index += 2 )
478 			{
479 				uint32_t			shiftedVal;
480 
481 				shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
482 				BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
483 			}
484 		}
485 
486 		// run the dynamic predictor and lossless compression for the "left" channel
487 		// - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
488 		//		   of only using "U" buffers for the U-channel and "V" buffers for the V-channel
489 		if ( mode == 0 )
490 		{
491 			pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
492 		}
493 		else
494 		{
495 			pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
496 			pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
497 		}
498 
499 		set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
500 		status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
501 		RequireNoErr( status, goto Exit; );
502 
503 		// run the dynamic predictor and lossless compression for the "right" channel
504 		if ( mode == 0 )
505 		{
506 			pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
507 		}
508 		else
509 		{
510 			pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
511 			pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
512 		}
513 
514 		set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
515 		status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
516 		RequireNoErr( status, goto Exit; );
517 
518 		/*	if we happened to create a compressed packet that was actually bigger than an escape packet would be,
519 			chuck it and do an escape packet
520 		*/
521 		minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
522 		if ( minBits >= escapeBits )
523 		{
524 			*bitstream = startBits;		// reset bitstream state
525 			doEscape = true;
526 			printf( "compressed frame too big: %u vs. %u \n", minBits, escapeBits );
527 		}
528 	}
529 
530 	if ( doEscape == true )
531 	{
532 		/* escape */
533 		status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
534 
535 #if VERBOSE_DEBUG
536 		DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
537 #endif
538 	}
539 
540 Exit:
541 	return status;
542 }
543 
544 /*
545 	EncodeStereoFast()
546 	- encode a channel pair without the search loop for maximum possible speed
547 */
EncodeStereoFast(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)548 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
549 {
550 	BitBuffer		startBits = *bitstream;			// squirrel away current bit position in case we decide to use escape hatch
551 	AGParamRec		agParams;
552 	uint32_t	bits1, bits2;
553 	int32_t			mixBits, mixRes;
554 	uint32_t			minBits, minBits1, minBits2;
555 	uint32_t			numU, numV;
556 	uint32_t			mode;
557 	uint32_t			pbFactor;
558 	uint32_t			chanBits;
559 	uint32_t			denShift;
560 	uint8_t			bytesShifted;
561 	SearchCoefs		coefsU;
562 	SearchCoefs		coefsV;
563 	uint32_t			index;
564 	uint8_t			partialFrame;
565 	uint32_t			escapeBits;
566 	bool			doEscape;
567 	int32_t		status;
568 
569 	// make sure we handle this bit-depth before we get going
570 	RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
571 
572 	// reload coefs pointers for this channel pair
573 	// - note that, while you might think they should be re-initialized per block, retaining state across blocks
574 	//	 actually results in better overall compression
575 	// - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
576 	//	 different coefs for the different passes of "mixRes" results in even better compression
577 	coefsU = (SearchCoefs) mCoefsU[channelIndex];
578 	coefsV = (SearchCoefs) mCoefsV[channelIndex];
579 
580 	// matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
581 	// so enable 16-bit "shift off" and encode in 17-bit mode
582 	// - in addition, 24-bit mode really improves with one byte shifted off
583 	if ( mBitDepth == 32 )
584 		bytesShifted = 2;
585 	else if ( mBitDepth >= 24 )
586 		bytesShifted = 1;
587 	else
588 		bytesShifted = 0;
589 
590 	chanBits = mBitDepth - (bytesShifted * 8) + 1;
591 
592 	// flag whether or not this is a partial frame
593 	partialFrame = (numSamples == mFrameSize) ? 0 : 1;
594 
595 	// set up default encoding parameters for "fast" mode
596 	mixBits		= kDefaultMixBits;
597 	mixRes		= kDefaultMixRes;
598 	numU = numV = kDefaultNumUV;
599 	denShift	= DENSHIFT_DEFAULT;
600 	mode		= 0;
601 	pbFactor	= 4;
602 
603 	minBits	= minBits1 = minBits2 = 1ul << 31;
604 
605 	// mix the stereo inputs with default mixBits/mixRes
606 	switch ( mBitDepth )
607 	{
608 		case 16:
609 			mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
610 			break;
611 		case 20:
612 			mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
613 			break;
614 		case 24:
615 			// also extracts the shifted off bytes into the shift buffers
616 			mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
617 					mixBits, mixRes, mShiftBufferUV, bytesShifted );
618 			break;
619 		case 32:
620 			// also extracts the shifted off bytes into the shift buffers
621 			mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
622 					mixBits, mixRes, mShiftBufferUV, bytesShifted );
623 			break;
624 	}
625 
626 	/* speculatively write the bitstream assuming the compressed version will be smaller */
627 
628 	// write bitstream header and coefs
629 	BitBufferWrite( bitstream, 0, 12 );
630 	BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
631 	if ( partialFrame )
632 		BitBufferWrite( bitstream, numSamples, 32 );
633 	BitBufferWrite( bitstream, mixBits, 8 );
634 	BitBufferWrite( bitstream, mixRes, 8 );
635 
636 	//Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
637 	//Assert( (pbFactor < 8) && (numU < 32) );
638 	//Assert( (pbFactor < 8) && (numV < 32) );
639 
640 	BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
641 	BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
642 	for ( index = 0; index < numU; index++ )
643 		BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
644 
645 	BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
646 	BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
647 	for ( index = 0; index < numV; index++ )
648 		BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
649 
650 	// if shift active, write the interleaved shift buffers
651 	if ( bytesShifted != 0 )
652 	{
653 		uint32_t		bitShift = bytesShifted * 8;
654 
655 		//Assert( bitShift <= 16 );
656 
657 		for ( index = 0; index < (numSamples * 2); index += 2 )
658 		{
659 			uint32_t			shiftedVal;
660 
661 			shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
662 			BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
663 		}
664 	}
665 
666 	// run the dynamic predictor and lossless compression for the "left" channel
667 	// - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
668 	pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
669 
670 	set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
671 	status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
672 	RequireNoErr( status, goto Exit; );
673 
674 	// run the dynamic predictor and lossless compression for the "right" channel
675 	pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
676 
677 	set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
678 	status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
679 	RequireNoErr( status, goto Exit; );
680 
681 	// do bit requirement calculations
682 	minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
683 	minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
684 
685 	// test for escape hatch if best calculated compressed size turns out to be more than the input size
686 	minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
687 	if ( bytesShifted != 0 )
688 		minBits += (numSamples * (bytesShifted * 8) * 2);
689 
690 	escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8);	/* 2 common header bytes */
691 
692 	doEscape = (minBits >= escapeBits) ? true : false;
693 
694 	if ( doEscape == false )
695 	{
696 		/*	if we happened to create a compressed packet that was actually bigger than an escape packet would be,
697 			chuck it and do an escape packet
698 		*/
699 		minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
700 		if ( minBits >= escapeBits )
701 		{
702 			doEscape = true;
703 			printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
704 		}
705 
706 	}
707 
708 	if ( doEscape == true )
709 	{
710 		/* escape */
711 
712 		// reset bitstream position since we speculatively wrote the compressed version
713 		*bitstream = startBits;
714 
715 		// write escape frame
716 		status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
717 
718 #if VERBOSE_DEBUG
719 		DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
720 #endif
721 	}
722 
723 Exit:
724 	return status;
725 }
726 
727 /*
728 	EncodeStereoEscape()
729 	- encode stereo escape frame
730 */
EncodeStereoEscape(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t numSamples)731 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
732 {
733 	int16_t *		input16;
734 	int32_t *		input32;
735 	uint8_t			partialFrame;
736 	uint32_t			index;
737 
738 	// flag whether or not this is a partial frame
739 	partialFrame = (numSamples == mFrameSize) ? 0 : 1;
740 
741 	// write bitstream header
742 	BitBufferWrite( bitstream, 0, 12 );
743 	BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 );	// LSB = 1 means "frame not compressed"
744 	if ( partialFrame )
745 		BitBufferWrite( bitstream, numSamples, 32 );
746 
747 	// just copy the input data to the output buffer
748 	switch ( mBitDepth )
749 	{
750 		case 16:
751 			input16 = (int16_t *) inputBuffer;
752 
753 			for ( index = 0; index < (numSamples * stride); index += stride )
754 			{
755 				BitBufferWrite( bitstream, input16[index + 0], 16 );
756 				BitBufferWrite( bitstream, input16[index + 1], 16 );
757 			}
758 			break;
759 		case 20:
760 			// mix20() with mixres param = 0 means de-interleave so use it to simplify things
761 			mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
762 			for ( index = 0; index < numSamples; index++ )
763 			{
764 				BitBufferWrite( bitstream, mMixBufferU[index], 20 );
765 				BitBufferWrite( bitstream, mMixBufferV[index], 20 );
766 			}
767 			break;
768 		case 24:
769 			// mix24() with mixres param = 0 means de-interleave so use it to simplify things
770 			mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
771 			for ( index = 0; index < numSamples; index++ )
772 			{
773 				BitBufferWrite( bitstream, mMixBufferU[index], 24 );
774 				BitBufferWrite( bitstream, mMixBufferV[index], 24 );
775 			}
776 			break;
777 		case 32:
778 			input32 = (int32_t *) inputBuffer;
779 
780 			for ( index = 0; index < (numSamples * stride); index += stride )
781 			{
782 				BitBufferWrite( bitstream, input32[index + 0], 32 );
783 				BitBufferWrite( bitstream, input32[index + 1], 32 );
784 			}
785 			break;
786 	}
787 
788 	return ALAC_noErr;
789 }
790 
791 /*
792 	EncodeMono()
793 	- encode a mono input buffer
794 */
EncodeMono(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)795 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
796 {
797 	BitBuffer		startBits = *bitstream;			// squirrel away copy of current state in case we need to go back and do an escape packet
798 	AGParamRec		agParams;
799 	uint32_t	bits1;
800 	uint32_t			numU;
801 	SearchCoefs		coefsU;
802 	uint32_t			dilate;
803 	uint32_t			minBits, bestU;
804 	uint32_t			minU, maxU;
805 	uint32_t			index, index2;
806 	uint8_t			bytesShifted;
807 	uint32_t			shift;
808 	uint32_t			mask;
809 	uint32_t			chanBits;
810 	uint8_t			pbFactor;
811 	uint8_t			partialFrame;
812 	int16_t *		input16;
813 	int32_t *		input32;
814 	uint32_t			escapeBits;
815 	bool			doEscape;
816 	int32_t		status;
817 
818 	// make sure we handle this bit-depth before we get going
819 	RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
820 
821 	status = ALAC_noErr;
822 
823 	// reload coefs array from previous frame
824 	coefsU = (SearchCoefs) mCoefsU[channelIndex];
825 
826 	// pick bit depth for actual encoding
827 	// - we lop off the lower byte(s) for 24-/32-bit encodings
828 	if ( mBitDepth == 32 )
829 		bytesShifted = 2;
830 	else if ( mBitDepth >= 24 )
831 		bytesShifted = 1;
832 	else
833 		bytesShifted = 0;
834 
835 	shift = bytesShifted * 8;
836 	mask = (1ul << shift) - 1;
837 	chanBits = mBitDepth - (bytesShifted * 8);
838 
839 	// flag whether or not this is a partial frame
840 	partialFrame = (numSamples == mFrameSize) ? 0 : 1;
841 
842 	// convert N-bit data to 32-bit for predictor
843 	switch ( mBitDepth )
844 	{
845 		case 16:
846 		{
847 			// convert 16-bit data to 32-bit for predictor
848 			input16 = (int16_t *) inputBuffer;
849 			for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
850 				mMixBufferU[index] = (int32_t) input16[index2];
851 			break;
852 		}
853 		case 20:
854 			// convert 20-bit data to 32-bit for predictor
855 			copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
856 			break;
857 		case 24:
858 			// convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
859 			copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
860 			for ( index = 0; index < numSamples; index++ )
861 			{
862 				mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
863 				mMixBufferU[index] >>= shift;
864 			}
865 			break;
866 		case 32:
867 		{
868 			// just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
869 			input32 = (int32_t *) inputBuffer;
870 
871 			for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
872 			{
873 				int32_t			val = input32[index2];
874 
875 				mShiftBufferUV[index] = (uint16_t)(val & mask);
876 				mMixBufferU[index] = val >> shift;
877 			}
878 			break;
879 		}
880 	}
881 
882 	// brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
883 	// - run over variations of the encoding params to find the best choice
884 	minU		= 4;
885 	maxU		= 8;
886 	minBits		= 1ul << 31;
887 	pbFactor	= 4;
888 
889 	minBits	= 1ul << 31;
890 	bestU	= minU;
891 
892 	for ( numU = minU; numU <= maxU; numU += 4 )
893 	{
894 		BitBuffer		workBits;
895 		uint32_t			numBits;
896 
897 		BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
898 
899 		dilate = 32;
900 		for ( uint32_t converge = 0; converge < 7; converge++ )
901 			pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
902 
903 		dilate = 8;
904 		pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
905 
906 		set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
907 		status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
908 		RequireNoErr( status, goto Exit; );
909 
910 		numBits = (dilate * bits1) + (16 * numU);
911 		if ( numBits < minBits )
912 		{
913 			bestU	= numU;
914 			minBits = numBits;
915 		}
916 	}
917 
918 	// test for escape hatch if best calculated compressed size turns out to be more than the input size
919 	// - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
920 	minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
921 	if ( bytesShifted != 0 )
922 		minBits += (numSamples * (bytesShifted * 8));
923 
924 	escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8);	/* 2 common header bytes */
925 
926 	doEscape = (minBits >= escapeBits) ? true : false;
927 
928 	if ( doEscape == false )
929 	{
930 		// write bitstream header
931 		BitBufferWrite( bitstream, 0, 12 );
932 		BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
933 		if ( partialFrame )
934 			BitBufferWrite( bitstream, numSamples, 32 );
935 		BitBufferWrite( bitstream, 0, 16 );								// mixBits = mixRes = 0
936 
937 		// write the params and predictor coefs
938 		numU = bestU;
939 		BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 );	// modeU = 0
940 		BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
941 		for ( index = 0; index < numU; index++ )
942 			BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
943 
944 		// if shift active, write the interleaved shift buffers
945 		if ( bytesShifted != 0 )
946 		{
947 			for ( index = 0; index < numSamples; index++ )
948 				BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
949 		}
950 
951 		// run the dynamic predictor with the best result
952 		pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
953 
954 		// do lossless compression
955 		set_standard_ag_params( &agParams, numSamples, numSamples );
956 		status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
957 		//AssertNoErr( status );
958 
959 
960 		/*	if we happened to create a compressed packet that was actually bigger than an escape packet would be,
961 			chuck it and do an escape packet
962 		*/
963 		minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
964 		if ( minBits >= escapeBits )
965 		{
966 			*bitstream = startBits;		// reset bitstream state
967 			doEscape = true;
968 			printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
969 		}
970 	}
971 
972 	if ( doEscape == true )
973 	{
974 		// write bitstream header and coefs
975 		BitBufferWrite( bitstream, 0, 12 );
976 		BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 );	// LSB = 1 means "frame not compressed"
977 		if ( partialFrame )
978 			BitBufferWrite( bitstream, numSamples, 32 );
979 
980 		// just copy the input data to the output buffer
981 		switch ( mBitDepth )
982 		{
983 			case 16:
984 				input16 = (int16_t *) inputBuffer;
985 				for ( index = 0; index < (numSamples * stride); index += stride )
986 					BitBufferWrite( bitstream, input16[index], 16 );
987 				break;
988 			case 20:
989 				// convert 20-bit data to 32-bit for simplicity
990 				copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
991 				for ( index = 0; index < numSamples; index++ )
992 					BitBufferWrite( bitstream, mMixBufferU[index], 20 );
993 				break;
994 			case 24:
995 				// convert 24-bit data to 32-bit for simplicity
996 				copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
997 				for ( index = 0; index < numSamples; index++ )
998 					BitBufferWrite( bitstream, mMixBufferU[index], 24 );
999 				break;
1000 			case 32:
1001 				input32 = (int32_t *) inputBuffer;
1002 				for ( index = 0; index < (numSamples * stride); index += stride )
1003 					BitBufferWrite( bitstream, input32[index], 32 );
1004 				break;
1005 		}
1006 #if VERBOSE_DEBUG
1007 		DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1008 #endif
1009 	}
1010 
1011 Exit:
1012 	return status;
1013 }
1014 
1015 #if PRAGMA_MARK
1016 #pragma mark -
1017 #endif
1018 
1019 /*
1020 	Encode()
1021 	- encode the next block of samples
1022 */
Encode(AudioFormatDescription theInputFormat,AudioFormatDescription theOutputFormat,unsigned char * theReadBuffer,unsigned char * theWriteBuffer,int32_t * ioNumBytes)1023 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1024                              unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1025 {
1026 	uint32_t				numFrames;
1027 	uint32_t				outputSize;
1028 	BitBuffer			bitstream;
1029 	int32_t			status;
1030 
1031 	numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1032 
1033 	// create a bit buffer structure pointing to our output buffer
1034 	BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1035 
1036 	if ( theInputFormat.mChannelsPerFrame == 2 )
1037 	{
1038 		// add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1039 		BitBufferWrite( &bitstream, ID_CPE, 3 );
1040 		BitBufferWrite( &bitstream, 0, 4 );
1041 
1042 		// encode stereo input buffer
1043 		if ( mFastMode == false )
1044 			status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1045 		else
1046 			status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1047 		RequireNoErr( status, goto Exit; );
1048 	}
1049 	else if ( theInputFormat.mChannelsPerFrame == 1 )
1050 	{
1051 		// add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1052 		BitBufferWrite( &bitstream, ID_SCE, 3 );
1053 		BitBufferWrite( &bitstream, 0, 4 );
1054 
1055 		// encode mono input buffer
1056 		status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1057 		RequireNoErr( status, goto Exit; );
1058 	}
1059 	else
1060 	{
1061 		char *					inputBuffer;
1062 		uint32_t				tag;
1063 		uint32_t				channelIndex;
1064 		uint32_t				inputIncrement;
1065 		uint8_t				stereoElementTag;
1066 		uint8_t				monoElementTag;
1067 		uint8_t				lfeElementTag;
1068 
1069 		inputBuffer		= (char *) theReadBuffer;
1070 		inputIncrement	= ((mBitDepth + 7) / 8);
1071 
1072 		stereoElementTag	= 0;
1073 		monoElementTag		= 0;
1074 		lfeElementTag		= 0;
1075 
1076 		for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1077 		{
1078 			tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1079 
1080 			BitBufferWrite( &bitstream, tag, 3 );
1081 			switch ( tag )
1082 			{
1083 				case ID_SCE:
1084 					// mono
1085 					BitBufferWrite( &bitstream, monoElementTag, 4 );
1086 
1087 					status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1088 
1089 					inputBuffer += inputIncrement;
1090 					channelIndex++;
1091 					monoElementTag++;
1092 					break;
1093 
1094 				case ID_CPE:
1095 					// stereo
1096 					BitBufferWrite( &bitstream, stereoElementTag, 4 );
1097 
1098 					status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1099 
1100 					inputBuffer += (inputIncrement * 2);
1101 					channelIndex += 2;
1102 					stereoElementTag++;
1103 					break;
1104 
1105 				case ID_LFE:
1106 					// LFE channel (subwoofer)
1107 					BitBufferWrite( &bitstream, lfeElementTag, 4 );
1108 
1109 					status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1110 
1111 					inputBuffer += inputIncrement;
1112 					channelIndex++;
1113 					lfeElementTag++;
1114 					break;
1115 
1116 				default:
1117 					printf( "That ain't right! (%u)\n", tag );
1118 					status = kALAC_ParamError;
1119 					goto Exit;
1120 			}
1121 
1122 			RequireNoErr( status, goto Exit; );
1123 		}
1124 	}
1125 
1126 #if VERBOSE_DEBUG
1127 {
1128 	// if there is room left in the output buffer, add some random fill data to test decoder
1129 	int32_t			bitsLeft;
1130 	int32_t			bytesLeft;
1131 
1132 	bitsLeft = BitBufferGetPosition( &bitstream ) - 3;	// - 3 for ID_END tag
1133 	bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1134 
1135 	if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1136 		AddFiller( &bitstream, bytesLeft );
1137 }
1138 #endif
1139 
1140 	// add 3-bit frame end tag: ID_END
1141 	BitBufferWrite( &bitstream, ID_END, 3 );
1142 
1143 	// byte-align the output data
1144 	BitBufferByteAlign( &bitstream, true );
1145 
1146 	outputSize = BitBufferGetPosition( &bitstream ) / 8;
1147 	//Assert( outputSize <= mMaxOutputBytes );
1148 
1149 
1150 	// all good, let iTunes know what happened and remember the total number of input sample frames
1151 	*ioNumBytes = outputSize;
1152 	//mEncodedFrames		   	   += encodeMsg->numInputSamples;
1153 
1154 	// gather encoding stats
1155 	mTotalBytesGenerated += outputSize;
1156 	mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1157 
1158 	status = ALAC_noErr;
1159 
1160 Exit:
1161 	return status;
1162 }
1163 
1164 /*
1165 	Finish()
1166 	- drain out any leftover samples
1167 */
1168 
Finish()1169 int32_t ALACEncoder::Finish()
1170 {
1171 /*	// finalize bit rate statistics
1172 	if ( mSampleSize.numEntries != 0 )
1173 		mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1174 	else
1175 		mAvgBitRate = 0;
1176 */
1177 	return ALAC_noErr;
1178 }
1179 
1180 #if PRAGMA_MARK
1181 #pragma mark -
1182 #endif
1183 
1184 /*
1185 	GetConfig()
1186 */
GetConfig(ALACSpecificConfig & config)1187 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1188 {
1189 	config.frameLength			= Swap32NtoB(mFrameSize);
1190 	config.compatibleVersion	= (uint8_t) kALACCompatibleVersion;
1191 	config.bitDepth				= (uint8_t) mBitDepth;
1192 	config.pb					= (uint8_t) PB0;
1193 	config.kb					= (uint8_t) KB0;
1194 	config.mb					= (uint8_t) MB0;
1195 	config.numChannels			= (uint8_t) mNumChannels;
1196 	config.maxRun				= Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1197 	config.maxFrameBytes		= Swap32NtoB(mMaxFrameBytes);
1198 	config.avgBitRate			= Swap32NtoB(mAvgBitRate);
1199 	config.sampleRate			= Swap32NtoB(mOutputSampleRate);
1200 }
1201 
GetMagicCookieSize(uint32_t inNumChannels)1202 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1203 {
1204     if (inNumChannels > 2)
1205     {
1206         return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1207     }
1208     else
1209     {
1210         return sizeof(ALACSpecificConfig);
1211     }
1212 }
1213 
GetMagicCookie(void * outCookie,uint32_t * ioSize)1214 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1215 {
1216     ALACSpecificConfig theConfig = {0};
1217     ALACAudioChannelLayout theChannelLayout = {0};
1218     uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1219     uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1220     uint8_t * theCookiePointer = (uint8_t *)outCookie;
1221 
1222     GetConfig(theConfig);
1223     if (theConfig.numChannels > 2)
1224     {
1225         theChannelLayout.mChannelLayoutTag = ALACChannelLayoutTags[theConfig.numChannels - 1];
1226         theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1227     }
1228      if (*ioSize >= theCookieSize)
1229     {
1230         memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1231         theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1232         if (theConfig.numChannels > 2)
1233         {
1234             theCookiePointer += sizeof(ALACSpecificConfig);
1235             memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1236             theCookiePointer += kChannelAtomSize;
1237             memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1238         }
1239         *ioSize = theCookieSize;
1240     }
1241     else
1242     {
1243         *ioSize = 0; // no incomplete cookies
1244     }
1245 }
1246 
1247 /*
1248 	InitializeEncoder()
1249 	- initialize the encoder component with the current config
1250 */
InitializeEncoder(AudioFormatDescription theOutputFormat)1251 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1252 {
1253 	int32_t			status;
1254 
1255     mOutputSampleRate = theOutputFormat.mSampleRate;
1256     mNumChannels = theOutputFormat.mChannelsPerFrame;
1257     switch(theOutputFormat.mFormatFlags)
1258     {
1259         case 1:
1260             mBitDepth = 16;
1261             break;
1262         case 2:
1263             mBitDepth = 20;
1264             break;
1265         case 3:
1266             mBitDepth = 24;
1267             break;
1268         case 4:
1269             mBitDepth = 32;
1270             break;
1271         default:
1272             break;
1273     }
1274 
1275 	// set up default encoding parameters and state
1276 	// - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1277 	for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1278 		mLastMixRes[index] = kDefaultMixRes;
1279 
1280 	// the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1281 	// but note that this can be bigger than the input size!
1282 	// - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1283 	mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8)  + 1;
1284 
1285 	// allocate mix buffers
1286 	mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287 	mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1288 
1289 	// allocate dynamic predictor buffers
1290 	mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1291 	mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1292 
1293 	// allocate combined shift buffer
1294 	mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1295 
1296 	// allocate work buffer for search loop
1297 	mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1298 
1299 	RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1300 					(mPredictorU != nil) && (mPredictorV != nil) &&
1301 					(mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1302 					status = kALAC_MemFullError; goto Exit; );
1303 
1304 	status = ALAC_noErr;
1305 
1306 
1307 	// initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1308 	for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1309 	{
1310 		for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1311 		{
1312 			init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1313 			init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1314 		}
1315 	}
1316 
1317 Exit:
1318 	return status;
1319 }
1320 
1321 /*
1322 	GetSourceFormat()
1323 	- given the input format, return one of our supported formats
1324 */
GetSourceFormat(const AudioFormatDescription * source,AudioFormatDescription * output)1325 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1326 {
1327 	// default is 16-bit native endian
1328 	// - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1329 	//		   to encode to 16-bit since the source was lossy in the first place
1330 	// - note: if not a supported bit depth, find the closest supported bit depth to the input one
1331 	if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1332 		( source->mBitsPerChannel <= 16 ) )
1333 		mBitDepth = 16;
1334 	else if ( source->mBitsPerChannel <= 20 )
1335 		mBitDepth = 20;
1336 	else if ( source->mBitsPerChannel <= 24 )
1337 		mBitDepth = 24;
1338 	else
1339 		mBitDepth = 32;
1340 
1341 	// we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1342 	// and sample rate were specified when we were configured
1343 	/*
1344     MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1345      */
1346 }
1347 
1348 
1349 
1350 #if VERBOSE_DEBUG
1351 
1352 #if PRAGMA_MARK
1353 #pragma mark -
1354 #endif
1355 
1356 /*
1357 	AddFiller()
1358 	- add fill and data stream elements to the bitstream to test the decoder
1359 */
AddFiller(BitBuffer * bits,int32_t numBytes)1360 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1361 {
1362 	uint8_t		tag;
1363 	uint32_t		index;
1364 
1365 	// out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1366 	numBytes -= 6;
1367 	if ( numBytes <= 0 )
1368 		return;
1369 
1370 	// randomly pick Fill or Data Stream Element based on numBytes requested
1371 	tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1372 
1373 	BitBufferWrite( bits, tag, 3 );
1374 	if ( tag == ID_FIL )
1375 	{
1376 		// can't write more than 269 bytes in a fill element
1377 		numBytes = (numBytes > 269) ? 269 : numBytes;
1378 
1379 		// fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1380 		if ( numBytes >= 15 )
1381 		{
1382 			uint16_t			extensionSize;
1383 
1384 			BitBufferWrite( bits, 15, 4 );
1385 
1386 			// 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1387 			// - otherwise, there's no way to represent 15
1388 			// - for example, to really mean 15 bytes you must encode extensionSize = 1
1389 			// - why it's not like data stream elements I have no idea
1390 			extensionSize = (numBytes - 15) + 1;
1391 			Assert( extensionSize <= 255 );
1392 			BitBufferWrite( bits, extensionSize, 8 );
1393 		}
1394 		else
1395 			BitBufferWrite( bits, numBytes, 4 );
1396 
1397 		BitBufferWrite( bits, 0x10, 8 );		// extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1398 		for ( index = 0; index < (numBytes - 1); index++ )
1399 			BitBufferWrite( bits, 0xa5, 8 );	// fill_byte = b10100101 = 0xa5
1400 	}
1401 	else
1402 	{
1403 		// can't write more than 510 bytes in a data stream element
1404 		numBytes = (numBytes > 510) ? 510 : numBytes;
1405 
1406 		BitBufferWrite( bits, 0, 4 );			// element instance tag
1407 		BitBufferWrite( bits, 1, 1 );			// byte-align flag = true
1408 
1409 		// data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1410 		if ( numBytes >= 255 )
1411 		{
1412 			BitBufferWrite( bits, 255, 8 );
1413 			BitBufferWrite( bits, numBytes - 255, 8 );
1414 		}
1415 		else
1416 			BitBufferWrite( bits, numBytes, 8 );
1417 
1418 		BitBufferByteAlign( bits, true );		// byte-align with zeros
1419 
1420 		for ( index = 0; index < numBytes; index++ )
1421 			BitBufferWrite( bits, 0x5a, 8 );
1422 	}
1423 }
1424 
1425 #endif	/* VERBOSE_DEBUG */
1426