1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 /*
22 File: ALACEncoder.cpp
23 */
24
25 // build stuff
26 #define VERBOSE_DEBUG 0
27
28 // headers
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "ALACEncoder.h"
34
35 #include "aglib.h"
36 #include "dplib.h"
37 #include "matrixlib.h"
38
39 #include "ALACBitUtilities.h"
40 #include "ALACAudioTypes.h"
41 #include "EndianPortable.h"
42
43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
46
47 // defines/constants
48 const uint32_t kALACEncoderMagic = 'dpge';
49 const uint32_t kMaxSampleSize = 32; // max allowed bit width is 32
50 const uint32_t kDefaultMixBits = 2;
51 const uint32_t kDefaultMixRes = 0;
52 const uint32_t kMaxRes = 4;
53 const uint32_t kDefaultNumUV = 8;
54 const uint32_t kMinUV = 4;
55 const uint32_t kMaxUV = 8;
56
57 // static functions
58 #if VERBOSE_DEBUG
59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
60 #endif
61
62
63 /*
64 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
65 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE.
66 Each particular field is accessed via the current channel index. Note that the channel
67 index increments by two for channel pairs.
68
69 For example:
70
71 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE)
72 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
73 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
74
75 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
76 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
77 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
78 index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
79 index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
80 index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
81 */
82 static const uint32_t sChannelMaps[kALACMaxChannels] =
83 {
84 ID_SCE,
85 ID_CPE,
86 (ID_CPE << 3) | (ID_SCE),
87 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
88 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
89 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
90 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
91 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
92 };
93
94 static const uint32_t sSupportediPodSampleRates[] =
95 {
96 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
97 };
98
99 /*
100 Constructor
101 */
ALACEncoder()102 ALACEncoder::ALACEncoder() :
103 mBitDepth( 0 ),
104 mFastMode( 0 ),
105 mMixBufferU( nil ),
106 mMixBufferV( nil ),
107 mPredictorU( nil ),
108 mPredictorV( nil ),
109 mShiftBufferUV( nil ),
110 mWorkBuffer( nil ),
111
112
113 mTotalBytesGenerated( 0 ),
114 mAvgBitRate( 0 ),
115 mMaxFrameBytes( 0 )
116 {
117 // overrides
118 mFrameSize = kALACDefaultFrameSize;
119 }
120
121 /*
122 Destructor
123 */
~ALACEncoder()124 ALACEncoder::~ALACEncoder()
125 {
126 // delete the matrix mixing buffers
127 if ( mMixBufferU )
128 {
129 free(mMixBufferU);
130 mMixBufferU = NULL;
131 }
132 if ( mMixBufferV )
133 {
134 free(mMixBufferV);
135 mMixBufferV = NULL;
136 }
137
138 // delete the dynamic predictor's "corrector" buffers
139 if ( mPredictorU )
140 {
141 free(mPredictorU);
142 mPredictorU = NULL;
143 }
144 if ( mPredictorV )
145 {
146 free(mPredictorV);
147 mPredictorV = NULL;
148 }
149
150 // delete the unused byte shift buffer
151 if ( mShiftBufferUV )
152 {
153 free(mShiftBufferUV);
154 mShiftBufferUV = NULL;
155 }
156
157 // delete the work buffer
158 if ( mWorkBuffer )
159 {
160 free(mWorkBuffer);
161 mWorkBuffer = NULL;
162 }
163 }
164
165 #if PRAGMA_MARK
166 #pragma mark -
167 #endif
168
169 /*
170 HEADER SPECIFICATION
171
172 For every segment we adopt the following header:
173
174 1 byte reserved (always 0)
175 1 byte flags (see below)
176 [4 byte frame length] (optional, see below)
177 ---Next, the per-segment ALAC parameters---
178 1 byte mixBits (middle-side parameter)
179 1 byte mixRes (middle-side parameter, interpreted as signed char)
180
181 1 byte shiftU (4 bits modeU, 4 bits denShiftU)
182 1 byte filterU (3 bits pbFactorU, 5 bits numU)
183 (numU) shorts (signed DP coefficients for V channel)
184 ---Next, 2nd-channel ALAC parameters in case of stereo mode---
185 1 byte shiftV (4 bits modeV, 4 bits denShiftV)
186 1 byte filterV (3 bits pbFactorV, 5 bits numV)
187 (numV) shorts (signed DP coefficients for V channel)
188 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
189 ---Then comes the AG-compressor bitstream---
190
191
192 FLAGS
193 -----
194
195 The presence of certain flag bits changes the header format such that the parameters might
196 not even be sent. The currently defined flags format is:
197
198 0000psse
199
200 where 0 = reserved, must be 0
201 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
202 ss = 2-bit field indicating "number of shift-off bytes ignored by compression"
203 e = 1-bit field indicating "escape"
204
205 The "partial frame" flag means that the following segment is not equal to the frame length specified
206 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial
207 segments without incurring the 32-bit overhead for each segment.
208
209 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
210 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
211 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
212 uncompressed size. However, by shifting the input values down and running the remaining bits through
213 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that
214 the shifted-off bytes follow after the parameter section of the header and before the compressed
215 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
216 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels,
217 the shifted off bytes are interleaved.
218
219 The "escape" flag means that this segment was not compressed b/c the compressed size would be
220 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header.
221 The other header parameter bytes will not be sent.
222
223
224 PARAMETERS
225 ----------
226
227 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
228
229 4 + (2 + 2 * numU) (mono mode)
230 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode)
231
232 where the ALAC filter-lengths numU, numV are bounded by a
233 constant (in the current source, numU, numV <= NUMCOEPAIRS), and
234 this forces an absolute upper bound on header size.
235
236 Each segment-decode process loads up these bytes from the front of the
237 local stream, in the above order, then follows with the entropy-encoded
238 bits for the given segment.
239
240 To generalize middle-side, there are various mixing modes including middle-side, each lossless,
241 as embodied in the mix() and unmix() functions. These functions exploit a generalized middle-side
242 transformation:
243
244 u := [(rL + (m-r)R)/m];
245 v := L - R;
246
247 where [ ] denotes integer floor. The (lossless) inverse is
248
249 L = u + v - [rV/m];
250 R = L - v;
251
252 In the segment header, m and r are encoded in mixBits and mixRes.
253 Classical "middle-side" is obtained with m = 2, r = 1, but now
254 we have more generalized mixes.
255
256 NOTES
257 -----
258 The relevance of the ALAC coefficients is explained in detail
259 in patent documents.
260 */
261
262 /*
263 EncodeStereo()
264 - encode a channel pair
265 */
EncodeStereo(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
267 {
268 BitBuffer workBits;
269 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
270 AGParamRec agParams;
271 uint32_t bits1, bits2;
272 uint32_t dilate;
273 int32_t mixBits, mixRes, maxRes;
274 uint32_t minBits, minBits1, minBits2;
275 uint32_t numU, numV;
276 uint32_t mode;
277 uint32_t pbFactor;
278 uint32_t chanBits;
279 uint32_t denShift;
280 uint8_t bytesShifted;
281 SearchCoefs coefsU;
282 SearchCoefs coefsV;
283 uint32_t index;
284 uint8_t partialFrame;
285 uint32_t escapeBits;
286 bool doEscape;
287 int32_t status = ALAC_noErr;
288
289 // make sure we handle this bit-depth before we get going
290 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
291
292 // reload coefs pointers for this channel pair
293 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
294 // actually results in better overall compression
295 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
296 // different coefs for the different passes of "mixRes" results in even better compression
297 coefsU = (SearchCoefs) mCoefsU[channelIndex];
298 coefsV = (SearchCoefs) mCoefsV[channelIndex];
299
300 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
301 // so enable 16-bit "shift off" and encode in 17-bit mode
302 // - in addition, 24-bit mode really improves with one byte shifted off
303 if ( mBitDepth == 32 )
304 bytesShifted = 2;
305 else if ( mBitDepth >= 24 )
306 bytesShifted = 1;
307 else
308 bytesShifted = 0;
309
310 chanBits = mBitDepth - (bytesShifted * 8) + 1;
311
312 // flag whether or not this is a partial frame
313 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
314
315 // brute-force encode optimization loop
316 // - run over variations of the encoding params to find the best choice
317 mixBits = kDefaultMixBits;
318 maxRes = kMaxRes;
319 numU = numV = kDefaultNumUV;
320 denShift = DENSHIFT_DEFAULT;
321 mode = 0;
322 pbFactor = 4;
323 dilate = 8;
324
325 minBits = minBits1 = minBits2 = 1ul << 31;
326
327 int32_t bestRes = mLastMixRes[channelIndex];
328
329 for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
330 {
331 // mix the stereo inputs
332 switch ( mBitDepth )
333 {
334 case 16:
335 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
336 break;
337 case 20:
338 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
339 break;
340 case 24:
341 // includes extraction of shifted-off bytes
342 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
343 mixBits, mixRes, mShiftBufferUV, bytesShifted );
344 break;
345 case 32:
346 // includes extraction of shifted-off bytes
347 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
348 mixBits, mixRes, mShiftBufferUV, bytesShifted );
349 break;
350 }
351
352 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
353
354 // run the dynamic predictors
355 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
356 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
357
358 // run the lossless compressor on each channel
359 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
360 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
361 RequireNoErr( status, goto Exit; );
362
363 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
364 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
365 RequireNoErr( status, goto Exit; );
366
367 // look for best match
368 if ( (bits1 + bits2) < minBits1 )
369 {
370 minBits1 = bits1 + bits2;
371 bestRes = mixRes;
372 }
373 }
374
375 mLastMixRes[channelIndex] = (int16_t)bestRes;
376
377 // mix the stereo inputs with the current best mixRes
378 mixRes = mLastMixRes[channelIndex];
379 switch ( mBitDepth )
380 {
381 case 16:
382 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
383 break;
384 case 20:
385 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
386 break;
387 case 24:
388 // also extracts the shifted off bytes into the shift buffers
389 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
390 mixBits, mixRes, mShiftBufferUV, bytesShifted );
391 break;
392 case 32:
393 // also extracts the shifted off bytes into the shift buffers
394 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
395 mixBits, mixRes, mShiftBufferUV, bytesShifted );
396 break;
397 }
398
399 // now it's time for the predictor coefficient search loop
400 numU = numV = kMinUV;
401 minBits1 = minBits2 = 1ul << 31;
402
403 for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
404 {
405 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
406
407 dilate = 32;
408
409 // run the predictor over the same data multiple times to help it converge
410 for ( uint32_t converge = 0; converge < 8; converge++ )
411 {
412 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
413 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
414 }
415
416 dilate = 8;
417
418 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
419 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
420
421 if ( (bits1 * dilate + 16 * numUV) < minBits1 )
422 {
423 minBits1 = bits1 * dilate + 16 * numUV;
424 numU = numUV;
425 }
426
427 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
428 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
429
430 if ( (bits2 * dilate + 16 * numUV) < minBits2 )
431 {
432 minBits2 = bits2 * dilate + 16 * numUV;
433 numV = numUV;
434 }
435 }
436
437 // test for escape hatch if best calculated compressed size turns out to be more than the input size
438 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
439 if ( bytesShifted != 0 )
440 minBits += (numSamples * (bytesShifted * 8) * 2);
441
442 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
443
444 doEscape = (minBits >= escapeBits) ? true : false;
445
446 if ( doEscape == false )
447 {
448 // write bitstream header and coefs
449 BitBufferWrite( bitstream, 0, 12 );
450 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
451 if ( partialFrame )
452 BitBufferWrite( bitstream, numSamples, 32 );
453 BitBufferWrite( bitstream, mixBits, 8 );
454 BitBufferWrite( bitstream, mixRes, 8 );
455
456 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
457 //Assert( (pbFactor < 8) && (numU < 32) );
458 //Assert( (pbFactor < 8) && (numV < 32) );
459
460 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
461 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
462 for ( index = 0; index < numU; index++ )
463 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
464
465 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
466 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
467 for ( index = 0; index < numV; index++ )
468 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
469
470 // if shift active, write the interleaved shift buffers
471 if ( bytesShifted != 0 )
472 {
473 uint32_t bitShift = bytesShifted * 8;
474
475 //Assert( bitShift <= 16 );
476
477 for ( index = 0; index < (numSamples * 2); index += 2 )
478 {
479 uint32_t shiftedVal;
480
481 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
482 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
483 }
484 }
485
486 // run the dynamic predictor and lossless compression for the "left" channel
487 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
488 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel
489 if ( mode == 0 )
490 {
491 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
492 }
493 else
494 {
495 pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
496 pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
497 }
498
499 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
500 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
501 RequireNoErr( status, goto Exit; );
502
503 // run the dynamic predictor and lossless compression for the "right" channel
504 if ( mode == 0 )
505 {
506 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
507 }
508 else
509 {
510 pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
511 pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
512 }
513
514 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
515 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
516 RequireNoErr( status, goto Exit; );
517
518 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
519 chuck it and do an escape packet
520 */
521 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
522 if ( minBits >= escapeBits )
523 {
524 *bitstream = startBits; // reset bitstream state
525 doEscape = true;
526 printf( "compressed frame too big: %u vs. %u \n", minBits, escapeBits );
527 }
528 }
529
530 if ( doEscape == true )
531 {
532 /* escape */
533 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
534
535 #if VERBOSE_DEBUG
536 DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
537 #endif
538 }
539
540 Exit:
541 return status;
542 }
543
544 /*
545 EncodeStereoFast()
546 - encode a channel pair without the search loop for maximum possible speed
547 */
EncodeStereoFast(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)548 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
549 {
550 BitBuffer startBits = *bitstream; // squirrel away current bit position in case we decide to use escape hatch
551 AGParamRec agParams;
552 uint32_t bits1, bits2;
553 int32_t mixBits, mixRes;
554 uint32_t minBits, minBits1, minBits2;
555 uint32_t numU, numV;
556 uint32_t mode;
557 uint32_t pbFactor;
558 uint32_t chanBits;
559 uint32_t denShift;
560 uint8_t bytesShifted;
561 SearchCoefs coefsU;
562 SearchCoefs coefsV;
563 uint32_t index;
564 uint8_t partialFrame;
565 uint32_t escapeBits;
566 bool doEscape;
567 int32_t status;
568
569 // make sure we handle this bit-depth before we get going
570 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
571
572 // reload coefs pointers for this channel pair
573 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
574 // actually results in better overall compression
575 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
576 // different coefs for the different passes of "mixRes" results in even better compression
577 coefsU = (SearchCoefs) mCoefsU[channelIndex];
578 coefsV = (SearchCoefs) mCoefsV[channelIndex];
579
580 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
581 // so enable 16-bit "shift off" and encode in 17-bit mode
582 // - in addition, 24-bit mode really improves with one byte shifted off
583 if ( mBitDepth == 32 )
584 bytesShifted = 2;
585 else if ( mBitDepth >= 24 )
586 bytesShifted = 1;
587 else
588 bytesShifted = 0;
589
590 chanBits = mBitDepth - (bytesShifted * 8) + 1;
591
592 // flag whether or not this is a partial frame
593 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
594
595 // set up default encoding parameters for "fast" mode
596 mixBits = kDefaultMixBits;
597 mixRes = kDefaultMixRes;
598 numU = numV = kDefaultNumUV;
599 denShift = DENSHIFT_DEFAULT;
600 mode = 0;
601 pbFactor = 4;
602
603 minBits = minBits1 = minBits2 = 1ul << 31;
604
605 // mix the stereo inputs with default mixBits/mixRes
606 switch ( mBitDepth )
607 {
608 case 16:
609 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
610 break;
611 case 20:
612 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
613 break;
614 case 24:
615 // also extracts the shifted off bytes into the shift buffers
616 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
617 mixBits, mixRes, mShiftBufferUV, bytesShifted );
618 break;
619 case 32:
620 // also extracts the shifted off bytes into the shift buffers
621 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
622 mixBits, mixRes, mShiftBufferUV, bytesShifted );
623 break;
624 }
625
626 /* speculatively write the bitstream assuming the compressed version will be smaller */
627
628 // write bitstream header and coefs
629 BitBufferWrite( bitstream, 0, 12 );
630 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
631 if ( partialFrame )
632 BitBufferWrite( bitstream, numSamples, 32 );
633 BitBufferWrite( bitstream, mixBits, 8 );
634 BitBufferWrite( bitstream, mixRes, 8 );
635
636 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
637 //Assert( (pbFactor < 8) && (numU < 32) );
638 //Assert( (pbFactor < 8) && (numV < 32) );
639
640 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
641 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
642 for ( index = 0; index < numU; index++ )
643 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
644
645 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
646 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
647 for ( index = 0; index < numV; index++ )
648 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
649
650 // if shift active, write the interleaved shift buffers
651 if ( bytesShifted != 0 )
652 {
653 uint32_t bitShift = bytesShifted * 8;
654
655 //Assert( bitShift <= 16 );
656
657 for ( index = 0; index < (numSamples * 2); index += 2 )
658 {
659 uint32_t shiftedVal;
660
661 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
662 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
663 }
664 }
665
666 // run the dynamic predictor and lossless compression for the "left" channel
667 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
668 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
669
670 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
671 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
672 RequireNoErr( status, goto Exit; );
673
674 // run the dynamic predictor and lossless compression for the "right" channel
675 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
676
677 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
678 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
679 RequireNoErr( status, goto Exit; );
680
681 // do bit requirement calculations
682 minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
683 minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
684
685 // test for escape hatch if best calculated compressed size turns out to be more than the input size
686 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
687 if ( bytesShifted != 0 )
688 minBits += (numSamples * (bytesShifted * 8) * 2);
689
690 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
691
692 doEscape = (minBits >= escapeBits) ? true : false;
693
694 if ( doEscape == false )
695 {
696 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
697 chuck it and do an escape packet
698 */
699 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
700 if ( minBits >= escapeBits )
701 {
702 doEscape = true;
703 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
704 }
705
706 }
707
708 if ( doEscape == true )
709 {
710 /* escape */
711
712 // reset bitstream position since we speculatively wrote the compressed version
713 *bitstream = startBits;
714
715 // write escape frame
716 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
717
718 #if VERBOSE_DEBUG
719 DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
720 #endif
721 }
722
723 Exit:
724 return status;
725 }
726
727 /*
728 EncodeStereoEscape()
729 - encode stereo escape frame
730 */
EncodeStereoEscape(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t numSamples)731 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
732 {
733 int16_t * input16;
734 int32_t * input32;
735 uint8_t partialFrame;
736 uint32_t index;
737
738 // flag whether or not this is a partial frame
739 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
740
741 // write bitstream header
742 BitBufferWrite( bitstream, 0, 12 );
743 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
744 if ( partialFrame )
745 BitBufferWrite( bitstream, numSamples, 32 );
746
747 // just copy the input data to the output buffer
748 switch ( mBitDepth )
749 {
750 case 16:
751 input16 = (int16_t *) inputBuffer;
752
753 for ( index = 0; index < (numSamples * stride); index += stride )
754 {
755 BitBufferWrite( bitstream, input16[index + 0], 16 );
756 BitBufferWrite( bitstream, input16[index + 1], 16 );
757 }
758 break;
759 case 20:
760 // mix20() with mixres param = 0 means de-interleave so use it to simplify things
761 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
762 for ( index = 0; index < numSamples; index++ )
763 {
764 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
765 BitBufferWrite( bitstream, mMixBufferV[index], 20 );
766 }
767 break;
768 case 24:
769 // mix24() with mixres param = 0 means de-interleave so use it to simplify things
770 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
771 for ( index = 0; index < numSamples; index++ )
772 {
773 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
774 BitBufferWrite( bitstream, mMixBufferV[index], 24 );
775 }
776 break;
777 case 32:
778 input32 = (int32_t *) inputBuffer;
779
780 for ( index = 0; index < (numSamples * stride); index += stride )
781 {
782 BitBufferWrite( bitstream, input32[index + 0], 32 );
783 BitBufferWrite( bitstream, input32[index + 1], 32 );
784 }
785 break;
786 }
787
788 return ALAC_noErr;
789 }
790
791 /*
792 EncodeMono()
793 - encode a mono input buffer
794 */
EncodeMono(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)795 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
796 {
797 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
798 AGParamRec agParams;
799 uint32_t bits1;
800 uint32_t numU;
801 SearchCoefs coefsU;
802 uint32_t dilate;
803 uint32_t minBits, bestU;
804 uint32_t minU, maxU;
805 uint32_t index, index2;
806 uint8_t bytesShifted;
807 uint32_t shift;
808 uint32_t mask;
809 uint32_t chanBits;
810 uint8_t pbFactor;
811 uint8_t partialFrame;
812 int16_t * input16;
813 int32_t * input32;
814 uint32_t escapeBits;
815 bool doEscape;
816 int32_t status;
817
818 // make sure we handle this bit-depth before we get going
819 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
820
821 status = ALAC_noErr;
822
823 // reload coefs array from previous frame
824 coefsU = (SearchCoefs) mCoefsU[channelIndex];
825
826 // pick bit depth for actual encoding
827 // - we lop off the lower byte(s) for 24-/32-bit encodings
828 if ( mBitDepth == 32 )
829 bytesShifted = 2;
830 else if ( mBitDepth >= 24 )
831 bytesShifted = 1;
832 else
833 bytesShifted = 0;
834
835 shift = bytesShifted * 8;
836 mask = (1ul << shift) - 1;
837 chanBits = mBitDepth - (bytesShifted * 8);
838
839 // flag whether or not this is a partial frame
840 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
841
842 // convert N-bit data to 32-bit for predictor
843 switch ( mBitDepth )
844 {
845 case 16:
846 {
847 // convert 16-bit data to 32-bit for predictor
848 input16 = (int16_t *) inputBuffer;
849 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
850 mMixBufferU[index] = (int32_t) input16[index2];
851 break;
852 }
853 case 20:
854 // convert 20-bit data to 32-bit for predictor
855 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
856 break;
857 case 24:
858 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
859 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
860 for ( index = 0; index < numSamples; index++ )
861 {
862 mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
863 mMixBufferU[index] >>= shift;
864 }
865 break;
866 case 32:
867 {
868 // just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
869 input32 = (int32_t *) inputBuffer;
870
871 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
872 {
873 int32_t val = input32[index2];
874
875 mShiftBufferUV[index] = (uint16_t)(val & mask);
876 mMixBufferU[index] = val >> shift;
877 }
878 break;
879 }
880 }
881
882 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
883 // - run over variations of the encoding params to find the best choice
884 minU = 4;
885 maxU = 8;
886 minBits = 1ul << 31;
887 pbFactor = 4;
888
889 minBits = 1ul << 31;
890 bestU = minU;
891
892 for ( numU = minU; numU <= maxU; numU += 4 )
893 {
894 BitBuffer workBits;
895 uint32_t numBits;
896
897 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
898
899 dilate = 32;
900 for ( uint32_t converge = 0; converge < 7; converge++ )
901 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
902
903 dilate = 8;
904 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
905
906 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
907 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
908 RequireNoErr( status, goto Exit; );
909
910 numBits = (dilate * bits1) + (16 * numU);
911 if ( numBits < minBits )
912 {
913 bestU = numU;
914 minBits = numBits;
915 }
916 }
917
918 // test for escape hatch if best calculated compressed size turns out to be more than the input size
919 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
920 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
921 if ( bytesShifted != 0 )
922 minBits += (numSamples * (bytesShifted * 8));
923
924 escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
925
926 doEscape = (minBits >= escapeBits) ? true : false;
927
928 if ( doEscape == false )
929 {
930 // write bitstream header
931 BitBufferWrite( bitstream, 0, 12 );
932 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
933 if ( partialFrame )
934 BitBufferWrite( bitstream, numSamples, 32 );
935 BitBufferWrite( bitstream, 0, 16 ); // mixBits = mixRes = 0
936
937 // write the params and predictor coefs
938 numU = bestU;
939 BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 ); // modeU = 0
940 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
941 for ( index = 0; index < numU; index++ )
942 BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
943
944 // if shift active, write the interleaved shift buffers
945 if ( bytesShifted != 0 )
946 {
947 for ( index = 0; index < numSamples; index++ )
948 BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
949 }
950
951 // run the dynamic predictor with the best result
952 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
953
954 // do lossless compression
955 set_standard_ag_params( &agParams, numSamples, numSamples );
956 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
957 //AssertNoErr( status );
958
959
960 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
961 chuck it and do an escape packet
962 */
963 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
964 if ( minBits >= escapeBits )
965 {
966 *bitstream = startBits; // reset bitstream state
967 doEscape = true;
968 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
969 }
970 }
971
972 if ( doEscape == true )
973 {
974 // write bitstream header and coefs
975 BitBufferWrite( bitstream, 0, 12 );
976 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
977 if ( partialFrame )
978 BitBufferWrite( bitstream, numSamples, 32 );
979
980 // just copy the input data to the output buffer
981 switch ( mBitDepth )
982 {
983 case 16:
984 input16 = (int16_t *) inputBuffer;
985 for ( index = 0; index < (numSamples * stride); index += stride )
986 BitBufferWrite( bitstream, input16[index], 16 );
987 break;
988 case 20:
989 // convert 20-bit data to 32-bit for simplicity
990 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
991 for ( index = 0; index < numSamples; index++ )
992 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
993 break;
994 case 24:
995 // convert 24-bit data to 32-bit for simplicity
996 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
997 for ( index = 0; index < numSamples; index++ )
998 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
999 break;
1000 case 32:
1001 input32 = (int32_t *) inputBuffer;
1002 for ( index = 0; index < (numSamples * stride); index += stride )
1003 BitBufferWrite( bitstream, input32[index], 32 );
1004 break;
1005 }
1006 #if VERBOSE_DEBUG
1007 DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1008 #endif
1009 }
1010
1011 Exit:
1012 return status;
1013 }
1014
1015 #if PRAGMA_MARK
1016 #pragma mark -
1017 #endif
1018
1019 /*
1020 Encode()
1021 - encode the next block of samples
1022 */
Encode(AudioFormatDescription theInputFormat,AudioFormatDescription theOutputFormat,unsigned char * theReadBuffer,unsigned char * theWriteBuffer,int32_t * ioNumBytes)1023 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1024 unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1025 {
1026 uint32_t numFrames;
1027 uint32_t outputSize;
1028 BitBuffer bitstream;
1029 int32_t status;
1030
1031 numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1032
1033 // create a bit buffer structure pointing to our output buffer
1034 BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1035
1036 if ( theInputFormat.mChannelsPerFrame == 2 )
1037 {
1038 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1039 BitBufferWrite( &bitstream, ID_CPE, 3 );
1040 BitBufferWrite( &bitstream, 0, 4 );
1041
1042 // encode stereo input buffer
1043 if ( mFastMode == false )
1044 status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1045 else
1046 status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1047 RequireNoErr( status, goto Exit; );
1048 }
1049 else if ( theInputFormat.mChannelsPerFrame == 1 )
1050 {
1051 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1052 BitBufferWrite( &bitstream, ID_SCE, 3 );
1053 BitBufferWrite( &bitstream, 0, 4 );
1054
1055 // encode mono input buffer
1056 status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1057 RequireNoErr( status, goto Exit; );
1058 }
1059 else
1060 {
1061 char * inputBuffer;
1062 uint32_t tag;
1063 uint32_t channelIndex;
1064 uint32_t inputIncrement;
1065 uint8_t stereoElementTag;
1066 uint8_t monoElementTag;
1067 uint8_t lfeElementTag;
1068
1069 inputBuffer = (char *) theReadBuffer;
1070 inputIncrement = ((mBitDepth + 7) / 8);
1071
1072 stereoElementTag = 0;
1073 monoElementTag = 0;
1074 lfeElementTag = 0;
1075
1076 for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1077 {
1078 tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1079
1080 BitBufferWrite( &bitstream, tag, 3 );
1081 switch ( tag )
1082 {
1083 case ID_SCE:
1084 // mono
1085 BitBufferWrite( &bitstream, monoElementTag, 4 );
1086
1087 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1088
1089 inputBuffer += inputIncrement;
1090 channelIndex++;
1091 monoElementTag++;
1092 break;
1093
1094 case ID_CPE:
1095 // stereo
1096 BitBufferWrite( &bitstream, stereoElementTag, 4 );
1097
1098 status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1099
1100 inputBuffer += (inputIncrement * 2);
1101 channelIndex += 2;
1102 stereoElementTag++;
1103 break;
1104
1105 case ID_LFE:
1106 // LFE channel (subwoofer)
1107 BitBufferWrite( &bitstream, lfeElementTag, 4 );
1108
1109 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1110
1111 inputBuffer += inputIncrement;
1112 channelIndex++;
1113 lfeElementTag++;
1114 break;
1115
1116 default:
1117 printf( "That ain't right! (%u)\n", tag );
1118 status = kALAC_ParamError;
1119 goto Exit;
1120 }
1121
1122 RequireNoErr( status, goto Exit; );
1123 }
1124 }
1125
1126 #if VERBOSE_DEBUG
1127 {
1128 // if there is room left in the output buffer, add some random fill data to test decoder
1129 int32_t bitsLeft;
1130 int32_t bytesLeft;
1131
1132 bitsLeft = BitBufferGetPosition( &bitstream ) - 3; // - 3 for ID_END tag
1133 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1134
1135 if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1136 AddFiller( &bitstream, bytesLeft );
1137 }
1138 #endif
1139
1140 // add 3-bit frame end tag: ID_END
1141 BitBufferWrite( &bitstream, ID_END, 3 );
1142
1143 // byte-align the output data
1144 BitBufferByteAlign( &bitstream, true );
1145
1146 outputSize = BitBufferGetPosition( &bitstream ) / 8;
1147 //Assert( outputSize <= mMaxOutputBytes );
1148
1149
1150 // all good, let iTunes know what happened and remember the total number of input sample frames
1151 *ioNumBytes = outputSize;
1152 //mEncodedFrames += encodeMsg->numInputSamples;
1153
1154 // gather encoding stats
1155 mTotalBytesGenerated += outputSize;
1156 mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1157
1158 status = ALAC_noErr;
1159
1160 Exit:
1161 return status;
1162 }
1163
1164 /*
1165 Finish()
1166 - drain out any leftover samples
1167 */
1168
Finish()1169 int32_t ALACEncoder::Finish()
1170 {
1171 /* // finalize bit rate statistics
1172 if ( mSampleSize.numEntries != 0 )
1173 mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1174 else
1175 mAvgBitRate = 0;
1176 */
1177 return ALAC_noErr;
1178 }
1179
1180 #if PRAGMA_MARK
1181 #pragma mark -
1182 #endif
1183
1184 /*
1185 GetConfig()
1186 */
GetConfig(ALACSpecificConfig & config)1187 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1188 {
1189 config.frameLength = Swap32NtoB(mFrameSize);
1190 config.compatibleVersion = (uint8_t) kALACCompatibleVersion;
1191 config.bitDepth = (uint8_t) mBitDepth;
1192 config.pb = (uint8_t) PB0;
1193 config.kb = (uint8_t) KB0;
1194 config.mb = (uint8_t) MB0;
1195 config.numChannels = (uint8_t) mNumChannels;
1196 config.maxRun = Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1197 config.maxFrameBytes = Swap32NtoB(mMaxFrameBytes);
1198 config.avgBitRate = Swap32NtoB(mAvgBitRate);
1199 config.sampleRate = Swap32NtoB(mOutputSampleRate);
1200 }
1201
GetMagicCookieSize(uint32_t inNumChannels)1202 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1203 {
1204 if (inNumChannels > 2)
1205 {
1206 return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1207 }
1208 else
1209 {
1210 return sizeof(ALACSpecificConfig);
1211 }
1212 }
1213
GetMagicCookie(void * outCookie,uint32_t * ioSize)1214 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1215 {
1216 ALACSpecificConfig theConfig = {0};
1217 ALACAudioChannelLayout theChannelLayout = {0};
1218 uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1219 uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1220 uint8_t * theCookiePointer = (uint8_t *)outCookie;
1221
1222 GetConfig(theConfig);
1223 if (theConfig.numChannels > 2)
1224 {
1225 theChannelLayout.mChannelLayoutTag = ALACChannelLayoutTags[theConfig.numChannels - 1];
1226 theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1227 }
1228 if (*ioSize >= theCookieSize)
1229 {
1230 memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1231 theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1232 if (theConfig.numChannels > 2)
1233 {
1234 theCookiePointer += sizeof(ALACSpecificConfig);
1235 memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1236 theCookiePointer += kChannelAtomSize;
1237 memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1238 }
1239 *ioSize = theCookieSize;
1240 }
1241 else
1242 {
1243 *ioSize = 0; // no incomplete cookies
1244 }
1245 }
1246
1247 /*
1248 InitializeEncoder()
1249 - initialize the encoder component with the current config
1250 */
InitializeEncoder(AudioFormatDescription theOutputFormat)1251 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1252 {
1253 int32_t status;
1254
1255 mOutputSampleRate = theOutputFormat.mSampleRate;
1256 mNumChannels = theOutputFormat.mChannelsPerFrame;
1257 switch(theOutputFormat.mFormatFlags)
1258 {
1259 case 1:
1260 mBitDepth = 16;
1261 break;
1262 case 2:
1263 mBitDepth = 20;
1264 break;
1265 case 3:
1266 mBitDepth = 24;
1267 break;
1268 case 4:
1269 mBitDepth = 32;
1270 break;
1271 default:
1272 break;
1273 }
1274
1275 // set up default encoding parameters and state
1276 // - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1277 for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1278 mLastMixRes[index] = kDefaultMixRes;
1279
1280 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1281 // but note that this can be bigger than the input size!
1282 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1283 mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8) + 1;
1284
1285 // allocate mix buffers
1286 mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287 mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1288
1289 // allocate dynamic predictor buffers
1290 mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1291 mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1292
1293 // allocate combined shift buffer
1294 mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1295
1296 // allocate work buffer for search loop
1297 mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1298
1299 RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1300 (mPredictorU != nil) && (mPredictorV != nil) &&
1301 (mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1302 status = kALAC_MemFullError; goto Exit; );
1303
1304 status = ALAC_noErr;
1305
1306
1307 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1308 for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1309 {
1310 for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1311 {
1312 init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1313 init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1314 }
1315 }
1316
1317 Exit:
1318 return status;
1319 }
1320
1321 /*
1322 GetSourceFormat()
1323 - given the input format, return one of our supported formats
1324 */
GetSourceFormat(const AudioFormatDescription * source,AudioFormatDescription * output)1325 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1326 {
1327 // default is 16-bit native endian
1328 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1329 // to encode to 16-bit since the source was lossy in the first place
1330 // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1331 if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1332 ( source->mBitsPerChannel <= 16 ) )
1333 mBitDepth = 16;
1334 else if ( source->mBitsPerChannel <= 20 )
1335 mBitDepth = 20;
1336 else if ( source->mBitsPerChannel <= 24 )
1337 mBitDepth = 24;
1338 else
1339 mBitDepth = 32;
1340
1341 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1342 // and sample rate were specified when we were configured
1343 /*
1344 MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1345 */
1346 }
1347
1348
1349
1350 #if VERBOSE_DEBUG
1351
1352 #if PRAGMA_MARK
1353 #pragma mark -
1354 #endif
1355
1356 /*
1357 AddFiller()
1358 - add fill and data stream elements to the bitstream to test the decoder
1359 */
AddFiller(BitBuffer * bits,int32_t numBytes)1360 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1361 {
1362 uint8_t tag;
1363 uint32_t index;
1364
1365 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1366 numBytes -= 6;
1367 if ( numBytes <= 0 )
1368 return;
1369
1370 // randomly pick Fill or Data Stream Element based on numBytes requested
1371 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1372
1373 BitBufferWrite( bits, tag, 3 );
1374 if ( tag == ID_FIL )
1375 {
1376 // can't write more than 269 bytes in a fill element
1377 numBytes = (numBytes > 269) ? 269 : numBytes;
1378
1379 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1380 if ( numBytes >= 15 )
1381 {
1382 uint16_t extensionSize;
1383
1384 BitBufferWrite( bits, 15, 4 );
1385
1386 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1387 // - otherwise, there's no way to represent 15
1388 // - for example, to really mean 15 bytes you must encode extensionSize = 1
1389 // - why it's not like data stream elements I have no idea
1390 extensionSize = (numBytes - 15) + 1;
1391 Assert( extensionSize <= 255 );
1392 BitBufferWrite( bits, extensionSize, 8 );
1393 }
1394 else
1395 BitBufferWrite( bits, numBytes, 4 );
1396
1397 BitBufferWrite( bits, 0x10, 8 ); // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1398 for ( index = 0; index < (numBytes - 1); index++ )
1399 BitBufferWrite( bits, 0xa5, 8 ); // fill_byte = b10100101 = 0xa5
1400 }
1401 else
1402 {
1403 // can't write more than 510 bytes in a data stream element
1404 numBytes = (numBytes > 510) ? 510 : numBytes;
1405
1406 BitBufferWrite( bits, 0, 4 ); // element instance tag
1407 BitBufferWrite( bits, 1, 1 ); // byte-align flag = true
1408
1409 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1410 if ( numBytes >= 255 )
1411 {
1412 BitBufferWrite( bits, 255, 8 );
1413 BitBufferWrite( bits, numBytes - 255, 8 );
1414 }
1415 else
1416 BitBufferWrite( bits, numBytes, 8 );
1417
1418 BitBufferByteAlign( bits, true ); // byte-align with zeros
1419
1420 for ( index = 0; index < numBytes; index++ )
1421 BitBufferWrite( bits, 0x5a, 8 );
1422 }
1423 }
1424
1425 #endif /* VERBOSE_DEBUG */
1426