1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 /*
22 File: ALACEncoder.cpp
23 */
24
25 // build stuff
26 #define VERBOSE_DEBUG 0
27
28 // headers
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "ALACEncoder.h"
34
35 #include "aglib.h"
36 #include "dplib.h"
37 #include "matrixlib.h"
38
39 #include "ALACBitUtilities.h"
40 #include "ALACAudioTypes.h"
41 #include "EndianPortable.h"
42
43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
46
47 // defines/constants
48 const uint32_t kALACEncoderMagic = 'dpge';
49 const uint32_t kMaxSampleSize = 32; // max allowed bit width is 32
50 const uint32_t kDefaultMixBits = 2;
51 const uint32_t kDefaultMixRes = 0;
52 const uint32_t kMaxRes = 4;
53 const uint32_t kDefaultNumUV = 8;
54 const uint32_t kMinUV = 4;
55 const uint32_t kMaxUV = 8;
56
57 // static functions
58 #if VERBOSE_DEBUG
59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
60 #endif
61
62
63 /*
64 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
65 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE.
66 Each particular field is accessed via the current channel index. Note that the channel
67 index increments by two for channel pairs.
68
69 For example:
70
71 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE)
72 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
73 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
74
75 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
76 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
77 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
78 index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
79 index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
80 index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
81 */
82 static const uint32_t sChannelMaps[kALACMaxChannels] =
83 {
84 ID_SCE,
85 ID_CPE,
86 (ID_CPE << 3) | (ID_SCE),
87 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
88 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
89 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
90 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
91 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
92 };
93
94 static const uint32_t sSupportediPodSampleRates[] =
95 {
96 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
97 };
98
99 /*
100 Constructor
101 */
ALACEncoder()102 ALACEncoder::ALACEncoder() :
103 mBitDepth( 0 ),
104 mFastMode( 0 ),
105 mMixBufferU( nil ),
106 mMixBufferV( nil ),
107 mPredictorU( nil ),
108 mPredictorV( nil ),
109 mShiftBufferUV( nil ),
110 mWorkBuffer( nil ),
111
112
113 mTotalBytesGenerated( 0 ),
114 mAvgBitRate( 0 ),
115 mMaxFrameBytes( 0 )
116 {
117 // overrides
118 mFrameSize = kALACDefaultFrameSize;
119 }
120
121 /*
122 Destructor
123 */
~ALACEncoder()124 ALACEncoder::~ALACEncoder()
125 {
126 // delete the matrix mixing buffers
127 if ( mMixBufferU )
128 {
129 free(mMixBufferU);
130 mMixBufferU = NULL;
131 }
132 if ( mMixBufferV )
133 {
134 free(mMixBufferV);
135 mMixBufferV = NULL;
136 }
137
138 // delete the dynamic predictor's "corrector" buffers
139 if ( mPredictorU )
140 {
141 free(mPredictorU);
142 mPredictorU = NULL;
143 }
144 if ( mPredictorV )
145 {
146 free(mPredictorV);
147 mPredictorV = NULL;
148 }
149
150 // delete the unused byte shift buffer
151 if ( mShiftBufferUV )
152 {
153 free(mShiftBufferUV);
154 mShiftBufferUV = NULL;
155 }
156
157 // delete the work buffer
158 if ( mWorkBuffer )
159 {
160 free(mWorkBuffer);
161 mWorkBuffer = NULL;
162 }
163 }
164
165 #if PRAGMA_MARK
166 #pragma mark -
167 #endif
168
169 /*
170 HEADER SPECIFICATION
171
172 For every segment we adopt the following header:
173
174 1 byte reserved (always 0)
175 1 byte flags (see below)
176 [4 byte frame length] (optional, see below)
177 ---Next, the per-segment ALAC parameters---
178 1 byte mixBits (middle-side parameter)
179 1 byte mixRes (middle-side parameter, interpreted as signed char)
180
181 1 byte shiftU (4 bits modeU, 4 bits denShiftU)
182 1 byte filterU (3 bits pbFactorU, 5 bits numU)
183 (numU) shorts (signed DP coefficients for V channel)
184 ---Next, 2nd-channel ALAC parameters in case of stereo mode---
185 1 byte shiftV (4 bits modeV, 4 bits denShiftV)
186 1 byte filterV (3 bits pbFactorV, 5 bits numV)
187 (numV) shorts (signed DP coefficients for V channel)
188 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
189 ---Then comes the AG-compressor bitstream---
190
191
192 FLAGS
193 -----
194
195 The presence of certain flag bits changes the header format such that the parameters might
196 not even be sent. The currently defined flags format is:
197
198 0000psse
199
200 where 0 = reserved, must be 0
201 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
202 ss = 2-bit field indicating "number of shift-off bytes ignored by compression"
203 e = 1-bit field indicating "escape"
204
205 The "partial frame" flag means that the following segment is not equal to the frame length specified
206 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial
207 segments without incurring the 32-bit overhead for each segment.
208
209 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
210 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
211 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
212 uncompressed size. However, by shifting the input values down and running the remaining bits through
213 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that
214 the shifted-off bytes follow after the parameter section of the header and before the compressed
215 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
216 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels,
217 the shifted off bytes are interleaved.
218
219 The "escape" flag means that this segment was not compressed b/c the compressed size would be
220 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header.
221 The other header parameter bytes will not be sent.
222
223
224 PARAMETERS
225 ----------
226
227 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
228
229 4 + (2 + 2 * numU) (mono mode)
230 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode)
231
232 where the ALAC filter-lengths numU, numV are bounded by a
233 constant (in the current source, numU, numV <= NUMCOEPAIRS), and
234 this forces an absolute upper bound on header size.
235
236 Each segment-decode process loads up these bytes from the front of the
237 local stream, in the above order, then follows with the entropy-encoded
238 bits for the given segment.
239
240 To generalize middle-side, there are various mixing modes including middle-side, each lossless,
241 as embodied in the mix() and unmix() functions. These functions exploit a generalized middle-side
242 transformation:
243
244 u := [(rL + (m-r)R)/m];
245 v := L - R;
246
247 where [ ] denotes integer floor. The (lossless) inverse is
248
249 L = u + v - [rV/m];
250 R = L - v;
251
252 In the segment header, m and r are encoded in mixBits and mixRes.
253 Classical "middle-side" is obtained with m = 2, r = 1, but now
254 we have more generalized mixes.
255
256 NOTES
257 -----
258 The relevance of the ALAC coefficients is explained in detail
259 in patent documents.
260 */
261
262 /*
263 EncodeStereo()
264 - encode a channel pair
265 */
EncodeStereo(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
267 {
268 BitBuffer workBits;
269 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
270 AGParamRec agParams;
271 uint32_t bits1, bits2;
272 uint32_t dilate;
273 int32_t mixBits, mixRes, maxRes;
274 uint32_t minBits, minBits1, minBits2;
275 uint32_t numU, numV;
276 uint32_t mode;
277 uint32_t pbFactor;
278 uint32_t chanBits;
279 uint32_t denShift;
280 uint8_t bytesShifted;
281 SearchCoefs coefsU;
282 SearchCoefs coefsV;
283 uint32_t index;
284 uint8_t partialFrame;
285 uint32_t escapeBits;
286 bool doEscape;
287 int32_t status = ALAC_noErr;
288
289 // make sure we handle this bit-depth before we get going
290 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
291
292 // reload coefs pointers for this channel pair
293 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
294 // actually results in better overall compression
295 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
296 // different coefs for the different passes of "mixRes" results in even better compression
297 coefsU = (SearchCoefs) mCoefsU[channelIndex];
298 coefsV = (SearchCoefs) mCoefsV[channelIndex];
299
300 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
301 // so enable 16-bit "shift off" and encode in 17-bit mode
302 // - in addition, 24-bit mode really improves with one byte shifted off
303 if ( mBitDepth == 32 )
304 bytesShifted = 2;
305 else if ( mBitDepth >= 24 )
306 bytesShifted = 1;
307 else
308 bytesShifted = 0;
309
310 chanBits = mBitDepth - (bytesShifted * 8) + 1;
311
312 // flag whether or not this is a partial frame
313 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
314
315 // brute-force encode optimization loop
316 // - run over variations of the encoding params to find the best choice
317 mixBits = kDefaultMixBits;
318 maxRes = kMaxRes;
319 numU = numV = kDefaultNumUV;
320 denShift = DENSHIFT_DEFAULT;
321 mode = 0;
322 pbFactor = 4;
323 dilate = 8;
324
325 minBits = minBits1 = minBits2 = 1ul << 31;
326
327 int32_t bestRes = mLastMixRes[channelIndex];
328
329 for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
330 {
331 // mix the stereo inputs
332 switch ( mBitDepth )
333 {
334 case 16:
335 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
336 break;
337 case 20:
338 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
339 break;
340 case 24:
341 // includes extraction of shifted-off bytes
342 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
343 mixBits, mixRes, mShiftBufferUV, bytesShifted );
344 break;
345 case 32:
346 // includes extraction of shifted-off bytes
347 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
348 mixBits, mixRes, mShiftBufferUV, bytesShifted );
349 break;
350 }
351
352 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
353
354 // run the dynamic predictors
355 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
356 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
357
358 // run the lossless compressor on each channel
359 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
360 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
361 RequireNoErr( status, goto Exit; );
362
363 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
364 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
365 RequireNoErr( status, goto Exit; );
366
367 // look for best match
368 if ( (bits1 + bits2) < minBits1 )
369 {
370 minBits1 = bits1 + bits2;
371 bestRes = mixRes;
372 }
373 }
374
375 mLastMixRes[channelIndex] = (int16_t)bestRes;
376
377 // mix the stereo inputs with the current best mixRes
378 mixRes = mLastMixRes[channelIndex];
379 switch ( mBitDepth )
380 {
381 case 16:
382 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
383 break;
384 case 20:
385 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
386 break;
387 case 24:
388 // also extracts the shifted off bytes into the shift buffers
389 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
390 mixBits, mixRes, mShiftBufferUV, bytesShifted );
391 break;
392 case 32:
393 // also extracts the shifted off bytes into the shift buffers
394 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
395 mixBits, mixRes, mShiftBufferUV, bytesShifted );
396 break;
397 }
398
399 // now it's time for the predictor coefficient search loop
400 numU = numV = kMinUV;
401 minBits1 = minBits2 = 1ul << 31;
402
403 for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
404 {
405 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
406
407 dilate = 32;
408
409 // run the predictor over the same data multiple times to help it converge
410 for ( uint32_t converge = 0; converge < 8; converge++ )
411 {
412 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
413 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
414 }
415
416 dilate = 8;
417
418 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
419 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
420
421 if ( (bits1 * dilate + 16 * numUV) < minBits1 )
422 {
423 minBits1 = bits1 * dilate + 16 * numUV;
424 numU = numUV;
425 }
426
427 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
428 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
429
430 if ( (bits2 * dilate + 16 * numUV) < minBits2 )
431 {
432 minBits2 = bits2 * dilate + 16 * numUV;
433 numV = numUV;
434 }
435 }
436
437 // test for escape hatch if best calculated compressed size turns out to be more than the input size
438 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
439 if ( bytesShifted != 0 )
440 minBits += (numSamples * (bytesShifted * 8) * 2);
441
442 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
443
444 doEscape = (minBits >= escapeBits) ? true : false;
445
446 if ( doEscape == false )
447 {
448 // write bitstream header and coefs
449 BitBufferWrite( bitstream, 0, 12 );
450 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
451 if ( partialFrame )
452 BitBufferWrite( bitstream, numSamples, 32 );
453 BitBufferWrite( bitstream, mixBits, 8 );
454 BitBufferWrite( bitstream, mixRes, 8 );
455
456 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
457 //Assert( (pbFactor < 8) && (numU < 32) );
458 //Assert( (pbFactor < 8) && (numV < 32) );
459
460 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
461 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
462 for ( index = 0; index < numU; index++ )
463 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
464
465 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
466 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
467 for ( index = 0; index < numV; index++ )
468 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
469
470 // if shift active, write the interleaved shift buffers
471 if ( bytesShifted != 0 )
472 {
473 uint32_t bitShift = bytesShifted * 8;
474
475 //Assert( bitShift <= 16 );
476
477 for ( index = 0; index < (numSamples * 2); index += 2 )
478 {
479 uint32_t shiftedVal;
480
481 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
482 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
483 }
484 }
485
486 // run the dynamic predictor and lossless compression for the "left" channel
487 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
488 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel
489 if ( mode == 0 )
490 {
491 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
492 }
493 else
494 {
495 pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
496 pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
497 }
498
499 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
500 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
501 RequireNoErr( status, goto Exit; );
502
503 // run the dynamic predictor and lossless compression for the "right" channel
504 if ( mode == 0 )
505 {
506 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
507 }
508 else
509 {
510 pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
511 pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
512 }
513
514 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
515 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
516 RequireNoErr( status, goto Exit; );
517
518 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
519 chuck it and do an escape packet
520 */
521 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
522 if ( minBits >= escapeBits )
523 {
524 *bitstream = startBits; // reset bitstream state
525 doEscape = true;
526 }
527 }
528
529 if ( doEscape == true )
530 {
531 /* escape */
532 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
533
534 #if VERBOSE_DEBUG
535 DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
536 #endif
537 }
538
539 Exit:
540 return status;
541 }
542
543 /*
544 EncodeStereoFast()
545 - encode a channel pair without the search loop for maximum possible speed
546 */
EncodeStereoFast(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)547 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
548 {
549 BitBuffer startBits = *bitstream; // squirrel away current bit position in case we decide to use escape hatch
550 AGParamRec agParams;
551 uint32_t bits1, bits2;
552 int32_t mixBits, mixRes;
553 uint32_t minBits, minBits1, minBits2;
554 uint32_t numU, numV;
555 uint32_t mode;
556 uint32_t pbFactor;
557 uint32_t chanBits;
558 uint32_t denShift;
559 uint8_t bytesShifted;
560 SearchCoefs coefsU;
561 SearchCoefs coefsV;
562 uint32_t index;
563 uint8_t partialFrame;
564 uint32_t escapeBits;
565 bool doEscape;
566 int32_t status;
567
568 // make sure we handle this bit-depth before we get going
569 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
570
571 // reload coefs pointers for this channel pair
572 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
573 // actually results in better overall compression
574 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
575 // different coefs for the different passes of "mixRes" results in even better compression
576 coefsU = (SearchCoefs) mCoefsU[channelIndex];
577 coefsV = (SearchCoefs) mCoefsV[channelIndex];
578
579 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
580 // so enable 16-bit "shift off" and encode in 17-bit mode
581 // - in addition, 24-bit mode really improves with one byte shifted off
582 if ( mBitDepth == 32 )
583 bytesShifted = 2;
584 else if ( mBitDepth >= 24 )
585 bytesShifted = 1;
586 else
587 bytesShifted = 0;
588
589 chanBits = mBitDepth - (bytesShifted * 8) + 1;
590
591 // flag whether or not this is a partial frame
592 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
593
594 // set up default encoding parameters for "fast" mode
595 mixBits = kDefaultMixBits;
596 mixRes = kDefaultMixRes;
597 numU = numV = kDefaultNumUV;
598 denShift = DENSHIFT_DEFAULT;
599 mode = 0;
600 pbFactor = 4;
601
602 minBits = minBits1 = minBits2 = 1ul << 31;
603
604 // mix the stereo inputs with default mixBits/mixRes
605 switch ( mBitDepth )
606 {
607 case 16:
608 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
609 break;
610 case 20:
611 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
612 break;
613 case 24:
614 // also extracts the shifted off bytes into the shift buffers
615 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
616 mixBits, mixRes, mShiftBufferUV, bytesShifted );
617 break;
618 case 32:
619 // also extracts the shifted off bytes into the shift buffers
620 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
621 mixBits, mixRes, mShiftBufferUV, bytesShifted );
622 break;
623 }
624
625 /* speculatively write the bitstream assuming the compressed version will be smaller */
626
627 // write bitstream header and coefs
628 BitBufferWrite( bitstream, 0, 12 );
629 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
630 if ( partialFrame )
631 BitBufferWrite( bitstream, numSamples, 32 );
632 BitBufferWrite( bitstream, mixBits, 8 );
633 BitBufferWrite( bitstream, mixRes, 8 );
634
635 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
636 //Assert( (pbFactor < 8) && (numU < 32) );
637 //Assert( (pbFactor < 8) && (numV < 32) );
638
639 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
640 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
641 for ( index = 0; index < numU; index++ )
642 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
643
644 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
645 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
646 for ( index = 0; index < numV; index++ )
647 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
648
649 // if shift active, write the interleaved shift buffers
650 if ( bytesShifted != 0 )
651 {
652 uint32_t bitShift = bytesShifted * 8;
653
654 //Assert( bitShift <= 16 );
655
656 for ( index = 0; index < (numSamples * 2); index += 2 )
657 {
658 uint32_t shiftedVal;
659
660 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
661 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
662 }
663 }
664
665 // run the dynamic predictor and lossless compression for the "left" channel
666 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
667 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
668
669 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
670 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
671 RequireNoErr( status, goto Exit; );
672
673 // run the dynamic predictor and lossless compression for the "right" channel
674 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
675
676 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
677 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
678 RequireNoErr( status, goto Exit; );
679
680 // do bit requirement calculations
681 minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
682 minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
683
684 // test for escape hatch if best calculated compressed size turns out to be more than the input size
685 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
686 if ( bytesShifted != 0 )
687 minBits += (numSamples * (bytesShifted * 8) * 2);
688
689 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
690
691 doEscape = (minBits >= escapeBits) ? true : false;
692
693 if ( doEscape == false )
694 {
695 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
696 chuck it and do an escape packet
697 */
698 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
699 if ( minBits >= escapeBits )
700 {
701 doEscape = true;
702 }
703
704 }
705
706 if ( doEscape == true )
707 {
708 /* escape */
709
710 // reset bitstream position since we speculatively wrote the compressed version
711 *bitstream = startBits;
712
713 // write escape frame
714 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
715
716 #if VERBOSE_DEBUG
717 DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
718 #endif
719 }
720
721 Exit:
722 return status;
723 }
724
725 /*
726 EncodeStereoEscape()
727 - encode stereo escape frame
728 */
EncodeStereoEscape(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t numSamples)729 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
730 {
731 int16_t * input16;
732 int32_t * input32;
733 uint8_t partialFrame;
734 uint32_t index;
735
736 // flag whether or not this is a partial frame
737 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
738
739 // write bitstream header
740 BitBufferWrite( bitstream, 0, 12 );
741 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
742 if ( partialFrame )
743 BitBufferWrite( bitstream, numSamples, 32 );
744
745 // just copy the input data to the output buffer
746 switch ( mBitDepth )
747 {
748 case 16:
749 input16 = (int16_t *) inputBuffer;
750
751 for ( index = 0; index < (numSamples * stride); index += stride )
752 {
753 BitBufferWrite( bitstream, input16[index + 0], 16 );
754 BitBufferWrite( bitstream, input16[index + 1], 16 );
755 }
756 break;
757 case 20:
758 // mix20() with mixres param = 0 means de-interleave so use it to simplify things
759 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
760 for ( index = 0; index < numSamples; index++ )
761 {
762 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
763 BitBufferWrite( bitstream, mMixBufferV[index], 20 );
764 }
765 break;
766 case 24:
767 // mix24() with mixres param = 0 means de-interleave so use it to simplify things
768 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
769 for ( index = 0; index < numSamples; index++ )
770 {
771 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
772 BitBufferWrite( bitstream, mMixBufferV[index], 24 );
773 }
774 break;
775 case 32:
776 input32 = (int32_t *) inputBuffer;
777
778 for ( index = 0; index < (numSamples * stride); index += stride )
779 {
780 BitBufferWrite( bitstream, input32[index + 0], 32 );
781 BitBufferWrite( bitstream, input32[index + 1], 32 );
782 }
783 break;
784 }
785
786 return ALAC_noErr;
787 }
788
789 /*
790 EncodeMono()
791 - encode a mono input buffer
792 */
EncodeMono(BitBuffer * bitstream,void * inputBuffer,uint32_t stride,uint32_t channelIndex,uint32_t numSamples)793 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
794 {
795 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
796 AGParamRec agParams;
797 uint32_t bits1;
798 uint32_t numU;
799 SearchCoefs coefsU;
800 uint32_t dilate;
801 uint32_t minBits, bestU;
802 uint32_t minU, maxU;
803 uint32_t index, index2;
804 uint8_t bytesShifted;
805 uint32_t shift;
806 uint32_t mask;
807 uint32_t chanBits;
808 uint8_t pbFactor;
809 uint8_t partialFrame;
810 int16_t * input16;
811 int32_t * input32;
812 uint32_t escapeBits;
813 bool doEscape;
814 int32_t status;
815
816 // make sure we handle this bit-depth before we get going
817 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
818
819 status = ALAC_noErr;
820
821 // reload coefs array from previous frame
822 coefsU = (SearchCoefs) mCoefsU[channelIndex];
823
824 // pick bit depth for actual encoding
825 // - we lop off the lower byte(s) for 24-/32-bit encodings
826 if ( mBitDepth == 32 )
827 bytesShifted = 2;
828 else if ( mBitDepth >= 24 )
829 bytesShifted = 1;
830 else
831 bytesShifted = 0;
832
833 shift = bytesShifted * 8;
834 mask = (1ul << shift) - 1;
835 chanBits = mBitDepth - (bytesShifted * 8);
836
837 // flag whether or not this is a partial frame
838 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
839
840 // convert N-bit data to 32-bit for predictor
841 switch ( mBitDepth )
842 {
843 case 16:
844 {
845 // convert 16-bit data to 32-bit for predictor
846 input16 = (int16_t *) inputBuffer;
847 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
848 mMixBufferU[index] = (int32_t) input16[index2];
849 break;
850 }
851 case 20:
852 // convert 20-bit data to 32-bit for predictor
853 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
854 break;
855 case 24:
856 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
857 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
858 for ( index = 0; index < numSamples; index++ )
859 {
860 mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
861 mMixBufferU[index] >>= shift;
862 }
863 break;
864 case 32:
865 {
866 // just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
867 input32 = (int32_t *) inputBuffer;
868
869 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
870 {
871 int32_t val = input32[index2];
872
873 mShiftBufferUV[index] = (uint16_t)(val & mask);
874 mMixBufferU[index] = val >> shift;
875 }
876 break;
877 }
878 }
879
880 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
881 // - run over variations of the encoding params to find the best choice
882 minU = 4;
883 maxU = 8;
884 minBits = 1ul << 31;
885 pbFactor = 4;
886
887 minBits = 1ul << 31;
888 bestU = minU;
889
890 for ( numU = minU; numU <= maxU; numU += 4 )
891 {
892 BitBuffer workBits;
893 uint32_t numBits;
894
895 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
896
897 dilate = 32;
898 for ( uint32_t converge = 0; converge < 7; converge++ )
899 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
900
901 dilate = 8;
902 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
903
904 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
905 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
906 RequireNoErr( status, goto Exit; );
907
908 numBits = (dilate * bits1) + (16 * numU);
909 if ( numBits < minBits )
910 {
911 bestU = numU;
912 minBits = numBits;
913 }
914 }
915
916 // test for escape hatch if best calculated compressed size turns out to be more than the input size
917 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
918 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
919 if ( bytesShifted != 0 )
920 minBits += (numSamples * (bytesShifted * 8));
921
922 escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
923
924 doEscape = (minBits >= escapeBits) ? true : false;
925
926 if ( doEscape == false )
927 {
928 // write bitstream header
929 BitBufferWrite( bitstream, 0, 12 );
930 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
931 if ( partialFrame )
932 BitBufferWrite( bitstream, numSamples, 32 );
933 BitBufferWrite( bitstream, 0, 16 ); // mixBits = mixRes = 0
934
935 // write the params and predictor coefs
936 numU = bestU;
937 BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 ); // modeU = 0
938 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
939 for ( index = 0; index < numU; index++ )
940 BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
941
942 // if shift active, write the interleaved shift buffers
943 if ( bytesShifted != 0 )
944 {
945 for ( index = 0; index < numSamples; index++ )
946 BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
947 }
948
949 // run the dynamic predictor with the best result
950 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
951
952 // do lossless compression
953 set_standard_ag_params( &agParams, numSamples, numSamples );
954 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
955 //AssertNoErr( status );
956
957
958 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
959 chuck it and do an escape packet
960 */
961 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
962 if ( minBits >= escapeBits )
963 {
964 *bitstream = startBits; // reset bitstream state
965 doEscape = true;
966 }
967 }
968
969 if ( doEscape == true )
970 {
971 // write bitstream header and coefs
972 BitBufferWrite( bitstream, 0, 12 );
973 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
974 if ( partialFrame )
975 BitBufferWrite( bitstream, numSamples, 32 );
976
977 // just copy the input data to the output buffer
978 switch ( mBitDepth )
979 {
980 case 16:
981 input16 = (int16_t *) inputBuffer;
982 for ( index = 0; index < (numSamples * stride); index += stride )
983 BitBufferWrite( bitstream, input16[index], 16 );
984 break;
985 case 20:
986 // convert 20-bit data to 32-bit for simplicity
987 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
988 for ( index = 0; index < numSamples; index++ )
989 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
990 break;
991 case 24:
992 // convert 24-bit data to 32-bit for simplicity
993 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
994 for ( index = 0; index < numSamples; index++ )
995 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
996 break;
997 case 32:
998 input32 = (int32_t *) inputBuffer;
999 for ( index = 0; index < (numSamples * stride); index += stride )
1000 BitBufferWrite( bitstream, input32[index], 32 );
1001 break;
1002 }
1003 #if VERBOSE_DEBUG
1004 DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1005 #endif
1006 }
1007
1008 Exit:
1009 return status;
1010 }
1011
1012 #if PRAGMA_MARK
1013 #pragma mark -
1014 #endif
1015
1016 /*
1017 Encode()
1018 - encode the next block of samples
1019 */
Encode(AudioFormatDescription theInputFormat,AudioFormatDescription theOutputFormat,unsigned char * theReadBuffer,unsigned char * theWriteBuffer,int32_t * ioNumBytes)1020 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1021 unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1022 {
1023 uint32_t numFrames;
1024 uint32_t outputSize;
1025 BitBuffer bitstream;
1026 int32_t status;
1027
1028 numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1029
1030 // create a bit buffer structure pointing to our output buffer
1031 BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1032
1033 if ( theInputFormat.mChannelsPerFrame == 2 )
1034 {
1035 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1036 BitBufferWrite( &bitstream, ID_CPE, 3 );
1037 BitBufferWrite( &bitstream, 0, 4 );
1038
1039 // encode stereo input buffer
1040 if ( mFastMode == false )
1041 status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1042 else
1043 status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1044 RequireNoErr( status, goto Exit; );
1045 }
1046 else if ( theInputFormat.mChannelsPerFrame == 1 )
1047 {
1048 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1049 BitBufferWrite( &bitstream, ID_SCE, 3 );
1050 BitBufferWrite( &bitstream, 0, 4 );
1051
1052 // encode mono input buffer
1053 status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1054 RequireNoErr( status, goto Exit; );
1055 }
1056 else
1057 {
1058 char * inputBuffer;
1059 uint32_t tag;
1060 uint32_t channelIndex;
1061 uint32_t inputIncrement;
1062 uint8_t stereoElementTag;
1063 uint8_t monoElementTag;
1064 uint8_t lfeElementTag;
1065
1066 inputBuffer = (char *) theReadBuffer;
1067 inputIncrement = ((mBitDepth + 7) / 8);
1068
1069 stereoElementTag = 0;
1070 monoElementTag = 0;
1071 lfeElementTag = 0;
1072
1073 for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1074 {
1075 tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1076
1077 BitBufferWrite( &bitstream, tag, 3 );
1078 switch ( tag )
1079 {
1080 case ID_SCE:
1081 // mono
1082 BitBufferWrite( &bitstream, monoElementTag, 4 );
1083
1084 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1085
1086 inputBuffer += inputIncrement;
1087 channelIndex++;
1088 monoElementTag++;
1089 break;
1090
1091 case ID_CPE:
1092 // stereo
1093 BitBufferWrite( &bitstream, stereoElementTag, 4 );
1094
1095 status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1096
1097 inputBuffer += (inputIncrement * 2);
1098 channelIndex += 2;
1099 stereoElementTag++;
1100 break;
1101
1102 case ID_LFE:
1103 // LFE channel (subwoofer)
1104 BitBufferWrite( &bitstream, lfeElementTag, 4 );
1105
1106 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1107
1108 inputBuffer += inputIncrement;
1109 channelIndex++;
1110 lfeElementTag++;
1111 break;
1112
1113 default:
1114 status = kALAC_ParamError;
1115 goto Exit;
1116 }
1117
1118 RequireNoErr( status, goto Exit; );
1119 }
1120 }
1121
1122 #if VERBOSE_DEBUG
1123 {
1124 // if there is room left in the output buffer, add some random fill data to test decoder
1125 int32_t bitsLeft;
1126 int32_t bytesLeft;
1127
1128 bitsLeft = BitBufferGetPosition( &bitstream ) - 3; // - 3 for ID_END tag
1129 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1130
1131 if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1132 AddFiller( &bitstream, bytesLeft );
1133 }
1134 #endif
1135
1136 // add 3-bit frame end tag: ID_END
1137 BitBufferWrite( &bitstream, ID_END, 3 );
1138
1139 // byte-align the output data
1140 BitBufferByteAlign( &bitstream, true );
1141
1142 outputSize = BitBufferGetPosition( &bitstream ) / 8;
1143 //Assert( outputSize <= mMaxOutputBytes );
1144
1145
1146 // all good, let iTunes know what happened and remember the total number of input sample frames
1147 *ioNumBytes = outputSize;
1148 //mEncodedFrames += encodeMsg->numInputSamples;
1149
1150 // gather encoding stats
1151 mTotalBytesGenerated += outputSize;
1152 mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1153
1154 status = ALAC_noErr;
1155
1156 Exit:
1157 return status;
1158 }
1159
1160 /*
1161 Finish()
1162 - drain out any leftover samples
1163 */
1164
Finish()1165 int32_t ALACEncoder::Finish()
1166 {
1167 /* // finalize bit rate statistics
1168 if ( mSampleSize.numEntries != 0 )
1169 mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1170 else
1171 mAvgBitRate = 0;
1172 */
1173 return ALAC_noErr;
1174 }
1175
1176 #if PRAGMA_MARK
1177 #pragma mark -
1178 #endif
1179
1180 /*
1181 GetConfig()
1182 */
GetConfig(ALACSpecificConfig & config)1183 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1184 {
1185 config.frameLength = Swap32NtoB(mFrameSize);
1186 config.compatibleVersion = (uint8_t) kALACCompatibleVersion;
1187 config.bitDepth = (uint8_t) mBitDepth;
1188 config.pb = (uint8_t) PB0;
1189 config.kb = (uint8_t) KB0;
1190 config.mb = (uint8_t) MB0;
1191 config.numChannels = (uint8_t) mNumChannels;
1192 config.maxRun = Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1193 config.maxFrameBytes = Swap32NtoB(mMaxFrameBytes);
1194 config.avgBitRate = Swap32NtoB(mAvgBitRate);
1195 config.sampleRate = Swap32NtoB(mOutputSampleRate);
1196 }
1197
GetMagicCookieSize(uint32_t inNumChannels)1198 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1199 {
1200 if (inNumChannels > 2)
1201 {
1202 return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1203 }
1204 else
1205 {
1206 return sizeof(ALACSpecificConfig);
1207 }
1208 }
1209
GetMagicCookie(void * outCookie,uint32_t * ioSize)1210 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1211 {
1212 ALACSpecificConfig theConfig = {0};
1213 ALACAudioChannelLayout theChannelLayout = {0};
1214 uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1215 uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1216 uint8_t * theCookiePointer = (uint8_t *)outCookie;
1217
1218 GetConfig(theConfig);
1219 if (theConfig.numChannels > 2)
1220 {
1221 theChannelLayout.mChannelLayoutTag = Swap32NtoB(ALACChannelLayoutTags[theConfig.numChannels - 1]);
1222 theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1223 }
1224 if (*ioSize >= theCookieSize)
1225 {
1226 memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1227 theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1228 if (theConfig.numChannels > 2)
1229 {
1230 theCookiePointer += sizeof(ALACSpecificConfig);
1231 memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1232 theCookiePointer += kChannelAtomSize;
1233 memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1234 }
1235 *ioSize = theCookieSize;
1236 }
1237 else
1238 {
1239 *ioSize = 0; // no incomplete cookies
1240 }
1241 }
1242
1243 /*
1244 InitializeEncoder()
1245 - initialize the encoder component with the current config
1246 */
InitializeEncoder(AudioFormatDescription theOutputFormat)1247 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1248 {
1249 int32_t status;
1250
1251 mOutputSampleRate = theOutputFormat.mSampleRate;
1252 mNumChannels = theOutputFormat.mChannelsPerFrame;
1253 switch(theOutputFormat.mFormatFlags)
1254 {
1255 case 1:
1256 mBitDepth = 16;
1257 break;
1258 case 2:
1259 mBitDepth = 20;
1260 break;
1261 case 3:
1262 mBitDepth = 24;
1263 break;
1264 case 4:
1265 mBitDepth = 32;
1266 break;
1267 default:
1268 break;
1269 }
1270
1271 // set up default encoding parameters and state
1272 // - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1273 for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1274 mLastMixRes[index] = kDefaultMixRes;
1275
1276 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1277 // but note that this can be bigger than the input size!
1278 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1279 mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8) + 1;
1280
1281 // allocate mix buffers
1282 mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1283 mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1284
1285 // allocate dynamic predictor buffers
1286 mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287 mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1288
1289 // allocate combined shift buffer
1290 mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1291
1292 // allocate work buffer for search loop
1293 mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1294
1295 RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1296 (mPredictorU != nil) && (mPredictorV != nil) &&
1297 (mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1298 status = kALAC_MemFullError; goto Exit; );
1299
1300 status = ALAC_noErr;
1301
1302
1303 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1304 for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1305 {
1306 for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1307 {
1308 init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1309 init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1310 }
1311 }
1312
1313 Exit:
1314 return status;
1315 }
1316
1317 /*
1318 GetSourceFormat()
1319 - given the input format, return one of our supported formats
1320 */
GetSourceFormat(const AudioFormatDescription * source,AudioFormatDescription * output)1321 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1322 {
1323 // default is 16-bit native endian
1324 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1325 // to encode to 16-bit since the source was lossy in the first place
1326 // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1327 if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1328 ( source->mBitsPerChannel <= 16 ) )
1329 mBitDepth = 16;
1330 else if ( source->mBitsPerChannel <= 20 )
1331 mBitDepth = 20;
1332 else if ( source->mBitsPerChannel <= 24 )
1333 mBitDepth = 24;
1334 else
1335 mBitDepth = 32;
1336
1337 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1338 // and sample rate were specified when we were configured
1339 /*
1340 MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1341 */
1342 }
1343
1344
1345
1346 #if VERBOSE_DEBUG
1347
1348 #if PRAGMA_MARK
1349 #pragma mark -
1350 #endif
1351
1352 /*
1353 AddFiller()
1354 - add fill and data stream elements to the bitstream to test the decoder
1355 */
AddFiller(BitBuffer * bits,int32_t numBytes)1356 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1357 {
1358 uint8_t tag;
1359 uint32_t index;
1360
1361 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1362 numBytes -= 6;
1363 if ( numBytes <= 0 )
1364 return;
1365
1366 // randomly pick Fill or Data Stream Element based on numBytes requested
1367 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1368
1369 BitBufferWrite( bits, tag, 3 );
1370 if ( tag == ID_FIL )
1371 {
1372 // can't write more than 269 bytes in a fill element
1373 numBytes = (numBytes > 269) ? 269 : numBytes;
1374
1375 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1376 if ( numBytes >= 15 )
1377 {
1378 uint16_t extensionSize;
1379
1380 BitBufferWrite( bits, 15, 4 );
1381
1382 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1383 // - otherwise, there's no way to represent 15
1384 // - for example, to really mean 15 bytes you must encode extensionSize = 1
1385 // - why it's not like data stream elements I have no idea
1386 extensionSize = (numBytes - 15) + 1;
1387 Assert( extensionSize <= 255 );
1388 BitBufferWrite( bits, extensionSize, 8 );
1389 }
1390 else
1391 BitBufferWrite( bits, numBytes, 4 );
1392
1393 BitBufferWrite( bits, 0x10, 8 ); // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1394 for ( index = 0; index < (numBytes - 1); index++ )
1395 BitBufferWrite( bits, 0xa5, 8 ); // fill_byte = b10100101 = 0xa5
1396 }
1397 else
1398 {
1399 // can't write more than 510 bytes in a data stream element
1400 numBytes = (numBytes > 510) ? 510 : numBytes;
1401
1402 BitBufferWrite( bits, 0, 4 ); // element instance tag
1403 BitBufferWrite( bits, 1, 1 ); // byte-align flag = true
1404
1405 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1406 if ( numBytes >= 255 )
1407 {
1408 BitBufferWrite( bits, 255, 8 );
1409 BitBufferWrite( bits, numBytes - 255, 8 );
1410 }
1411 else
1412 BitBufferWrite( bits, numBytes, 8 );
1413
1414 BitBufferByteAlign( bits, true ); // byte-align with zeros
1415
1416 for ( index = 0; index < numBytes; index++ )
1417 BitBufferWrite( bits, 0x5a, 8 );
1418 }
1419 }
1420
1421 #endif /* VERBOSE_DEBUG */
1422