1 /**
2 * Aften: A/52 audio encoder
3 * Copyright (c) 2006 Justin Ruggles
4 * 2007 Prakash Punnoor <prakash@punnoor.de>
5 *
6 * Based on "The simplest AC3 encoder" from FFmpeg
7 * Copyright (c) 2000 Fabrice Bellard.
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 /**
25 * @file a52enc.c
26 * A/52 encoder
27 */
28
29 #include "common.h"
30
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <assert.h>
35
36 #include "a52.h"
37 #include "bitalloc.h"
38 #include "crc.h"
39 #include "mdct.h"
40 #include "window.h"
41 #include "exponent.h"
42 #include "dynrng.h"
43 #include "cpu_caps.h"
44
45 /**
46 * LUT for number of exponent groups present.
47 * expsizetab[exponent strategy][number of coefficients]
48 */
49 int nexpgrptab[3][256];
50
51 /**
52 * Pre-defined sets of exponent strategies. A strategy set is selected for
53 * each channel in a frame. All sets 1 to 5 use the same number of exponent
54 * bits. Set 0 is only used as the reference of optimal accuracy.
55 * TODO: more options and other sets which use greater or fewer bits
56 */
57 uint8_t str_predef[6][6] = {
58 { EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
59 { EXP_D15, EXP_REUSE, EXP_REUSE, EXP_REUSE, EXP_REUSE, EXP_REUSE },
60 { EXP_D25, EXP_REUSE, EXP_REUSE, EXP_D25, EXP_REUSE, EXP_REUSE },
61 { EXP_D25, EXP_REUSE, EXP_REUSE, EXP_D45, EXP_REUSE, EXP_D45 },
62 { EXP_D25, EXP_REUSE, EXP_D45, EXP_REUSE, EXP_D45, EXP_REUSE },
63 { EXP_D45, EXP_D45, EXP_REUSE, EXP_D45, EXP_REUSE, EXP_D45 }
64 };
65
66 static const uint8_t rematbndtab[4][2] = {
67 {13, 24}, {25, 36}, {37, 60}, {61, 252}
68 };
69
70 /* possible frequencies */
71 const uint16_t a52_freqs[3] = { 48000, 44100, 32000 };
72
73 /* possible bitrates */
74 const uint16_t a52_bitratetab[19] = {
75 32, 40, 48, 56, 64, 80, 96, 112, 128,
76 160, 192, 224, 256, 320, 384, 448, 512, 576, 640
77 };
78
79 #ifndef NO_THREADS
80 static int threaded_encode(void* vtctx);
81 #endif
82
83 const char *
aften_get_version(void)84 aften_get_version(void)
85 {
86 #ifdef SVN_VERSION
87 static const char *const str = AFTEN_VERSION "-r" SVN_VERSION;
88 #else
89 static const char *const str = AFTEN_VERSION;
90 #endif
91
92 return str;
93 }
94
95 static void
set_available_simd_instructions(AftenSimdInstructions * simd_instructions)96 set_available_simd_instructions(AftenSimdInstructions *simd_instructions)
97 {
98 cpu_caps_detect();
99
100 memset(simd_instructions, 0, sizeof(AftenSimdInstructions));
101
102 #ifdef HAVE_MMX
103 simd_instructions->mmx = cpu_caps_have_mmx();
104 #endif
105 #ifdef HAVE_SSE
106 simd_instructions->sse = cpu_caps_have_sse();
107 #endif
108 #ifdef HAVE_SSE2
109 simd_instructions->sse2 = cpu_caps_have_sse2();
110 #endif
111 #ifdef HAVE_SSE3
112 simd_instructions->sse3 = cpu_caps_have_sse3();
113 #endif
114 /* Following SIMD code doesn't exist yet, so don't set it available */
115 #if 0
116 #ifdef HAVE_SSSE3
117 simd_instructions->ssse3 = cpu_caps_have_ssse3();
118 #endif
119 #ifdef HAVE_HAVE_3DNOW
120 simd_instructions->amd_3dnow = cpu_caps_have_3dnow();
121 #endif
122 #ifdef HAVE_HAVE_SSE_MMX
123 simd_instructions->amd_sse_mmx = cpu_caps_have_sse_mmx();
124 #endif
125 #ifdef HAVE_HAVE_3DNOWEXT
126 simd_instructions->amd_3dnowext = cpu_caps_have_3dnowext();
127 #endif
128 #endif
129 #ifdef HAVE_ALTIVEC
130 simd_instructions->altivec = cpu_caps_have_altivec();
131 #endif
132 }
133
134 void
aften_set_defaults(AftenContext * s)135 aften_set_defaults(AftenContext *s)
136 {
137 if(s == NULL) {
138 fprintf(stderr, "NULL parameter passed to aften_set_defaults\n");
139 return;
140 }
141
142
143 /**
144 * These 5 must be set explicitly before initialization.
145 * There are utility functions to help setting acmod and lfe.
146 */
147
148 /* Tell the context which SIMD instruction sets are available. */
149 set_available_simd_instructions(&s->system.available_simd_instructions);
150 s->system.wanted_simd_instructions = s->system.available_simd_instructions;
151 s->system.n_threads = 0;
152
153 s->verbose = 1;
154 s->channels = -1;
155 s->samplerate = -1;
156 s->acmod = -1;
157 s->lfe = -1;
158
159 s->sample_format = A52_SAMPLE_FMT_S16;
160 s->private_context = NULL;
161 s->params.encoding_mode = AFTEN_ENC_MODE_CBR;
162 s->params.bitrate = 0;
163 s->params.quality = 240;
164 s->params.bwcode = -1;
165 s->params.use_rematrixing = 1;
166 s->params.use_block_switching = 0;
167 s->params.use_bw_filter = 0;
168 s->params.use_dc_filter = 0;
169 s->params.use_lfe_filter = 0;
170 s->params.bitalloc_fast = 0;
171 s->params.expstr_fast = 0;
172 s->params.dynrng_profile = DYNRNG_PROFILE_NONE;
173 s->params.min_bwcode = 0;
174 s->params.max_bwcode = 60;
175
176 s->meta.cmixlev = 0;
177 s->meta.surmixlev = 0;
178 s->meta.dsurmod = 0;
179 s->meta.dialnorm = 31;
180 s->meta.xbsi1e = 0;
181 s->meta.dmixmod = 0;
182 s->meta.ltrtcmixlev = 4;
183 s->meta.ltrtsmixlev = 4;
184 s->meta.lorocmixlev = 4;
185 s->meta.lorosmixlev = 4;
186 s->meta.xbsi2e = 0;
187 s->meta.dsurexmod = 0;
188 s->meta.dheadphonmod = 0;
189 s->meta.adconvtyp = 0;
190
191 s->status.quality = 0;
192 s->status.bit_rate = 0;
193 s->status.bwcode = 0;
194 }
195
196 static void
fmt_convert_from_u8(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)197 fmt_convert_from_u8(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
198 const void *vsrc, int nch, int n)
199 {
200 int i, j, ch;
201 const uint8_t *src = vsrc;
202
203 for(ch=0; ch<nch; ch++) {
204 FLOAT *dest_ch = dest[ch];
205 const uint8_t *src_ch = src + ch;
206 for(i=0, j=0; i<n; i++, j+=nch) {
207 dest_ch[i] = (src_ch[j]-FCONST(128.0)) / FCONST(128.0);
208 }
209 }
210 }
211
212 static void
fmt_convert_from_s16(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)213 fmt_convert_from_s16(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
214 const void *vsrc, int nch, int n)
215 {
216 int i, j, ch;
217 const int16_t *src = vsrc;
218
219 for(ch=0; ch<nch; ch++) {
220 FLOAT *dest_ch = dest[ch];
221 const int16_t *src_ch = src + ch;
222 for(i=0, j=0; i<n; i++, j+=nch) {
223 dest_ch[i] = src_ch[j] / FCONST(32768.0);
224 }
225 }
226 }
227
228 static void
fmt_convert_from_s20(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)229 fmt_convert_from_s20(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
230 const void *vsrc, int nch, int n)
231 {
232 int i, j, ch;
233 const int32_t *src = vsrc;
234
235 for(ch=0; ch<nch; ch++) {
236 FLOAT *dest_ch = dest[ch];
237 const int32_t *src_ch = src + ch;
238 for(i=0, j=0; i<n; i++, j+=nch) {
239 dest_ch[i] = src_ch[j] / FCONST(524288.0);
240 }
241 }
242 }
243
244 static void
fmt_convert_from_s24(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)245 fmt_convert_from_s24(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
246 const void *vsrc, int nch, int n)
247 {
248 int i, j, ch;
249 const int32_t *src = vsrc;
250
251 for(ch=0; ch<nch; ch++) {
252 FLOAT *dest_ch = dest[ch];
253 const int32_t *src_ch = src + ch;
254 for(i=0, j=0; i<n; i++, j+=nch) {
255 dest_ch[i] = src_ch[j] / FCONST(8388608.0);
256 }
257 }
258 }
259
260 static void
fmt_convert_from_s32(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)261 fmt_convert_from_s32(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
262 const void *vsrc, int nch, int n)
263 {
264 int i, j, ch;
265 const int32_t *src = vsrc;
266
267 for(ch=0; ch<nch; ch++) {
268 FLOAT *dest_ch = dest[ch];
269 const int32_t *src_ch = src + ch;
270 for(i=0, j=0; i<n; i++, j+=nch) {
271 dest_ch[i] = src_ch[j] / FCONST(2147483648.0);
272 }
273 }
274 }
275
276 static void
fmt_convert_from_float(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)277 fmt_convert_from_float(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
278 const void *vsrc, int nch, int n)
279 {
280 int i, j, ch;
281 const float *src = vsrc;
282
283 for(ch=0; ch<nch; ch++) {
284 FLOAT *dest_ch = dest[ch];
285 const float *src_ch = src + ch;
286 for(i=0, j=0; i<n; i++, j+=nch) {
287 dest_ch[i] = src_ch[j];
288 }
289 }
290 }
291
292 static void
fmt_convert_from_double(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)293 fmt_convert_from_double(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
294 const void *vsrc, int nch, int n)
295 {
296 int i, j, ch;
297 const double *src = vsrc;
298
299 for(ch=0; ch<nch; ch++) {
300 FLOAT *dest_ch = dest[ch];
301 const double *src_ch = src + ch;
302 for(i=0, j=0; i<n; i++, j+=nch) {
303 dest_ch[i] = (FLOAT)src_ch[j];
304 }
305 }
306 }
307
308 static void
select_mdct(A52Context * ctx)309 select_mdct(A52Context *ctx)
310 {
311 #ifndef CONFIG_DOUBLE
312 #ifdef HAVE_SSE3
313 if (cpu_caps_have_sse3()) {
314 sse3_mdct_init(ctx);
315 return;
316 }
317 #endif
318 #ifdef HAVE_SSE
319 if (cpu_caps_have_sse()) {
320 sse_mdct_init(ctx);
321 return;
322 }
323 #endif
324 #ifdef HAVE_ALTIVEC
325 if (cpu_caps_have_altivec()) {
326 mdct_init_altivec(ctx);
327 return;
328 }
329 #endif
330 #endif /* CONFIG_DOUBLE */
331 mdct_init(ctx);
332 }
333
334 static void
select_mdct_thread(A52ThreadContext * tctx)335 select_mdct_thread(A52ThreadContext *tctx)
336 {
337 #ifndef CONFIG_DOUBLE
338 #ifdef HAVE_SSE3
339 if (cpu_caps_have_sse3()) {
340 sse3_mdct_thread_init(tctx);
341 return;
342 }
343 #endif
344 #ifdef HAVE_SSE
345 if (cpu_caps_have_sse()) {
346 sse_mdct_thread_init(tctx);
347 return;
348 }
349 #endif
350 #ifdef HAVE_ALTIVEC
351 if (cpu_caps_have_altivec()) {
352 mdct_thread_init_altivec(tctx);
353 return;
354 }
355 #endif
356 #endif /* CONFIG_DOUBLE */
357 mdct_thread_init(tctx);
358 }
359
360 int
aften_encode_init(AftenContext * s)361 aften_encode_init(AftenContext *s)
362 {
363 A52Context *ctx;
364 A52ThreadContext *tctx;
365 int i, j, brate;
366 int last_quality;
367
368 if(s == NULL) {
369 fprintf(stderr, "NULL parameter passed to aften_encode_init\n");
370 return -1;
371 }
372 cpu_caps_detect();
373 apply_simd_restrictions(&s->system.wanted_simd_instructions);
374
375 ctx = calloc(sizeof(A52Context), 1);
376 if(!ctx) {
377 fprintf(stderr, "error allocating memory for A52Context\n");
378 return -1;
379 }
380 select_mdct(ctx);
381 s->private_context = ctx;
382
383 switch(s->sample_format) {
384 case A52_SAMPLE_FMT_U8: ctx->fmt_convert_from_src = fmt_convert_from_u8;
385 break;
386 case A52_SAMPLE_FMT_S16: ctx->fmt_convert_from_src = fmt_convert_from_s16;
387 break;
388 case A52_SAMPLE_FMT_S20: ctx->fmt_convert_from_src = fmt_convert_from_s20;
389 break;
390 case A52_SAMPLE_FMT_S24: ctx->fmt_convert_from_src = fmt_convert_from_s24;
391 break;
392 case A52_SAMPLE_FMT_S32: ctx->fmt_convert_from_src = fmt_convert_from_s32;
393 break;
394 case A52_SAMPLE_FMT_FLT: ctx->fmt_convert_from_src = fmt_convert_from_float;
395 break;
396 case A52_SAMPLE_FMT_DBL: ctx->fmt_convert_from_src = fmt_convert_from_double;
397 break;
398 default: break;
399 }
400
401 // channel configuration
402 if(s->channels < 1 || s->channels > 6) {
403 fprintf(stderr, "invalid number of channels\n");
404 return -1;
405 }
406 if(s->acmod < 0 || s->acmod > 7) {
407 fprintf(stderr, "invalid acmod\n");
408 return -1;
409 }
410 if(s->channels == 6 && !s->lfe) {
411 fprintf(stderr, "6-channel audio must have LFE channel\n");
412 return -1;
413 }
414 if(s->channels == 1 && s->lfe) {
415 fprintf(stderr, "cannot encode stand-alone LFE channel\n");
416 return -1;
417 }
418 ctx->acmod = s->acmod;
419 ctx->lfe = s->lfe;
420 ctx->n_all_channels = s->channels;
421 ctx->n_channels = s->channels - s->lfe;
422 ctx->lfe_channel = s->lfe ? (s->channels - 1) : -1;
423
424 ctx->params = s->params;
425 ctx->meta = s->meta;
426
427 // frequency
428 for(i=0;i<3;i++) {
429 for(j=0;j<3;j++)
430 if((a52_freqs[j] >> i) == s->samplerate)
431 goto found;
432 }
433 fprintf(stderr, "invalid sample rate\n");
434 return -1;
435 found:
436 ctx->sample_rate = s->samplerate;
437 ctx->halfratecod = i;
438 ctx->fscod = j;
439 if(ctx->halfratecod) {
440 // DolbyNet
441 ctx->bsid = 8 + ctx->halfratecod;
442 } else if(ctx->meta.xbsi1e || ctx->meta.xbsi2e) {
443 // alternate bit stream syntax
444 ctx->bsid = 6;
445 } else {
446 // normal AC-3
447 ctx->bsid = 8;
448 }
449 ctx->bsmod = 0;
450
451 // bitrate & frame size
452 brate = s->params.bitrate;
453 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
454 if(brate == 0) {
455 switch(ctx->n_channels) {
456 case 1: brate = 96; break;
457 case 2: brate = 192; break;
458 case 3: brate = 256; break;
459 case 4: brate = 384; break;
460 case 5: brate = 448; break;
461 }
462 }
463 } else if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
464 if(s->params.quality < 0 || s->params.quality > 1023) {
465 fprintf(stderr, "invalid quality setting\n");
466 return -1;
467 }
468 } else {
469 return -1;
470 }
471
472 for(i=0; i<19; i++) {
473 if((a52_bitratetab[i] >> ctx->halfratecod) == brate)
474 break;
475 }
476 if(i == 19) {
477 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
478 fprintf(stderr, "invalid bitrate\n");
479 return -1;
480 }
481 i = 18;
482 }
483 ctx->frmsizecod = i*2;
484 ctx->target_bitrate = a52_bitratetab[i] >> ctx->halfratecod;
485
486 bitalloc_init();
487 crc_init();
488 a52_window_init(ctx);
489 exponent_init(ctx);
490 dynrng_init();
491
492 // can't do block switching with low sample rate due to the high-pass filter
493 if(ctx->sample_rate <= 16000) {
494 ctx->params.use_block_switching = 0;
495 }
496
497 last_quality = 240;
498 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
499 last_quality = ctx->params.quality;
500 } else if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
501 last_quality = ((((ctx->target_bitrate/ctx->n_channels)*35)/24)+95)+(25*ctx->halfratecod);
502 }
503
504 // Initialize thread specific contexts
505 ctx->n_threads = (s->system.n_threads > 0) ? s->system.n_threads : get_ncpus();
506 ctx->n_threads = MIN(ctx->n_threads, MAX_NUM_THREADS);
507 s->system.n_threads = ctx->n_threads;
508 tctx = calloc(sizeof(A52ThreadContext), ctx->n_threads);
509 ctx->tctx = tctx;
510
511 for (j=0; j<ctx->n_threads; ++j) {
512 A52ThreadContext *cur_tctx = &ctx->tctx[j];
513 cur_tctx->ctx = ctx;
514 cur_tctx->thread_num = j;
515
516 select_mdct_thread(cur_tctx);
517
518 cur_tctx->bit_cnt = 0;
519 cur_tctx->sample_cnt = 0;
520
521 cur_tctx->last_quality = last_quality;
522
523 if (ctx->n_threads > 1) {
524 cur_tctx->state = START;
525
526 posix_cond_init(&cur_tctx->ts.enter_cond);
527 posix_cond_init(&cur_tctx->ts.confirm_cond);
528 posix_cond_init(&cur_tctx->ts.samples_cond);
529
530 posix_mutex_init(&cur_tctx->ts.enter_mutex);
531 posix_mutex_init(&cur_tctx->ts.confirm_mutex);
532
533 windows_event_init(&cur_tctx->ts.ready_event);
534 windows_event_init(&cur_tctx->ts.enter_event);
535 windows_event_init(&cur_tctx->ts.samples_event);
536
537 posix_mutex_lock(&cur_tctx->ts.enter_mutex);
538 thread_create(&cur_tctx->ts.thread, threaded_encode, cur_tctx);
539 posix_cond_wait(&cur_tctx->ts.enter_cond, &cur_tctx->ts.enter_mutex);
540 posix_mutex_unlock(&cur_tctx->ts.enter_mutex);
541 }
542 }
543 for (j=0; j<ctx->n_threads; ++j) {
544 #ifdef HAVE_POSIX_THREADS
545 ctx->tctx[j].ts.next_samples_cond = &ctx->tctx[(j + 1) % ctx->n_threads].ts.samples_cond;
546 #endif
547 #ifdef HAVE_WINDOWS_THREADS
548 ctx->tctx[j].ts.next_samples_event = &ctx->tctx[(j + 1) % ctx->n_threads].ts.samples_event;
549 #endif
550 }
551 posix_mutex_init(&ctx->ts.samples_mutex);
552 windows_cs_init(&ctx->ts.samples_cs);
553
554 if(s->params.bwcode < -2 || s->params.bwcode > 60) {
555 fprintf(stderr, "invalid bandwidth code\n");
556 return -1;
557 }
558 if(ctx->params.bwcode < 0) {
559 int cutoff = ((last_quality-120) * 120) + 4000;
560 ctx->fixed_bwcode = ((cutoff * 512 / ctx->sample_rate) - 73) / 3;
561 if(ctx->params.bwcode == -2) {
562 if(ctx->params.min_bwcode < 0 || ctx->params.min_bwcode > 60 ||
563 ctx->params.max_bwcode < 0 || ctx->params.max_bwcode > 60 ||
564 ctx->params.min_bwcode > ctx->params.max_bwcode) {
565 fprintf(stderr, "invalid min/max bandwidth code\n");
566 return -1;
567 }
568 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
569 fprintf(stderr, "variable bandwidth mode cannot be used with variable bitrate mode\n");
570 return -1;
571 }
572 }
573 ctx->fixed_bwcode = CLIP(ctx->fixed_bwcode, ctx->params.min_bwcode,
574 ctx->params.max_bwcode);
575 } else {
576 ctx->fixed_bwcode = ctx->params.bwcode;
577 }
578
579 // initialize transient-detect filters (one for each channel)
580 // cascaded biquad direct form I high-pass w/ cutoff of 8 kHz
581 if(ctx->params.use_block_switching) {
582 for(i=0; i<ctx->n_all_channels; i++) {
583 ctx->bs_filter[i].type = FILTER_TYPE_HIGHPASS;
584 ctx->bs_filter[i].cascaded = 1;
585 ctx->bs_filter[i].cutoff = 8000;
586 ctx->bs_filter[i].samplerate = (FLOAT)ctx->sample_rate;
587 if(filter_init(&ctx->bs_filter[i], FILTER_ID_BIQUAD_I)) {
588 fprintf(stderr, "error initializing transient-detect filter\n");
589 return -1;
590 }
591 }
592 }
593
594 // initialize DC filters (one for each channel)
595 // one-pole high-pass w/ cutoff of 3 Hz
596 if(ctx->params.use_dc_filter) {
597 for(i=0; i<ctx->n_all_channels; i++) {
598 ctx->dc_filter[i].type = FILTER_TYPE_HIGHPASS;
599 ctx->dc_filter[i].cascaded = 0;
600 ctx->dc_filter[i].cutoff = 3;
601 ctx->dc_filter[i].samplerate = (FLOAT)ctx->sample_rate;
602 if(filter_init(&ctx->dc_filter[i], FILTER_ID_ONEPOLE)) {
603 fprintf(stderr, "error initializing dc filter\n");
604 return -1;
605 }
606 }
607 }
608
609 // initialize bandwidth filters (one for each channel)
610 // butterworth 2nd order cascaded direct form II low-pass
611 if(ctx->params.use_bw_filter) {
612 int cutoff;
613 if(ctx->params.bwcode == -2) {
614 fprintf(stderr, "cannot use bandwidth filter with variable bandwidth\n");
615 return -1;
616 }
617 cutoff = (((ctx->fixed_bwcode * 3) + 73) * ctx->sample_rate) / 512;
618 if(cutoff < 4000) {
619 // disable bandwidth filter if cutoff is below 4000 Hz
620 ctx->params.use_bw_filter = 0;
621 } else {
622 for(i=0; i<ctx->n_channels; i++) {
623 ctx->bw_filter[i].type = FILTER_TYPE_LOWPASS;
624 ctx->bw_filter[i].cascaded = 1;
625 ctx->bw_filter[i].cutoff = (FLOAT)cutoff;
626 ctx->bw_filter[i].samplerate = (FLOAT)ctx->sample_rate;
627 if(filter_init(&ctx->bw_filter[i], FILTER_ID_BUTTERWORTH_II)) {
628 fprintf(stderr, "error initializing bandwidth filter\n");
629 return -1;
630 }
631 }
632 }
633 }
634
635 // initialize LFE filter
636 // butterworth 2nd order cascaded direct form II low-pass w/ cutoff of 120 Hz
637 if(ctx->params.use_lfe_filter) {
638 if(!ctx->lfe) {
639 fprintf(stderr, "cannot use lfe filter. no lfe channel\n");
640 return -1;
641 }
642 ctx->lfe_filter.type = FILTER_TYPE_LOWPASS;
643 ctx->lfe_filter.cascaded = 1;
644 ctx->lfe_filter.cutoff = 120;
645 ctx->lfe_filter.samplerate = (FLOAT)ctx->sample_rate;
646 if(filter_init(&ctx->lfe_filter, FILTER_ID_BUTTERWORTH_II)) {
647 fprintf(stderr, "error initializing lfe filter\n");
648 return -1;
649 }
650 }
651
652 return 0;
653 }
654
655 static int
frame_init(A52ThreadContext * tctx)656 frame_init(A52ThreadContext *tctx)
657 {
658 A52Context *ctx = tctx->ctx;
659 A52Frame *frame = &tctx->frame;
660 A52Block *block;
661 int blk, bnd, ch;
662
663 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
664 block = &frame->blocks[blk];
665 block->block_num = blk;
666 block->rematstr = 0;
667 if(blk == 0) {
668 block->rematstr = 1;
669 for(bnd=0; bnd<4; bnd++) {
670 block->rematflg[bnd] = 0;
671 }
672 }
673 for(ch=0; ch<ctx->n_channels; ch++) {
674 block->blksw[ch] = 0;
675 block->dithflag[ch] = 1;
676
677 // input_samples will be null if context is not initialized
678 if(block->input_samples[ch] == NULL) {
679 return -1;
680 }
681 }
682 }
683
684 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
685 frame->bit_rate = ctx->target_bitrate;
686 frame->frmsizecod = ctx->frmsizecod;
687 frame->frame_size_min = frame->bit_rate * 96000 / ctx->sample_rate;
688 frame->frame_size = frame->frame_size_min;
689 }
690
691 if(ctx->params.bwcode == -2) {
692 frame->bwcode = 60;
693 } else {
694 frame->bwcode = ctx->fixed_bwcode;
695 }
696 for(ch=0; ch<ctx->n_channels; ch++) {
697 frame->ncoefs[ch] = (frame->bwcode * 3) + 73;
698 }
699 if(ctx->lfe) {
700 frame->ncoefs[ctx->lfe_channel] = 7;
701 }
702
703 frame->frame_bits = 0;
704 frame->exp_bits = 0;
705 frame->mant_bits = 0;
706
707 // default bit allocation params
708 frame->sdecaycod = 2;
709 frame->fdecaycod = 1;
710 frame->sgaincod = 1;
711 frame->dbkneecod = 2;
712 frame->floorcod = 7;
713 frame->fgaincod = 4;
714
715 return 0;
716 }
717
718 /* output the A52 frame header */
719 static void
output_frame_header(A52ThreadContext * tctx,uint8_t * frame_buffer)720 output_frame_header(A52ThreadContext *tctx, uint8_t *frame_buffer)
721 {
722 A52Context *ctx = tctx->ctx;
723 A52Frame *f = &tctx->frame;
724 BitWriter *bw = &tctx->bw;
725 int frmsizecod = f->frmsizecod+(f->frame_size-f->frame_size_min);
726
727 bitwriter_init(bw, frame_buffer, A52_MAX_CODED_FRAME_SIZE);
728
729 bitwriter_writebits(bw, 16, 0x0B77); /* frame header */
730 bitwriter_writebits(bw, 16, 0); /* crc1: will be filled later */
731 bitwriter_writebits(bw, 2, ctx->fscod);
732 bitwriter_writebits(bw, 6, frmsizecod);
733 bitwriter_writebits(bw, 5, ctx->bsid);
734 bitwriter_writebits(bw, 3, ctx->bsmod);
735 bitwriter_writebits(bw, 3, ctx->acmod);
736 if((ctx->acmod & 0x01) && (ctx->acmod != A52_ACMOD_MONO))
737 bitwriter_writebits(bw, 2, ctx->meta.cmixlev);
738 if(ctx->acmod & 0x04)
739 bitwriter_writebits(bw, 2, ctx->meta.surmixlev);
740 if(ctx->acmod == A52_ACMOD_STEREO)
741 bitwriter_writebits(bw, 2, ctx->meta.dsurmod);
742 bitwriter_writebits(bw, 1, ctx->lfe);
743 bitwriter_writebits(bw, 5, ctx->meta.dialnorm);
744 bitwriter_writebits(bw, 1, 0); /* no compression control word */
745 bitwriter_writebits(bw, 1, 0); /* no lang code */
746 bitwriter_writebits(bw, 1, 0); /* no audio production info */
747 if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
748 bitwriter_writebits(bw, 5, ctx->meta.dialnorm);
749 bitwriter_writebits(bw, 1, 0); /* no compression control word 2 */
750 bitwriter_writebits(bw, 1, 0); /* no lang code 2 */
751 bitwriter_writebits(bw, 1, 0); /* no audio production info 2 */
752 }
753 bitwriter_writebits(bw, 1, 0); /* no copyright */
754 bitwriter_writebits(bw, 1, 1); /* original bitstream */
755 if(ctx->bsid == 6) {
756 // alternate bit stream syntax
757 bitwriter_writebits(bw, 1, ctx->meta.xbsi1e);
758 if(ctx->meta.xbsi1e) {
759 bitwriter_writebits(bw, 2, ctx->meta.dmixmod);
760 bitwriter_writebits(bw, 3, ctx->meta.ltrtcmixlev);
761 bitwriter_writebits(bw, 3, ctx->meta.ltrtsmixlev);
762 bitwriter_writebits(bw, 3, ctx->meta.lorocmixlev);
763 bitwriter_writebits(bw, 3, ctx->meta.lorosmixlev);
764 }
765 bitwriter_writebits(bw, 1, ctx->meta.xbsi2e);
766 if(ctx->meta.xbsi2e) {
767 bitwriter_writebits(bw, 2, ctx->meta.dsurexmod);
768 bitwriter_writebits(bw, 2, ctx->meta.dheadphonmod);
769 bitwriter_writebits(bw, 1, ctx->meta.adconvtyp);
770 bitwriter_writebits(bw, 9, 0);
771 }
772 } else {
773 bitwriter_writebits(bw, 1, 0); // timecod1e
774 bitwriter_writebits(bw, 1, 0); // timecod2e
775 }
776 bitwriter_writebits(bw, 1, 0); /* no addtional bit stream info */
777 }
778
779 /* symmetric quantization on 'levels' levels */
780 #define sym_quant(c, e, levels) \
781 ((((((levels) * (c)) >> (24-(e))) + 1) >> 1) + ((levels) >> 1))
782
783 /* asymmetric quantization on 2^qbits levels */
784 static inline int
asym_quant(int c,int e,int qbits)785 asym_quant(int c, int e, int qbits)
786 {
787 int lshift, m, v;
788
789 lshift = e + (qbits-1) - 24;
790 if(lshift >= 0) v = c << lshift;
791 else v = c >> (-lshift);
792
793 m = (1 << (qbits-1));
794 v = CLIP(v, -m, m-1);
795
796 return v & ((1 << qbits)-1);
797 }
798
799 static void
quant_mant_ch(FLOAT * mdct_coef,uint8_t * exp,uint8_t * bap,uint16_t * qmant,int ncoefs,uint16_t * qmant_ptr[3],int mant_cnt[3])800 quant_mant_ch(FLOAT *mdct_coef, uint8_t *exp, uint8_t *bap, uint16_t *qmant,
801 int ncoefs, uint16_t *qmant_ptr[3], int mant_cnt[3])
802 {
803 int i, c, e, b, v;
804
805 for(i=0; i<ncoefs; i++) {
806 c = (int)(mdct_coef[i] * (1 << 24));
807 e = exp[i];
808 b = bap[i];
809 switch(b) {
810 case 0:
811 v = 0;
812 break;
813 case 1:
814 v = sym_quant(c, e, 3);
815 if(mant_cnt[0] == 0) {
816 qmant_ptr[0] = &qmant[i];
817 v = 9 * v;
818 } else if(mant_cnt[0] == 1) {
819 *qmant_ptr[0] += 3 * v;
820 v = 128;
821 } else {
822 *qmant_ptr[0] += v;
823 v = 128;
824 }
825 mant_cnt[0] = (mant_cnt[0] + 1) % 3;
826 break;
827 case 2:
828 v = sym_quant(c, e, 5);
829 if(mant_cnt[1] == 0) {
830 qmant_ptr[1] = &qmant[i];
831 v = 25 * v;
832 } else if(mant_cnt[1] == 1) {
833 *qmant_ptr[1] += 5 * v;
834 v = 128;
835 } else {
836 *qmant_ptr[1] += v;
837 v = 128;
838 }
839 mant_cnt[1] = (mant_cnt[1] + 1) % 3;
840 break;
841 case 3:
842 v = sym_quant(c, e, 7);
843 break;
844 case 4:
845 v = sym_quant(c, e, 11);
846 if(mant_cnt[2]== 0) {
847 qmant_ptr[2] = &qmant[i];
848 v = 11 * v;
849 } else {
850 *qmant_ptr[2] += v;
851 v = 128;
852 }
853 mant_cnt[2] = (mant_cnt[2] + 1) % 2;
854 break;
855 case 5:
856 v = sym_quant(c, e, 15);
857 break;
858 case 14:
859 v = asym_quant(c, e, 14);
860 break;
861 case 15:
862 v = asym_quant(c, e, 16);
863 break;
864 default:
865 v = asym_quant(c, e, b - 1);
866 }
867 qmant[i] = v;
868 }
869 }
870
871 static void
quantize_mantissas(A52ThreadContext * tctx)872 quantize_mantissas(A52ThreadContext *tctx)
873 {
874 A52Context *ctx = tctx->ctx;
875 A52Frame *frame = &tctx->frame;
876 A52Block *block;
877 uint16_t *qmant_ptr[3];
878 int blk, ch;
879 int mant_cnt[3];
880
881 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
882 block = &frame->blocks[blk];
883 mant_cnt[0] = mant_cnt[1] = mant_cnt[2] = 0;
884 qmant_ptr[0] = qmant_ptr[1] = qmant_ptr[2] = NULL;
885 for(ch=0; ch<ctx->n_all_channels; ch++) {
886 quant_mant_ch(block->mdct_coef[ch], block->exp[ch], block->bap[ch],
887 block->qmant[ch], frame->ncoefs[ch], qmant_ptr,
888 mant_cnt);
889 }
890 }
891 }
892
893 /* Output each audio block. */
894 static void
output_audio_blocks(A52ThreadContext * tctx)895 output_audio_blocks(A52ThreadContext *tctx)
896 {
897 A52Context *ctx = tctx->ctx;
898 A52Frame *frame = &tctx->frame;
899 A52Block *block;
900 BitWriter *bw;
901 int blk, ch, i, baie, rbnd;
902
903 bw = &tctx->bw;
904 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
905 block = &frame->blocks[blk];
906 for(ch=0; ch<ctx->n_channels; ch++) {
907 bitwriter_writebits(bw, 1, block->blksw[ch]);
908 }
909 for(ch=0; ch<ctx->n_channels; ch++) {
910 bitwriter_writebits(bw, 1, block->dithflag[ch]);
911 }
912 if(ctx->params.dynrng_profile == DYNRNG_PROFILE_NONE) {
913 bitwriter_writebits(bw, 1, 0); // no dynamic range
914 if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
915 bitwriter_writebits(bw, 1, 0); // no dynamic range 2
916 }
917 } else {
918 bitwriter_writebits(bw, 1, 1);
919 bitwriter_writebits(bw, 8, block->dynrng);
920 if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
921 bitwriter_writebits(bw, 1, 1);
922 bitwriter_writebits(bw, 8, block->dynrng);
923 }
924 }
925 if(block->block_num == 0) {
926 // must define coupling strategy in block 0
927 bitwriter_writebits(bw, 1, 1); // new coupling strategy
928 bitwriter_writebits(bw, 1, 0); // no coupling in use
929 } else {
930 bitwriter_writebits(bw, 1, 0); // no new coupling strategy
931 }
932
933 if(ctx->acmod == A52_ACMOD_STEREO) {
934 bitwriter_writebits(bw, 1, block->rematstr);
935 if(block->rematstr) {
936 for(rbnd=0; rbnd<4; rbnd++) {
937 bitwriter_writebits(bw, 1, block->rematflg[rbnd]);
938 }
939 }
940 }
941
942 // exponent strategy
943 for(ch=0; ch<ctx->n_channels; ch++) {
944 bitwriter_writebits(bw, 2, block->exp_strategy[ch]);
945 }
946
947 if(ctx->lfe) {
948 bitwriter_writebits(bw, 1, block->exp_strategy[ctx->lfe_channel]);
949 }
950
951 for(ch=0; ch<ctx->n_channels; ch++) {
952 if(block->exp_strategy[ch] != EXP_REUSE)
953 bitwriter_writebits(bw, 6, frame->bwcode);
954 }
955
956 // exponents
957 for(ch=0; ch<ctx->n_all_channels; ch++) {
958 if(block->exp_strategy[ch] != EXP_REUSE) {
959 // first exponent
960 bitwriter_writebits(bw, 4, block->grp_exp[ch][0]);
961
962 // delta-encoded exponent groups
963 for(i=1; i<=block->nexpgrps[ch]; i++) {
964 bitwriter_writebits(bw, 7, block->grp_exp[ch][i]);
965 }
966
967 // gain range info
968 if(ch != ctx->lfe_channel) {
969 bitwriter_writebits(bw, 2, 0);
970 }
971 }
972 }
973
974 // bit allocation info
975 baie = (block->block_num == 0);
976 bitwriter_writebits(bw, 1, baie);
977 if(baie) {
978 bitwriter_writebits(bw, 2, frame->sdecaycod);
979 bitwriter_writebits(bw, 2, frame->fdecaycod);
980 bitwriter_writebits(bw, 2, frame->sgaincod);
981 bitwriter_writebits(bw, 2, frame->dbkneecod);
982 bitwriter_writebits(bw, 3, frame->floorcod);
983 }
984
985 // snr offset
986 bitwriter_writebits(bw, 1, baie);
987 if(baie) {
988 bitwriter_writebits(bw, 6, frame->csnroffst);
989 for(ch=0; ch<ctx->n_all_channels; ch++) {
990 bitwriter_writebits(bw, 4, frame->fsnroffst);
991 bitwriter_writebits(bw, 3, frame->fgaincod);
992 }
993 }
994
995 bitwriter_writebits(bw, 1, 0); // no delta bit allocation
996 bitwriter_writebits(bw, 1, 0); // no data to skip
997
998 // mantissas
999 for(ch=0; ch<ctx->n_all_channels; ch++) {
1000 int b, q;
1001 for(i=0; i<frame->ncoefs[ch]; i++) {
1002 q = block->qmant[ch][i];
1003 b = block->bap[ch][i];
1004 switch(b) {
1005 case 0: break;
1006 case 1: if(q != 128) bitwriter_writebits(bw, 5, q);
1007 break;
1008 case 2: if(q != 128) bitwriter_writebits(bw, 7, q);
1009 break;
1010 case 3: bitwriter_writebits(bw, 3, q);
1011 break;
1012 case 4: if(q != 128) bitwriter_writebits(bw, 7, q);
1013 break;
1014 case 14: bitwriter_writebits(bw, 14, q);
1015 break;
1016 case 15: bitwriter_writebits(bw, 16, q);
1017 break;
1018 default: bitwriter_writebits(bw, b - 1, q);
1019 }
1020 }
1021 }
1022 }
1023 }
1024
1025 static int
output_frame_end(A52ThreadContext * tctx)1026 output_frame_end(A52ThreadContext *tctx)
1027 {
1028 uint8_t *frame;
1029 int fs, fs58, n, crc1, crc2, bitcount;
1030
1031 fs = tctx->frame.frame_size;
1032 // align to 8 bits
1033 bitwriter_flushbits(&tctx->bw);
1034 // add zero bytes to reach the frame size
1035 frame = tctx->bw.buffer;
1036 bitcount = bitwriter_bitcount(&tctx->bw);
1037 n = (fs << 1) - 2 - (bitcount >> 3);
1038 if(n < 0) {
1039 fprintf(stderr, "data size exceeds frame size (frame=%d data=%d)\n",
1040 (fs << 1) - 2, bitcount >> 3);
1041 return -1;
1042 }
1043 if(n > 0) memset(&tctx->bw.buffer[bitcount>>3], 0, n);
1044
1045 // compute crc1 for 1st 5/8 of frame
1046 fs58 = (fs >> 1) + (fs >> 3);
1047 crc1 = calc_crc16(&frame[4], (fs58<<1)-4);
1048 crc1 = crc16_zero(crc1, (fs58<<1)-2);
1049 frame[2] = crc1 >> 8;
1050 frame[3] = crc1;
1051 // double-check
1052 crc1 = calc_crc16(&frame[2], (fs58<<1)-2);
1053 if(crc1 != 0) fprintf(stderr, "CRC ERROR\n");
1054
1055 // compute crc2 for final 3/8 of frame
1056 crc2 = calc_crc16(&frame[fs58<<1], ((fs - fs58) << 1) - 2);
1057 frame[(fs<<1)-2] = crc2 >> 8;
1058 frame[(fs<<1)-1] = crc2;
1059
1060 return (fs << 1);
1061 }
1062
1063 static void
copy_samples(A52ThreadContext * tctx)1064 copy_samples(A52ThreadContext *tctx)
1065 {
1066 A52Context *ctx = tctx->ctx;
1067 A52Frame *frame = &tctx->frame;
1068 FLOAT buffer[A52_SAMPLES_PER_FRAME];
1069 FLOAT *in_audio;
1070 FLOAT *out_audio;
1071 FLOAT *temp;
1072 int ch, blk;
1073 #define SWAP_BUFFERS temp=in_audio;in_audio=out_audio;out_audio=temp;
1074
1075 #ifndef NO_THREADS
1076 if (ctx->n_threads > 1) {
1077 posix_mutex_lock(&ctx->ts.samples_mutex);
1078
1079 windows_cs_enter(&ctx->ts.samples_cs);
1080
1081 while (ctx->ts.samples_thread_num != tctx->thread_num) {
1082 posix_cond_wait(&tctx->ts.samples_cond, &ctx->ts.samples_mutex);
1083
1084 windows_cs_leave(&ctx->ts.samples_cs);
1085 windows_event_wait(&tctx->ts.samples_event);
1086 windows_cs_enter(&ctx->ts.samples_cs);
1087 }
1088 windows_event_reset(&tctx->ts.samples_event);
1089 }
1090 #endif
1091 for(ch=0; ch<ctx->n_all_channels; ch++) {
1092 out_audio = buffer;
1093 in_audio = frame->input_audio[ch];
1094 // DC-removal high-pass filter
1095 if(ctx->params.use_dc_filter) {
1096 filter_run(&ctx->dc_filter[ch], out_audio, in_audio,
1097 A52_SAMPLES_PER_FRAME);
1098 SWAP_BUFFERS
1099 }
1100 if (ch < ctx->n_channels) {
1101 // channel bandwidth filter
1102 if(ctx->params.use_bw_filter) {
1103 filter_run(&ctx->bw_filter[ch], out_audio, in_audio,
1104 A52_SAMPLES_PER_FRAME);
1105 SWAP_BUFFERS
1106 }
1107 // block-switching high-pass filter
1108 if(ctx->params.use_block_switching) {
1109 filter_run(&ctx->bs_filter[ch], out_audio, in_audio,
1110 A52_SAMPLES_PER_FRAME);
1111 memcpy(frame->blocks[0].transient_samples[ch],
1112 ctx->last_transient_samples[ch], 256 * sizeof(FLOAT));
1113 memcpy(&frame->blocks[0].transient_samples[ch][256], out_audio,
1114 256 * sizeof(FLOAT));
1115 for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1116 memcpy(frame->blocks[blk].transient_samples[ch],
1117 &out_audio[256*(blk-1)], 512 * sizeof(FLOAT));
1118 }
1119 memcpy(ctx->last_transient_samples[ch],
1120 &out_audio[256*5], 256 * sizeof(FLOAT));
1121 }
1122 } else {
1123 // LFE bandwidth low-pass filter
1124 if(ctx->params.use_lfe_filter) {
1125 assert(ch == ctx->lfe_channel);
1126 filter_run(&ctx->lfe_filter, out_audio, in_audio,
1127 A52_SAMPLES_PER_FRAME);
1128 SWAP_BUFFERS
1129 }
1130 }
1131
1132 memcpy(frame->blocks[0].input_samples[ch], ctx->last_samples[ch],
1133 256 * sizeof(FLOAT));
1134 memcpy(&frame->blocks[0].input_samples[ch][256], in_audio,
1135 256 * sizeof(FLOAT));
1136 for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1137 memcpy(frame->blocks[blk].input_samples[ch], &in_audio[256*(blk-1)],
1138 512 * sizeof(FLOAT));
1139 }
1140 memcpy(ctx->last_samples[ch],
1141 &in_audio[256*5], 256 * sizeof(FLOAT));
1142 }
1143 #ifndef NO_THREADS
1144 if (ctx->n_threads > 1) {
1145 ++ctx->ts.samples_thread_num;
1146 ctx->ts.samples_thread_num %= ctx->n_threads;
1147
1148 posix_cond_signal(tctx->ts.next_samples_cond);
1149 posix_mutex_unlock(&ctx->ts.samples_mutex);
1150
1151 windows_event_set(tctx->ts.next_samples_event);
1152 windows_cs_leave(&ctx->ts.samples_cs);
1153 }
1154 #endif
1155 #undef SWAP_BUFFERS
1156 }
1157
1158 /* determines block length by detecting transients */
1159 static int
detect_transient(FLOAT * in)1160 detect_transient(FLOAT *in)
1161 {
1162 FLOAT *xx = in;
1163 int i, j;
1164 FLOAT level1[2];
1165 FLOAT level2[4];
1166 FLOAT level3[8];
1167 FLOAT tmax = FCONST(100.0) / FCONST(32768.0);
1168 FLOAT t1 = FCONST(0.100);
1169 FLOAT t2 = FCONST(0.075);
1170 FLOAT t3 = FCONST(0.050);
1171
1172 // level 1 (2 x 256)
1173 for(i=0; i<2; i++) {
1174 level1[i] = 0;
1175 for(j=0; j<256; j++) {
1176 level1[i] = MAX(AFT_FABS(xx[i*256+j]), level1[i]);
1177 }
1178 if(level1[i] < tmax) {
1179 return 0;
1180 }
1181 if((i > 0) && (level1[i] * t1 > level1[i-1])) {
1182 return 1;
1183 }
1184 }
1185
1186 // level 2 (4 x 128)
1187 for(i=1; i<4; i++) {
1188 level2[i] = 0;
1189 for(j=0; j<128; j++) {
1190 level2[i] = MAX(AFT_FABS(xx[i*128+j]), level2[i]);
1191 }
1192 if((i > 1) && (level2[i] * t2 > level2[i-1])) {
1193 return 1;
1194 }
1195 }
1196
1197 // level 3 (8 x 64)
1198 for(i=3; i<8; i++) {
1199 level3[i] = 0;
1200 for(j=0; j<64; j++) {
1201 level3[i] = MAX(AFT_FABS(xx[i*64+j]), level3[i]);
1202 }
1203 if((i > 3) && (level3[i] * t3 > level3[i-1])) {
1204 return 1;
1205 }
1206 }
1207
1208 return 0;
1209 }
1210
1211 static void
generate_coefs(A52ThreadContext * tctx)1212 generate_coefs(A52ThreadContext *tctx)
1213 {
1214 A52Context *ctx = tctx->ctx;
1215 A52Block *block;
1216 void (*mdct_256)(struct A52ThreadContext *tctx, FLOAT *out, FLOAT *in) =
1217 ctx->mdct_ctx_256.mdct;
1218 void (*mdct_512)(struct A52ThreadContext *tctx, FLOAT *out, FLOAT *in) =
1219 ctx->mdct_ctx_512.mdct;
1220 int blk, ch, i;
1221
1222 for(ch=0; ch<ctx->n_all_channels; ch++) {
1223 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1224 block = &tctx->frame.blocks[blk];
1225 if(ctx->params.use_block_switching) {
1226 block->blksw[ch] = detect_transient(block->transient_samples[ch]);
1227 } else {
1228 block->blksw[ch] = 0;
1229 }
1230 ctx->apply_a52_window(block->input_samples[ch]);
1231 if(block->blksw[ch]) {
1232 mdct_256(tctx, block->mdct_coef[ch], block->input_samples[ch]);
1233 } else {
1234 mdct_512(tctx, block->mdct_coef[ch], block->input_samples[ch]);
1235 }
1236 for(i=tctx->frame.ncoefs[ch]; i<256; i++) {
1237 block->mdct_coef[ch][i] = 0.0;
1238 }
1239 }
1240 }
1241 }
1242
1243 static void
calc_rematrixing(A52ThreadContext * tctx)1244 calc_rematrixing(A52ThreadContext *tctx)
1245 {
1246 A52Context *ctx = tctx->ctx;
1247 A52Frame *frame = &tctx->frame;
1248 A52Block *block;
1249 FLOAT sum[4][4];
1250 FLOAT lt, rt, ctmp1, ctmp2;
1251 int blk, bnd, i;
1252
1253
1254 if(!ctx->params.use_rematrixing) {
1255 frame->blocks[0].rematstr = 1;
1256 for(bnd=0; bnd<4; bnd++) {
1257 frame->blocks[0].rematflg[bnd] = 0;
1258 }
1259 for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1260 frame->blocks[blk].rematstr = 0;
1261 }
1262 return;
1263 }
1264
1265 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1266 block = &frame->blocks[blk];
1267
1268 block->rematstr = 0;
1269 if(blk == 0) block->rematstr = 1;
1270
1271 for(bnd=0; bnd<4; bnd++) {
1272 block->rematflg[bnd] = 0;
1273 sum[bnd][0] = sum[bnd][1] = sum[bnd][2] = sum[bnd][3] = 0;
1274 for(i=rematbndtab[bnd][0]; i<=rematbndtab[bnd][1]; i++) {
1275 if(i == frame->ncoefs[0]) break;
1276 lt = block->mdct_coef[0][i];
1277 rt = block->mdct_coef[1][i];
1278 sum[bnd][0] += lt * lt;
1279 sum[bnd][1] += rt * rt;
1280 sum[bnd][2] += (lt + rt) * (lt + rt) / FCONST(4.0);
1281 sum[bnd][3] += (lt - rt) * (lt - rt) / FCONST(4.0);
1282 }
1283 if(sum[bnd][0]+sum[bnd][1] >= (sum[bnd][2]+sum[bnd][3])/FCONST(2.0)) {
1284 block->rematflg[bnd] = 1;
1285 for(i=rematbndtab[bnd][0]; i<=rematbndtab[bnd][1]; i++) {
1286 if(i == frame->ncoefs[0]) break;
1287 ctmp1 = block->mdct_coef[0][i] * FCONST(0.5);
1288 ctmp2 = block->mdct_coef[1][i] * FCONST(0.5);
1289 block->mdct_coef[0][i] = ctmp1 + ctmp2;
1290 block->mdct_coef[1][i] = ctmp1 - ctmp2;
1291 }
1292 }
1293 if(blk != 0 && block->rematstr == 0 &&
1294 block->rematflg[bnd] != frame->blocks[blk-1].rematflg[bnd]) {
1295 block->rematstr = 1;
1296 }
1297 }
1298 }
1299 }
1300
1301 /** Adjust for fractional frame sizes in CBR mode */
1302 static void
adjust_frame_size(A52ThreadContext * tctx)1303 adjust_frame_size(A52ThreadContext *tctx)
1304 {
1305 A52Context *ctx = tctx->ctx;
1306 A52Frame *f = &tctx->frame;
1307 uint32_t kbps = f->bit_rate * 1000;
1308 uint32_t srate = ctx->sample_rate;
1309 int add;
1310
1311 while(tctx->bit_cnt >= kbps && tctx->sample_cnt >= srate) {
1312 tctx->bit_cnt -= kbps;
1313 tctx->sample_cnt -= srate;
1314 }
1315 add = !!(tctx->bit_cnt * srate < tctx->sample_cnt * kbps);
1316 f->frame_size = f->frame_size_min + add;
1317 }
1318
1319 static void
compute_dither_strategy(A52ThreadContext * tctx)1320 compute_dither_strategy(A52ThreadContext *tctx)
1321 {
1322 A52Block *block0;
1323 A52Block *block1;
1324 int channels = tctx->ctx->n_channels;
1325 int blk, ch;
1326
1327 block0 = NULL;
1328 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1329 block1 = &tctx->frame.blocks[blk];
1330 for(ch=0; ch<channels; ch++) {
1331 if(block1->blksw[ch] || ((blk>0) && block0->blksw[ch])) {
1332 block1->dithflag[ch] = 0;
1333 } else {
1334 block1->dithflag[ch] = 1;
1335 }
1336 }
1337 block0 = block1;
1338 }
1339 }
1340
1341 static void
calculate_dynrng(A52ThreadContext * tctx)1342 calculate_dynrng(A52ThreadContext *tctx)
1343 {
1344 A52Context *ctx = tctx->ctx;
1345 A52Block *block;
1346 int blk;
1347
1348 if(ctx->params.dynrng_profile == DYNRNG_PROFILE_NONE)
1349 return;
1350
1351 for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1352 block = &tctx->frame.blocks[blk];
1353 block->dynrng = calculate_block_dynrng(block->input_samples,
1354 ctx->n_all_channels,
1355 -ctx->meta.dialnorm,
1356 ctx->params.dynrng_profile);
1357 }
1358 }
1359
1360 static int
encode_frame(A52ThreadContext * tctx,uint8_t * frame_buffer)1361 encode_frame(A52ThreadContext *tctx, uint8_t *frame_buffer)
1362 {
1363 A52Context *ctx = tctx->ctx;
1364 A52Frame *frame = &tctx->frame;
1365
1366 if(frame_init(tctx)) {
1367 fprintf(stderr, "Encoding has not properly initialized\n");
1368 return -1;
1369 }
1370
1371 copy_samples(tctx);
1372
1373 calculate_dynrng(tctx);
1374
1375 generate_coefs(tctx);
1376
1377 compute_dither_strategy(tctx);
1378
1379 if(ctx->acmod == A52_ACMOD_STEREO) {
1380 calc_rematrixing(tctx);
1381 }
1382
1383 // variable bandwidth
1384 if(ctx->params.bwcode == -2) {
1385 // process exponents at full bandwidth
1386 ctx->process_exponents(tctx);
1387 // run bit allocation at q=240 to calculate bandwidth
1388 vbw_bit_allocation(tctx);
1389 }
1390
1391 ctx->process_exponents(tctx);
1392
1393 if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
1394 adjust_frame_size(tctx);
1395 }
1396
1397 if(compute_bit_allocation(tctx)) {
1398 fprintf(stderr, "Error in bit allocation\n");
1399 tctx->framesize = 0;
1400 return -1;
1401 }
1402
1403 quantize_mantissas(tctx);
1404
1405 // increment counters
1406 tctx->bit_cnt += frame->frame_size * 16;
1407 tctx->sample_cnt += A52_SAMPLES_PER_FRAME;
1408
1409 // update encoding status
1410 tctx->status.quality = frame->quality;
1411 tctx->status.bit_rate = frame->bit_rate;
1412 tctx->status.bwcode = frame->bwcode;
1413
1414 output_frame_header(tctx, frame_buffer);
1415 output_audio_blocks(tctx);
1416 tctx->framesize = output_frame_end(tctx);
1417
1418 return 0;
1419 }
1420
1421 #ifndef NO_THREADS
1422 static int
threaded_encode(void * vtctx)1423 threaded_encode(void* vtctx)
1424 {
1425 A52ThreadContext *tctx;
1426
1427 #ifdef MINGW_ALIGN_STACK_HACK
1428 asm volatile (
1429 "movl %%esp, %%ecx\n"
1430 "andl $15, %%ecx\n"
1431 "subl %%ecx, %%esp\n"
1432 "pushl %%ecx\n"
1433 "pushl %%ecx\n"
1434 "pushl %%ecx\n"
1435 "pushl %%ecx\n"
1436 : : : "%esp","%ecx");
1437 #endif
1438
1439 tctx = vtctx;
1440
1441 posix_mutex_lock(&tctx->ts.enter_mutex);
1442 posix_cond_signal(&tctx->ts.enter_cond);
1443 while(1) {
1444 posix_cond_wait(&tctx->ts.enter_cond, &tctx->ts.enter_mutex);
1445 posix_mutex_lock(&tctx->ts.confirm_mutex);
1446 posix_cond_signal(&tctx->ts.confirm_cond);
1447 posix_mutex_unlock(&tctx->ts.confirm_mutex);
1448
1449 windows_event_set(&tctx->ts.ready_event);
1450 windows_event_wait(&tctx->ts.enter_event);
1451 /* end thread if nothing to encode */
1452 if (tctx->state == END) {
1453 tctx->framesize = 0;
1454 break;
1455 }
1456 if (tctx->state == ABORT) {
1457 tctx->framesize = -1;
1458 break;
1459 }
1460 if (encode_frame(tctx, tctx->frame_buffer))
1461 tctx->state = ABORT;
1462 }
1463 posix_mutex_unlock(&tctx->ts.enter_mutex);
1464
1465 windows_event_set(&tctx->ts.ready_event);
1466
1467 #ifdef MINGW_ALIGN_STACK_HACK
1468 asm volatile (
1469 "popl %%ecx\n"
1470 "popl %%ecx\n"
1471 "popl %%ecx\n"
1472 "popl %%ecx\n"
1473 "addl %%ecx, %%esp\n"
1474 : : : "%esp", "%ecx");
1475 #endif
1476
1477 return 0;
1478 }
1479
1480 static int
encode_frame_parallel(AftenContext * s,uint8_t * frame_buffer,const void * samples)1481 encode_frame_parallel(AftenContext *s, uint8_t *frame_buffer, const void *samples)
1482 {
1483 A52Context *ctx = s->private_context;
1484 int framesize = 0;
1485
1486 do {
1487 A52ThreadContext *tctx = &ctx->tctx[ctx->ts.current_thread_num];
1488
1489 posix_mutex_lock(&tctx->ts.enter_mutex);
1490
1491 windows_event_wait(&tctx->ts.ready_event);
1492
1493 if (tctx->state == ABORT || ctx->ts.threads_to_abort) {
1494 tctx->state = ABORT;
1495 framesize = -1;
1496 if (!ctx->ts.threads_to_abort)
1497 ctx->ts.threads_to_abort = ctx->n_threads;
1498 --ctx->ts.threads_to_abort;
1499 } else {
1500 if (tctx->state == START)
1501 tctx->state = WORK;
1502 else {
1503 if(tctx->framesize > 0) {
1504 framesize = tctx->framesize;
1505 memcpy(frame_buffer, tctx->frame_buffer, framesize);
1506 // update encoding status
1507 s->status.quality = tctx->status.quality;
1508 s->status.bit_rate = tctx->status.bit_rate;
1509 s->status.bwcode = tctx->status.bwcode;
1510 } else {
1511 posix_mutex_unlock(&tctx->ts.enter_mutex);
1512 goto end;
1513 }
1514 }
1515 if(!samples)
1516 tctx->state = END;
1517 else
1518 // convert sample format and de-interleave channels
1519 ctx->fmt_convert_from_src(tctx->frame.input_audio, samples,
1520 ctx->n_all_channels,
1521 A52_SAMPLES_PER_FRAME);
1522 }
1523 posix_mutex_lock(&tctx->ts.confirm_mutex);
1524 posix_cond_signal(&tctx->ts.enter_cond);
1525 posix_mutex_unlock(&tctx->ts.enter_mutex);
1526 posix_cond_wait(&tctx->ts.confirm_cond, &tctx->ts.confirm_mutex);
1527 posix_mutex_unlock(&tctx->ts.confirm_mutex);
1528
1529 windows_event_set(&tctx->ts.enter_event);
1530 end:
1531 ++ctx->ts.current_thread_num;
1532 ctx->ts.current_thread_num %= ctx->n_threads;
1533 } while(ctx->ts.threads_to_abort);
1534
1535 return framesize;
1536 }
1537 #endif
1538
1539 int
aften_encode_frame(AftenContext * s,uint8_t * frame_buffer,const void * samples)1540 aften_encode_frame(AftenContext *s, uint8_t *frame_buffer, const void *samples)
1541 {
1542 A52Context *ctx;
1543 A52ThreadContext *tctx;
1544 A52Frame *frame;
1545
1546 if(s == NULL || frame_buffer == NULL) {
1547 fprintf(stderr, "One or more NULL parameters passed to aften_encode_frame\n");
1548 return -1;
1549 }
1550 ctx = s->private_context;
1551 #ifndef NO_THREADS
1552 if (ctx->n_threads > 1)
1553 return encode_frame_parallel(s, frame_buffer, samples);
1554 #endif
1555 if (!samples)
1556 return 0;
1557
1558 tctx = ctx->tctx;
1559 frame = &tctx->frame;
1560
1561 ctx->fmt_convert_from_src(frame->input_audio, samples, ctx->n_all_channels,
1562 A52_SAMPLES_PER_FRAME);
1563
1564 encode_frame(tctx, frame_buffer);
1565
1566 s->status.quality = tctx->status.quality;
1567 s->status.bit_rate = tctx->status.bit_rate;
1568 s->status.bwcode = tctx->status.bwcode;
1569
1570 return tctx->framesize;
1571 }
1572
1573 void
aften_encode_close(AftenContext * s)1574 aften_encode_close(AftenContext *s)
1575 {
1576 if(s != NULL && s->private_context != NULL) {
1577 A52Context *ctx = s->private_context;
1578 /* mdct_close deinits both mdcts */
1579 ctx->mdct_ctx_512.mdct_close(ctx);
1580
1581 posix_mutex_destroy(&ctx->ts.samples_mutex);
1582
1583 windows_cs_destroy(&ctx->ts.samples_cs);
1584 if (ctx->tctx) {
1585 if (ctx->n_threads == 1)
1586 ctx->tctx[0].mdct_tctx_512.mdct_thread_close(&ctx->tctx[0]);
1587 else {
1588 int i;
1589 for (i=0; i<ctx->n_threads; ++i) {
1590 A52ThreadContext cur_tctx = ctx->tctx[i];
1591 thread_join(cur_tctx.ts.thread);
1592 cur_tctx.mdct_tctx_512.mdct_thread_close(&cur_tctx);
1593 posix_cond_destroy(&cur_tctx.ts.enter_cond);
1594 posix_cond_destroy(&cur_tctx.ts.confirm_cond);
1595 posix_cond_destroy(&cur_tctx.ts.samples_cond);
1596
1597 posix_mutex_destroy(&cur_tctx.ts.enter_mutex);
1598 posix_mutex_destroy(&cur_tctx.ts.confirm_mutex);
1599
1600 windows_event_destroy(&cur_tctx.ts.ready_event);
1601 windows_event_destroy(&cur_tctx.ts.enter_event);
1602 windows_event_destroy(&cur_tctx.ts.samples_event);
1603 }
1604 }
1605 free(ctx->tctx);
1606 }
1607 free(ctx);
1608 s->private_context = NULL;
1609 }
1610 }
1611