1 /**
2  * Aften: A/52 audio encoder
3  * Copyright (c) 2006 Justin Ruggles
4  *               2007 Prakash Punnoor <prakash@punnoor.de>
5  *
6  * Based on "The simplest AC3 encoder" from FFmpeg
7  * Copyright (c) 2000 Fabrice Bellard.
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 /**
25  * @file a52enc.c
26  * A/52 encoder
27  */
28 
29 #include "common.h"
30 
31 #include <stdlib.h>
32 #include <stdio.h>
33 #include <string.h>
34 #include <assert.h>
35 
36 #include "a52.h"
37 #include "bitalloc.h"
38 #include "crc.h"
39 #include "mdct.h"
40 #include "window.h"
41 #include "exponent.h"
42 #include "dynrng.h"
43 #include "cpu_caps.h"
44 
45 /**
46  * LUT for number of exponent groups present.
47  * expsizetab[exponent strategy][number of coefficients]
48  */
49 int nexpgrptab[3][256];
50 
51 /**
52  * Pre-defined sets of exponent strategies. A strategy set is selected for
53  * each channel in a frame.  All sets 1 to 5 use the same number of exponent
54  * bits.  Set 0 is only used as the reference of optimal accuracy.
55  * TODO: more options and other sets which use greater or fewer bits
56  */
57 uint8_t str_predef[6][6] = {
58     { EXP_D15,   EXP_D15,   EXP_D15,   EXP_D15,   EXP_D15,   EXP_D15 },
59     { EXP_D15, EXP_REUSE, EXP_REUSE, EXP_REUSE, EXP_REUSE, EXP_REUSE },
60     { EXP_D25, EXP_REUSE, EXP_REUSE,   EXP_D25, EXP_REUSE, EXP_REUSE },
61     { EXP_D25, EXP_REUSE, EXP_REUSE,   EXP_D45, EXP_REUSE,   EXP_D45 },
62     { EXP_D25, EXP_REUSE,   EXP_D45, EXP_REUSE,   EXP_D45, EXP_REUSE },
63     { EXP_D45,   EXP_D45, EXP_REUSE,   EXP_D45, EXP_REUSE,   EXP_D45 }
64 };
65 
66 static const uint8_t rematbndtab[4][2] = {
67     {13, 24}, {25, 36}, {37, 60}, {61, 252}
68 };
69 
70 /* possible frequencies */
71 const uint16_t a52_freqs[3] = { 48000, 44100, 32000 };
72 
73 /* possible bitrates */
74 const uint16_t a52_bitratetab[19] = {
75     32, 40, 48, 56, 64, 80, 96, 112, 128,
76     160, 192, 224, 256, 320, 384, 448, 512, 576, 640
77 };
78 
79 #ifndef NO_THREADS
80 static int threaded_encode(void* vtctx);
81 #endif
82 
83 const char *
aften_get_version(void)84 aften_get_version(void)
85 {
86 #ifdef SVN_VERSION
87     static const char *const str = AFTEN_VERSION "-r" SVN_VERSION;
88 #else
89     static const char *const str = AFTEN_VERSION;
90 #endif
91 
92     return str;
93 }
94 
95 static void
set_available_simd_instructions(AftenSimdInstructions * simd_instructions)96 set_available_simd_instructions(AftenSimdInstructions *simd_instructions)
97 {
98     cpu_caps_detect();
99 
100     memset(simd_instructions, 0, sizeof(AftenSimdInstructions));
101 
102 #ifdef HAVE_MMX
103     simd_instructions->mmx = cpu_caps_have_mmx();
104 #endif
105 #ifdef HAVE_SSE
106     simd_instructions->sse = cpu_caps_have_sse();
107 #endif
108 #ifdef HAVE_SSE2
109     simd_instructions->sse2 = cpu_caps_have_sse2();
110 #endif
111 #ifdef HAVE_SSE3
112     simd_instructions->sse3 = cpu_caps_have_sse3();
113 #endif
114 /* Following SIMD code doesn't exist yet, so don't set it available */
115 #if 0
116 #ifdef HAVE_SSSE3
117     simd_instructions->ssse3 = cpu_caps_have_ssse3();
118 #endif
119 #ifdef HAVE_HAVE_3DNOW
120     simd_instructions->amd_3dnow = cpu_caps_have_3dnow();
121 #endif
122 #ifdef HAVE_HAVE_SSE_MMX
123     simd_instructions->amd_sse_mmx = cpu_caps_have_sse_mmx();
124 #endif
125 #ifdef HAVE_HAVE_3DNOWEXT
126     simd_instructions->amd_3dnowext = cpu_caps_have_3dnowext();
127 #endif
128 #endif
129 #ifdef HAVE_ALTIVEC
130     simd_instructions->altivec = cpu_caps_have_altivec();
131 #endif
132 }
133 
134 void
aften_set_defaults(AftenContext * s)135 aften_set_defaults(AftenContext *s)
136 {
137     if(s == NULL) {
138         fprintf(stderr, "NULL parameter passed to aften_set_defaults\n");
139         return;
140     }
141 
142 
143     /**
144      * These 5 must be set explicitly before initialization.
145      * There are utility functions to help setting acmod and lfe.
146      */
147 
148     /* Tell the context which SIMD instruction sets are available. */
149     set_available_simd_instructions(&s->system.available_simd_instructions);
150     s->system.wanted_simd_instructions = s->system.available_simd_instructions;
151     s->system.n_threads = 0;
152 
153     s->verbose = 1;
154     s->channels = -1;
155     s->samplerate = -1;
156     s->acmod = -1;
157     s->lfe = -1;
158 
159     s->sample_format = A52_SAMPLE_FMT_S16;
160     s->private_context = NULL;
161     s->params.encoding_mode = AFTEN_ENC_MODE_CBR;
162     s->params.bitrate = 0;
163     s->params.quality = 240;
164     s->params.bwcode = -1;
165     s->params.use_rematrixing = 1;
166     s->params.use_block_switching = 0;
167     s->params.use_bw_filter = 0;
168     s->params.use_dc_filter = 0;
169     s->params.use_lfe_filter = 0;
170     s->params.bitalloc_fast = 0;
171     s->params.expstr_fast = 0;
172     s->params.dynrng_profile = DYNRNG_PROFILE_NONE;
173     s->params.min_bwcode = 0;
174     s->params.max_bwcode = 60;
175 
176     s->meta.cmixlev = 0;
177     s->meta.surmixlev = 0;
178     s->meta.dsurmod = 0;
179     s->meta.dialnorm = 31;
180     s->meta.xbsi1e = 0;
181     s->meta.dmixmod = 0;
182     s->meta.ltrtcmixlev = 4;
183     s->meta.ltrtsmixlev = 4;
184     s->meta.lorocmixlev = 4;
185     s->meta.lorosmixlev = 4;
186     s->meta.xbsi2e = 0;
187     s->meta.dsurexmod = 0;
188     s->meta.dheadphonmod = 0;
189     s->meta.adconvtyp = 0;
190 
191     s->status.quality = 0;
192     s->status.bit_rate = 0;
193     s->status.bwcode = 0;
194 }
195 
196 static void
fmt_convert_from_u8(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)197 fmt_convert_from_u8(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
198                     const void *vsrc, int nch, int n)
199 {
200     int i, j, ch;
201     const uint8_t *src = vsrc;
202 
203     for(ch=0; ch<nch; ch++) {
204         FLOAT *dest_ch = dest[ch];
205         const uint8_t *src_ch = src + ch;
206         for(i=0, j=0; i<n; i++, j+=nch) {
207             dest_ch[i] = (src_ch[j]-FCONST(128.0)) / FCONST(128.0);
208         }
209     }
210 }
211 
212 static void
fmt_convert_from_s16(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)213 fmt_convert_from_s16(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
214                      const void *vsrc, int nch, int n)
215 {
216     int i, j, ch;
217     const int16_t *src = vsrc;
218 
219     for(ch=0; ch<nch; ch++) {
220         FLOAT *dest_ch = dest[ch];
221         const int16_t *src_ch = src + ch;
222         for(i=0, j=0; i<n; i++, j+=nch) {
223             dest_ch[i] = src_ch[j] / FCONST(32768.0);
224         }
225     }
226 }
227 
228 static void
fmt_convert_from_s20(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)229 fmt_convert_from_s20(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
230                      const void *vsrc, int nch, int n)
231 {
232     int i, j, ch;
233     const int32_t *src = vsrc;
234 
235     for(ch=0; ch<nch; ch++) {
236         FLOAT *dest_ch = dest[ch];
237         const int32_t *src_ch = src + ch;
238         for(i=0, j=0; i<n; i++, j+=nch) {
239             dest_ch[i] = src_ch[j] / FCONST(524288.0);
240         }
241     }
242 }
243 
244 static void
fmt_convert_from_s24(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)245 fmt_convert_from_s24(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
246                      const void *vsrc, int nch, int n)
247 {
248     int i, j, ch;
249     const int32_t *src = vsrc;
250 
251     for(ch=0; ch<nch; ch++) {
252         FLOAT *dest_ch = dest[ch];
253         const int32_t *src_ch = src + ch;
254         for(i=0, j=0; i<n; i++, j+=nch) {
255             dest_ch[i] = src_ch[j] / FCONST(8388608.0);
256         }
257     }
258 }
259 
260 static void
fmt_convert_from_s32(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)261 fmt_convert_from_s32(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
262                      const void *vsrc, int nch, int n)
263 {
264     int i, j, ch;
265     const int32_t *src = vsrc;
266 
267     for(ch=0; ch<nch; ch++) {
268         FLOAT *dest_ch = dest[ch];
269         const int32_t *src_ch = src + ch;
270         for(i=0, j=0; i<n; i++, j+=nch) {
271             dest_ch[i] = src_ch[j] / FCONST(2147483648.0);
272         }
273     }
274 }
275 
276 static void
fmt_convert_from_float(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)277 fmt_convert_from_float(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
278                        const void *vsrc, int nch, int n)
279 {
280     int i, j, ch;
281     const float *src = vsrc;
282 
283     for(ch=0; ch<nch; ch++) {
284         FLOAT *dest_ch = dest[ch];
285         const float *src_ch = src + ch;
286         for(i=0, j=0; i<n; i++, j+=nch) {
287             dest_ch[i] = src_ch[j];
288         }
289     }
290 }
291 
292 static void
fmt_convert_from_double(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],const void * vsrc,int nch,int n)293 fmt_convert_from_double(FLOAT dest[A52_MAX_CHANNELS][A52_SAMPLES_PER_FRAME],
294                         const void *vsrc, int nch, int n)
295 {
296     int i, j, ch;
297     const double *src = vsrc;
298 
299     for(ch=0; ch<nch; ch++) {
300         FLOAT *dest_ch = dest[ch];
301         const double *src_ch = src + ch;
302         for(i=0, j=0; i<n; i++, j+=nch) {
303             dest_ch[i] = (FLOAT)src_ch[j];
304         }
305     }
306 }
307 
308 static void
select_mdct(A52Context * ctx)309 select_mdct(A52Context *ctx)
310 {
311 #ifndef CONFIG_DOUBLE
312 #ifdef HAVE_SSE3
313     if (cpu_caps_have_sse3()) {
314         sse3_mdct_init(ctx);
315         return;
316     }
317 #endif
318 #ifdef HAVE_SSE
319     if (cpu_caps_have_sse()) {
320         sse_mdct_init(ctx);
321         return;
322     }
323 #endif
324 #ifdef HAVE_ALTIVEC
325     if (cpu_caps_have_altivec()) {
326         mdct_init_altivec(ctx);
327         return;
328     }
329 #endif
330 #endif /* CONFIG_DOUBLE */
331     mdct_init(ctx);
332 }
333 
334 static void
select_mdct_thread(A52ThreadContext * tctx)335 select_mdct_thread(A52ThreadContext *tctx)
336 {
337 #ifndef CONFIG_DOUBLE
338 #ifdef HAVE_SSE3
339     if (cpu_caps_have_sse3()) {
340         sse3_mdct_thread_init(tctx);
341         return;
342     }
343 #endif
344 #ifdef HAVE_SSE
345     if (cpu_caps_have_sse()) {
346         sse_mdct_thread_init(tctx);
347         return;
348     }
349 #endif
350 #ifdef HAVE_ALTIVEC
351     if (cpu_caps_have_altivec()) {
352         mdct_thread_init_altivec(tctx);
353         return;
354     }
355 #endif
356 #endif /* CONFIG_DOUBLE */
357     mdct_thread_init(tctx);
358 }
359 
360 int
aften_encode_init(AftenContext * s)361 aften_encode_init(AftenContext *s)
362 {
363     A52Context *ctx;
364     A52ThreadContext *tctx;
365     int i, j, brate;
366     int last_quality;
367 
368     if(s == NULL) {
369         fprintf(stderr, "NULL parameter passed to aften_encode_init\n");
370         return -1;
371     }
372     cpu_caps_detect();
373     apply_simd_restrictions(&s->system.wanted_simd_instructions);
374 
375     ctx = calloc(sizeof(A52Context), 1);
376     if(!ctx) {
377         fprintf(stderr, "error allocating memory for A52Context\n");
378         return -1;
379     }
380     select_mdct(ctx);
381     s->private_context = ctx;
382 
383     switch(s->sample_format) {
384         case A52_SAMPLE_FMT_U8:  ctx->fmt_convert_from_src = fmt_convert_from_u8;
385                                  break;
386         case A52_SAMPLE_FMT_S16: ctx->fmt_convert_from_src = fmt_convert_from_s16;
387                                  break;
388         case A52_SAMPLE_FMT_S20: ctx->fmt_convert_from_src = fmt_convert_from_s20;
389                                  break;
390         case A52_SAMPLE_FMT_S24: ctx->fmt_convert_from_src = fmt_convert_from_s24;
391                                  break;
392         case A52_SAMPLE_FMT_S32: ctx->fmt_convert_from_src = fmt_convert_from_s32;
393                                  break;
394         case A52_SAMPLE_FMT_FLT: ctx->fmt_convert_from_src = fmt_convert_from_float;
395                                  break;
396         case A52_SAMPLE_FMT_DBL: ctx->fmt_convert_from_src = fmt_convert_from_double;
397                                  break;
398         default: break;
399     }
400 
401     // channel configuration
402     if(s->channels < 1 || s->channels > 6) {
403         fprintf(stderr, "invalid number of channels\n");
404         return -1;
405     }
406     if(s->acmod < 0 || s->acmod > 7) {
407         fprintf(stderr, "invalid acmod\n");
408         return -1;
409     }
410     if(s->channels == 6 && !s->lfe) {
411         fprintf(stderr, "6-channel audio must have LFE channel\n");
412         return -1;
413     }
414     if(s->channels == 1 && s->lfe) {
415         fprintf(stderr, "cannot encode stand-alone LFE channel\n");
416         return -1;
417     }
418     ctx->acmod = s->acmod;
419     ctx->lfe = s->lfe;
420     ctx->n_all_channels = s->channels;
421     ctx->n_channels = s->channels - s->lfe;
422     ctx->lfe_channel = s->lfe ? (s->channels - 1) : -1;
423 
424     ctx->params = s->params;
425     ctx->meta = s->meta;
426 
427     // frequency
428     for(i=0;i<3;i++) {
429         for(j=0;j<3;j++)
430             if((a52_freqs[j] >> i) == s->samplerate)
431                 goto found;
432     }
433     fprintf(stderr, "invalid sample rate\n");
434     return -1;
435  found:
436     ctx->sample_rate = s->samplerate;
437     ctx->halfratecod = i;
438     ctx->fscod = j;
439     if(ctx->halfratecod) {
440         // DolbyNet
441         ctx->bsid = 8 + ctx->halfratecod;
442     } else if(ctx->meta.xbsi1e || ctx->meta.xbsi2e) {
443         // alternate bit stream syntax
444         ctx->bsid = 6;
445     } else {
446         // normal AC-3
447         ctx->bsid = 8;
448     }
449     ctx->bsmod = 0;
450 
451     // bitrate & frame size
452     brate = s->params.bitrate;
453     if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
454         if(brate == 0) {
455             switch(ctx->n_channels) {
456                 case 1: brate =  96; break;
457                 case 2: brate = 192; break;
458                 case 3: brate = 256; break;
459                 case 4: brate = 384; break;
460                 case 5: brate = 448; break;
461             }
462         }
463     } else if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
464         if(s->params.quality < 0 || s->params.quality > 1023) {
465             fprintf(stderr, "invalid quality setting\n");
466             return -1;
467         }
468     } else {
469         return -1;
470     }
471 
472     for(i=0; i<19; i++) {
473         if((a52_bitratetab[i] >> ctx->halfratecod) == brate)
474             break;
475     }
476     if(i == 19) {
477         if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
478             fprintf(stderr, "invalid bitrate\n");
479             return -1;
480         }
481         i = 18;
482     }
483     ctx->frmsizecod = i*2;
484     ctx->target_bitrate = a52_bitratetab[i] >> ctx->halfratecod;
485 
486     bitalloc_init();
487     crc_init();
488     a52_window_init(ctx);
489     exponent_init(ctx);
490     dynrng_init();
491 
492     // can't do block switching with low sample rate due to the high-pass filter
493     if(ctx->sample_rate <= 16000) {
494         ctx->params.use_block_switching = 0;
495     }
496 
497     last_quality = 240;
498     if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
499         last_quality = ctx->params.quality;
500     } else if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
501         last_quality = ((((ctx->target_bitrate/ctx->n_channels)*35)/24)+95)+(25*ctx->halfratecod);
502     }
503 
504     // Initialize thread specific contexts
505     ctx->n_threads = (s->system.n_threads > 0) ? s->system.n_threads : get_ncpus();
506     ctx->n_threads = MIN(ctx->n_threads, MAX_NUM_THREADS);
507     s->system.n_threads = ctx->n_threads;
508     tctx = calloc(sizeof(A52ThreadContext), ctx->n_threads);
509     ctx->tctx = tctx;
510 
511     for (j=0; j<ctx->n_threads; ++j) {
512         A52ThreadContext *cur_tctx = &ctx->tctx[j];
513         cur_tctx->ctx = ctx;
514         cur_tctx->thread_num = j;
515 
516         select_mdct_thread(cur_tctx);
517 
518         cur_tctx->bit_cnt = 0;
519         cur_tctx->sample_cnt = 0;
520 
521         cur_tctx->last_quality = last_quality;
522 
523         if (ctx->n_threads > 1) {
524             cur_tctx->state = START;
525 
526             posix_cond_init(&cur_tctx->ts.enter_cond);
527             posix_cond_init(&cur_tctx->ts.confirm_cond);
528             posix_cond_init(&cur_tctx->ts.samples_cond);
529 
530             posix_mutex_init(&cur_tctx->ts.enter_mutex);
531             posix_mutex_init(&cur_tctx->ts.confirm_mutex);
532 
533             windows_event_init(&cur_tctx->ts.ready_event);
534             windows_event_init(&cur_tctx->ts.enter_event);
535             windows_event_init(&cur_tctx->ts.samples_event);
536 
537             posix_mutex_lock(&cur_tctx->ts.enter_mutex);
538             thread_create(&cur_tctx->ts.thread, threaded_encode, cur_tctx);
539             posix_cond_wait(&cur_tctx->ts.enter_cond, &cur_tctx->ts.enter_mutex);
540             posix_mutex_unlock(&cur_tctx->ts.enter_mutex);
541         }
542     }
543     for (j=0; j<ctx->n_threads; ++j) {
544 #ifdef HAVE_POSIX_THREADS
545         ctx->tctx[j].ts.next_samples_cond = &ctx->tctx[(j + 1) % ctx->n_threads].ts.samples_cond;
546 #endif
547 #ifdef HAVE_WINDOWS_THREADS
548         ctx->tctx[j].ts.next_samples_event = &ctx->tctx[(j + 1) % ctx->n_threads].ts.samples_event;
549 #endif
550     }
551     posix_mutex_init(&ctx->ts.samples_mutex);
552     windows_cs_init(&ctx->ts.samples_cs);
553 
554     if(s->params.bwcode < -2 || s->params.bwcode > 60) {
555         fprintf(stderr, "invalid bandwidth code\n");
556         return -1;
557     }
558     if(ctx->params.bwcode < 0) {
559         int cutoff = ((last_quality-120) * 120) + 4000;
560         ctx->fixed_bwcode = ((cutoff * 512 / ctx->sample_rate) - 73) / 3;
561         if(ctx->params.bwcode == -2) {
562             if(ctx->params.min_bwcode < 0 || ctx->params.min_bwcode > 60 ||
563                ctx->params.max_bwcode < 0 || ctx->params.max_bwcode > 60 ||
564                ctx->params.min_bwcode > ctx->params.max_bwcode) {
565                 fprintf(stderr, "invalid min/max bandwidth code\n");
566                 return -1;
567             }
568             if(ctx->params.encoding_mode == AFTEN_ENC_MODE_VBR) {
569                 fprintf(stderr, "variable bandwidth mode cannot be used with variable bitrate mode\n");
570                 return -1;
571             }
572         }
573         ctx->fixed_bwcode = CLIP(ctx->fixed_bwcode, ctx->params.min_bwcode,
574                                  ctx->params.max_bwcode);
575     } else {
576         ctx->fixed_bwcode = ctx->params.bwcode;
577     }
578 
579     // initialize transient-detect filters (one for each channel)
580     // cascaded biquad direct form I high-pass w/ cutoff of 8 kHz
581     if(ctx->params.use_block_switching) {
582         for(i=0; i<ctx->n_all_channels; i++) {
583             ctx->bs_filter[i].type = FILTER_TYPE_HIGHPASS;
584             ctx->bs_filter[i].cascaded = 1;
585             ctx->bs_filter[i].cutoff = 8000;
586             ctx->bs_filter[i].samplerate = (FLOAT)ctx->sample_rate;
587             if(filter_init(&ctx->bs_filter[i], FILTER_ID_BIQUAD_I)) {
588                 fprintf(stderr, "error initializing transient-detect filter\n");
589                 return -1;
590             }
591         }
592     }
593 
594     // initialize DC filters (one for each channel)
595     // one-pole high-pass w/ cutoff of 3 Hz
596     if(ctx->params.use_dc_filter) {
597         for(i=0; i<ctx->n_all_channels; i++) {
598             ctx->dc_filter[i].type = FILTER_TYPE_HIGHPASS;
599             ctx->dc_filter[i].cascaded = 0;
600             ctx->dc_filter[i].cutoff = 3;
601             ctx->dc_filter[i].samplerate = (FLOAT)ctx->sample_rate;
602             if(filter_init(&ctx->dc_filter[i], FILTER_ID_ONEPOLE)) {
603                 fprintf(stderr, "error initializing dc filter\n");
604                 return -1;
605             }
606         }
607     }
608 
609     // initialize bandwidth filters (one for each channel)
610     // butterworth 2nd order cascaded direct form II low-pass
611     if(ctx->params.use_bw_filter) {
612         int cutoff;
613         if(ctx->params.bwcode == -2) {
614             fprintf(stderr, "cannot use bandwidth filter with variable bandwidth\n");
615             return -1;
616         }
617         cutoff = (((ctx->fixed_bwcode * 3) + 73) * ctx->sample_rate) / 512;
618         if(cutoff < 4000) {
619             // disable bandwidth filter if cutoff is below 4000 Hz
620             ctx->params.use_bw_filter = 0;
621         } else {
622             for(i=0; i<ctx->n_channels; i++) {
623                 ctx->bw_filter[i].type = FILTER_TYPE_LOWPASS;
624                 ctx->bw_filter[i].cascaded = 1;
625                 ctx->bw_filter[i].cutoff = (FLOAT)cutoff;
626                 ctx->bw_filter[i].samplerate = (FLOAT)ctx->sample_rate;
627                 if(filter_init(&ctx->bw_filter[i], FILTER_ID_BUTTERWORTH_II)) {
628                     fprintf(stderr, "error initializing bandwidth filter\n");
629                     return -1;
630                 }
631             }
632         }
633     }
634 
635     // initialize LFE filter
636     // butterworth 2nd order cascaded direct form II low-pass w/ cutoff of 120 Hz
637     if(ctx->params.use_lfe_filter) {
638         if(!ctx->lfe) {
639             fprintf(stderr, "cannot use lfe filter. no lfe channel\n");
640             return -1;
641         }
642         ctx->lfe_filter.type = FILTER_TYPE_LOWPASS;
643         ctx->lfe_filter.cascaded = 1;
644         ctx->lfe_filter.cutoff = 120;
645         ctx->lfe_filter.samplerate = (FLOAT)ctx->sample_rate;
646         if(filter_init(&ctx->lfe_filter, FILTER_ID_BUTTERWORTH_II)) {
647             fprintf(stderr, "error initializing lfe filter\n");
648             return -1;
649         }
650     }
651 
652     return 0;
653 }
654 
655 static int
frame_init(A52ThreadContext * tctx)656 frame_init(A52ThreadContext *tctx)
657 {
658     A52Context *ctx = tctx->ctx;
659     A52Frame *frame = &tctx->frame;
660     A52Block *block;
661     int blk, bnd, ch;
662 
663     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
664         block = &frame->blocks[blk];
665         block->block_num = blk;
666         block->rematstr = 0;
667         if(blk == 0) {
668             block->rematstr = 1;
669             for(bnd=0; bnd<4; bnd++) {
670                 block->rematflg[bnd] = 0;
671             }
672         }
673         for(ch=0; ch<ctx->n_channels; ch++) {
674             block->blksw[ch] = 0;
675             block->dithflag[ch] = 1;
676 
677             // input_samples will be null if context is not initialized
678             if(block->input_samples[ch] == NULL) {
679                 return -1;
680             }
681         }
682     }
683 
684     if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
685         frame->bit_rate = ctx->target_bitrate;
686         frame->frmsizecod = ctx->frmsizecod;
687         frame->frame_size_min = frame->bit_rate * 96000 / ctx->sample_rate;
688         frame->frame_size = frame->frame_size_min;
689     }
690 
691     if(ctx->params.bwcode == -2) {
692         frame->bwcode = 60;
693     } else {
694         frame->bwcode = ctx->fixed_bwcode;
695     }
696     for(ch=0; ch<ctx->n_channels; ch++) {
697         frame->ncoefs[ch] = (frame->bwcode * 3) + 73;
698     }
699     if(ctx->lfe) {
700         frame->ncoefs[ctx->lfe_channel] = 7;
701     }
702 
703     frame->frame_bits = 0;
704     frame->exp_bits = 0;
705     frame->mant_bits = 0;
706 
707     // default bit allocation params
708     frame->sdecaycod = 2;
709     frame->fdecaycod = 1;
710     frame->sgaincod = 1;
711     frame->dbkneecod = 2;
712     frame->floorcod = 7;
713     frame->fgaincod = 4;
714 
715     return 0;
716 }
717 
718 /* output the A52 frame header */
719 static void
output_frame_header(A52ThreadContext * tctx,uint8_t * frame_buffer)720 output_frame_header(A52ThreadContext *tctx, uint8_t *frame_buffer)
721 {
722     A52Context *ctx = tctx->ctx;
723     A52Frame *f = &tctx->frame;
724     BitWriter *bw = &tctx->bw;
725     int frmsizecod = f->frmsizecod+(f->frame_size-f->frame_size_min);
726 
727     bitwriter_init(bw, frame_buffer, A52_MAX_CODED_FRAME_SIZE);
728 
729     bitwriter_writebits(bw, 16, 0x0B77); /* frame header */
730     bitwriter_writebits(bw, 16, 0); /* crc1: will be filled later */
731     bitwriter_writebits(bw, 2, ctx->fscod);
732     bitwriter_writebits(bw, 6, frmsizecod);
733     bitwriter_writebits(bw, 5, ctx->bsid);
734     bitwriter_writebits(bw, 3, ctx->bsmod);
735     bitwriter_writebits(bw, 3, ctx->acmod);
736     if((ctx->acmod & 0x01) && (ctx->acmod != A52_ACMOD_MONO))
737         bitwriter_writebits(bw, 2, ctx->meta.cmixlev);
738     if(ctx->acmod & 0x04)
739         bitwriter_writebits(bw, 2, ctx->meta.surmixlev);
740     if(ctx->acmod == A52_ACMOD_STEREO)
741         bitwriter_writebits(bw, 2, ctx->meta.dsurmod);
742     bitwriter_writebits(bw, 1, ctx->lfe);
743     bitwriter_writebits(bw, 5, ctx->meta.dialnorm);
744     bitwriter_writebits(bw, 1, 0); /* no compression control word */
745     bitwriter_writebits(bw, 1, 0); /* no lang code */
746     bitwriter_writebits(bw, 1, 0); /* no audio production info */
747     if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
748         bitwriter_writebits(bw, 5, ctx->meta.dialnorm);
749         bitwriter_writebits(bw, 1, 0); /* no compression control word 2 */
750         bitwriter_writebits(bw, 1, 0); /* no lang code 2 */
751         bitwriter_writebits(bw, 1, 0); /* no audio production info 2 */
752     }
753     bitwriter_writebits(bw, 1, 0); /* no copyright */
754     bitwriter_writebits(bw, 1, 1); /* original bitstream */
755     if(ctx->bsid == 6) {
756         // alternate bit stream syntax
757         bitwriter_writebits(bw, 1, ctx->meta.xbsi1e);
758         if(ctx->meta.xbsi1e) {
759             bitwriter_writebits(bw, 2, ctx->meta.dmixmod);
760             bitwriter_writebits(bw, 3, ctx->meta.ltrtcmixlev);
761             bitwriter_writebits(bw, 3, ctx->meta.ltrtsmixlev);
762             bitwriter_writebits(bw, 3, ctx->meta.lorocmixlev);
763             bitwriter_writebits(bw, 3, ctx->meta.lorosmixlev);
764         }
765         bitwriter_writebits(bw, 1, ctx->meta.xbsi2e);
766         if(ctx->meta.xbsi2e) {
767             bitwriter_writebits(bw, 2, ctx->meta.dsurexmod);
768             bitwriter_writebits(bw, 2, ctx->meta.dheadphonmod);
769             bitwriter_writebits(bw, 1, ctx->meta.adconvtyp);
770             bitwriter_writebits(bw, 9, 0);
771         }
772     } else {
773         bitwriter_writebits(bw, 1, 0); // timecod1e
774         bitwriter_writebits(bw, 1, 0); // timecod2e
775     }
776     bitwriter_writebits(bw, 1, 0); /* no addtional bit stream info */
777 }
778 
779 /* symmetric quantization on 'levels' levels */
780 #define sym_quant(c, e, levels) \
781     ((((((levels) * (c)) >> (24-(e))) + 1) >> 1) + ((levels) >> 1))
782 
783 /* asymmetric quantization on 2^qbits levels */
784 static inline int
asym_quant(int c,int e,int qbits)785 asym_quant(int c, int e, int qbits)
786 {
787     int lshift, m, v;
788 
789     lshift = e + (qbits-1) - 24;
790     if(lshift >= 0) v = c << lshift;
791     else v = c >> (-lshift);
792 
793     m = (1 << (qbits-1));
794     v = CLIP(v, -m, m-1);
795 
796     return v & ((1 << qbits)-1);
797 }
798 
799 static void
quant_mant_ch(FLOAT * mdct_coef,uint8_t * exp,uint8_t * bap,uint16_t * qmant,int ncoefs,uint16_t * qmant_ptr[3],int mant_cnt[3])800 quant_mant_ch(FLOAT *mdct_coef, uint8_t *exp, uint8_t *bap, uint16_t *qmant,
801               int ncoefs, uint16_t *qmant_ptr[3], int mant_cnt[3])
802 {
803     int i, c, e, b, v;
804 
805     for(i=0; i<ncoefs; i++) {
806         c = (int)(mdct_coef[i] * (1 << 24));
807         e = exp[i];
808         b = bap[i];
809         switch(b) {
810             case 0:
811                 v = 0;
812                 break;
813             case 1:
814                 v = sym_quant(c, e, 3);
815                 if(mant_cnt[0] == 0) {
816                     qmant_ptr[0] = &qmant[i];
817                     v = 9 * v;
818                 } else if(mant_cnt[0] == 1) {
819                     *qmant_ptr[0] += 3 * v;
820                     v = 128;
821                 } else {
822                     *qmant_ptr[0] += v;
823                     v = 128;
824                 }
825                 mant_cnt[0] = (mant_cnt[0] + 1) % 3;
826                 break;
827             case 2:
828                 v = sym_quant(c, e, 5);
829                 if(mant_cnt[1] == 0) {
830                     qmant_ptr[1] = &qmant[i];
831                     v = 25 * v;
832                 } else if(mant_cnt[1] == 1) {
833                     *qmant_ptr[1] += 5 * v;
834                     v = 128;
835                 } else {
836                     *qmant_ptr[1] += v;
837                     v = 128;
838                 }
839                 mant_cnt[1] = (mant_cnt[1] + 1) % 3;
840                 break;
841             case 3:
842                 v = sym_quant(c, e, 7);
843                 break;
844             case 4:
845                 v = sym_quant(c, e, 11);
846                 if(mant_cnt[2]== 0) {
847                     qmant_ptr[2] = &qmant[i];
848                     v = 11 * v;
849                 } else {
850                     *qmant_ptr[2] += v;
851                     v = 128;
852                 }
853                 mant_cnt[2] = (mant_cnt[2] + 1) % 2;
854                 break;
855             case 5:
856                 v = sym_quant(c, e, 15);
857                 break;
858             case 14:
859                 v = asym_quant(c, e, 14);
860                 break;
861             case 15:
862                 v = asym_quant(c, e, 16);
863                 break;
864             default:
865                 v = asym_quant(c, e, b - 1);
866         }
867         qmant[i] = v;
868     }
869 }
870 
871 static void
quantize_mantissas(A52ThreadContext * tctx)872 quantize_mantissas(A52ThreadContext *tctx)
873 {
874     A52Context *ctx = tctx->ctx;
875     A52Frame *frame = &tctx->frame;
876     A52Block *block;
877     uint16_t *qmant_ptr[3];
878     int blk, ch;
879     int mant_cnt[3];
880 
881     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
882         block = &frame->blocks[blk];
883         mant_cnt[0] = mant_cnt[1] = mant_cnt[2] = 0;
884         qmant_ptr[0] = qmant_ptr[1] = qmant_ptr[2] = NULL;
885         for(ch=0; ch<ctx->n_all_channels; ch++) {
886             quant_mant_ch(block->mdct_coef[ch], block->exp[ch], block->bap[ch],
887                           block->qmant[ch], frame->ncoefs[ch], qmant_ptr,
888                           mant_cnt);
889         }
890     }
891 }
892 
893 /* Output each audio block. */
894 static void
output_audio_blocks(A52ThreadContext * tctx)895 output_audio_blocks(A52ThreadContext *tctx)
896 {
897     A52Context *ctx = tctx->ctx;
898     A52Frame *frame = &tctx->frame;
899     A52Block *block;
900     BitWriter *bw;
901     int blk, ch, i, baie, rbnd;
902 
903     bw = &tctx->bw;
904     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
905         block = &frame->blocks[blk];
906         for(ch=0; ch<ctx->n_channels; ch++) {
907             bitwriter_writebits(bw, 1, block->blksw[ch]);
908         }
909         for(ch=0; ch<ctx->n_channels; ch++) {
910             bitwriter_writebits(bw, 1, block->dithflag[ch]);
911         }
912         if(ctx->params.dynrng_profile == DYNRNG_PROFILE_NONE) {
913             bitwriter_writebits(bw, 1, 0); // no dynamic range
914             if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
915                 bitwriter_writebits(bw, 1, 0); // no dynamic range 2
916             }
917         } else {
918             bitwriter_writebits(bw, 1, 1);
919             bitwriter_writebits(bw, 8, block->dynrng);
920             if(ctx->acmod == A52_ACMOD_DUAL_MONO) {
921                 bitwriter_writebits(bw, 1, 1);
922                 bitwriter_writebits(bw, 8, block->dynrng);
923             }
924         }
925         if(block->block_num == 0) {
926             // must define coupling strategy in block 0
927             bitwriter_writebits(bw, 1, 1); // new coupling strategy
928             bitwriter_writebits(bw, 1, 0); // no coupling in use
929         } else {
930             bitwriter_writebits(bw, 1, 0); // no new coupling strategy
931         }
932 
933         if(ctx->acmod == A52_ACMOD_STEREO) {
934             bitwriter_writebits(bw, 1, block->rematstr);
935             if(block->rematstr) {
936                 for(rbnd=0; rbnd<4; rbnd++) {
937                     bitwriter_writebits(bw, 1, block->rematflg[rbnd]);
938                 }
939             }
940         }
941 
942         // exponent strategy
943         for(ch=0; ch<ctx->n_channels; ch++) {
944             bitwriter_writebits(bw, 2, block->exp_strategy[ch]);
945         }
946 
947         if(ctx->lfe) {
948             bitwriter_writebits(bw, 1, block->exp_strategy[ctx->lfe_channel]);
949         }
950 
951         for(ch=0; ch<ctx->n_channels; ch++) {
952             if(block->exp_strategy[ch] != EXP_REUSE)
953                 bitwriter_writebits(bw, 6, frame->bwcode);
954         }
955 
956         // exponents
957         for(ch=0; ch<ctx->n_all_channels; ch++) {
958             if(block->exp_strategy[ch] != EXP_REUSE) {
959                 // first exponent
960                 bitwriter_writebits(bw, 4, block->grp_exp[ch][0]);
961 
962                 // delta-encoded exponent groups
963                 for(i=1; i<=block->nexpgrps[ch]; i++) {
964                     bitwriter_writebits(bw, 7, block->grp_exp[ch][i]);
965                 }
966 
967                 // gain range info
968                 if(ch != ctx->lfe_channel) {
969                     bitwriter_writebits(bw, 2, 0);
970                 }
971             }
972         }
973 
974         // bit allocation info
975         baie = (block->block_num == 0);
976         bitwriter_writebits(bw, 1, baie);
977         if(baie) {
978             bitwriter_writebits(bw, 2, frame->sdecaycod);
979             bitwriter_writebits(bw, 2, frame->fdecaycod);
980             bitwriter_writebits(bw, 2, frame->sgaincod);
981             bitwriter_writebits(bw, 2, frame->dbkneecod);
982             bitwriter_writebits(bw, 3, frame->floorcod);
983         }
984 
985         // snr offset
986         bitwriter_writebits(bw, 1, baie);
987         if(baie) {
988             bitwriter_writebits(bw, 6, frame->csnroffst);
989             for(ch=0; ch<ctx->n_all_channels; ch++) {
990                 bitwriter_writebits(bw, 4, frame->fsnroffst);
991                 bitwriter_writebits(bw, 3, frame->fgaincod);
992             }
993         }
994 
995         bitwriter_writebits(bw, 1, 0); // no delta bit allocation
996         bitwriter_writebits(bw, 1, 0); // no data to skip
997 
998         // mantissas
999         for(ch=0; ch<ctx->n_all_channels; ch++) {
1000             int b, q;
1001             for(i=0; i<frame->ncoefs[ch]; i++) {
1002                 q = block->qmant[ch][i];
1003                 b = block->bap[ch][i];
1004                 switch(b) {
1005                     case 0:  break;
1006                     case 1:  if(q != 128) bitwriter_writebits(bw, 5, q);
1007                              break;
1008                     case 2:  if(q != 128) bitwriter_writebits(bw, 7, q);
1009                              break;
1010                     case 3:  bitwriter_writebits(bw, 3, q);
1011                              break;
1012                     case 4:  if(q != 128) bitwriter_writebits(bw, 7, q);
1013                              break;
1014                     case 14: bitwriter_writebits(bw, 14, q);
1015                              break;
1016                     case 15: bitwriter_writebits(bw, 16, q);
1017                              break;
1018                     default: bitwriter_writebits(bw, b - 1, q);
1019                 }
1020             }
1021         }
1022     }
1023 }
1024 
1025 static int
output_frame_end(A52ThreadContext * tctx)1026 output_frame_end(A52ThreadContext *tctx)
1027 {
1028     uint8_t *frame;
1029     int fs, fs58, n, crc1, crc2, bitcount;
1030 
1031     fs = tctx->frame.frame_size;
1032     // align to 8 bits
1033     bitwriter_flushbits(&tctx->bw);
1034     // add zero bytes to reach the frame size
1035     frame = tctx->bw.buffer;
1036     bitcount = bitwriter_bitcount(&tctx->bw);
1037     n = (fs << 1) - 2 - (bitcount >> 3);
1038     if(n < 0) {
1039         fprintf(stderr, "data size exceeds frame size (frame=%d data=%d)\n",
1040                 (fs << 1) - 2, bitcount >> 3);
1041         return -1;
1042     }
1043     if(n > 0) memset(&tctx->bw.buffer[bitcount>>3], 0, n);
1044 
1045     // compute crc1 for 1st 5/8 of frame
1046     fs58 = (fs >> 1) + (fs >> 3);
1047     crc1 = calc_crc16(&frame[4], (fs58<<1)-4);
1048     crc1 = crc16_zero(crc1, (fs58<<1)-2);
1049     frame[2] = crc1 >> 8;
1050     frame[3] = crc1;
1051     // double-check
1052     crc1 = calc_crc16(&frame[2], (fs58<<1)-2);
1053     if(crc1 != 0) fprintf(stderr, "CRC ERROR\n");
1054 
1055     // compute crc2 for final 3/8 of frame
1056     crc2 = calc_crc16(&frame[fs58<<1], ((fs - fs58) << 1) - 2);
1057     frame[(fs<<1)-2] = crc2 >> 8;
1058     frame[(fs<<1)-1] = crc2;
1059 
1060     return (fs << 1);
1061 }
1062 
1063 static void
copy_samples(A52ThreadContext * tctx)1064 copy_samples(A52ThreadContext *tctx)
1065 {
1066     A52Context *ctx = tctx->ctx;
1067     A52Frame *frame = &tctx->frame;
1068     FLOAT buffer[A52_SAMPLES_PER_FRAME];
1069     FLOAT *in_audio;
1070     FLOAT *out_audio;
1071     FLOAT *temp;
1072     int ch, blk;
1073 #define SWAP_BUFFERS temp=in_audio;in_audio=out_audio;out_audio=temp;
1074 
1075 #ifndef NO_THREADS
1076     if (ctx->n_threads > 1) {
1077         posix_mutex_lock(&ctx->ts.samples_mutex);
1078 
1079         windows_cs_enter(&ctx->ts.samples_cs);
1080 
1081         while (ctx->ts.samples_thread_num != tctx->thread_num) {
1082             posix_cond_wait(&tctx->ts.samples_cond, &ctx->ts.samples_mutex);
1083 
1084             windows_cs_leave(&ctx->ts.samples_cs);
1085             windows_event_wait(&tctx->ts.samples_event);
1086             windows_cs_enter(&ctx->ts.samples_cs);
1087         }
1088         windows_event_reset(&tctx->ts.samples_event);
1089     }
1090 #endif
1091     for(ch=0; ch<ctx->n_all_channels; ch++) {
1092         out_audio = buffer;
1093         in_audio = frame->input_audio[ch];
1094         // DC-removal high-pass filter
1095         if(ctx->params.use_dc_filter) {
1096             filter_run(&ctx->dc_filter[ch], out_audio, in_audio,
1097                        A52_SAMPLES_PER_FRAME);
1098             SWAP_BUFFERS
1099         }
1100         if (ch < ctx->n_channels) {
1101             // channel bandwidth filter
1102             if(ctx->params.use_bw_filter) {
1103                 filter_run(&ctx->bw_filter[ch], out_audio, in_audio,
1104                            A52_SAMPLES_PER_FRAME);
1105                 SWAP_BUFFERS
1106             }
1107             // block-switching high-pass filter
1108             if(ctx->params.use_block_switching) {
1109                 filter_run(&ctx->bs_filter[ch], out_audio, in_audio,
1110                            A52_SAMPLES_PER_FRAME);
1111                 memcpy(frame->blocks[0].transient_samples[ch],
1112                        ctx->last_transient_samples[ch], 256 * sizeof(FLOAT));
1113                 memcpy(&frame->blocks[0].transient_samples[ch][256], out_audio,
1114                        256 * sizeof(FLOAT));
1115                 for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1116                     memcpy(frame->blocks[blk].transient_samples[ch],
1117                            &out_audio[256*(blk-1)], 512 * sizeof(FLOAT));
1118                 }
1119                 memcpy(ctx->last_transient_samples[ch],
1120                        &out_audio[256*5], 256 * sizeof(FLOAT));
1121             }
1122         } else {
1123             // LFE bandwidth low-pass filter
1124             if(ctx->params.use_lfe_filter) {
1125                 assert(ch == ctx->lfe_channel);
1126                 filter_run(&ctx->lfe_filter, out_audio, in_audio,
1127                            A52_SAMPLES_PER_FRAME);
1128                 SWAP_BUFFERS
1129             }
1130         }
1131 
1132         memcpy(frame->blocks[0].input_samples[ch], ctx->last_samples[ch],
1133                256 * sizeof(FLOAT));
1134         memcpy(&frame->blocks[0].input_samples[ch][256], in_audio,
1135                256 * sizeof(FLOAT));
1136         for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1137             memcpy(frame->blocks[blk].input_samples[ch], &in_audio[256*(blk-1)],
1138                    512 * sizeof(FLOAT));
1139         }
1140         memcpy(ctx->last_samples[ch],
1141                &in_audio[256*5], 256 * sizeof(FLOAT));
1142     }
1143 #ifndef NO_THREADS
1144     if (ctx->n_threads > 1) {
1145         ++ctx->ts.samples_thread_num;
1146         ctx->ts.samples_thread_num %= ctx->n_threads;
1147 
1148         posix_cond_signal(tctx->ts.next_samples_cond);
1149         posix_mutex_unlock(&ctx->ts.samples_mutex);
1150 
1151         windows_event_set(tctx->ts.next_samples_event);
1152         windows_cs_leave(&ctx->ts.samples_cs);
1153     }
1154 #endif
1155 #undef SWAP_BUFFERS
1156 }
1157 
1158 /* determines block length by detecting transients */
1159 static int
detect_transient(FLOAT * in)1160 detect_transient(FLOAT *in)
1161 {
1162     FLOAT *xx = in;
1163     int i, j;
1164     FLOAT level1[2];
1165     FLOAT level2[4];
1166     FLOAT level3[8];
1167     FLOAT tmax = FCONST(100.0) / FCONST(32768.0);
1168     FLOAT t1 = FCONST(0.100);
1169     FLOAT t2 = FCONST(0.075);
1170     FLOAT t3 = FCONST(0.050);
1171 
1172     // level 1 (2 x 256)
1173     for(i=0; i<2; i++) {
1174         level1[i] = 0;
1175         for(j=0; j<256; j++) {
1176             level1[i] = MAX(AFT_FABS(xx[i*256+j]), level1[i]);
1177         }
1178         if(level1[i] < tmax) {
1179             return 0;
1180         }
1181         if((i > 0) && (level1[i] * t1 > level1[i-1])) {
1182             return 1;
1183         }
1184     }
1185 
1186     // level 2 (4 x 128)
1187     for(i=1; i<4; i++) {
1188         level2[i] = 0;
1189         for(j=0; j<128; j++) {
1190             level2[i] = MAX(AFT_FABS(xx[i*128+j]), level2[i]);
1191         }
1192         if((i > 1) && (level2[i] * t2 > level2[i-1])) {
1193             return 1;
1194         }
1195     }
1196 
1197     // level 3 (8 x 64)
1198     for(i=3; i<8; i++) {
1199         level3[i] = 0;
1200         for(j=0; j<64; j++) {
1201             level3[i] = MAX(AFT_FABS(xx[i*64+j]), level3[i]);
1202         }
1203         if((i > 3) && (level3[i] * t3 > level3[i-1])) {
1204             return 1;
1205         }
1206     }
1207 
1208     return 0;
1209 }
1210 
1211 static void
generate_coefs(A52ThreadContext * tctx)1212 generate_coefs(A52ThreadContext *tctx)
1213 {
1214     A52Context *ctx = tctx->ctx;
1215     A52Block *block;
1216     void (*mdct_256)(struct A52ThreadContext *tctx, FLOAT *out, FLOAT *in) =
1217         ctx->mdct_ctx_256.mdct;
1218     void (*mdct_512)(struct A52ThreadContext *tctx, FLOAT *out, FLOAT *in) =
1219         ctx->mdct_ctx_512.mdct;
1220     int blk, ch, i;
1221 
1222     for(ch=0; ch<ctx->n_all_channels; ch++) {
1223         for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1224             block = &tctx->frame.blocks[blk];
1225             if(ctx->params.use_block_switching) {
1226                 block->blksw[ch] = detect_transient(block->transient_samples[ch]);
1227             } else {
1228                 block->blksw[ch] = 0;
1229             }
1230             ctx->apply_a52_window(block->input_samples[ch]);
1231             if(block->blksw[ch]) {
1232                 mdct_256(tctx, block->mdct_coef[ch], block->input_samples[ch]);
1233             } else {
1234                 mdct_512(tctx, block->mdct_coef[ch], block->input_samples[ch]);
1235             }
1236             for(i=tctx->frame.ncoefs[ch]; i<256; i++) {
1237                 block->mdct_coef[ch][i] = 0.0;
1238             }
1239         }
1240     }
1241 }
1242 
1243 static void
calc_rematrixing(A52ThreadContext * tctx)1244 calc_rematrixing(A52ThreadContext *tctx)
1245 {
1246     A52Context *ctx = tctx->ctx;
1247     A52Frame *frame = &tctx->frame;
1248     A52Block *block;
1249     FLOAT sum[4][4];
1250     FLOAT lt, rt, ctmp1, ctmp2;
1251     int blk, bnd, i;
1252 
1253 
1254     if(!ctx->params.use_rematrixing) {
1255         frame->blocks[0].rematstr = 1;
1256         for(bnd=0; bnd<4; bnd++) {
1257             frame->blocks[0].rematflg[bnd] = 0;
1258         }
1259         for(blk=1; blk<A52_NUM_BLOCKS; blk++) {
1260             frame->blocks[blk].rematstr = 0;
1261         }
1262         return;
1263     }
1264 
1265     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1266         block = &frame->blocks[blk];
1267 
1268         block->rematstr = 0;
1269         if(blk == 0) block->rematstr = 1;
1270 
1271         for(bnd=0; bnd<4; bnd++) {
1272             block->rematflg[bnd] = 0;
1273             sum[bnd][0] = sum[bnd][1] = sum[bnd][2] = sum[bnd][3] = 0;
1274             for(i=rematbndtab[bnd][0]; i<=rematbndtab[bnd][1]; i++) {
1275                 if(i == frame->ncoefs[0]) break;
1276                 lt = block->mdct_coef[0][i];
1277                 rt = block->mdct_coef[1][i];
1278                 sum[bnd][0] += lt * lt;
1279                 sum[bnd][1] += rt * rt;
1280                 sum[bnd][2] += (lt + rt) * (lt + rt) / FCONST(4.0);
1281                 sum[bnd][3] += (lt - rt) * (lt - rt) / FCONST(4.0);
1282             }
1283             if(sum[bnd][0]+sum[bnd][1] >= (sum[bnd][2]+sum[bnd][3])/FCONST(2.0)) {
1284                 block->rematflg[bnd] = 1;
1285                 for(i=rematbndtab[bnd][0]; i<=rematbndtab[bnd][1]; i++) {
1286                     if(i == frame->ncoefs[0]) break;
1287                     ctmp1 = block->mdct_coef[0][i] * FCONST(0.5);
1288                     ctmp2 = block->mdct_coef[1][i] * FCONST(0.5);
1289                     block->mdct_coef[0][i] = ctmp1 + ctmp2;
1290                     block->mdct_coef[1][i] = ctmp1 - ctmp2;
1291                 }
1292             }
1293             if(blk != 0 && block->rematstr == 0 &&
1294                     block->rematflg[bnd] != frame->blocks[blk-1].rematflg[bnd]) {
1295                 block->rematstr = 1;
1296             }
1297         }
1298     }
1299 }
1300 
1301 /** Adjust for fractional frame sizes in CBR mode */
1302 static void
adjust_frame_size(A52ThreadContext * tctx)1303 adjust_frame_size(A52ThreadContext *tctx)
1304 {
1305     A52Context *ctx = tctx->ctx;
1306     A52Frame *f = &tctx->frame;
1307     uint32_t kbps = f->bit_rate * 1000;
1308     uint32_t srate = ctx->sample_rate;
1309     int add;
1310 
1311     while(tctx->bit_cnt >= kbps && tctx->sample_cnt >= srate) {
1312         tctx->bit_cnt -= kbps;
1313         tctx->sample_cnt -= srate;
1314     }
1315     add = !!(tctx->bit_cnt * srate < tctx->sample_cnt * kbps);
1316     f->frame_size = f->frame_size_min + add;
1317 }
1318 
1319 static void
compute_dither_strategy(A52ThreadContext * tctx)1320 compute_dither_strategy(A52ThreadContext *tctx)
1321 {
1322     A52Block *block0;
1323     A52Block *block1;
1324     int channels = tctx->ctx->n_channels;
1325     int blk, ch;
1326 
1327     block0 = NULL;
1328     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1329         block1 = &tctx->frame.blocks[blk];
1330         for(ch=0; ch<channels; ch++) {
1331             if(block1->blksw[ch] || ((blk>0) && block0->blksw[ch])) {
1332                 block1->dithflag[ch] = 0;
1333             } else {
1334                 block1->dithflag[ch] = 1;
1335             }
1336         }
1337         block0 = block1;
1338     }
1339 }
1340 
1341 static void
calculate_dynrng(A52ThreadContext * tctx)1342 calculate_dynrng(A52ThreadContext *tctx)
1343 {
1344     A52Context *ctx = tctx->ctx;
1345     A52Block *block;
1346     int blk;
1347 
1348     if(ctx->params.dynrng_profile == DYNRNG_PROFILE_NONE)
1349         return;
1350 
1351     for(blk=0; blk<A52_NUM_BLOCKS; blk++) {
1352         block = &tctx->frame.blocks[blk];
1353         block->dynrng = calculate_block_dynrng(block->input_samples,
1354                                                ctx->n_all_channels,
1355                                                -ctx->meta.dialnorm,
1356                                                ctx->params.dynrng_profile);
1357     }
1358 }
1359 
1360 static int
encode_frame(A52ThreadContext * tctx,uint8_t * frame_buffer)1361 encode_frame(A52ThreadContext *tctx, uint8_t *frame_buffer)
1362 {
1363     A52Context *ctx = tctx->ctx;
1364     A52Frame *frame = &tctx->frame;
1365 
1366     if(frame_init(tctx)) {
1367         fprintf(stderr, "Encoding has not properly initialized\n");
1368         return -1;
1369     }
1370 
1371     copy_samples(tctx);
1372 
1373     calculate_dynrng(tctx);
1374 
1375     generate_coefs(tctx);
1376 
1377     compute_dither_strategy(tctx);
1378 
1379     if(ctx->acmod == A52_ACMOD_STEREO) {
1380         calc_rematrixing(tctx);
1381     }
1382 
1383     // variable bandwidth
1384     if(ctx->params.bwcode == -2) {
1385         // process exponents at full bandwidth
1386         ctx->process_exponents(tctx);
1387         // run bit allocation at q=240 to calculate bandwidth
1388         vbw_bit_allocation(tctx);
1389     }
1390 
1391     ctx->process_exponents(tctx);
1392 
1393     if(ctx->params.encoding_mode == AFTEN_ENC_MODE_CBR) {
1394         adjust_frame_size(tctx);
1395     }
1396 
1397     if(compute_bit_allocation(tctx)) {
1398         fprintf(stderr, "Error in bit allocation\n");
1399         tctx->framesize = 0;
1400         return -1;
1401     }
1402 
1403     quantize_mantissas(tctx);
1404 
1405     // increment counters
1406     tctx->bit_cnt += frame->frame_size * 16;
1407     tctx->sample_cnt += A52_SAMPLES_PER_FRAME;
1408 
1409     // update encoding status
1410     tctx->status.quality = frame->quality;
1411     tctx->status.bit_rate = frame->bit_rate;
1412     tctx->status.bwcode = frame->bwcode;
1413 
1414     output_frame_header(tctx, frame_buffer);
1415     output_audio_blocks(tctx);
1416     tctx->framesize = output_frame_end(tctx);
1417 
1418     return 0;
1419 }
1420 
1421 #ifndef NO_THREADS
1422 static int
threaded_encode(void * vtctx)1423 threaded_encode(void* vtctx)
1424 {
1425     A52ThreadContext *tctx;
1426 
1427 #ifdef MINGW_ALIGN_STACK_HACK
1428     asm volatile (
1429         "movl %%esp, %%ecx\n"
1430         "andl $15, %%ecx\n"
1431         "subl %%ecx, %%esp\n"
1432         "pushl %%ecx\n"
1433         "pushl %%ecx\n"
1434         "pushl %%ecx\n"
1435         "pushl %%ecx\n"
1436         : : : "%esp","%ecx");
1437 #endif
1438 
1439     tctx = vtctx;
1440 
1441     posix_mutex_lock(&tctx->ts.enter_mutex);
1442     posix_cond_signal(&tctx->ts.enter_cond);
1443     while(1) {
1444         posix_cond_wait(&tctx->ts.enter_cond, &tctx->ts.enter_mutex);
1445         posix_mutex_lock(&tctx->ts.confirm_mutex);
1446         posix_cond_signal(&tctx->ts.confirm_cond);
1447         posix_mutex_unlock(&tctx->ts.confirm_mutex);
1448 
1449         windows_event_set(&tctx->ts.ready_event);
1450         windows_event_wait(&tctx->ts.enter_event);
1451         /* end thread if nothing to encode */
1452         if (tctx->state == END) {
1453             tctx->framesize = 0;
1454             break;
1455         }
1456         if (tctx->state == ABORT) {
1457             tctx->framesize = -1;
1458             break;
1459         }
1460         if (encode_frame(tctx, tctx->frame_buffer))
1461             tctx->state = ABORT;
1462     }
1463     posix_mutex_unlock(&tctx->ts.enter_mutex);
1464 
1465     windows_event_set(&tctx->ts.ready_event);
1466 
1467 #ifdef MINGW_ALIGN_STACK_HACK
1468     asm volatile (
1469         "popl %%ecx\n"
1470         "popl %%ecx\n"
1471         "popl %%ecx\n"
1472         "popl %%ecx\n"
1473         "addl %%ecx, %%esp\n"
1474         : : : "%esp", "%ecx");
1475 #endif
1476 
1477     return 0;
1478 }
1479 
1480 static int
encode_frame_parallel(AftenContext * s,uint8_t * frame_buffer,const void * samples)1481 encode_frame_parallel(AftenContext *s, uint8_t *frame_buffer, const void *samples)
1482 {
1483     A52Context *ctx = s->private_context;
1484     int framesize = 0;
1485 
1486     do {
1487         A52ThreadContext *tctx = &ctx->tctx[ctx->ts.current_thread_num];
1488 
1489         posix_mutex_lock(&tctx->ts.enter_mutex);
1490 
1491         windows_event_wait(&tctx->ts.ready_event);
1492 
1493         if (tctx->state == ABORT || ctx->ts.threads_to_abort) {
1494             tctx->state = ABORT;
1495             framesize = -1;
1496             if (!ctx->ts.threads_to_abort)
1497                 ctx->ts.threads_to_abort = ctx->n_threads;
1498             --ctx->ts.threads_to_abort;
1499         } else {
1500             if (tctx->state == START)
1501                 tctx->state = WORK;
1502             else {
1503                 if(tctx->framesize > 0) {
1504                     framesize = tctx->framesize;
1505                     memcpy(frame_buffer, tctx->frame_buffer, framesize);
1506                    // update encoding status
1507                     s->status.quality   = tctx->status.quality;
1508                     s->status.bit_rate  = tctx->status.bit_rate;
1509                     s->status.bwcode    = tctx->status.bwcode;
1510                 } else {
1511                     posix_mutex_unlock(&tctx->ts.enter_mutex);
1512                     goto end;
1513                 }
1514             }
1515             if(!samples)
1516                 tctx->state = END;
1517             else
1518                 // convert sample format and de-interleave channels
1519                 ctx->fmt_convert_from_src(tctx->frame.input_audio, samples,
1520                                           ctx->n_all_channels,
1521                                           A52_SAMPLES_PER_FRAME);
1522         }
1523         posix_mutex_lock(&tctx->ts.confirm_mutex);
1524         posix_cond_signal(&tctx->ts.enter_cond);
1525         posix_mutex_unlock(&tctx->ts.enter_mutex);
1526         posix_cond_wait(&tctx->ts.confirm_cond, &tctx->ts.confirm_mutex);
1527         posix_mutex_unlock(&tctx->ts.confirm_mutex);
1528 
1529         windows_event_set(&tctx->ts.enter_event);
1530 end:
1531         ++ctx->ts.current_thread_num;
1532         ctx->ts.current_thread_num %= ctx->n_threads;
1533     } while(ctx->ts.threads_to_abort);
1534 
1535     return framesize;
1536 }
1537 #endif
1538 
1539 int
aften_encode_frame(AftenContext * s,uint8_t * frame_buffer,const void * samples)1540 aften_encode_frame(AftenContext *s, uint8_t *frame_buffer, const void *samples)
1541 {
1542     A52Context *ctx;
1543     A52ThreadContext *tctx;
1544     A52Frame *frame;
1545 
1546     if(s == NULL || frame_buffer == NULL) {
1547         fprintf(stderr, "One or more NULL parameters passed to aften_encode_frame\n");
1548         return -1;
1549     }
1550     ctx = s->private_context;
1551 #ifndef NO_THREADS
1552     if (ctx->n_threads > 1)
1553         return encode_frame_parallel(s, frame_buffer, samples);
1554 #endif
1555     if (!samples)
1556         return 0;
1557 
1558     tctx = ctx->tctx;
1559     frame = &tctx->frame;
1560 
1561     ctx->fmt_convert_from_src(frame->input_audio, samples, ctx->n_all_channels,
1562                               A52_SAMPLES_PER_FRAME);
1563 
1564     encode_frame(tctx, frame_buffer);
1565 
1566     s->status.quality   = tctx->status.quality;
1567     s->status.bit_rate  = tctx->status.bit_rate;
1568     s->status.bwcode    = tctx->status.bwcode;
1569 
1570     return tctx->framesize;
1571 }
1572 
1573 void
aften_encode_close(AftenContext * s)1574 aften_encode_close(AftenContext *s)
1575 {
1576     if(s != NULL && s->private_context != NULL) {
1577         A52Context *ctx = s->private_context;
1578         /* mdct_close deinits both mdcts */
1579         ctx->mdct_ctx_512.mdct_close(ctx);
1580 
1581         posix_mutex_destroy(&ctx->ts.samples_mutex);
1582 
1583         windows_cs_destroy(&ctx->ts.samples_cs);
1584         if (ctx->tctx) {
1585             if (ctx->n_threads == 1)
1586                 ctx->tctx[0].mdct_tctx_512.mdct_thread_close(&ctx->tctx[0]);
1587             else {
1588                 int i;
1589                 for (i=0; i<ctx->n_threads; ++i) {
1590                     A52ThreadContext cur_tctx = ctx->tctx[i];
1591                     thread_join(cur_tctx.ts.thread);
1592                     cur_tctx.mdct_tctx_512.mdct_thread_close(&cur_tctx);
1593                     posix_cond_destroy(&cur_tctx.ts.enter_cond);
1594                     posix_cond_destroy(&cur_tctx.ts.confirm_cond);
1595                     posix_cond_destroy(&cur_tctx.ts.samples_cond);
1596 
1597                     posix_mutex_destroy(&cur_tctx.ts.enter_mutex);
1598                     posix_mutex_destroy(&cur_tctx.ts.confirm_mutex);
1599 
1600                     windows_event_destroy(&cur_tctx.ts.ready_event);
1601                     windows_event_destroy(&cur_tctx.ts.enter_event);
1602                     windows_event_destroy(&cur_tctx.ts.samples_event);
1603                 }
1604             }
1605             free(ctx->tctx);
1606         }
1607         free(ctx);
1608         s->private_context = NULL;
1609     }
1610 }
1611