1 /*
2  *      LAME MP3 encoding engine
3  *
4  *      Copyright (c) 1999 Mark Taylor
5  *      Copyright (c) 2000-2002 Takehiro Tominaga
6  *      Copyright (c) 2000-2011 Robert Hegemann
7  *      Copyright (c) 2001 Gabriel Bouvigne
8  *      Copyright (c) 2001 John Dahlstrom
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Library General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Library General Public License for more details.
19  *
20  * You should have received a copy of the GNU Library General Public
21  * License along with this library; if not, write to the
22  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23  * Boston, MA 02111-1307, USA.
24  */
25 
26 /* $Id: encoder.c,v 1.111 2011/05/07 16:05:17 rbrito Exp $ */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 
32 
33 #include "lame.h"
34 #include "lame-machine.h"
35 #include "encoder.h"
36 #include "util.h"
37 #include "lame_global_flags.h"
38 #include "newmdct.h"
39 #include "psymodel.h"
40 #include "lame-analysis.h"
41 #include "bitstream.h"
42 #include "VbrTag.h"
43 #include "quantize_pvt.h"
44 
45 
46 
47 /*
48  * auto-adjust of ATH, useful for low volume
49  * Gabriel Bouvigne 3 feb 2001
50  *
51  * modifies some values in
52  *   gfp->internal_flags->ATH
53  *   (gfc->ATH)
54  */
55 static void
adjust_ATH(lame_internal_flags const * const gfc)56 adjust_ATH(lame_internal_flags const *const gfc)
57 {
58     SessionConfig_t const *const cfg = &gfc->cfg;
59     FLOAT   gr2_max, max_pow;
60 
61     if (gfc->ATH->use_adjust == 0) {
62         gfc->ATH->adjust_factor = 1.0; /* no adjustment */
63         return;
64     }
65 
66     /* jd - 2001 mar 12, 27, jun 30 */
67     /* loudness based on equal loudness curve; */
68     /* use granule with maximum combined loudness */
69     max_pow = gfc->ov_psy.loudness_sq[0][0];
70     gr2_max = gfc->ov_psy.loudness_sq[1][0];
71     if (cfg->channels_out == 2) {
72         max_pow += gfc->ov_psy.loudness_sq[0][1];
73         gr2_max += gfc->ov_psy.loudness_sq[1][1];
74     }
75     else {
76         max_pow += max_pow;
77         gr2_max += gr2_max;
78     }
79     if (cfg->mode_gr == 2) {
80         max_pow = Max(max_pow, gr2_max);
81     }
82     max_pow *= 0.5;     /* max_pow approaches 1.0 for full band noise */
83 
84     /* jd - 2001 mar 31, jun 30 */
85     /* user tuning of ATH adjustment region */
86     max_pow *= gfc->ATH->aa_sensitivity_p;
87 
88     /*  adjust ATH depending on range of maximum value
89      */
90 
91     /* jd - 2001 feb27, mar12,20, jun30, jul22 */
92     /* continuous curves based on approximation */
93     /* to GB's original values. */
94     /* For an increase in approximate loudness, */
95     /* set ATH adjust to adjust_limit immediately */
96     /* after a delay of one frame. */
97     /* For a loudness decrease, reduce ATH adjust */
98     /* towards adjust_limit gradually. */
99     /* max_pow is a loudness squared or a power. */
100     if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
101         if (gfc->ATH->adjust_factor >= 1.0) {
102             gfc->ATH->adjust_factor = 1.0;
103         }
104         else {
105             /* preceding frame has lower ATH adjust; */
106             /* ascend only to the preceding adjust_limit */
107             /* in case there is leading low volume */
108             if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
109                 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
110             }
111         }
112         gfc->ATH->adjust_limit = 1.0;
113     }
114     else {              /* adjustment curve */
115         /* about 32 dB maximum adjust (0.000625) */
116         FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
117         if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
118             gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
119             if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
120                 gfc->ATH->adjust_factor = adj_lim_new;
121             }
122         }
123         else {          /* ascend */
124             if (gfc->ATH->adjust_limit >= adj_lim_new) {
125                 gfc->ATH->adjust_factor = adj_lim_new;
126             }
127             else {      /* preceding frame has lower ATH adjust; */
128                 /* ascend only to the preceding adjust_limit */
129                 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
130                     gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
131                 }
132             }
133         }
134         gfc->ATH->adjust_limit = adj_lim_new;
135     }
136 }
137 
138 /***********************************************************************
139  *
140  *  some simple statistics
141  *
142  *  bitrate index 0: free bitrate -> not allowed in VBR mode
143  *  : bitrates, kbps depending on MPEG version
144  *  bitrate index 15: forbidden
145  *
146  *  mode_ext:
147  *  0:  LR
148  *  1:  LR-i
149  *  2:  MS
150  *  3:  MS-i
151  *
152  ***********************************************************************/
153 
154 static void
updateStats(lame_internal_flags * const gfc)155 updateStats(lame_internal_flags * const gfc)
156 {
157     SessionConfig_t const *const cfg = &gfc->cfg;
158     EncResult_t *eov = &gfc->ov_enc;
159     int     gr, ch;
160     assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
161     assert(0 <= eov->mode_ext && eov->mode_ext < 4);
162 
163     /* count bitrate indices */
164     eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
165     eov->bitrate_channelmode_hist[15][4]++;
166 
167     /* count 'em for every mode extension in case of 2 channel encoding */
168     if (cfg->channels_out == 2) {
169         eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
170         eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
171     }
172     for (gr = 0; gr < cfg->mode_gr; ++gr) {
173         for (ch = 0; ch < cfg->channels_out; ++ch) {
174             int     bt = gfc->l3_side.tt[gr][ch].block_type;
175             if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
176                 bt = 4;
177             eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
178             eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
179             eov->bitrate_blocktype_hist[15][bt]++;
180             eov->bitrate_blocktype_hist[15][5]++;
181         }
182     }
183 }
184 
185 
186 
187 
188 static void
lame_encode_frame_init(lame_internal_flags * gfc,const sample_t * const inbuf[2])189 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
190 {
191     SessionConfig_t const *const cfg = &gfc->cfg;
192 
193     int     ch, gr;
194 
195     if (gfc->lame_encode_frame_init == 0) {
196         sample_t primebuff0[286 + 1152 + 576];
197         sample_t primebuff1[286 + 1152 + 576];
198         int const framesize = 576 * cfg->mode_gr;
199         /* prime the MDCT/polyphase filterbank with a short block */
200         int     i, j;
201         gfc->lame_encode_frame_init = 1;
202         memset(primebuff0, 0, sizeof(primebuff0));
203         memset(primebuff1, 0, sizeof(primebuff1));
204         for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
205             if (i < framesize) {
206                 primebuff0[i] = 0;
207                 if (cfg->channels_out == 2)
208                     primebuff1[i] = 0;
209             }
210             else {
211                 primebuff0[i] = inbuf[0][j];
212                 if (cfg->channels_out == 2)
213                     primebuff1[i] = inbuf[1][j];
214                 ++j;
215             }
216         }
217         /* polyphase filtering / mdct */
218         for (gr = 0; gr < cfg->mode_gr; gr++) {
219             for (ch = 0; ch < cfg->channels_out; ch++) {
220                 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
221             }
222         }
223         mdct_sub48(gfc, primebuff0, primebuff1);
224 
225         /* check FFT will not use a negative starting offset */
226 #if 576 < FFTOFFSET
227 # error FFTOFFSET greater than 576: FFT uses a negative offset
228 #endif
229         /* check if we have enough data for FFT */
230         assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
231         /* check if we have enough data for polyphase filterbank */
232         assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
233     }
234 
235 }
236 
237 
238 
239 
240 
241 
242 
243 /************************************************************************
244 *
245 * encodeframe()           Layer 3
246 *
247 * encode a single frame
248 *
249 ************************************************************************
250 lame_encode_frame()
251 
252 
253                        gr 0            gr 1
254 inbuf:           |--------------|--------------|--------------|
255 
256 
257 Polyphase (18 windows, each shifted 32)
258 gr 0:
259 window1          <----512---->
260 window18                 <----512---->
261 
262 gr 1:
263 window1                         <----512---->
264 window18                                <----512---->
265 
266 
267 
268 MDCT output:  |--------------|--------------|--------------|
269 
270 FFT's                    <---------1024---------->
271                                          <---------1024-------->
272 
273 
274 
275     inbuf = buffer of PCM data size=MP3 framesize
276     encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
277     so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
278 
279     psy-model FFT has a 1 granule delay, so we feed it data for the
280     next granule.
281     FFT is centered over granule:  224+576+224
282     So FFT starts at:   576-224-MDCTDELAY
283 
284     MPEG2:  FFT ends at:  BLKSIZE+576-224-MDCTDELAY      (1328)
285     MPEG1:  FFT ends at:  BLKSIZE+2*576-224-MDCTDELAY    (1904)
286 
287     MPEG2:  polyphase first window:  [0..511]
288                       18th window:   [544..1055]          (1056)
289     MPEG1:            36th window:   [1120..1631]         (1632)
290             data needed:  512+framesize-32
291 
292     A close look newmdct.c shows that the polyphase filterbank
293     only uses data from [0..510] for each window.  Perhaps because the window
294     used by the filterbank is zero for the last point, so Takehiro's
295     code doesn't bother to compute with it.
296 
297     FFT starts at 576-224-MDCTDELAY (304)  = 576-FFTOFFSET
298 
299 */
300 
301 typedef FLOAT chgrdata[2][2];
302 
303 
304 int
lame_encode_mp3_frame(lame_internal_flags * gfc,sample_t const * inbuf_l,sample_t const * inbuf_r,unsigned char * mp3buf,int mp3buf_size)305 lame_encode_mp3_frame(       /* Output */
306                          lame_internal_flags * gfc, /* Context */
307                          sample_t const *inbuf_l, /* Input */
308                          sample_t const *inbuf_r, /* Input */
309                          unsigned char *mp3buf, /* Output */
310                          int mp3buf_size)
311 {                       /* Output */
312     SessionConfig_t const *const cfg = &gfc->cfg;
313     int     mp3count;
314     III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
315     III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
316     const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
317     const sample_t *inbuf[2];
318 
319     FLOAT   tot_ener[2][4];
320     FLOAT   ms_ener_ratio[2] = { .5, .5 };
321     FLOAT   pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
322     0., 0.}, {
323     0., 0.}};
324     FLOAT (*pe_use)[2];
325 
326     int     ch, gr;
327 
328     inbuf[0] = inbuf_l;
329     inbuf[1] = inbuf_r;
330 
331     if (gfc->lame_encode_frame_init == 0) {
332         /*first run? */
333         lame_encode_frame_init(gfc, inbuf);
334 
335     }
336 
337 
338     /********************** padding *****************************/
339     /* padding method as described in
340      * "MPEG-Layer3 / Bitstream Syntax and Decoding"
341      * by Martin Sieler, Ralph Sperschneider
342      *
343      * note: there is no padding for the very first frame
344      *
345      * Robert Hegemann 2000-06-22
346      */
347     gfc->ov_enc.padding = FALSE;
348     if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
349         gfc->sv_enc.slot_lag += cfg->samplerate_out;
350         gfc->ov_enc.padding = TRUE;
351     }
352 
353 
354 
355     /****************************************
356     *   Stage 1: psychoacoustic model       *
357     ****************************************/
358 
359     {
360         /* psychoacoustic model
361          * psy model has a 1 granule (576) delay that we must compensate for
362          * (mt 6/99).
363          */
364         int     ret;
365         const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
366         int     blocktype[2];
367 
368         for (gr = 0; gr < cfg->mode_gr; gr++) {
369 
370             for (ch = 0; ch < cfg->channels_out; ch++) {
371                 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
372             }
373             ret = L3psycho_anal_vbr(gfc, bufp, gr,
374                                     masking_LR, masking_MS,
375                                     pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
376             if (ret != 0)
377                 return -4;
378 
379             if (cfg->mode == JOINT_STEREO) {
380                 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
381                 if (ms_ener_ratio[gr] > 0)
382                     ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
383             }
384 
385             /* block type flags */
386             for (ch = 0; ch < cfg->channels_out; ch++) {
387                 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
388                 cod_info->block_type = blocktype[ch];
389                 cod_info->mixed_block_flag = 0;
390             }
391         }
392     }
393 
394 
395     /* auto-adjust of ATH, useful for low volume */
396     adjust_ATH(gfc);
397 
398 
399     /****************************************
400     *   Stage 2: MDCT                       *
401     ****************************************/
402 
403     /* polyphase filtering / mdct */
404     mdct_sub48(gfc, inbuf[0], inbuf[1]);
405 
406 
407     /****************************************
408     *   Stage 3: MS/LR decision             *
409     ****************************************/
410 
411     /* Here will be selected MS or LR coding of the 2 stereo channels */
412     gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
413 
414     if (cfg->force_ms) {
415         gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
416     }
417     else if (cfg->mode == JOINT_STEREO) {
418         /* ms_ratio = is scaled, for historical reasons, to look like
419            a ratio of side_channel / total.
420            0 = signal is 100% mono
421            .5 = L & R uncorrelated
422          */
423 
424         /* [0] and [1] are the results for the two granules in MPEG-1,
425          * in MPEG-2 it's only a faked averaging of the same value
426          * _prev is the value of the last granule of the previous frame
427          * _next is the value of the first granule of the next frame
428          */
429 
430         FLOAT   sum_pe_MS = 0;
431         FLOAT   sum_pe_LR = 0;
432         for (gr = 0; gr < cfg->mode_gr; gr++) {
433             for (ch = 0; ch < cfg->channels_out; ch++) {
434                 sum_pe_MS += pe_MS[gr][ch];
435                 sum_pe_LR += pe[gr][ch];
436             }
437         }
438 
439         /* based on PE: M/S coding would not use much more bits than L/R */
440         if (sum_pe_MS <= 1.00 * sum_pe_LR) {
441 
442             gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
443             gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
444 
445             if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
446 
447                 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
448             }
449         }
450     }
451 
452     /* bit and noise allocation */
453     if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
454         masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
455         pe_use = pe_MS;
456     }
457     else {
458         masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
459         pe_use = pe;
460     }
461 
462 
463     /* copy data for MP3 frame analyzer */
464     if (cfg->analysis && gfc->pinfo != NULL) {
465         for (gr = 0; gr < cfg->mode_gr; gr++) {
466             for (ch = 0; ch < cfg->channels_out; ch++) {
467                 gfc->pinfo->ms_ratio[gr] = 0;
468                 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
469                 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
470                 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
471                 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
472                 /* in psymodel, LR and MS data was stored in pinfo.
473                    switch to MS data: */
474                 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
475                     gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
476                     memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
477                            sizeof(gfc->pinfo->energy[gr][ch]));
478                 }
479             }
480         }
481     }
482 
483 
484     /****************************************
485     *   Stage 4: quantization loop          *
486     ****************************************/
487 
488     if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
489         static FLOAT const fircoef[9] = {
490             -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
491             7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
492             0.187098 * 5
493         };
494 
495         int     i;
496         FLOAT   f;
497 
498         for (i = 0; i < 18; i++)
499             gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
500 
501         f = 0.0;
502         for (gr = 0; gr < cfg->mode_gr; gr++)
503             for (ch = 0; ch < cfg->channels_out; ch++)
504                 f += pe_use[gr][ch];
505         gfc->sv_enc.pefirbuf[18] = f;
506 
507         f = gfc->sv_enc.pefirbuf[9];
508         for (i = 0; i < 9; i++)
509             f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
510 
511         f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
512         for (gr = 0; gr < cfg->mode_gr; gr++) {
513             for (ch = 0; ch < cfg->channels_out; ch++) {
514                 pe_use[gr][ch] *= f;
515             }
516         }
517     }
518     gfc->iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
519 
520 
521     /****************************************
522     *   Stage 5: bitstream formatting       *
523     ****************************************/
524 
525 
526     /*  write the frame to the bitstream  */
527     (void) format_bitstream(gfc);
528 
529     /* copy mp3 bit buffer into array */
530     mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
531 
532 
533     if (cfg->write_lame_tag) {
534         AddVbrFrame(gfc);
535     }
536 
537     if (cfg->analysis && gfc->pinfo != NULL) {
538         int     framesize = 576 * cfg->mode_gr;
539         for (ch = 0; ch < cfg->channels_out; ch++) {
540             int     j;
541             for (j = 0; j < FFTOFFSET; j++)
542                 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
543             for (j = FFTOFFSET; j < 1600; j++) {
544                 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
545             }
546         }
547         gfc->sv_qnt.masking_lower = 1.0;
548 
549         set_frame_pinfo(gfc, masking);
550     }
551 
552     ++gfc->ov_enc.frame_number;
553 
554     updateStats(gfc);
555 
556     return mp3count;
557 }
558