1 /*
2 * LAME MP3 encoding engine
3 *
4 * Copyright (c) 1999 Mark Taylor
5 * Copyright (c) 2000-2002 Takehiro Tominaga
6 * Copyright (c) 2000-2011 Robert Hegemann
7 * Copyright (c) 2001 Gabriel Bouvigne
8 * Copyright (c) 2001 John Dahlstrom
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU Library General Public
21 * License along with this library; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 02111-1307, USA.
24 */
25
26 /* $Id: encoder.c,v 1.111 2011/05/07 16:05:17 rbrito Exp $ */
27
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31
32
33 #include "lame.h"
34 #include "lame-machine.h"
35 #include "encoder.h"
36 #include "util.h"
37 #include "lame_global_flags.h"
38 #include "newmdct.h"
39 #include "psymodel.h"
40 #include "lame-analysis.h"
41 #include "bitstream.h"
42 #include "VbrTag.h"
43 #include "quantize_pvt.h"
44
45
46
47 /*
48 * auto-adjust of ATH, useful for low volume
49 * Gabriel Bouvigne 3 feb 2001
50 *
51 * modifies some values in
52 * gfp->internal_flags->ATH
53 * (gfc->ATH)
54 */
55 static void
adjust_ATH(lame_internal_flags const * const gfc)56 adjust_ATH(lame_internal_flags const *const gfc)
57 {
58 SessionConfig_t const *const cfg = &gfc->cfg;
59 FLOAT gr2_max, max_pow;
60
61 if (gfc->ATH->use_adjust == 0) {
62 gfc->ATH->adjust_factor = 1.0; /* no adjustment */
63 return;
64 }
65
66 /* jd - 2001 mar 12, 27, jun 30 */
67 /* loudness based on equal loudness curve; */
68 /* use granule with maximum combined loudness */
69 max_pow = gfc->ov_psy.loudness_sq[0][0];
70 gr2_max = gfc->ov_psy.loudness_sq[1][0];
71 if (cfg->channels_out == 2) {
72 max_pow += gfc->ov_psy.loudness_sq[0][1];
73 gr2_max += gfc->ov_psy.loudness_sq[1][1];
74 }
75 else {
76 max_pow += max_pow;
77 gr2_max += gr2_max;
78 }
79 if (cfg->mode_gr == 2) {
80 max_pow = Max(max_pow, gr2_max);
81 }
82 max_pow *= 0.5; /* max_pow approaches 1.0 for full band noise */
83
84 /* jd - 2001 mar 31, jun 30 */
85 /* user tuning of ATH adjustment region */
86 max_pow *= gfc->ATH->aa_sensitivity_p;
87
88 /* adjust ATH depending on range of maximum value
89 */
90
91 /* jd - 2001 feb27, mar12,20, jun30, jul22 */
92 /* continuous curves based on approximation */
93 /* to GB's original values. */
94 /* For an increase in approximate loudness, */
95 /* set ATH adjust to adjust_limit immediately */
96 /* after a delay of one frame. */
97 /* For a loudness decrease, reduce ATH adjust */
98 /* towards adjust_limit gradually. */
99 /* max_pow is a loudness squared or a power. */
100 if (max_pow > 0.03125) { /* ((1 - 0.000625)/ 31.98) from curve below */
101 if (gfc->ATH->adjust_factor >= 1.0) {
102 gfc->ATH->adjust_factor = 1.0;
103 }
104 else {
105 /* preceding frame has lower ATH adjust; */
106 /* ascend only to the preceding adjust_limit */
107 /* in case there is leading low volume */
108 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
109 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
110 }
111 }
112 gfc->ATH->adjust_limit = 1.0;
113 }
114 else { /* adjustment curve */
115 /* about 32 dB maximum adjust (0.000625) */
116 FLOAT const adj_lim_new = 31.98 * max_pow + 0.000625;
117 if (gfc->ATH->adjust_factor >= adj_lim_new) { /* descend gradually */
118 gfc->ATH->adjust_factor *= adj_lim_new * 0.075 + 0.925;
119 if (gfc->ATH->adjust_factor < adj_lim_new) { /* stop descent */
120 gfc->ATH->adjust_factor = adj_lim_new;
121 }
122 }
123 else { /* ascend */
124 if (gfc->ATH->adjust_limit >= adj_lim_new) {
125 gfc->ATH->adjust_factor = adj_lim_new;
126 }
127 else { /* preceding frame has lower ATH adjust; */
128 /* ascend only to the preceding adjust_limit */
129 if (gfc->ATH->adjust_factor < gfc->ATH->adjust_limit) {
130 gfc->ATH->adjust_factor = gfc->ATH->adjust_limit;
131 }
132 }
133 }
134 gfc->ATH->adjust_limit = adj_lim_new;
135 }
136 }
137
138 /***********************************************************************
139 *
140 * some simple statistics
141 *
142 * bitrate index 0: free bitrate -> not allowed in VBR mode
143 * : bitrates, kbps depending on MPEG version
144 * bitrate index 15: forbidden
145 *
146 * mode_ext:
147 * 0: LR
148 * 1: LR-i
149 * 2: MS
150 * 3: MS-i
151 *
152 ***********************************************************************/
153
154 static void
updateStats(lame_internal_flags * const gfc)155 updateStats(lame_internal_flags * const gfc)
156 {
157 SessionConfig_t const *const cfg = &gfc->cfg;
158 EncResult_t *eov = &gfc->ov_enc;
159 int gr, ch;
160 assert(0 <= eov->bitrate_index && eov->bitrate_index < 16);
161 assert(0 <= eov->mode_ext && eov->mode_ext < 4);
162
163 /* count bitrate indices */
164 eov->bitrate_channelmode_hist[eov->bitrate_index][4]++;
165 eov->bitrate_channelmode_hist[15][4]++;
166
167 /* count 'em for every mode extension in case of 2 channel encoding */
168 if (cfg->channels_out == 2) {
169 eov->bitrate_channelmode_hist[eov->bitrate_index][eov->mode_ext]++;
170 eov->bitrate_channelmode_hist[15][eov->mode_ext]++;
171 }
172 for (gr = 0; gr < cfg->mode_gr; ++gr) {
173 for (ch = 0; ch < cfg->channels_out; ++ch) {
174 int bt = gfc->l3_side.tt[gr][ch].block_type;
175 if (gfc->l3_side.tt[gr][ch].mixed_block_flag)
176 bt = 4;
177 eov->bitrate_blocktype_hist[eov->bitrate_index][bt]++;
178 eov->bitrate_blocktype_hist[eov->bitrate_index][5]++;
179 eov->bitrate_blocktype_hist[15][bt]++;
180 eov->bitrate_blocktype_hist[15][5]++;
181 }
182 }
183 }
184
185
186
187
188 static void
lame_encode_frame_init(lame_internal_flags * gfc,const sample_t * const inbuf[2])189 lame_encode_frame_init(lame_internal_flags * gfc, const sample_t *const inbuf[2])
190 {
191 SessionConfig_t const *const cfg = &gfc->cfg;
192
193 int ch, gr;
194
195 if (gfc->lame_encode_frame_init == 0) {
196 sample_t primebuff0[286 + 1152 + 576];
197 sample_t primebuff1[286 + 1152 + 576];
198 int const framesize = 576 * cfg->mode_gr;
199 /* prime the MDCT/polyphase filterbank with a short block */
200 int i, j;
201 gfc->lame_encode_frame_init = 1;
202 memset(primebuff0, 0, sizeof(primebuff0));
203 memset(primebuff1, 0, sizeof(primebuff1));
204 for (i = 0, j = 0; i < 286 + 576 * (1 + cfg->mode_gr); ++i) {
205 if (i < framesize) {
206 primebuff0[i] = 0;
207 if (cfg->channels_out == 2)
208 primebuff1[i] = 0;
209 }
210 else {
211 primebuff0[i] = inbuf[0][j];
212 if (cfg->channels_out == 2)
213 primebuff1[i] = inbuf[1][j];
214 ++j;
215 }
216 }
217 /* polyphase filtering / mdct */
218 for (gr = 0; gr < cfg->mode_gr; gr++) {
219 for (ch = 0; ch < cfg->channels_out; ch++) {
220 gfc->l3_side.tt[gr][ch].block_type = SHORT_TYPE;
221 }
222 }
223 mdct_sub48(gfc, primebuff0, primebuff1);
224
225 /* check FFT will not use a negative starting offset */
226 #if 576 < FFTOFFSET
227 # error FFTOFFSET greater than 576: FFT uses a negative offset
228 #endif
229 /* check if we have enough data for FFT */
230 assert(gfc->sv_enc.mf_size >= (BLKSIZE + framesize - FFTOFFSET));
231 /* check if we have enough data for polyphase filterbank */
232 assert(gfc->sv_enc.mf_size >= (512 + framesize - 32));
233 }
234
235 }
236
237
238
239
240
241
242
243 /************************************************************************
244 *
245 * encodeframe() Layer 3
246 *
247 * encode a single frame
248 *
249 ************************************************************************
250 lame_encode_frame()
251
252
253 gr 0 gr 1
254 inbuf: |--------------|--------------|--------------|
255
256
257 Polyphase (18 windows, each shifted 32)
258 gr 0:
259 window1 <----512---->
260 window18 <----512---->
261
262 gr 1:
263 window1 <----512---->
264 window18 <----512---->
265
266
267
268 MDCT output: |--------------|--------------|--------------|
269
270 FFT's <---------1024---------->
271 <---------1024-------->
272
273
274
275 inbuf = buffer of PCM data size=MP3 framesize
276 encoder acts on inbuf[ch][0], but output is delayed by MDCTDELAY
277 so the MDCT coefficints are from inbuf[ch][-MDCTDELAY]
278
279 psy-model FFT has a 1 granule delay, so we feed it data for the
280 next granule.
281 FFT is centered over granule: 224+576+224
282 So FFT starts at: 576-224-MDCTDELAY
283
284 MPEG2: FFT ends at: BLKSIZE+576-224-MDCTDELAY (1328)
285 MPEG1: FFT ends at: BLKSIZE+2*576-224-MDCTDELAY (1904)
286
287 MPEG2: polyphase first window: [0..511]
288 18th window: [544..1055] (1056)
289 MPEG1: 36th window: [1120..1631] (1632)
290 data needed: 512+framesize-32
291
292 A close look newmdct.c shows that the polyphase filterbank
293 only uses data from [0..510] for each window. Perhaps because the window
294 used by the filterbank is zero for the last point, so Takehiro's
295 code doesn't bother to compute with it.
296
297 FFT starts at 576-224-MDCTDELAY (304) = 576-FFTOFFSET
298
299 */
300
301 typedef FLOAT chgrdata[2][2];
302
303
304 int
lame_encode_mp3_frame(lame_internal_flags * gfc,sample_t const * inbuf_l,sample_t const * inbuf_r,unsigned char * mp3buf,int mp3buf_size)305 lame_encode_mp3_frame( /* Output */
306 lame_internal_flags * gfc, /* Context */
307 sample_t const *inbuf_l, /* Input */
308 sample_t const *inbuf_r, /* Input */
309 unsigned char *mp3buf, /* Output */
310 int mp3buf_size)
311 { /* Output */
312 SessionConfig_t const *const cfg = &gfc->cfg;
313 int mp3count;
314 III_psy_ratio masking_LR[2][2]; /*LR masking & energy */
315 III_psy_ratio masking_MS[2][2]; /*MS masking & energy */
316 const III_psy_ratio (*masking)[2]; /*pointer to selected maskings */
317 const sample_t *inbuf[2];
318
319 FLOAT tot_ener[2][4];
320 FLOAT ms_ener_ratio[2] = { .5, .5 };
321 FLOAT pe[2][2] = { {0., 0.}, {0., 0.} }, pe_MS[2][2] = { {
322 0., 0.}, {
323 0., 0.}};
324 FLOAT (*pe_use)[2];
325
326 int ch, gr;
327
328 inbuf[0] = inbuf_l;
329 inbuf[1] = inbuf_r;
330
331 if (gfc->lame_encode_frame_init == 0) {
332 /*first run? */
333 lame_encode_frame_init(gfc, inbuf);
334
335 }
336
337
338 /********************** padding *****************************/
339 /* padding method as described in
340 * "MPEG-Layer3 / Bitstream Syntax and Decoding"
341 * by Martin Sieler, Ralph Sperschneider
342 *
343 * note: there is no padding for the very first frame
344 *
345 * Robert Hegemann 2000-06-22
346 */
347 gfc->ov_enc.padding = FALSE;
348 if ((gfc->sv_enc.slot_lag -= gfc->sv_enc.frac_SpF) < 0) {
349 gfc->sv_enc.slot_lag += cfg->samplerate_out;
350 gfc->ov_enc.padding = TRUE;
351 }
352
353
354
355 /****************************************
356 * Stage 1: psychoacoustic model *
357 ****************************************/
358
359 {
360 /* psychoacoustic model
361 * psy model has a 1 granule (576) delay that we must compensate for
362 * (mt 6/99).
363 */
364 int ret;
365 const sample_t *bufp[2] = {0, 0}; /* address of beginning of left & right granule */
366 int blocktype[2];
367
368 for (gr = 0; gr < cfg->mode_gr; gr++) {
369
370 for (ch = 0; ch < cfg->channels_out; ch++) {
371 bufp[ch] = &inbuf[ch][576 + gr * 576 - FFTOFFSET];
372 }
373 ret = L3psycho_anal_vbr(gfc, bufp, gr,
374 masking_LR, masking_MS,
375 pe[gr], pe_MS[gr], tot_ener[gr], blocktype);
376 if (ret != 0)
377 return -4;
378
379 if (cfg->mode == JOINT_STEREO) {
380 ms_ener_ratio[gr] = tot_ener[gr][2] + tot_ener[gr][3];
381 if (ms_ener_ratio[gr] > 0)
382 ms_ener_ratio[gr] = tot_ener[gr][3] / ms_ener_ratio[gr];
383 }
384
385 /* block type flags */
386 for (ch = 0; ch < cfg->channels_out; ch++) {
387 gr_info *const cod_info = &gfc->l3_side.tt[gr][ch];
388 cod_info->block_type = blocktype[ch];
389 cod_info->mixed_block_flag = 0;
390 }
391 }
392 }
393
394
395 /* auto-adjust of ATH, useful for low volume */
396 adjust_ATH(gfc);
397
398
399 /****************************************
400 * Stage 2: MDCT *
401 ****************************************/
402
403 /* polyphase filtering / mdct */
404 mdct_sub48(gfc, inbuf[0], inbuf[1]);
405
406
407 /****************************************
408 * Stage 3: MS/LR decision *
409 ****************************************/
410
411 /* Here will be selected MS or LR coding of the 2 stereo channels */
412 gfc->ov_enc.mode_ext = MPG_MD_LR_LR;
413
414 if (cfg->force_ms) {
415 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
416 }
417 else if (cfg->mode == JOINT_STEREO) {
418 /* ms_ratio = is scaled, for historical reasons, to look like
419 a ratio of side_channel / total.
420 0 = signal is 100% mono
421 .5 = L & R uncorrelated
422 */
423
424 /* [0] and [1] are the results for the two granules in MPEG-1,
425 * in MPEG-2 it's only a faked averaging of the same value
426 * _prev is the value of the last granule of the previous frame
427 * _next is the value of the first granule of the next frame
428 */
429
430 FLOAT sum_pe_MS = 0;
431 FLOAT sum_pe_LR = 0;
432 for (gr = 0; gr < cfg->mode_gr; gr++) {
433 for (ch = 0; ch < cfg->channels_out; ch++) {
434 sum_pe_MS += pe_MS[gr][ch];
435 sum_pe_LR += pe[gr][ch];
436 }
437 }
438
439 /* based on PE: M/S coding would not use much more bits than L/R */
440 if (sum_pe_MS <= 1.00 * sum_pe_LR) {
441
442 gr_info const *const gi0 = &gfc->l3_side.tt[0][0];
443 gr_info const *const gi1 = &gfc->l3_side.tt[cfg->mode_gr - 1][0];
444
445 if (gi0[0].block_type == gi0[1].block_type && gi1[0].block_type == gi1[1].block_type) {
446
447 gfc->ov_enc.mode_ext = MPG_MD_MS_LR;
448 }
449 }
450 }
451
452 /* bit and noise allocation */
453 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
454 masking = (const III_psy_ratio (*)[2])masking_MS; /* use MS masking */
455 pe_use = pe_MS;
456 }
457 else {
458 masking = (const III_psy_ratio (*)[2])masking_LR; /* use LR masking */
459 pe_use = pe;
460 }
461
462
463 /* copy data for MP3 frame analyzer */
464 if (cfg->analysis && gfc->pinfo != NULL) {
465 for (gr = 0; gr < cfg->mode_gr; gr++) {
466 for (ch = 0; ch < cfg->channels_out; ch++) {
467 gfc->pinfo->ms_ratio[gr] = 0;
468 gfc->pinfo->ms_ener_ratio[gr] = ms_ener_ratio[gr];
469 gfc->pinfo->blocktype[gr][ch] = gfc->l3_side.tt[gr][ch].block_type;
470 gfc->pinfo->pe[gr][ch] = pe_use[gr][ch];
471 memcpy(gfc->pinfo->xr[gr][ch], &gfc->l3_side.tt[gr][ch].xr[0], sizeof(FLOAT) * 576);
472 /* in psymodel, LR and MS data was stored in pinfo.
473 switch to MS data: */
474 if (gfc->ov_enc.mode_ext == MPG_MD_MS_LR) {
475 gfc->pinfo->ers[gr][ch] = gfc->pinfo->ers[gr][ch + 2];
476 memcpy(gfc->pinfo->energy[gr][ch], gfc->pinfo->energy[gr][ch + 2],
477 sizeof(gfc->pinfo->energy[gr][ch]));
478 }
479 }
480 }
481 }
482
483
484 /****************************************
485 * Stage 4: quantization loop *
486 ****************************************/
487
488 if (cfg->vbr == vbr_off || cfg->vbr == vbr_abr) {
489 static FLOAT const fircoef[9] = {
490 -0.0207887 * 5, -0.0378413 * 5, -0.0432472 * 5, -0.031183 * 5,
491 7.79609e-18 * 5, 0.0467745 * 5, 0.10091 * 5, 0.151365 * 5,
492 0.187098 * 5
493 };
494
495 int i;
496 FLOAT f;
497
498 for (i = 0; i < 18; i++)
499 gfc->sv_enc.pefirbuf[i] = gfc->sv_enc.pefirbuf[i + 1];
500
501 f = 0.0;
502 for (gr = 0; gr < cfg->mode_gr; gr++)
503 for (ch = 0; ch < cfg->channels_out; ch++)
504 f += pe_use[gr][ch];
505 gfc->sv_enc.pefirbuf[18] = f;
506
507 f = gfc->sv_enc.pefirbuf[9];
508 for (i = 0; i < 9; i++)
509 f += (gfc->sv_enc.pefirbuf[i] + gfc->sv_enc.pefirbuf[18 - i]) * fircoef[i];
510
511 f = (670 * 5 * cfg->mode_gr * cfg->channels_out) / f;
512 for (gr = 0; gr < cfg->mode_gr; gr++) {
513 for (ch = 0; ch < cfg->channels_out; ch++) {
514 pe_use[gr][ch] *= f;
515 }
516 }
517 }
518 gfc->iteration_loop(gfc, (const FLOAT (*)[2])pe_use, ms_ener_ratio, masking);
519
520
521 /****************************************
522 * Stage 5: bitstream formatting *
523 ****************************************/
524
525
526 /* write the frame to the bitstream */
527 (void) format_bitstream(gfc);
528
529 /* copy mp3 bit buffer into array */
530 mp3count = copy_buffer(gfc, mp3buf, mp3buf_size, 1);
531
532
533 if (cfg->write_lame_tag) {
534 AddVbrFrame(gfc);
535 }
536
537 if (cfg->analysis && gfc->pinfo != NULL) {
538 int framesize = 576 * cfg->mode_gr;
539 for (ch = 0; ch < cfg->channels_out; ch++) {
540 int j;
541 for (j = 0; j < FFTOFFSET; j++)
542 gfc->pinfo->pcmdata[ch][j] = gfc->pinfo->pcmdata[ch][j + framesize];
543 for (j = FFTOFFSET; j < 1600; j++) {
544 gfc->pinfo->pcmdata[ch][j] = inbuf[ch][j - FFTOFFSET];
545 }
546 }
547 gfc->sv_qnt.masking_lower = 1.0;
548
549 set_frame_pinfo(gfc, masking);
550 }
551
552 ++gfc->ov_enc.frame_number;
553
554 updateStats(gfc);
555
556 return mp3count;
557 }
558