1 /*
2 * psymodel.c
3 *
4 * Copyright (c) 1999-2000 Mark Taylor
5 * Copyright (c) 2001-2002 Naoki Shibata
6 * Copyright (c) 2000-2003 Takehiro Tominaga
7 * Copyright (c) 2000-2012 Robert Hegemann
8 * Copyright (c) 2000-2005 Gabriel Bouvigne
9 * Copyright (c) 2000-2005 Alexander Leidinger
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Library General Public License for more details.
20 *
21 * You should have received a copy of the GNU Library General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
25 */
26
27 /* $Id: psymodel.c,v 1.216 2017/09/06 19:38:23 aleidinger Exp $ */
28
29
30 /*
31 PSYCHO ACOUSTICS
32
33
34 This routine computes the psycho acoustics, delayed by one granule.
35
36 Input: buffer of PCM data (1024 samples).
37
38 This window should be centered over the 576 sample granule window.
39 The routine will compute the psycho acoustics for
40 this granule, but return the psycho acoustics computed
41 for the *previous* granule. This is because the block
42 type of the previous granule can only be determined
43 after we have computed the psycho acoustics for the following
44 granule.
45
46 Output: maskings and energies for each scalefactor band.
47 block type, PE, and some correlation measures.
48 The PE is used by CBR modes to determine if extra bits
49 from the bit reservoir should be used. The correlation
50 measures are used to determine mid/side or regular stereo.
51 */
52 /*
53 Notation:
54
55 barks: a non-linear frequency scale. Mapping from frequency to
56 barks is given by freq2bark()
57
58 scalefactor bands: The spectrum (frequencies) are broken into
59 SBMAX "scalefactor bands". Thes bands
60 are determined by the MPEG ISO spec. In
61 the noise shaping/quantization code, we allocate
62 bits among the partition bands to achieve the
63 best possible quality
64
65 partition bands: The spectrum is also broken into about
66 64 "partition bands". Each partition
67 band is about .34 barks wide. There are about 2-5
68 partition bands for each scalefactor band.
69
70 LAME computes all psycho acoustic information for each partition
71 band. Then at the end of the computations, this information
72 is mapped to scalefactor bands. The energy in each scalefactor
73 band is taken as the sum of the energy in all partition bands
74 which overlap the scalefactor band. The maskings can be computed
75 in the same way (and thus represent the average masking in that band)
76 or by taking the minmum value multiplied by the number of
77 partition bands used (which represents a minimum masking in that band).
78 */
79 /*
80 The general outline is as follows:
81
82 1. compute the energy in each partition band
83 2. compute the tonality in each partition band
84 3. compute the strength of each partion band "masker"
85 4. compute the masking (via the spreading function applied to each masker)
86 5. Modifications for mid/side masking.
87
88 Each partition band is considiered a "masker". The strength
89 of the i'th masker in band j is given by:
90
91 s3(bark(i)-bark(j))*strength(i)
92
93 The strength of the masker is a function of the energy and tonality.
94 The more tonal, the less masking. LAME uses a simple linear formula
95 (controlled by NMT and TMN) which says the strength is given by the
96 energy divided by a linear function of the tonality.
97 */
98 /*
99 s3() is the "spreading function". It is given by a formula
100 determined via listening tests.
101
102 The total masking in the j'th partition band is the sum over
103 all maskings i. It is thus given by the convolution of
104 the strength with s3(), the "spreading function."
105
106 masking(j) = sum_over_i s3(i-j)*strength(i) = s3 o strength
107
108 where "o" = convolution operator. s3 is given by a formula determined
109 via listening tests. It is normalized so that s3 o 1 = 1.
110
111 Note: instead of a simple convolution, LAME also has the
112 option of using "additive masking"
113
114 The most critical part is step 2, computing the tonality of each
115 partition band. LAME has two tonality estimators. The first
116 is based on the ISO spec, and measures how predictiable the
117 signal is over time. The more predictable, the more tonal.
118 The second measure is based on looking at the spectrum of
119 a single granule. The more peaky the spectrum, the more
120 tonal. By most indications, the latter approach is better.
121
122 Finally, in step 5, the maskings for the mid and side
123 channel are possibly increased. Under certain circumstances,
124 noise in the mid & side channels is assumed to also
125 be masked by strong maskers in the L or R channels.
126
127
128 Other data computed by the psy-model:
129
130 ms_ratio side-channel / mid-channel masking ratio (for previous granule)
131 ms_ratio_next side-channel / mid-channel masking ratio for this granule
132
133 percep_entropy[2] L and R values (prev granule) of PE - A measure of how
134 much pre-echo is in the previous granule
135 percep_entropy_MS[2] mid and side channel values (prev granule) of percep_entropy
136 energy[4] L,R,M,S energy in each channel, prev granule
137 blocktype_d[2] block type to use for previous granule
138 */
139
140
141
142
143 #ifdef HAVE_CONFIG_H
144 # include <config.h>
145 #endif
146
147 #include <float.h>
148
149 #include "lame.h"
150 #include "machine.h"
151 #include "encoder.h"
152 #include "util.h"
153 #include "psymodel.h"
154 #include "lame_global_flags.h"
155 #include "fft.h"
156 #include "lame-analysis.h"
157
158
159 #define NSFIRLEN 21
160
161 #ifdef M_LN10
162 #define LN_TO_LOG10 (M_LN10/10)
163 #else
164 #define LN_TO_LOG10 0.2302585093
165 #endif
166
167
168 /*
169 L3psycho_anal. Compute psycho acoustics.
170
171 Data returned to the calling program must be delayed by one
172 granule.
173
174 This is done in two places.
175 If we do not need to know the blocktype, the copying
176 can be done here at the top of the program: we copy the data for
177 the last granule (computed during the last call) before it is
178 overwritten with the new data. It looks like this:
179
180 0. static psymodel_data
181 1. calling_program_data = psymodel_data
182 2. compute psymodel_data
183
184 For data which needs to know the blocktype, the copying must be
185 done at the end of this loop, and the old values must be saved:
186
187 0. static psymodel_data_old
188 1. compute psymodel_data
189 2. compute possible block type of this granule
190 3. compute final block type of previous granule based on #2.
191 4. calling_program_data = psymodel_data_old
192 5. psymodel_data_old = psymodel_data
193 */
194
195
196
197
198
199 /* psycho_loudness_approx
200 jd - 2001 mar 12
201 in: energy - BLKSIZE/2 elements of frequency magnitudes ^ 2
202 gfp - uses out_samplerate, ATHtype (also needed for ATHformula)
203 returns: loudness^2 approximation, a positive value roughly tuned for a value
204 of 1.0 for signals near clipping.
205 notes: When calibrated, feeding this function binary white noise at sample
206 values +32767 or -32768 should return values that approach 3.
207 ATHformula is used to approximate an equal loudness curve.
208 future: Data indicates that the shape of the equal loudness curve varies
209 with intensity. This function might be improved by using an equal
210 loudness curve shaped for typical playback levels (instead of the
211 ATH, that is shaped for the threshold). A flexible realization might
212 simply bend the existing ATH curve to achieve the desired shape.
213 However, the potential gain may not be enough to justify an effort.
214 */
215 static FLOAT
psycho_loudness_approx(FLOAT const * energy,FLOAT const * eql_w)216 psycho_loudness_approx(FLOAT const *energy, FLOAT const *eql_w)
217 {
218 int i;
219 FLOAT loudness_power;
220
221 loudness_power = 0.0;
222 /* apply weights to power in freq. bands */
223 for (i = 0; i < BLKSIZE / 2; ++i)
224 loudness_power += energy[i] * eql_w[i];
225 loudness_power *= VO_SCALE;
226
227 return loudness_power;
228 }
229
230 /* mask_add optimization */
231 /* init the limit values used to avoid computing log in mask_add when it is not necessary */
232
233 /* For example, with i = 10*log10(m2/m1)/10*16 (= log10(m2/m1)*16)
234 *
235 * abs(i)>8 is equivalent (as i is an integer) to
236 * abs(i)>=9
237 * i>=9 || i<=-9
238 * equivalent to (as i is the biggest integer smaller than log10(m2/m1)*16
239 * or the smallest integer bigger than log10(m2/m1)*16 depending on the sign of log10(m2/m1)*16)
240 * log10(m2/m1)>=9/16 || log10(m2/m1)<=-9/16
241 * exp10 is strictly increasing thus this is equivalent to
242 * m2/m1 >= 10^(9/16) || m2/m1<=10^(-9/16) which are comparisons to constants
243 */
244
245
246 #define I1LIMIT 8 /* as in if(i>8) */
247 #define I2LIMIT 23 /* as in if(i>24) -> changed 23 */
248 #define MLIMIT 15 /* as in if(m<15) */
249
250 /* pow(10, (I1LIMIT + 1) / 16.0); */
251 static const FLOAT ma_max_i1 = 3.6517412725483771;
252 /* pow(10, (I2LIMIT + 1) / 16.0); */
253 static const FLOAT ma_max_i2 = 31.622776601683793;
254 /* pow(10, (MLIMIT) / 10.0); */
255 static const FLOAT ma_max_m = 31.622776601683793;
256
257 /*This is the masking table:
258 According to tonality, values are going from 0dB (TMN)
259 to 9.3dB (NMT).
260 After additive masking computation, 8dB are added, so
261 final values are going from 8dB to 17.3dB
262 */
263 static const FLOAT tab[] = {
264 1.0 /*pow(10, -0) */ ,
265 0.79433 /*pow(10, -0.1) */ ,
266 0.63096 /*pow(10, -0.2) */ ,
267 0.63096 /*pow(10, -0.2) */ ,
268 0.63096 /*pow(10, -0.2) */ ,
269 0.63096 /*pow(10, -0.2) */ ,
270 0.63096 /*pow(10, -0.2) */ ,
271 0.25119 /*pow(10, -0.6) */ ,
272 0.11749 /*pow(10, -0.93) */
273 };
274
275 static const int tab_mask_add_delta[] = { 2, 2, 2, 1, 1, 1, 0, 0, -1 };
276 #define STATIC_ASSERT_EQUAL_DIMENSION(A,B) enum{static_assert_##A=1/((dimension_of(A) == dimension_of(B))?1:0)}
277
278 inline static int
mask_add_delta(int i)279 mask_add_delta(int i)
280 {
281 STATIC_ASSERT_EQUAL_DIMENSION(tab_mask_add_delta,tab);
282 assert(i < (int)dimension_of(tab));
283 return tab_mask_add_delta[i];
284 }
285
286
287 static void
init_mask_add_max_values(void)288 init_mask_add_max_values(void)
289 {
290 #ifndef NDEBUG
291 FLOAT const _ma_max_i1 = pow(10, (I1LIMIT + 1) / 16.0);
292 FLOAT const _ma_max_i2 = pow(10, (I2LIMIT + 1) / 16.0);
293 FLOAT const _ma_max_m = pow(10, (MLIMIT) / 10.0);
294 assert(fabs(ma_max_i1 - _ma_max_i1) <= FLT_EPSILON);
295 assert(fabs(ma_max_i2 - _ma_max_i2) <= FLT_EPSILON);
296 assert(fabs(ma_max_m - _ma_max_m ) <= FLT_EPSILON);
297 #endif
298 }
299
300
301
302
303 /* addition of simultaneous masking Naoki Shibata 2000/7 */
304 inline static FLOAT
vbrpsy_mask_add(FLOAT m1,FLOAT m2,int b,int delta)305 vbrpsy_mask_add(FLOAT m1, FLOAT m2, int b, int delta)
306 {
307 static const FLOAT table2[] = {
308 1.33352 * 1.33352, 1.35879 * 1.35879, 1.38454 * 1.38454, 1.39497 * 1.39497,
309 1.40548 * 1.40548, 1.3537 * 1.3537, 1.30382 * 1.30382, 1.22321 * 1.22321,
310 1.14758 * 1.14758,
311 1
312 };
313
314 FLOAT ratio;
315
316 if (m1 < 0) {
317 m1 = 0;
318 }
319 if (m2 < 0) {
320 m2 = 0;
321 }
322 if (m1 <= 0) {
323 return m2;
324 }
325 if (m2 <= 0) {
326 return m1;
327 }
328 if (m2 > m1) {
329 ratio = m2 / m1;
330 }
331 else {
332 ratio = m1 / m2;
333 }
334 if (abs(b) <= delta) { /* approximately, 1 bark = 3 partitions */
335 /* originally 'if(i > 8)' */
336 if (ratio >= ma_max_i1) {
337 return m1 + m2;
338 }
339 else {
340 int i = (int) (FAST_LOG10_X(ratio, 16.0f));
341 return (m1 + m2) * table2[i];
342 }
343 }
344 if (ratio < ma_max_i2) {
345 return m1 + m2;
346 }
347 if (m1 < m2) {
348 m1 = m2;
349 }
350 return m1;
351 }
352
353
354 /* short block threshold calculation (part 2)
355
356 partition band bo_s[sfb] is at the transition from scalefactor
357 band sfb to the next one sfb+1; enn and thmm have to be split
358 between them
359 */
360 static void
convert_partition2scalefac(PsyConst_CB2SB_t const * const gd,FLOAT const * eb,FLOAT const * thr,FLOAT enn_out[],FLOAT thm_out[])361 convert_partition2scalefac(PsyConst_CB2SB_t const *const gd, FLOAT const *eb, FLOAT const *thr,
362 FLOAT enn_out[], FLOAT thm_out[])
363 {
364 FLOAT enn, thmm;
365 int sb, b, n = gd->n_sb;
366 enn = thmm = 0.0f;
367 for (sb = b = 0; sb < n; ++b, ++sb) {
368 int const bo_sb = gd->bo[sb];
369 int const npart = gd->npart;
370 int const b_lim = bo_sb < npart ? bo_sb : npart;
371 while (b < b_lim) {
372 assert(eb[b] >= 0); /* iff failed, it may indicate some index error elsewhere */
373 assert(thr[b] >= 0);
374 enn += eb[b];
375 thmm += thr[b];
376 b++;
377 }
378 if (b >= npart) {
379 enn_out[sb] = enn;
380 thm_out[sb] = thmm;
381 ++sb;
382 break;
383 }
384 assert(eb[b] >= 0); /* iff failed, it may indicate some index error elsewhere */
385 assert(thr[b] >= 0);
386 {
387 /* at transition sfb -> sfb+1 */
388 FLOAT const w_curr = gd->bo_weight[sb];
389 FLOAT const w_next = 1.0f - w_curr;
390 enn += w_curr * eb[b];
391 thmm += w_curr * thr[b];
392 enn_out[sb] = enn;
393 thm_out[sb] = thmm;
394 enn = w_next * eb[b];
395 thmm = w_next * thr[b];
396 }
397 }
398 /* zero initialize the rest */
399 for (; sb < n; ++sb) {
400 enn_out[sb] = 0;
401 thm_out[sb] = 0;
402 }
403 }
404
405 static void
convert_partition2scalefac_s(lame_internal_flags * gfc,FLOAT const * eb,FLOAT const * thr,int chn,int sblock)406 convert_partition2scalefac_s(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr, int chn,
407 int sblock)
408 {
409 PsyStateVar_t *const psv = &gfc->sv_psy;
410 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
411 FLOAT enn[SBMAX_s], thm[SBMAX_s];
412 int sb;
413 convert_partition2scalefac(gds, eb, thr, enn, thm);
414 for (sb = 0; sb < SBMAX_s; ++sb) {
415 psv->en[chn].s[sb][sblock] = enn[sb];
416 psv->thm[chn].s[sb][sblock] = thm[sb];
417 }
418 }
419
420 /* longblock threshold calculation (part 2) */
421 static void
convert_partition2scalefac_l(lame_internal_flags * gfc,FLOAT const * eb,FLOAT const * thr,int chn)422 convert_partition2scalefac_l(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr, int chn)
423 {
424 PsyStateVar_t *const psv = &gfc->sv_psy;
425 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
426 FLOAT *enn = &psv->en[chn].l[0];
427 FLOAT *thm = &psv->thm[chn].l[0];
428 convert_partition2scalefac(gdl, eb, thr, enn, thm);
429 }
430
431 static void
convert_partition2scalefac_l_to_s(lame_internal_flags * gfc,FLOAT const * eb,FLOAT const * thr,int chn)432 convert_partition2scalefac_l_to_s(lame_internal_flags * gfc, FLOAT const *eb, FLOAT const *thr,
433 int chn)
434 {
435 PsyStateVar_t *const psv = &gfc->sv_psy;
436 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->l_to_s;
437 FLOAT enn[SBMAX_s], thm[SBMAX_s];
438 int sb, sblock;
439 convert_partition2scalefac(gds, eb, thr, enn, thm);
440 for (sb = 0; sb < SBMAX_s; ++sb) {
441 FLOAT const scale = 1. / 64.f;
442 FLOAT const tmp_enn = enn[sb];
443 FLOAT const tmp_thm = thm[sb] * scale;
444 for (sblock = 0; sblock < 3; ++sblock) {
445 psv->en[chn].s[sb][sblock] = tmp_enn;
446 psv->thm[chn].s[sb][sblock] = tmp_thm;
447 }
448 }
449 }
450
451
452
453 static inline FLOAT
NS_INTERP(FLOAT x,FLOAT y,FLOAT r)454 NS_INTERP(FLOAT x, FLOAT y, FLOAT r)
455 {
456 /* was pow((x),(r))*pow((y),1-(r)) */
457 if (r >= 1.0f)
458 return x; /* 99.7% of the time */
459 if (r <= 0.0f)
460 return y;
461 if (y > 0.0f)
462 return powf(x / y, r) * y; /* rest of the time */
463 return 0.0f; /* never happens */
464 }
465
466
467
468 static FLOAT
pecalc_s(III_psy_ratio const * mr,FLOAT masking_lower)469 pecalc_s(III_psy_ratio const *mr, FLOAT masking_lower)
470 {
471 FLOAT pe_s;
472 static const FLOAT regcoef_s[] = {
473 11.8, /* these values are tuned only for 44.1kHz... */
474 13.6,
475 17.2,
476 32,
477 46.5,
478 51.3,
479 57.5,
480 67.1,
481 71.5,
482 84.6,
483 97.6,
484 130,
485 /* 255.8 */
486 };
487 unsigned int sb, sblock;
488
489 pe_s = 1236.28f / 4;
490 for (sb = 0; sb < SBMAX_s - 1; sb++) {
491 for (sblock = 0; sblock < 3; sblock++) {
492 FLOAT const thm = mr->thm.s[sb][sblock];
493 assert(sb < dimension_of(regcoef_s));
494 if (thm > 0.0f) {
495 FLOAT const x = thm * masking_lower;
496 FLOAT const en = mr->en.s[sb][sblock];
497 if (en > x) {
498 if (en > x * 1e10f) {
499 pe_s += regcoef_s[sb] * (10.0f * LOG10);
500 }
501 else {
502 assert(x > 0);
503 pe_s += regcoef_s[sb] * FAST_LOG10(en / x);
504 }
505 }
506 }
507 }
508 }
509
510 return pe_s;
511 }
512
513 static FLOAT
pecalc_l(III_psy_ratio const * mr,FLOAT masking_lower)514 pecalc_l(III_psy_ratio const *mr, FLOAT masking_lower)
515 {
516 FLOAT pe_l;
517 static const FLOAT regcoef_l[] = {
518 6.8, /* these values are tuned only for 44.1kHz... */
519 5.8,
520 5.8,
521 6.4,
522 6.5,
523 9.9,
524 12.1,
525 14.4,
526 15,
527 18.9,
528 21.6,
529 26.9,
530 34.2,
531 40.2,
532 46.8,
533 56.5,
534 60.7,
535 73.9,
536 85.7,
537 93.4,
538 126.1,
539 /* 241.3 */
540 };
541 unsigned int sb;
542
543 pe_l = 1124.23f / 4;
544 for (sb = 0; sb < SBMAX_l - 1; sb++) {
545 FLOAT const thm = mr->thm.l[sb];
546 assert(sb < dimension_of(regcoef_l));
547 if (thm > 0.0f) {
548 FLOAT const x = thm * masking_lower;
549 FLOAT const en = mr->en.l[sb];
550 if (en > x) {
551 if (en > x * 1e10f) {
552 pe_l += regcoef_l[sb] * (10.0f * LOG10);
553 }
554 else {
555 assert(x > 0);
556 pe_l += regcoef_l[sb] * FAST_LOG10(en / x);
557 }
558 }
559 }
560 }
561
562 return pe_l;
563 }
564
565
566 static void
calc_energy(PsyConst_CB2SB_t const * l,FLOAT const * fftenergy,FLOAT * eb,FLOAT * max,FLOAT * avg)567 calc_energy(PsyConst_CB2SB_t const *l, FLOAT const *fftenergy, FLOAT * eb, FLOAT * max, FLOAT * avg)
568 {
569 int b, j;
570
571 for (b = j = 0; b < l->npart; ++b) {
572 FLOAT ebb = 0, m = 0;
573 int i;
574 for (i = 0; i < l->numlines[b]; ++i, ++j) {
575 FLOAT const el = fftenergy[j];
576 assert(el >= 0);
577 ebb += el;
578 if (m < el)
579 m = el;
580 }
581 eb[b] = ebb;
582 max[b] = m;
583 avg[b] = ebb * l->rnumlines[b];
584 assert(l->rnumlines[b] >= 0);
585 assert(ebb >= 0);
586 assert(eb[b] >= 0);
587 assert(max[b] >= 0);
588 assert(avg[b] >= 0);
589 }
590 }
591
592
593 static void
calc_mask_index_l(lame_internal_flags const * gfc,FLOAT const * max,FLOAT const * avg,unsigned char * mask_idx)594 calc_mask_index_l(lame_internal_flags const *gfc, FLOAT const *max,
595 FLOAT const *avg, unsigned char *mask_idx)
596 {
597 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
598 FLOAT m, a;
599 int b, k;
600 int const last_tab_entry = sizeof(tab) / sizeof(tab[0]) - 1;
601 b = 0;
602 a = avg[b] + avg[b + 1];
603 assert(a >= 0);
604 if (a > 0.0f) {
605 m = max[b];
606 if (m < max[b + 1])
607 m = max[b + 1];
608 assert((gdl->numlines[b] + gdl->numlines[b + 1] - 1) > 0);
609 a = 20.0f * (m * 2.0f - a)
610 / (a * (gdl->numlines[b] + gdl->numlines[b + 1] - 1));
611 k = (int) a;
612 if (k > last_tab_entry)
613 k = last_tab_entry;
614 mask_idx[b] = k;
615 }
616 else {
617 mask_idx[b] = 0;
618 }
619
620 for (b = 1; b < gdl->npart - 1; b++) {
621 a = avg[b - 1] + avg[b] + avg[b + 1];
622 assert(a >= 0);
623 if (a > 0.0f) {
624 m = max[b - 1];
625 if (m < max[b])
626 m = max[b];
627 if (m < max[b + 1])
628 m = max[b + 1];
629 assert((gdl->numlines[b - 1] + gdl->numlines[b] + gdl->numlines[b + 1] - 1) > 0);
630 a = 20.0f * (m * 3.0f - a)
631 / (a * (gdl->numlines[b - 1] + gdl->numlines[b] + gdl->numlines[b + 1] - 1));
632 k = (int) a;
633 if (k > last_tab_entry)
634 k = last_tab_entry;
635 mask_idx[b] = k;
636 }
637 else {
638 mask_idx[b] = 0;
639 }
640 }
641 assert(b > 0);
642 assert(b == gdl->npart - 1);
643
644 a = avg[b - 1] + avg[b];
645 assert(a >= 0);
646 if (a > 0.0f) {
647 m = max[b - 1];
648 if (m < max[b])
649 m = max[b];
650 assert((gdl->numlines[b - 1] + gdl->numlines[b] - 1) > 0);
651 a = 20.0f * (m * 2.0f - a)
652 / (a * (gdl->numlines[b - 1] + gdl->numlines[b] - 1));
653 k = (int) a;
654 if (k > last_tab_entry)
655 k = last_tab_entry;
656 mask_idx[b] = k;
657 }
658 else {
659 mask_idx[b] = 0;
660 }
661 assert(b == (gdl->npart - 1));
662 }
663
664
665 static void
vbrpsy_compute_fft_l(lame_internal_flags * gfc,const sample_t * const buffer[2],int chn,int gr_out,FLOAT fftenergy[HBLKSIZE],FLOAT (* wsamp_l)[BLKSIZE])666 vbrpsy_compute_fft_l(lame_internal_flags * gfc, const sample_t * const buffer[2], int chn,
667 int gr_out, FLOAT fftenergy[HBLKSIZE], FLOAT(*wsamp_l)[BLKSIZE])
668 {
669 SessionConfig_t const *const cfg = &gfc->cfg;
670 PsyStateVar_t *psv = &gfc->sv_psy;
671 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
672 int j;
673
674 if (chn < 2) {
675 fft_long(gfc, *wsamp_l, chn, buffer);
676 }
677 else if (chn == 2) {
678 FLOAT const sqrt2_half = SQRT2 * 0.5f;
679 /* FFT data for mid and side channel is derived from L & R */
680 for (j = BLKSIZE - 1; j >= 0; --j) {
681 FLOAT const l = wsamp_l[0][j];
682 FLOAT const r = wsamp_l[1][j];
683 wsamp_l[0][j] = (l + r) * sqrt2_half;
684 wsamp_l[1][j] = (l - r) * sqrt2_half;
685 }
686 }
687
688 /*********************************************************************
689 * compute energies
690 *********************************************************************/
691 fftenergy[0] = wsamp_l[0][0];
692 fftenergy[0] *= fftenergy[0];
693
694 for (j = BLKSIZE / 2 - 1; j >= 0; --j) {
695 FLOAT const re = (*wsamp_l)[BLKSIZE / 2 - j];
696 FLOAT const im = (*wsamp_l)[BLKSIZE / 2 + j];
697 fftenergy[BLKSIZE / 2 - j] = (re * re + im * im) * 0.5f;
698 }
699 /* total energy */
700 {
701 FLOAT totalenergy = 0.0f;
702 for (j = 11; j < HBLKSIZE; j++)
703 totalenergy += fftenergy[j];
704
705 psv->tot_ener[chn] = totalenergy;
706 }
707
708 if (plt) {
709 for (j = 0; j < HBLKSIZE; j++) {
710 plt->energy[gr_out][chn][j] = plt->energy_save[chn][j];
711 plt->energy_save[chn][j] = fftenergy[j];
712 }
713 }
714 }
715
716
717 static void
vbrpsy_compute_fft_s(lame_internal_flags const * gfc,const sample_t * const buffer[2],int chn,int sblock,FLOAT (* fftenergy_s)[HBLKSIZE_s],FLOAT (* wsamp_s)[3][BLKSIZE_s])718 vbrpsy_compute_fft_s(lame_internal_flags const *gfc, const sample_t * const buffer[2], int chn,
719 int sblock, FLOAT(*fftenergy_s)[HBLKSIZE_s], FLOAT(*wsamp_s)[3][BLKSIZE_s])
720 {
721 int j;
722
723 if (sblock == 0 && chn < 2) {
724 fft_short(gfc, *wsamp_s, chn, buffer);
725 }
726 if (chn == 2) {
727 FLOAT const sqrt2_half = SQRT2 * 0.5f;
728 /* FFT data for mid and side channel is derived from L & R */
729 for (j = BLKSIZE_s - 1; j >= 0; --j) {
730 FLOAT const l = wsamp_s[0][sblock][j];
731 FLOAT const r = wsamp_s[1][sblock][j];
732 wsamp_s[0][sblock][j] = (l + r) * sqrt2_half;
733 wsamp_s[1][sblock][j] = (l - r) * sqrt2_half;
734 }
735 }
736
737 /*********************************************************************
738 * compute energies
739 *********************************************************************/
740 fftenergy_s[sblock][0] = (*wsamp_s)[sblock][0];
741 fftenergy_s[sblock][0] *= fftenergy_s[sblock][0];
742 for (j = BLKSIZE_s / 2 - 1; j >= 0; --j) {
743 FLOAT const re = (*wsamp_s)[sblock][BLKSIZE_s / 2 - j];
744 FLOAT const im = (*wsamp_s)[sblock][BLKSIZE_s / 2 + j];
745 fftenergy_s[sblock][BLKSIZE_s / 2 - j] = (re * re + im * im) * 0.5f;
746 }
747 }
748
749
750 /*********************************************************************
751 * compute loudness approximation (used for ATH auto-level adjustment)
752 *********************************************************************/
753 static void
vbrpsy_compute_loudness_approximation_l(lame_internal_flags * gfc,int gr_out,int chn,const FLOAT fftenergy[HBLKSIZE])754 vbrpsy_compute_loudness_approximation_l(lame_internal_flags * gfc, int gr_out, int chn,
755 const FLOAT fftenergy[HBLKSIZE])
756 {
757 PsyStateVar_t *psv = &gfc->sv_psy;
758 if (chn < 2) { /*no loudness for mid/side ch */
759 gfc->ov_psy.loudness_sq[gr_out][chn] = psv->loudness_sq_save[chn];
760 psv->loudness_sq_save[chn] = psycho_loudness_approx(fftenergy, gfc->ATH->eql_w);
761 }
762 }
763
764
765 /**********************************************************************
766 * Apply HPF of fs/4 to the input signal.
767 * This is used for attack detection / handling.
768 **********************************************************************/
769 static void
vbrpsy_attack_detection(lame_internal_flags * gfc,const sample_t * const buffer[2],int gr_out,III_psy_ratio masking_ratio[2][2],III_psy_ratio masking_MS_ratio[2][2],FLOAT energy[4],FLOAT sub_short_factor[4][3],int ns_attacks[4][4],int uselongblock[2])770 vbrpsy_attack_detection(lame_internal_flags * gfc, const sample_t * const buffer[2], int gr_out,
771 III_psy_ratio masking_ratio[2][2], III_psy_ratio masking_MS_ratio[2][2],
772 FLOAT energy[4], FLOAT sub_short_factor[4][3], int ns_attacks[4][4],
773 int uselongblock[2])
774 {
775 FLOAT ns_hpfsmpl[2][576];
776 SessionConfig_t const *const cfg = &gfc->cfg;
777 PsyStateVar_t *const psv = &gfc->sv_psy;
778 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
779 int const n_chn_out = cfg->channels_out;
780 /* chn=2 and 3 = Mid and Side channels */
781 int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : n_chn_out;
782 int chn, i, j;
783
784 memset(&ns_hpfsmpl[0][0], 0, sizeof(ns_hpfsmpl));
785 /* Don't copy the input buffer into a temporary buffer */
786 /* unroll the loop 2 times */
787 for (chn = 0; chn < n_chn_out; chn++) {
788 static const FLOAT fircoef[] = {
789 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
790 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
791 -5.52212e-17 * 2, -0.313819 * 2
792 };
793 /* apply high pass filter of fs/4 */
794 const sample_t *const firbuf = &buffer[chn][576 - 350 - NSFIRLEN + 192];
795 assert(dimension_of(fircoef) == ((NSFIRLEN - 1) / 2));
796 for (i = 0; i < 576; i++) {
797 FLOAT sum1, sum2;
798 sum1 = firbuf[i + 10];
799 sum2 = 0.0;
800 for (j = 0; j < ((NSFIRLEN - 1) / 2) - 1; j += 2) {
801 sum1 += fircoef[j] * (firbuf[i + j] + firbuf[i + NSFIRLEN - j]);
802 sum2 += fircoef[j + 1] * (firbuf[i + j + 1] + firbuf[i + NSFIRLEN - j - 1]);
803 }
804 ns_hpfsmpl[chn][i] = sum1 + sum2;
805 }
806 masking_ratio[gr_out][chn].en = psv->en[chn];
807 masking_ratio[gr_out][chn].thm = psv->thm[chn];
808 if (n_chn_psy > 2) {
809 /* MS maskings */
810 /*percep_MS_entropy [chn-2] = gfc -> pe [chn]; */
811 masking_MS_ratio[gr_out][chn].en = psv->en[chn + 2];
812 masking_MS_ratio[gr_out][chn].thm = psv->thm[chn + 2];
813 }
814 }
815 for (chn = 0; chn < n_chn_psy; chn++) {
816 FLOAT attack_intensity[12];
817 FLOAT en_subshort[12];
818 FLOAT en_short[4] = { 0, 0, 0, 0 };
819 FLOAT const *pf = ns_hpfsmpl[chn & 1];
820 int ns_uselongblock = 1;
821
822 if (chn == 2) {
823 for (i = 0, j = 576; j > 0; ++i, --j) {
824 FLOAT const l = ns_hpfsmpl[0][i];
825 FLOAT const r = ns_hpfsmpl[1][i];
826 ns_hpfsmpl[0][i] = l + r;
827 ns_hpfsmpl[1][i] = l - r;
828 }
829 }
830 /***************************************************************
831 * determine the block type (window type)
832 ***************************************************************/
833 /* calculate energies of each sub-shortblocks */
834 for (i = 0; i < 3; i++) {
835 en_subshort[i] = psv->last_en_subshort[chn][i + 6];
836 assert(psv->last_en_subshort[chn][i + 4] > 0);
837 attack_intensity[i] = en_subshort[i] / psv->last_en_subshort[chn][i + 4];
838 en_short[0] += en_subshort[i];
839 }
840
841 for (i = 0; i < 9; i++) {
842 FLOAT const *const pfe = pf + 576 / 9;
843 FLOAT p = 1.;
844 for (; pf < pfe; pf++)
845 if (p < fabs(*pf))
846 p = fabs(*pf);
847 psv->last_en_subshort[chn][i] = en_subshort[i + 3] = p;
848 en_short[1 + i / 3] += p;
849 if (p > en_subshort[i + 3 - 2]) {
850 assert(en_subshort[i + 3 - 2] > 0);
851 p = p / en_subshort[i + 3 - 2];
852 }
853 else if (en_subshort[i + 3 - 2] > p * 10.0f) {
854 assert(p > 0);
855 p = en_subshort[i + 3 - 2] / (p * 10.0f);
856 }
857 else {
858 p = 0.0;
859 }
860 attack_intensity[i + 3] = p;
861 }
862
863 /* pulse like signal detection for fatboy.wav and so on */
864 for (i = 0; i < 3; ++i) {
865 FLOAT const enn =
866 en_subshort[i * 3 + 3] + en_subshort[i * 3 + 4] + en_subshort[i * 3 + 5];
867 FLOAT factor = 1.f;
868 if (en_subshort[i * 3 + 5] * 6 < enn) {
869 factor *= 0.5f;
870 if (en_subshort[i * 3 + 4] * 6 < enn) {
871 factor *= 0.5f;
872 }
873 }
874 sub_short_factor[chn][i] = factor;
875 }
876
877 if (plt) {
878 FLOAT x = attack_intensity[0];
879 for (i = 1; i < 12; i++) {
880 if (x < attack_intensity[i]) {
881 x = attack_intensity[i];
882 }
883 }
884 plt->ers[gr_out][chn] = plt->ers_save[chn];
885 plt->ers_save[chn] = x;
886 }
887
888 /* compare energies between sub-shortblocks */
889 {
890 FLOAT x = gfc->cd_psy->attack_threshold[chn];
891 for (i = 0; i < 12; i++) {
892 if (ns_attacks[chn][i / 3] == 0) {
893 if (attack_intensity[i] > x) {
894 ns_attacks[chn][i / 3] = (i % 3) + 1;
895 }
896 }
897 }
898 }
899 /* should have energy change between short blocks, in order to avoid periodic signals */
900 /* Good samples to show the effect are Trumpet test songs */
901 /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
902 /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
903 for (i = 1; i < 4; i++) {
904 FLOAT const u = en_short[i - 1];
905 FLOAT const v = en_short[i];
906 FLOAT const m = Max(u, v);
907 if (m < 40000) { /* (2) */
908 if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
909 if (i == 1 && ns_attacks[chn][0] <= ns_attacks[chn][i]) {
910 ns_attacks[chn][0] = 0;
911 }
912 ns_attacks[chn][i] = 0;
913 }
914 }
915 }
916
917 if (ns_attacks[chn][0] <= psv->last_attacks[chn]) {
918 ns_attacks[chn][0] = 0;
919 }
920
921 if (psv->last_attacks[chn] == 3 ||
922 ns_attacks[chn][0] + ns_attacks[chn][1] + ns_attacks[chn][2] + ns_attacks[chn][3]) {
923 ns_uselongblock = 0;
924
925 if (ns_attacks[chn][1] && ns_attacks[chn][0]) {
926 ns_attacks[chn][1] = 0;
927 }
928 if (ns_attacks[chn][2] && ns_attacks[chn][1]) {
929 ns_attacks[chn][2] = 0;
930 }
931 if (ns_attacks[chn][3] && ns_attacks[chn][2]) {
932 ns_attacks[chn][3] = 0;
933 }
934 }
935
936 if (chn < 2) {
937 uselongblock[chn] = ns_uselongblock;
938 }
939 else {
940 if (ns_uselongblock == 0) {
941 uselongblock[0] = uselongblock[1] = 0;
942 }
943 }
944
945 /* there is a one granule delay. Copy maskings computed last call
946 * into masking_ratio to return to calling program.
947 */
948 energy[chn] = psv->tot_ener[chn];
949 }
950 }
951
952
953 static void
vbrpsy_skip_masking_s(lame_internal_flags * gfc,int chn,int sblock)954 vbrpsy_skip_masking_s(lame_internal_flags * gfc, int chn, int sblock)
955 {
956 if (sblock == 0) {
957 FLOAT *nbs2 = &gfc->sv_psy.nb_s2[chn][0];
958 FLOAT *nbs1 = &gfc->sv_psy.nb_s1[chn][0];
959 int const n = gfc->cd_psy->s.npart;
960 int b;
961 for (b = 0; b < n; b++) {
962 nbs2[b] = nbs1[b];
963 }
964 }
965 }
966
967
968 static void
vbrpsy_calc_mask_index_s(lame_internal_flags const * gfc,FLOAT const * max,FLOAT const * avg,unsigned char * mask_idx)969 vbrpsy_calc_mask_index_s(lame_internal_flags const *gfc, FLOAT const *max,
970 FLOAT const *avg, unsigned char *mask_idx)
971 {
972 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
973 FLOAT m, a;
974 int b, k;
975 int const last_tab_entry = dimension_of(tab) - 1;
976 b = 0;
977 a = avg[b] + avg[b + 1];
978 assert(a >= 0);
979 if (a > 0.0f) {
980 m = max[b];
981 if (m < max[b + 1])
982 m = max[b + 1];
983 assert((gds->numlines[b] + gds->numlines[b + 1] - 1) > 0);
984 a = 20.0f * (m * 2.0f - a)
985 / (a * (gds->numlines[b] + gds->numlines[b + 1] - 1));
986 k = (int) a;
987 if (k > last_tab_entry)
988 k = last_tab_entry;
989 mask_idx[b] = k;
990 }
991 else {
992 mask_idx[b] = 0;
993 }
994
995 for (b = 1; b < gds->npart - 1; b++) {
996 a = avg[b - 1] + avg[b] + avg[b + 1];
997 assert(b + 1 < gds->npart);
998 assert(a >= 0);
999 if (a > 0.0) {
1000 m = max[b - 1];
1001 if (m < max[b])
1002 m = max[b];
1003 if (m < max[b + 1])
1004 m = max[b + 1];
1005 assert((gds->numlines[b - 1] + gds->numlines[b] + gds->numlines[b + 1] - 1) > 0);
1006 a = 20.0f * (m * 3.0f - a)
1007 / (a * (gds->numlines[b - 1] + gds->numlines[b] + gds->numlines[b + 1] - 1));
1008 k = (int) a;
1009 if (k > last_tab_entry)
1010 k = last_tab_entry;
1011 mask_idx[b] = k;
1012 }
1013 else {
1014 mask_idx[b] = 0;
1015 }
1016 }
1017 assert(b > 0);
1018 assert(b == gds->npart - 1);
1019
1020 a = avg[b - 1] + avg[b];
1021 assert(a >= 0);
1022 if (a > 0.0f) {
1023 m = max[b - 1];
1024 if (m < max[b])
1025 m = max[b];
1026 assert((gds->numlines[b - 1] + gds->numlines[b] - 1) > 0);
1027 a = 20.0f * (m * 2.0f - a)
1028 / (a * (gds->numlines[b - 1] + gds->numlines[b] - 1));
1029 k = (int) a;
1030 if (k > last_tab_entry)
1031 k = last_tab_entry;
1032 mask_idx[b] = k;
1033 }
1034 else {
1035 mask_idx[b] = 0;
1036 }
1037 assert(b == (gds->npart - 1));
1038 }
1039
1040
1041 static void
vbrpsy_compute_masking_s(lame_internal_flags * gfc,const FLOAT (* fftenergy_s)[HBLKSIZE_s],FLOAT * eb,FLOAT * thr,int chn,int sblock)1042 vbrpsy_compute_masking_s(lame_internal_flags * gfc, const FLOAT(*fftenergy_s)[HBLKSIZE_s],
1043 FLOAT * eb, FLOAT * thr, int chn, int sblock)
1044 {
1045 PsyStateVar_t *const psv = &gfc->sv_psy;
1046 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
1047 FLOAT max[CBANDS], avg[CBANDS];
1048 int i, j, b;
1049 unsigned char mask_idx_s[CBANDS];
1050
1051 memset(max, 0, sizeof(max));
1052 memset(avg, 0, sizeof(avg));
1053
1054 for (b = j = 0; b < gds->npart; ++b) {
1055 FLOAT ebb = 0, m = 0;
1056 int const n = gds->numlines[b];
1057 for (i = 0; i < n; ++i, ++j) {
1058 FLOAT const el = fftenergy_s[sblock][j];
1059 ebb += el;
1060 if (m < el)
1061 m = el;
1062 }
1063 eb[b] = ebb;
1064 assert(ebb >= 0);
1065 max[b] = m;
1066 assert(n > 0);
1067 avg[b] = ebb * gds->rnumlines[b];
1068 assert(avg[b] >= 0);
1069 }
1070 assert(b == gds->npart);
1071 assert(j == 129);
1072 vbrpsy_calc_mask_index_s(gfc, max, avg, mask_idx_s);
1073 for (j = b = 0; b < gds->npart; b++) {
1074 int kk = gds->s3ind[b][0];
1075 int const last = gds->s3ind[b][1];
1076 int const delta = mask_add_delta(mask_idx_s[b]);
1077 int dd, dd_n;
1078 FLOAT x, ecb, avg_mask;
1079 FLOAT const masking_lower = gds->masking_lower[b] * gfc->sv_qnt.masking_lower;
1080
1081 dd = mask_idx_s[kk];
1082 dd_n = 1;
1083 ecb = gds->s3[j] * eb[kk] * tab[mask_idx_s[kk]];
1084 ++j, ++kk;
1085 while (kk <= last) {
1086 dd += mask_idx_s[kk];
1087 dd_n += 1;
1088 x = gds->s3[j] * eb[kk] * tab[mask_idx_s[kk]];
1089 ecb = vbrpsy_mask_add(ecb, x, kk - b, delta);
1090 ++j, ++kk;
1091 }
1092 dd = (1 + 2 * dd) / (2 * dd_n);
1093 avg_mask = tab[dd] * 0.5f;
1094 ecb *= avg_mask;
1095 #if 0 /* we can do PRE ECHO control now here, or do it later */
1096 if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
1097 /* limit calculated threshold by even older granule */
1098 FLOAT const t1 = rpelev_s * psv->nb_s1[chn][b];
1099 FLOAT const t2 = rpelev2_s * psv->nb_s2[chn][b];
1100 FLOAT const tm = (t2 > 0) ? Min(ecb, t2) : ecb;
1101 thr[b] = (t1 > 0) ? NS_INTERP(Min(tm, t1), ecb, 0.6) : ecb;
1102 }
1103 else {
1104 /* limit calculated threshold by older granule */
1105 FLOAT const t1 = rpelev_s * psv->nb_s1[chn][b];
1106 thr[b] = (t1 > 0) ? NS_INTERP(Min(ecb, t1), ecb, 0.6) : ecb;
1107 }
1108 #else /* we do it later */
1109 thr[b] = ecb;
1110 #endif
1111 psv->nb_s2[chn][b] = psv->nb_s1[chn][b];
1112 psv->nb_s1[chn][b] = ecb;
1113 {
1114 /* if THR exceeds EB, the quantization routines will take the difference
1115 * from other bands. in case of strong tonal samples (tonaltest.wav)
1116 * this leads to heavy distortions. that's why we limit THR here.
1117 */
1118 x = max[b];
1119 x *= gds->minval[b];
1120 x *= avg_mask;
1121 if (thr[b] > x) {
1122 thr[b] = x;
1123 }
1124 }
1125 if (masking_lower > 1) {
1126 thr[b] *= masking_lower;
1127 }
1128 if (thr[b] > eb[b]) {
1129 thr[b] = eb[b];
1130 }
1131 if (masking_lower < 1) {
1132 thr[b] *= masking_lower;
1133 }
1134
1135 assert(thr[b] >= 0);
1136 }
1137 for (; b < CBANDS; ++b) {
1138 eb[b] = 0;
1139 thr[b] = 0;
1140 }
1141 }
1142
1143
1144 static void
vbrpsy_compute_masking_l(lame_internal_flags * gfc,const FLOAT fftenergy[HBLKSIZE],FLOAT eb_l[CBANDS],FLOAT thr[CBANDS],int chn)1145 vbrpsy_compute_masking_l(lame_internal_flags * gfc, const FLOAT fftenergy[HBLKSIZE],
1146 FLOAT eb_l[CBANDS], FLOAT thr[CBANDS], int chn)
1147 {
1148 PsyStateVar_t *const psv = &gfc->sv_psy;
1149 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
1150 FLOAT max[CBANDS], avg[CBANDS];
1151 unsigned char mask_idx_l[CBANDS + 2];
1152 int k, b;
1153
1154 /*********************************************************************
1155 * Calculate the energy and the tonality of each partition.
1156 *********************************************************************/
1157 calc_energy(gdl, fftenergy, eb_l, max, avg);
1158 calc_mask_index_l(gfc, max, avg, mask_idx_l);
1159
1160 /*********************************************************************
1161 * convolve the partitioned energy and unpredictability
1162 * with the spreading function, s3_l[b][k]
1163 ********************************************************************/
1164 k = 0;
1165 for (b = 0; b < gdl->npart; b++) {
1166 FLOAT x, ecb, avg_mask, t;
1167 FLOAT const masking_lower = gdl->masking_lower[b] * gfc->sv_qnt.masking_lower;
1168 /* convolve the partitioned energy with the spreading function */
1169 int kk = gdl->s3ind[b][0];
1170 int const last = gdl->s3ind[b][1];
1171 int const delta = mask_add_delta(mask_idx_l[b]);
1172 int dd = 0, dd_n = 0;
1173
1174 dd = mask_idx_l[kk];
1175 dd_n += 1;
1176 ecb = gdl->s3[k] * eb_l[kk] * tab[mask_idx_l[kk]];
1177 ++k, ++kk;
1178 while (kk <= last) {
1179 dd += mask_idx_l[kk];
1180 dd_n += 1;
1181 x = gdl->s3[k] * eb_l[kk] * tab[mask_idx_l[kk]];
1182 t = vbrpsy_mask_add(ecb, x, kk - b, delta);
1183 #if 0
1184 ecb += eb_l[kk];
1185 if (ecb > t) {
1186 ecb = t;
1187 }
1188 #else
1189 ecb = t;
1190 #endif
1191 ++k, ++kk;
1192 }
1193 dd = (1 + 2 * dd) / (2 * dd_n);
1194 avg_mask = tab[dd] * 0.5f;
1195 ecb *= avg_mask;
1196
1197 /**** long block pre-echo control ****/
1198 /* dont use long block pre-echo control if previous granule was
1199 * a short block. This is to avoid the situation:
1200 * frame0: quiet (very low masking)
1201 * frame1: surge (triggers short blocks)
1202 * frame2: regular frame. looks like pre-echo when compared to
1203 * frame0, but all pre-echo was in frame1.
1204 */
1205 /* chn=0,1 L and R channels
1206 chn=2,3 S and M channels.
1207 */
1208 if (psv->blocktype_old[chn & 0x01] == SHORT_TYPE) {
1209 FLOAT const ecb_limit = rpelev * psv->nb_l1[chn][b];
1210 if (ecb_limit > 0) {
1211 thr[b] = Min(ecb, ecb_limit);
1212 }
1213 else {
1214 /* Robert 071209:
1215 Because we don't calculate long block psy when we know a granule
1216 should be of short blocks, we don't have any clue how the granule
1217 before would have looked like as a long block. So we have to guess
1218 a little bit for this END_TYPE block.
1219 Most of the time we get away with this sloppyness. (fingers crossed :)
1220 The speed increase is worth it.
1221 */
1222 thr[b] = Min(ecb, eb_l[b] * NS_PREECHO_ATT2);
1223 }
1224 }
1225 else {
1226 FLOAT ecb_limit_2 = rpelev2 * psv->nb_l2[chn][b];
1227 FLOAT ecb_limit_1 = rpelev * psv->nb_l1[chn][b];
1228 FLOAT ecb_limit;
1229 if (ecb_limit_2 <= 0) {
1230 ecb_limit_2 = ecb;
1231 }
1232 if (ecb_limit_1 <= 0) {
1233 ecb_limit_1 = ecb;
1234 }
1235 if (psv->blocktype_old[chn & 0x01] == NORM_TYPE) {
1236 ecb_limit = Min(ecb_limit_1, ecb_limit_2);
1237 }
1238 else {
1239 ecb_limit = ecb_limit_1;
1240 }
1241 thr[b] = Min(ecb, ecb_limit);
1242 }
1243 psv->nb_l2[chn][b] = psv->nb_l1[chn][b];
1244 psv->nb_l1[chn][b] = ecb;
1245 {
1246 /* if THR exceeds EB, the quantization routines will take the difference
1247 * from other bands. in case of strong tonal samples (tonaltest.wav)
1248 * this leads to heavy distortions. that's why we limit THR here.
1249 */
1250 x = max[b];
1251 x *= gdl->minval[b];
1252 x *= avg_mask;
1253 if (thr[b] > x) {
1254 thr[b] = x;
1255 }
1256 }
1257 if (masking_lower > 1) {
1258 thr[b] *= masking_lower;
1259 }
1260 if (thr[b] > eb_l[b]) {
1261 thr[b] = eb_l[b];
1262 }
1263 if (masking_lower < 1) {
1264 thr[b] *= masking_lower;
1265 }
1266 assert(thr[b] >= 0);
1267 }
1268 for (; b < CBANDS; ++b) {
1269 eb_l[b] = 0;
1270 thr[b] = 0;
1271 }
1272 }
1273
1274
1275 static void
vbrpsy_compute_block_type(SessionConfig_t const * cfg,int * uselongblock)1276 vbrpsy_compute_block_type(SessionConfig_t const *cfg, int *uselongblock)
1277 {
1278 int chn;
1279
1280 if (cfg->short_blocks == short_block_coupled
1281 /* force both channels to use the same block type */
1282 /* this is necessary if the frame is to be encoded in ms_stereo. */
1283 /* But even without ms_stereo, FhG does this */
1284 && !(uselongblock[0] && uselongblock[1]))
1285 uselongblock[0] = uselongblock[1] = 0;
1286
1287 for (chn = 0; chn < cfg->channels_out; chn++) {
1288 /* disable short blocks */
1289 if (cfg->short_blocks == short_block_dispensed) {
1290 uselongblock[chn] = 1;
1291 }
1292 if (cfg->short_blocks == short_block_forced) {
1293 uselongblock[chn] = 0;
1294 }
1295 }
1296 }
1297
1298
1299 static void
vbrpsy_apply_block_type(PsyStateVar_t * psv,int nch,int const * uselongblock,int * blocktype_d)1300 vbrpsy_apply_block_type(PsyStateVar_t * psv, int nch, int const *uselongblock, int *blocktype_d)
1301 {
1302 int chn;
1303
1304 /* update the blocktype of the previous granule, since it depends on what
1305 * happend in this granule */
1306 for (chn = 0; chn < nch; chn++) {
1307 int blocktype = NORM_TYPE;
1308 /* disable short blocks */
1309
1310 if (uselongblock[chn]) {
1311 /* no attack : use long blocks */
1312 assert(psv->blocktype_old[chn] != START_TYPE);
1313 if (psv->blocktype_old[chn] == SHORT_TYPE)
1314 blocktype = STOP_TYPE;
1315 }
1316 else {
1317 /* attack : use short blocks */
1318 blocktype = SHORT_TYPE;
1319 if (psv->blocktype_old[chn] == NORM_TYPE) {
1320 psv->blocktype_old[chn] = START_TYPE;
1321 }
1322 if (psv->blocktype_old[chn] == STOP_TYPE)
1323 psv->blocktype_old[chn] = SHORT_TYPE;
1324 }
1325
1326 blocktype_d[chn] = psv->blocktype_old[chn]; /* value returned to calling program */
1327 psv->blocktype_old[chn] = blocktype; /* save for next call to l3psy_anal */
1328 }
1329 }
1330
1331
1332 /***************************************************************
1333 * compute M/S thresholds from Johnston & Ferreira 1992 ICASSP paper
1334 ***************************************************************/
1335
1336 static void
vbrpsy_compute_MS_thresholds(const FLOAT eb[4][CBANDS],FLOAT thr[4][CBANDS],const FLOAT cb_mld[CBANDS],const FLOAT ath_cb[CBANDS],FLOAT athlower,FLOAT msfix,int n)1337 vbrpsy_compute_MS_thresholds(const FLOAT eb[4][CBANDS], FLOAT thr[4][CBANDS],
1338 const FLOAT cb_mld[CBANDS], const FLOAT ath_cb[CBANDS], FLOAT athlower,
1339 FLOAT msfix, int n)
1340 {
1341 FLOAT const msfix2 = msfix * 2.f;
1342 FLOAT rside, rmid;
1343 int b;
1344 for (b = 0; b < n; ++b) {
1345 FLOAT const ebM = eb[2][b];
1346 FLOAT const ebS = eb[3][b];
1347 FLOAT const thmL = thr[0][b];
1348 FLOAT const thmR = thr[1][b];
1349 FLOAT thmM = thr[2][b];
1350 FLOAT thmS = thr[3][b];
1351
1352 /* use this fix if L & R masking differs by 2db or less */
1353 /* if db = 10*log10(x2/x1) < 2 */
1354 /* if (x2 < 1.58*x1) { */
1355 if (thmL <= 1.58f * thmR && thmR <= 1.58f * thmL) {
1356 FLOAT const mld_m = cb_mld[b] * ebS;
1357 FLOAT const mld_s = cb_mld[b] * ebM;
1358 FLOAT const tmp_m = Min(thmS, mld_m);
1359 FLOAT const tmp_s = Min(thmM, mld_s);
1360 rmid = Max(thmM, tmp_m);
1361 rside = Max(thmS, tmp_s);
1362 }
1363 else {
1364 rmid = thmM;
1365 rside = thmS;
1366 }
1367 if (msfix > 0.f) {
1368 /***************************************************************/
1369 /* Adjust M/S maskings if user set "msfix" */
1370 /***************************************************************/
1371 /* Naoki Shibata 2000 */
1372 FLOAT thmLR, thmMS;
1373 FLOAT const ath = ath_cb[b] * athlower;
1374 FLOAT const tmp_l = Max(thmL, ath);
1375 FLOAT const tmp_r = Max(thmR, ath);
1376 thmLR = Min(tmp_l, tmp_r);
1377 thmM = Max(rmid, ath);
1378 thmS = Max(rside, ath);
1379 thmMS = thmM + thmS;
1380 if (thmMS > 0.f && (thmLR * msfix2) < thmMS) {
1381 FLOAT const f = thmLR * msfix2 / thmMS;
1382 thmM *= f;
1383 thmS *= f;
1384 assert(thmMS > 0.f);
1385 }
1386 rmid = Min(thmM, rmid);
1387 rside = Min(thmS, rside);
1388 }
1389 if (rmid > ebM) {
1390 rmid = ebM;
1391 }
1392 if (rside > ebS) {
1393 rside = ebS;
1394 }
1395 thr[2][b] = rmid;
1396 thr[3][b] = rside;
1397 }
1398 }
1399
1400
1401 /*
1402 * NOTE: the bitrate reduction from the inter-channel masking effect is low
1403 * compared to the chance of getting annyoing artefacts. L3psycho_anal_vbr does
1404 * not use this feature. (Robert 071216)
1405 */
1406
1407 int
L3psycho_anal_vbr(lame_internal_flags * gfc,const sample_t * const buffer[2],int gr_out,III_psy_ratio masking_ratio[2][2],III_psy_ratio masking_MS_ratio[2][2],FLOAT percep_entropy[2],FLOAT percep_MS_entropy[2],FLOAT energy[4],int blocktype_d[2])1408 L3psycho_anal_vbr(lame_internal_flags * gfc,
1409 const sample_t * const buffer[2], int gr_out,
1410 III_psy_ratio masking_ratio[2][2],
1411 III_psy_ratio masking_MS_ratio[2][2],
1412 FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2],
1413 FLOAT energy[4], int blocktype_d[2])
1414 {
1415 SessionConfig_t const *const cfg = &gfc->cfg;
1416 PsyStateVar_t *const psv = &gfc->sv_psy;
1417 PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l;
1418 PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s;
1419 plotting_data *plt = cfg->analysis ? gfc->pinfo : 0;
1420
1421 III_psy_xmin last_thm[4];
1422
1423 /* fft and energy calculation */
1424 FLOAT(*wsamp_l)[BLKSIZE];
1425 FLOAT(*wsamp_s)[3][BLKSIZE_s];
1426 FLOAT fftenergy[HBLKSIZE];
1427 FLOAT fftenergy_s[3][HBLKSIZE_s];
1428 FLOAT wsamp_L[2][BLKSIZE];
1429 FLOAT wsamp_S[2][3][BLKSIZE_s];
1430 FLOAT eb[4][CBANDS], thr[4][CBANDS];
1431
1432 FLOAT sub_short_factor[4][3];
1433 FLOAT thmm;
1434 FLOAT const pcfact = 0.6f;
1435 FLOAT const ath_factor =
1436 (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f;
1437
1438 const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb;
1439 const FLOAT(*const_fftenergy_s)[HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s;
1440
1441 /* block type */
1442 int ns_attacks[4][4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} };
1443 int uselongblock[2];
1444
1445 /* usual variables like loop indices, etc.. */
1446 int chn, sb, sblock;
1447
1448 /* chn=2 and 3 = Mid and Side channels */
1449 int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out;
1450
1451 memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm));
1452
1453 vbrpsy_attack_detection(gfc, buffer, gr_out, masking_ratio, masking_MS_ratio, energy,
1454 sub_short_factor, ns_attacks, uselongblock);
1455
1456 vbrpsy_compute_block_type(cfg, uselongblock);
1457
1458 /* LONG BLOCK CASE */
1459 {
1460 for (chn = 0; chn < n_chn_psy; chn++) {
1461 int const ch01 = chn & 0x01;
1462
1463 wsamp_l = wsamp_L + ch01;
1464 vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l);
1465 vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy);
1466 vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn);
1467 }
1468 if (cfg->mode == JOINT_STEREO) {
1469 if ((uselongblock[0] + uselongblock[1]) == 2) {
1470 vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l,
1471 ath_factor, cfg->msfix, gdl->npart);
1472 }
1473 }
1474 /* TODO: apply adaptive ATH masking here ?? */
1475 for (chn = 0; chn < n_chn_psy; chn++) {
1476 convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn);
1477 convert_partition2scalefac_l_to_s(gfc, eb[chn], thr[chn], chn);
1478 }
1479 }
1480 /* SHORT BLOCKS CASE */
1481 {
1482 int const force_short_block_calc = gfc->cd_psy->force_short_block_calc;
1483 for (sblock = 0; sblock < 3; sblock++) {
1484 for (chn = 0; chn < n_chn_psy; ++chn) {
1485 int const ch01 = chn & 0x01;
1486 if (uselongblock[ch01] && !force_short_block_calc) {
1487 vbrpsy_skip_masking_s(gfc, chn, sblock);
1488 }
1489 else {
1490 /* compute masking thresholds for short blocks */
1491 wsamp_s = wsamp_S + ch01;
1492 vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s);
1493 vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn,
1494 sblock);
1495 }
1496 }
1497 if (cfg->mode == JOINT_STEREO) {
1498 if ((uselongblock[0] + uselongblock[1]) == 0) {
1499 vbrpsy_compute_MS_thresholds(const_eb, thr, gds->mld_cb, gfc->ATH->cb_s,
1500 ath_factor, cfg->msfix, gds->npart);
1501 }
1502 }
1503 /* TODO: apply adaptive ATH masking here ?? */
1504 for (chn = 0; chn < n_chn_psy; ++chn) {
1505 int const ch01 = chn & 0x01;
1506 if (!uselongblock[ch01] || force_short_block_calc) {
1507 convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock);
1508 }
1509 }
1510 }
1511
1512 /**** short block pre-echo control ****/
1513 for (chn = 0; chn < n_chn_psy; chn++) {
1514 for (sb = 0; sb < SBMAX_s; sb++) {
1515 FLOAT new_thmm[3], prev_thm, t1, t2;
1516 for (sblock = 0; sblock < 3; sblock++) {
1517 thmm = psv->thm[chn].s[sb][sblock];
1518 thmm *= NS_PREECHO_ATT0;
1519
1520 t1 = t2 = thmm;
1521
1522 if (sblock > 0) {
1523 prev_thm = new_thmm[sblock - 1];
1524 }
1525 else {
1526 prev_thm = last_thm[chn].s[sb][2];
1527 }
1528 if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) {
1529 t1 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact);
1530 }
1531 thmm = Min(t1, thmm);
1532 if (ns_attacks[chn][sblock] == 1) {
1533 t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
1534 }
1535 else if ((sblock == 0 && psv->last_attacks[chn] == 3)
1536 || (sblock > 0 && ns_attacks[chn][sblock - 1] == 3)) { /* 2nd preceeding block */
1537 switch (sblock) {
1538 case 0:
1539 prev_thm = last_thm[chn].s[sb][1];
1540 break;
1541 case 1:
1542 prev_thm = last_thm[chn].s[sb][2];
1543 break;
1544 case 2:
1545 prev_thm = new_thmm[0];
1546 break;
1547 }
1548 t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact);
1549 }
1550
1551 thmm = Min(t1, thmm);
1552 thmm = Min(t2, thmm);
1553
1554 /* pulse like signal detection for fatboy.wav and so on */
1555 thmm *= sub_short_factor[chn][sblock];
1556
1557 new_thmm[sblock] = thmm;
1558 }
1559 for (sblock = 0; sblock < 3; sblock++) {
1560 psv->thm[chn].s[sb][sblock] = new_thmm[sblock];
1561 }
1562 }
1563 }
1564 }
1565 for (chn = 0; chn < n_chn_psy; chn++) {
1566 psv->last_attacks[chn] = ns_attacks[chn][2];
1567 }
1568
1569
1570 /***************************************************************
1571 * determine final block type
1572 ***************************************************************/
1573 vbrpsy_apply_block_type(psv, cfg->channels_out, uselongblock, blocktype_d);
1574
1575 /*********************************************************************
1576 * compute the value of PE to return ... no delay and advance
1577 *********************************************************************/
1578 for (chn = 0; chn < n_chn_psy; chn++) {
1579 FLOAT *ppe;
1580 int type;
1581 III_psy_ratio const *mr;
1582
1583 if (chn > 1) {
1584 ppe = percep_MS_entropy - 2;
1585 type = NORM_TYPE;
1586 if (blocktype_d[0] == SHORT_TYPE || blocktype_d[1] == SHORT_TYPE)
1587 type = SHORT_TYPE;
1588 mr = &masking_MS_ratio[gr_out][chn - 2];
1589 }
1590 else {
1591 ppe = percep_entropy;
1592 type = blocktype_d[chn];
1593 mr = &masking_ratio[gr_out][chn];
1594 }
1595 if (type == SHORT_TYPE) {
1596 ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower);
1597 }
1598 else {
1599 ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower);
1600 }
1601
1602 if (plt) {
1603 plt->pe[gr_out][chn] = ppe[chn];
1604 }
1605 }
1606 return 0;
1607 }
1608
1609
1610
1611
1612 /*
1613 * The spreading function. Values returned in units of energy
1614 */
1615 static FLOAT
s3_func(FLOAT bark)1616 s3_func(FLOAT bark)
1617 {
1618 FLOAT tempx, x, tempy, temp;
1619 tempx = bark;
1620 if (tempx >= 0)
1621 tempx *= 3;
1622 else
1623 tempx *= 1.5;
1624
1625 if (tempx >= 0.5 && tempx <= 2.5) {
1626 temp = tempx - 0.5;
1627 x = 8.0 * (temp * temp - 2.0 * temp);
1628 }
1629 else
1630 x = 0.0;
1631 tempx += 0.474;
1632 tempy = 15.811389 + 7.5 * tempx - 17.5 * sqrt(1.0 + tempx * tempx);
1633
1634 if (tempy <= -60.0)
1635 return 0.0;
1636
1637 tempx = exp((x + tempy) * LN_TO_LOG10);
1638
1639 /* Normalization. The spreading function should be normalized so that:
1640 +inf
1641 /
1642 | s3 [ bark ] d(bark) = 1
1643 /
1644 -inf
1645 */
1646 tempx /= .6609193;
1647 return tempx;
1648 }
1649
1650 #if 0
1651 static FLOAT
1652 norm_s3_func(void)
1653 {
1654 double lim_a = 0, lim_b = 0;
1655 double x = 0, l, h;
1656 for (x = 0; s3_func(x) > 1e-20; x -= 1);
1657 l = x;
1658 h = 0;
1659 while (fabs(h - l) > 1e-12) {
1660 x = (h + l) / 2;
1661 if (s3_func(x) > 0) {
1662 h = x;
1663 }
1664 else {
1665 l = x;
1666 }
1667 }
1668 lim_a = l;
1669 for (x = 0; s3_func(x) > 1e-20; x += 1);
1670 l = 0;
1671 h = x;
1672 while (fabs(h - l) > 1e-12) {
1673 x = (h + l) / 2;
1674 if (s3_func(x) > 0) {
1675 l = x;
1676 }
1677 else {
1678 h = x;
1679 }
1680 }
1681 lim_b = h;
1682 {
1683 double sum = 0;
1684 int const m = 1000;
1685 int i;
1686 for (i = 0; i <= m; ++i) {
1687 double x = lim_a + i * (lim_b - lim_a) / m;
1688 double y = s3_func(x);
1689 sum += y;
1690 }
1691 {
1692 double norm = (m + 1) / (sum * (lim_b - lim_a));
1693 /*printf( "norm = %lf\n",norm); */
1694 return norm;
1695 }
1696 }
1697 }
1698 #endif
1699
1700 static FLOAT
stereo_demask(double f)1701 stereo_demask(double f)
1702 {
1703 /* setup stereo demasking thresholds */
1704 /* formula reverse enginerred from plot in paper */
1705 double arg = freq2bark(f);
1706 arg = (Min(arg, 15.5) / 15.5);
1707
1708 return pow(10.0, 1.25 * (1 - cos(PI * arg)) - 2.5);
1709 }
1710
1711 static void
init_numline(PsyConst_CB2SB_t * gd,FLOAT sfreq,int fft_size,int mdct_size,int sbmax,int const * scalepos)1712 init_numline(PsyConst_CB2SB_t * gd, FLOAT sfreq, int fft_size,
1713 int mdct_size, int sbmax, int const *scalepos)
1714 {
1715 FLOAT b_frq[CBANDS + 1];
1716 FLOAT const mdct_freq_frac = sfreq / (2.0f * mdct_size);
1717 FLOAT const deltafreq = fft_size / (2.0f * mdct_size);
1718 int partition[HBLKSIZE] = { 0 };
1719 int i, j, ni;
1720 int sfb;
1721 sfreq /= fft_size;
1722 j = 0;
1723 ni = 0;
1724 /* compute numlines, the number of spectral lines in each partition band */
1725 /* each partition band should be about DELBARK wide. */
1726 for (i = 0; i < CBANDS; i++) {
1727 FLOAT bark1;
1728 int j2, nl;
1729 bark1 = freq2bark(sfreq * j);
1730
1731 b_frq[i] = sfreq * j;
1732
1733 for (j2 = j; freq2bark(sfreq * j2) - bark1 < DELBARK && j2 <= fft_size / 2; j2++);
1734
1735 nl = j2 - j;
1736 gd->numlines[i] = nl;
1737 gd->rnumlines[i] = (nl > 0) ? (1.0f / nl) : 0;
1738
1739 ni = i + 1;
1740
1741 while (j < j2) {
1742 assert(j < HBLKSIZE);
1743 partition[j++] = i;
1744 }
1745 if (j > fft_size / 2) {
1746 j = fft_size / 2;
1747 ++i;
1748 break;
1749 }
1750 }
1751 assert(i < CBANDS);
1752 b_frq[i] = sfreq * j;
1753
1754 gd->n_sb = sbmax;
1755 gd->npart = ni;
1756
1757 {
1758 j = 0;
1759 for (i = 0; i < gd->npart; i++) {
1760 int const nl = gd->numlines[i];
1761 FLOAT const freq = sfreq * (j + nl / 2);
1762 gd->mld_cb[i] = stereo_demask(freq);
1763 j += nl;
1764 }
1765 for (; i < CBANDS; ++i) {
1766 gd->mld_cb[i] = 1;
1767 }
1768 }
1769 for (sfb = 0; sfb < sbmax; sfb++) {
1770 int i1, i2, bo;
1771 int start = scalepos[sfb];
1772 int end = scalepos[sfb + 1];
1773
1774 i1 = floor(.5 + deltafreq * (start - .5));
1775 if (i1 < 0)
1776 i1 = 0;
1777 i2 = floor(.5 + deltafreq * (end - .5));
1778
1779 if (i2 > fft_size / 2)
1780 i2 = fft_size / 2;
1781
1782 bo = partition[i2];
1783 gd->bm[sfb] = (partition[i1] + partition[i2]) / 2;
1784 gd->bo[sfb] = bo;
1785
1786 /* calculate how much of this band belongs to current scalefactor band */
1787 {
1788 FLOAT const f_tmp = mdct_freq_frac * end;
1789 FLOAT bo_w = (f_tmp - b_frq[bo]) / (b_frq[bo + 1] - b_frq[bo]);
1790 if (bo_w < 0) {
1791 bo_w = 0;
1792 }
1793 else {
1794 if (bo_w > 1) {
1795 bo_w = 1;
1796 }
1797 }
1798 gd->bo_weight[sfb] = bo_w;
1799 }
1800 gd->mld[sfb] = stereo_demask(mdct_freq_frac * start);
1801 }
1802 }
1803
1804 static void
compute_bark_values(PsyConst_CB2SB_t const * gd,FLOAT sfreq,int fft_size,FLOAT * bval,FLOAT * bval_width)1805 compute_bark_values(PsyConst_CB2SB_t const *gd, FLOAT sfreq, int fft_size,
1806 FLOAT * bval, FLOAT * bval_width)
1807 {
1808 /* compute bark values of each critical band */
1809 int k, j = 0, ni = gd->npart;
1810 sfreq /= fft_size;
1811 for (k = 0; k < ni; k++) {
1812 int const w = gd->numlines[k];
1813 FLOAT bark1, bark2;
1814
1815 bark1 = freq2bark(sfreq * (j));
1816 bark2 = freq2bark(sfreq * (j + w - 1));
1817 bval[k] = .5 * (bark1 + bark2);
1818
1819 bark1 = freq2bark(sfreq * (j - .5));
1820 bark2 = freq2bark(sfreq * (j + w - .5));
1821 bval_width[k] = bark2 - bark1;
1822 j += w;
1823 }
1824 }
1825
1826 static int
init_s3_values(FLOAT ** p,int (* s3ind)[2],int npart,FLOAT const * bval,FLOAT const * bval_width,FLOAT const * norm)1827 init_s3_values(FLOAT ** p, int (*s3ind)[2], int npart,
1828 FLOAT const *bval, FLOAT const *bval_width, FLOAT const *norm)
1829 {
1830 FLOAT s3[CBANDS][CBANDS];
1831 /* The s3 array is not linear in the bark scale.
1832 * bval[x] should be used to get the bark value.
1833 */
1834 int i, j, k;
1835 int numberOfNoneZero = 0;
1836
1837 memset(&s3[0][0], 0, sizeof(s3));
1838
1839 /* s[i][j], the value of the spreading function,
1840 * centered at band j (masker), for band i (maskee)
1841 *
1842 * i.e.: sum over j to spread into signal barkval=i
1843 * NOTE: i and j are used opposite as in the ISO docs
1844 */
1845 for (i = 0; i < npart; i++) {
1846 for (j = 0; j < npart; j++) {
1847 FLOAT v = s3_func(bval[i] - bval[j]) * bval_width[j];
1848 s3[i][j] = v * norm[i];
1849 }
1850 }
1851 for (i = 0; i < npart; i++) {
1852 for (j = 0; j < npart; j++) {
1853 if (s3[i][j] > 0.0f)
1854 break;
1855 }
1856 s3ind[i][0] = j;
1857
1858 for (j = npart - 1; j > 0; j--) {
1859 if (s3[i][j] > 0.0f)
1860 break;
1861 }
1862 s3ind[i][1] = j;
1863 numberOfNoneZero += (s3ind[i][1] - s3ind[i][0] + 1);
1864 }
1865 *p = lame_calloc(FLOAT, numberOfNoneZero);
1866 if (!*p)
1867 return -1;
1868
1869 k = 0;
1870 for (i = 0; i < npart; i++)
1871 for (j = s3ind[i][0]; j <= s3ind[i][1]; j++)
1872 (*p)[k++] = s3[i][j];
1873
1874 return 0;
1875 }
1876
1877 int
psymodel_init(lame_global_flags const * gfp)1878 psymodel_init(lame_global_flags const *gfp)
1879 {
1880 lame_internal_flags *const gfc = gfp->internal_flags;
1881 SessionConfig_t *const cfg = &gfc->cfg;
1882 PsyStateVar_t *const psv = &gfc->sv_psy;
1883 PsyConst_t *gd;
1884 int i, j, b, sb, k;
1885 FLOAT bvl_a = 13, bvl_b = 24;
1886 FLOAT snr_l_a = 0, snr_l_b = 0;
1887 FLOAT snr_s_a = -8.25, snr_s_b = -4.5;
1888
1889 FLOAT bval[CBANDS];
1890 FLOAT bval_width[CBANDS];
1891 FLOAT norm[CBANDS];
1892 FLOAT const sfreq = cfg->samplerate_out;
1893
1894 FLOAT xav = 10, xbv = 12;
1895 FLOAT const minval_low = (0.f - cfg->minval);
1896
1897 if (gfc->cd_psy != 0) {
1898 return 0;
1899 }
1900 memset(norm, 0, sizeof(norm));
1901
1902 gd = lame_calloc(PsyConst_t, 1);
1903 gfc->cd_psy = gd;
1904
1905 gd->force_short_block_calc = gfp->experimentalZ;
1906
1907 psv->blocktype_old[0] = psv->blocktype_old[1] = NORM_TYPE; /* the vbr header is long blocks */
1908
1909 for (i = 0; i < 4; ++i) {
1910 for (j = 0; j < CBANDS; ++j) {
1911 psv->nb_l1[i][j] = 1e20;
1912 psv->nb_l2[i][j] = 1e20;
1913 psv->nb_s1[i][j] = psv->nb_s2[i][j] = 1.0;
1914 }
1915 for (sb = 0; sb < SBMAX_l; sb++) {
1916 psv->en[i].l[sb] = 1e20;
1917 psv->thm[i].l[sb] = 1e20;
1918 }
1919 for (j = 0; j < 3; ++j) {
1920 for (sb = 0; sb < SBMAX_s; sb++) {
1921 psv->en[i].s[sb][j] = 1e20;
1922 psv->thm[i].s[sb][j] = 1e20;
1923 }
1924 psv->last_attacks[i] = 0;
1925 }
1926 for (j = 0; j < 9; j++)
1927 psv->last_en_subshort[i][j] = 10.;
1928 }
1929
1930
1931 /* init. for loudness approx. -jd 2001 mar 27 */
1932 psv->loudness_sq_save[0] = psv->loudness_sq_save[1] = 0.0;
1933
1934
1935
1936 /*************************************************************************
1937 * now compute the psychoacoustic model specific constants
1938 ************************************************************************/
1939 /* compute numlines, bo, bm, bval, bval_width, mld */
1940 init_numline(&gd->l, sfreq, BLKSIZE, 576, SBMAX_l, gfc->scalefac_band.l);
1941 assert(gd->l.npart < CBANDS);
1942 compute_bark_values(&gd->l, sfreq, BLKSIZE, bval, bval_width);
1943
1944 /* compute the spreading function */
1945 for (i = 0; i < gd->l.npart; i++) {
1946 double snr = snr_l_a;
1947 if (bval[i] >= bvl_a) {
1948 snr = snr_l_b * (bval[i] - bvl_a) / (bvl_b - bvl_a)
1949 + snr_l_a * (bvl_b - bval[i]) / (bvl_b - bvl_a);
1950 }
1951 norm[i] = pow(10.0, snr / 10.0);
1952 }
1953 i = init_s3_values(&gd->l.s3, gd->l.s3ind, gd->l.npart, bval, bval_width, norm);
1954 if (i)
1955 return i;
1956
1957 /* compute long block specific values, ATH and MINVAL */
1958 j = 0;
1959 for (i = 0; i < gd->l.npart; i++) {
1960 double x;
1961
1962 /* ATH */
1963 x = FLOAT_MAX;
1964 for (k = 0; k < gd->l.numlines[i]; k++, j++) {
1965 FLOAT const freq = sfreq * j / (1000.0 * BLKSIZE);
1966 FLOAT level;
1967 /* freq = Min(.1,freq); *//* ATH below 100 Hz constant, not further climbing */
1968 level = ATHformula(cfg, freq * 1000) - 20; /* scale to FFT units; returned value is in dB */
1969 level = pow(10., 0.1 * level); /* convert from dB -> energy */
1970 level *= gd->l.numlines[i];
1971 if (x > level)
1972 x = level;
1973 }
1974 gfc->ATH->cb_l[i] = x;
1975
1976 /* MINVAL.
1977 For low freq, the strength of the masking is limited by minval
1978 this is an ISO MPEG1 thing, dont know if it is really needed */
1979 /* FIXME: it does work to reduce low-freq problems in S53-Wind-Sax
1980 and lead-voice samples, but introduces some 3 kbps bit bloat too.
1981 TODO: Further refinement of the shape of this hack.
1982 */
1983 x = 20.0 * (bval[i] / xav - 1.0);
1984 if (x > 6) {
1985 x = 30;
1986 }
1987 if (x < minval_low) {
1988 x = minval_low;
1989 }
1990 if (cfg->samplerate_out < 44000) {
1991 x = 30;
1992 }
1993 x -= 8.;
1994 gd->l.minval[i] = pow(10.0, x / 10.) * gd->l.numlines[i];
1995 }
1996
1997 /************************************************************************
1998 * do the same things for short blocks
1999 ************************************************************************/
2000 init_numline(&gd->s, sfreq, BLKSIZE_s, 192, SBMAX_s, gfc->scalefac_band.s);
2001 assert(gd->s.npart < CBANDS);
2002 compute_bark_values(&gd->s, sfreq, BLKSIZE_s, bval, bval_width);
2003
2004 /* SNR formula. short block is normalized by SNR. is it still right ? */
2005 j = 0;
2006 for (i = 0; i < gd->s.npart; i++) {
2007 double x;
2008 double snr = snr_s_a;
2009 if (bval[i] >= bvl_a) {
2010 snr = snr_s_b * (bval[i] - bvl_a) / (bvl_b - bvl_a)
2011 + snr_s_a * (bvl_b - bval[i]) / (bvl_b - bvl_a);
2012 }
2013 norm[i] = pow(10.0, snr / 10.0);
2014
2015 /* ATH */
2016 x = FLOAT_MAX;
2017 for (k = 0; k < gd->s.numlines[i]; k++, j++) {
2018 FLOAT const freq = sfreq * j / (1000.0 * BLKSIZE_s);
2019 FLOAT level;
2020 /* freq = Min(.1,freq); *//* ATH below 100 Hz constant, not further climbing */
2021 level = ATHformula(cfg, freq * 1000) - 20; /* scale to FFT units; returned value is in dB */
2022 level = pow(10., 0.1 * level); /* convert from dB -> energy */
2023 level *= gd->s.numlines[i];
2024 if (x > level)
2025 x = level;
2026 }
2027 gfc->ATH->cb_s[i] = x;
2028
2029 /* MINVAL.
2030 For low freq, the strength of the masking is limited by minval
2031 this is an ISO MPEG1 thing, dont know if it is really needed */
2032 x = 7.0 * (bval[i] / xbv - 1.0);
2033 if (bval[i] > xbv) {
2034 x *= 1 + log(1 + x) * 3.1;
2035 }
2036 if (bval[i] < xbv) {
2037 x *= 1 + log(1 - x) * 2.3;
2038 }
2039 if (x > 6) {
2040 x = 30;
2041 }
2042 if (x < minval_low) {
2043 x = minval_low;
2044 }
2045 if (cfg->samplerate_out < 44000) {
2046 x = 30;
2047 }
2048 x -= 8;
2049 gd->s.minval[i] = pow(10.0, x / 10) * gd->s.numlines[i];
2050 }
2051
2052 i = init_s3_values(&gd->s.s3, gd->s.s3ind, gd->s.npart, bval, bval_width, norm);
2053 if (i)
2054 return i;
2055
2056
2057 init_mask_add_max_values();
2058 init_fft(gfc);
2059
2060 /* setup temporal masking */
2061 gd->decay = exp(-1.0 * LOG10 / (temporalmask_sustain_sec * sfreq / 192.0));
2062
2063 {
2064 FLOAT msfix;
2065 msfix = NS_MSFIX;
2066 if (cfg->use_safe_joint_stereo)
2067 msfix = 1.0;
2068 if (fabs(cfg->msfix) > 0.0)
2069 msfix = cfg->msfix;
2070 cfg->msfix = msfix;
2071
2072 /* spread only from npart_l bands. Normally, we use the spreading
2073 * function to convolve from npart_l down to npart_l bands
2074 */
2075 for (b = 0; b < gd->l.npart; b++)
2076 if (gd->l.s3ind[b][1] > gd->l.npart - 1)
2077 gd->l.s3ind[b][1] = gd->l.npart - 1;
2078 }
2079
2080 /* prepare for ATH auto adjustment:
2081 * we want to decrease the ATH by 12 dB per second
2082 */
2083 #define frame_duration (576. * cfg->mode_gr / sfreq)
2084 gfc->ATH->decay = pow(10., -12. / 10. * frame_duration);
2085 gfc->ATH->adjust_factor = 0.01; /* minimum, for leading low loudness */
2086 gfc->ATH->adjust_limit = 1.0; /* on lead, allow adjust up to maximum */
2087 #undef frame_duration
2088
2089 assert(gd->l.bo[SBMAX_l - 1] <= gd->l.npart);
2090 assert(gd->s.bo[SBMAX_s - 1] <= gd->s.npart);
2091
2092 if (cfg->ATHtype != -1) {
2093 /* compute equal loudness weights (eql_w) */
2094 FLOAT freq;
2095 FLOAT const freq_inc = (FLOAT) cfg->samplerate_out / (FLOAT) (BLKSIZE);
2096 FLOAT eql_balance = 0.0;
2097 freq = 0.0;
2098 for (i = 0; i < BLKSIZE / 2; ++i) {
2099 /* convert ATH dB to relative power (not dB) */
2100 /* to determine eql_w */
2101 freq += freq_inc;
2102 gfc->ATH->eql_w[i] = 1. / pow(10, ATHformula(cfg, freq) / 10);
2103 eql_balance += gfc->ATH->eql_w[i];
2104 }
2105 eql_balance = 1.0 / eql_balance;
2106 for (i = BLKSIZE / 2; --i >= 0;) { /* scale weights */
2107 gfc->ATH->eql_w[i] *= eql_balance;
2108 }
2109 }
2110 {
2111 for (b = j = 0; b < gd->s.npart; ++b) {
2112 for (i = 0; i < gd->s.numlines[b]; ++i) {
2113 ++j;
2114 }
2115 }
2116 assert(j == 129);
2117 for (b = j = 0; b < gd->l.npart; ++b) {
2118 for (i = 0; i < gd->l.numlines[b]; ++i) {
2119 ++j;
2120 }
2121 }
2122 assert(j == 513);
2123 }
2124 /* short block attack threshold */
2125 {
2126 float x = gfp->attackthre;
2127 float y = gfp->attackthre_s;
2128 if (x < 0) {
2129 x = NSATTACKTHRE;
2130 }
2131 if (y < 0) {
2132 y = NSATTACKTHRE_S;
2133 }
2134 gd->attack_threshold[0] = gd->attack_threshold[1] = gd->attack_threshold[2] = x;
2135 gd->attack_threshold[3] = y;
2136 }
2137 {
2138 float sk_s = -10.f, sk_l = -4.7f;
2139 static float const sk[] =
2140 { -7.4, -7.4, -7.4, -9.5, -7.4, -6.1, -5.5, -4.7, -4.7, -4.7, -4.7 };
2141 if (gfp->VBR_q < 4) {
2142 sk_l = sk_s = sk[0];
2143 }
2144 else {
2145 sk_l = sk_s = sk[gfp->VBR_q] + gfp->VBR_q_frac * (sk[gfp->VBR_q] - sk[gfp->VBR_q + 1]);
2146 }
2147 b = 0;
2148 for (; b < gd->s.npart; b++) {
2149 float m = (float) (gd->s.npart - b) / gd->s.npart;
2150 gd->s.masking_lower[b] = powf(10.f, sk_s * m * 0.1f);
2151 }
2152 for (; b < CBANDS; ++b) {
2153 gd->s.masking_lower[b] = 1.f;
2154 }
2155 b = 0;
2156 for (; b < gd->l.npart; b++) {
2157 float m = (float) (gd->l.npart - b) / gd->l.npart;
2158 gd->l.masking_lower[b] = powf(10.f, sk_l * m * 0.1f);
2159 }
2160 for (; b < CBANDS; ++b) {
2161 gd->l.masking_lower[b] = 1.f;
2162 }
2163 }
2164 memcpy(&gd->l_to_s, &gd->l, sizeof(gd->l_to_s));
2165 init_numline(&gd->l_to_s, sfreq, BLKSIZE, 192, SBMAX_s, gfc->scalefac_band.s);
2166 return 0;
2167 }
2168