1 /*
2 * TwoLAME: an optimized MPEG Audio Layer Two encoder
3 *
4 * Copyright (C) 2001-2004 Michael Cheng
5 * Copyright (C) 2004-2018 The TwoLAME Project
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <string.h>
27
28 #include "twolame.h"
29 #include "common.h"
30 #include "mem.h"
31 #include "fft.h"
32 #include "ath.h"
33 #include "psycho_4.h"
34
35 /****************************************************************
36 PSYCHO_4 by MFC Feb 2003
37
38 This is a cleaned up implementation of psy model 2.
39 This is basically because I was sick of the inconsistencies between
40 the notation in the ISO docs and in the sourcecode.
41
42 I've nicked a bunch of stuff from LAME to make this a bit easier to grok
43 - ATH values (this also overcomes the lack of mpeg-2 tables
44 which meant that LSF never had proper values)
45 - ath_freq2bark() to convert frequencies directly to bark values.
46 - spreading_function() isolated the calculation of the spreading function.
47 Basically the same code as before, just isolated in its own function.
48 LAME seem to does some extra tweaks to the ISO1117s model.
49 Not really sure if they help or hinder, so I've commented them out (#ifdef LAME)
50
51 NB: Because of some of the tweaks to bark value calculation etc, it is now possible
52 to have 64 CBANDS. There's no real limit on the actual number of paritions.
53 I wonder if it's worth experimenting with really higher numbers? Probably won't make
54 that much difference to the final SNR values, but it's something worth trying
55 Maybe CBANDS should be a dynamic value, calculated by the psycho_init function
56 CBANDS definition has been changed in encoder.h from 63 to 64
57
58 ****************************************************************/
59
60
61 /* The static variables "r", "phi_sav", "new", "old" and "oldest" have
62 to be remembered for the unpredictability measure. For "r" and
63 "phi_sav", the first index from the left is the channel select and
64 the second index is the "age" of the data. */
65
66
67 /* NMT is a constant 5.5dB. ISO11172 Sec D.2.4.h */
68 static const FLOAT NMT = 5.5;
69
70 /* The index into this array is a bark value
71 This array gives the 'minval' values from ISO11172 Tables D.3.x */
72 static const FLOAT minval[27] = {
73 0.0, /* bark = 0 */
74 20.0, /* 1 */
75 20.0, /* 2 */
76 20.0, /* 3 */
77 20.0, /* 4 */
78 20.0, /* 5 */
79 17.0, /* 6 */
80 15.0, /* 7 */
81 10.0, /* 8 */
82 7.0, /* 9 */
83 4.4, /* 10 */
84 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, /* 11 - 25 */
85 3.5 /* 26 */
86 };
87
88
89 /* Table covers angles from 0 to TRIGTABLESIZE/TRIGTABLESCALE (3.142) radians
90 In steps of 1/TRIGTABLESCALE (0.0005) radians.
91 Largest absolute error: 0.0005
92 Only create a table for cos, and then use trig to work out sin.
93 sin(theta) = cos(PI/2 - theta)
94 MFC March 2003 */
psycho_4_trigtable_init(psycho_4_mem * p4mem)95 static void psycho_4_trigtable_init(psycho_4_mem * p4mem)
96 {
97
98 int i;
99 for (i = 0; i < TRIGTABLESIZE; i++) {
100 p4mem->cos_table[i] = cos((FLOAT) i / TRIGTABLESCALE);
101 }
102 }
103
104 #ifdef NEWTAN
psycho_4_cos(psycho_4_mem * p4mem,FLOAT phi)105 static inline FLOAT psycho_4_cos(psycho_4_mem * p4mem, FLOAT phi)
106 {
107 int index;
108 int sign = 1;
109
110 index = (int) (fabs(phi) * TRIGTABLESCALE);
111 while (index >= TRIGTABLESIZE) {
112 /* If we're larger than PI, then subtract PI until we aren't each time the sign will flip -
113 Year 11 trig again. MFC March 2003 */
114 index -= TRIGTABLESIZE;
115 sign *= -1;
116 }
117 return (sign * p4mem->cos_table[index]);
118 }
119 #endif
120
121 /* The spreading function. Values returned in units of energy
122 Argument 'bark' is the difference in bark values between the
123 centre of two partitions.
124 This has been taken from LAME. MFC Feb 2003 */
psycho_4_spreading_function(FLOAT bark)125 static FLOAT psycho_4_spreading_function(FLOAT bark)
126 {
127
128 FLOAT tempx, x, tempy, temp;
129 tempx = bark;
130 #ifdef LAME
131 /* MP3 standard actually spreads these values a little more */
132 if (tempx >= 0)
133 tempx *= 3;
134 else
135 tempx *= 1.5;
136 #endif
137
138 if (tempx >= 0.5 && tempx <= 2.5) {
139 temp = tempx - 0.5;
140 x = 8.0 * (temp * temp - 2.0 * temp);
141 } else
142 x = 0.0;
143 tempx += 0.474;
144 tempy = 15.811389 + 7.5 * tempx - 17.5 * sqrt(1.0 + tempx * tempx);
145
146 if (tempy <= -60.0)
147 return 0.0;
148
149 tempx = exp((x + tempy) * LN_TO_LOG10);
150
151 #ifdef LAME
152 /* I'm not sure where the magic value of 0.6609193 comes from. twolame will just keep using the
153 rnorm to normalise the spreading function MFC Feb 2003 */
154 /* Normalization. The spreading function should be normalized so that: +inf / | s3 [ bark ]
155 d(bark) = 1 / -inf */
156 tempx /= .6609193;
157 #endif
158 return tempx;
159
160 }
161
162 /********************************
163 * init psycho model 2
164 ********************************/
twolame_psycho_4_init(twolame_options * glopts,int sfreq)165 static psycho_4_mem *twolame_psycho_4_init(twolame_options * glopts, int sfreq)
166 {
167 psycho_4_mem *mem;
168 FLOAT *cbval, *rnorm;
169 FLOAT *window;
170 FLOAT bark[HBLKSIZE], *ath;
171 int *numlines;
172 int *partition;
173 FCB *s;
174 FLOAT *tmn;
175 int i, j;
176
177 {
178 mem = (psycho_4_mem *) TWOLAME_MALLOC(sizeof(psycho_4_mem));
179
180 mem->tmn = (FLOAT *) TWOLAME_MALLOC(sizeof(DCB));
181 mem->s = (FCB *) TWOLAME_MALLOC(sizeof(FCBCB));
182 mem->lthr = (FHBLK *) TWOLAME_MALLOC(sizeof(F2HBLK));
183 mem->r = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
184 mem->phi_sav = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
185
186 mem->new = 0;
187 mem->old = 1;
188 mem->oldest = 0;
189 }
190
191 {
192 cbval = mem->cbval;
193 rnorm = mem->rnorm;
194 window = mem->window;
195 // bark = mem->bark;
196 ath = mem->ath;
197 numlines = mem->numlines;
198 partition = mem->partition;
199 s = mem->s;
200 tmn = mem->tmn;
201 }
202
203
204 /* Set up the SIN/COS tables */
205 psycho_4_trigtable_init(mem);
206
207 /* calculate HANN window coefficients */
208 for (i = 0; i < BLKSIZE; i++)
209 window[i] = 0.5 * (1 - cos(2.0 * PI * (i - 0.5) / BLKSIZE));
210
211 /* For each FFT line from 0(DC) to 512(Nyquist) calculate - bark : the bark value of this fft
212 line - ath : the absolute threshold of hearing for this line [ATH]
213
214 Since it is a 1024 point FFT, each line in the fft corresponds to 1/1024 of the total
215 frequency. Line 0 should correspond to DC - which doesn't really have a ATH afaik Line 1
216 should be 1/1024th of the Sampling Freq Line 512 should be the nyquist freq */
217 for (i = 0; i < HBLKSIZE; i++) {
218 FLOAT freq = i * (FLOAT) sfreq / (FLOAT) BLKSIZE;
219 bark[i] = twolame_ath_freq2bark(freq);
220 /* The ath tables in the dist10 code seem to be a little out of kilter. they seem to start
221 with index 0 corresponding to (sampling freq)/1024. When in doubt, i'm going to assume
222 that the dist10 code is wrong. MFC Feb2003 */
223 ath[i] = twolame_ath_energy(freq, glopts->athlevel);
224 // fprintf(stderr,"%.2f ",ath[i]);
225 }
226
227
228 /* Work out the partitions Starting from line 0, all lines within 0.33 of the starting bark are
229 added to the same partition. When a line is greater by 0.33 of a bark, start a new
230 partition. */
231 {
232 int partition_count = 0; /* keep a count of the partitions */
233 int cbase = 0; /* current base index for the bark range calculation */
234 for (i = 0; i < HBLKSIZE; i++) {
235 if ((bark[i] - bark[cbase]) > 0.33) { /* 1/3 critical band? */
236 /* this frequency line is too different from the starting line, (in terms of the
237 bark distance) so close that previous partition, and make this line the first
238 member of the next partition */
239 cbase = i; /* Start the new partition from this frequency */
240 partition_count++;
241 }
242 /* partition[i] tells us which partition the i'th frequency line is in */
243 partition[i] = partition_count;
244 /* keep a count of how many frequency lines are in each partition */
245 numlines[partition_count]++;
246 }
247 }
248
249 /* For each partition within the frequency space, calculate the average bark value - cbval
250 [central bark value] */
251 for (i = 0; i < HBLKSIZE; i++)
252 cbval[partition[i]] += bark[i]; /* sum up all the bark values */
253 for (i = 0; i < CBANDS; i++) {
254 if (numlines[i] != 0)
255 cbval[i] /= numlines[i]; /* divide by the number of values */
256 else {
257 cbval[i] = 0; /* this isn't a partition */
258 }
259 }
260
261
262 /* Calculate the spreading function. ISO 11172 Section D.2.3 */
263 for (i = 0; i < CBANDS; i++) {
264 for (j = 0; j < CBANDS; j++) {
265 s[i][j] = psycho_4_spreading_function(1.05 * (cbval[i] - cbval[j]));
266 rnorm[i] += s[i][j]; /* sum the spreading function values for each partition so that
267 they can be normalised later on */
268 }
269 }
270
271 /* Calculate Tone Masking Noise values. ISO 11172 Tables D.3.x */
272 for (j = 0; j < CBANDS; j++)
273 tmn[j] = MAX(15.5 + cbval[j], 24.5);
274
275
276 if (glopts->verbosity > 6) {
277 /* Dump All the Values to STDERR */
278 int wlow, whigh = 0;
279 int ntot = 0;
280 fprintf(stderr, "psy model 4 init\n");
281 fprintf(stderr, "index \tnlines \twlow \twhigh \tbval \tminval \ttmn\n");
282 for (i = 0; i < CBANDS; i++)
283 if (numlines[i] != 0) {
284 wlow = whigh + 1;
285 whigh = wlow + numlines[i] - 1;
286 fprintf(stderr, "%i \t%i \t%i \t%i \t%5.2f \t%4.2f \t%4.2f\n", i + 1, numlines[i],
287 wlow, whigh, cbval[i], minval[(int) cbval[i]], tmn[i]);
288 ntot += numlines[i];
289 }
290 fprintf(stderr, "total lines %i\n", ntot);
291 }
292
293 return (mem);
294 }
295
296
twolame_psycho_4(twolame_options * glopts,short int buffer[2][1152],short int savebuf[2][1056],FLOAT smr[2][32])297 void twolame_psycho_4(twolame_options * glopts,
298 short int buffer[2][1152], short int savebuf[2][1056], FLOAT smr[2][32])
299 /* to match prototype : FLOAT args are always FLOAT */
300 {
301 psycho_4_mem *mem;
302 unsigned int run, i, j, k, ch;
303 FLOAT r_prime, phi_prime;
304 FLOAT npart, epart;
305 int new, old, oldest;
306 FLOAT *grouped_c, *grouped_e;
307 FLOAT *nb, *cb, *tb, *ecb, *bc;
308 FLOAT *cbval, *rnorm;
309 FLOAT *wsamp_r, *phi, *energy, *window;
310 FLOAT *ath, *thr, *c;
311
312 FLOAT *snrtmp[2];
313 int *numlines;
314 int *partition;
315 FLOAT *tmn;
316 FCB *s;
317 F2HBLK *r, *phi_sav;
318
319 int nch = glopts->num_channels_out;
320 int sfreq = glopts->samplerate_out;
321
322 if (!glopts->p4mem) {
323 glopts->p4mem = twolame_psycho_4_init(glopts, sfreq);
324 }
325
326 mem = glopts->p4mem;
327 {
328 grouped_c = mem->grouped_c;
329 grouped_e = mem->grouped_e;
330 nb = mem->nb;
331 cb = mem->cb;
332 tb = mem->tb;
333 ecb = mem->ecb;
334 bc = mem->bc;
335 rnorm = mem->rnorm;
336 cbval = mem->cbval;
337 wsamp_r = mem->wsamp_r;
338 phi = mem->phi;
339 energy = mem->energy;
340 window = mem->window;
341 ath = mem->ath;
342 thr = mem->thr;
343 c = mem->c;
344
345 snrtmp[0] = mem->snrtmp[0];
346 snrtmp[1] = mem->snrtmp[1];
347
348 numlines = mem->numlines;
349 partition = mem->partition;
350 tmn = mem->tmn;
351 s = mem->s;
352 r = mem->r;
353 phi_sav = mem->phi_sav;
354 }
355
356 for (ch = 0; ch < nch; ch++) {
357 for (run = 0; run < 2; run++) {
358 /* Net offset is 480 samples (1056-576) for layer 2; this is because one must stagger
359 input data by 256 samples to synchronize psychoacoustic model with filter bank
360 outputs, then stagger so that center of 1024 FFT window lines up with center of 576
361 "new" audio samples.
362
363 flush = 384*3.0/2.0; = 576 syncsize = 1056; sync_flush = syncsize - flush; 480
364 BLKSIZE = 1024 */
365 {
366 short int *bufferp = buffer[ch];
367 for (j = 0; j < 480; j++) {
368 savebuf[ch][j] = savebuf[ch][j + 576];
369 wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
370 }
371 for (; j < 1024; j++) {
372 savebuf[ch][j] = *bufferp++;
373 wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
374 }
375 for (; j < 1056; j++)
376 savebuf[ch][j] = *bufferp++;
377 }
378
379 /* Compute FFT */
380 twolame_psycho_2_fft(wsamp_r, energy, phi);
381
382 /* calculate the unpredictability measure, given energy[f] and phi[f] (the age pointers
383 [new/old/oldest] are reset automatically on the second pass */
384 {
385 if (mem->new == 0) {
386 mem->new = 1;
387 mem->oldest = 1;
388 } else {
389 mem->new = 0;
390 mem->oldest = 0;
391 }
392 if (mem->old == 0)
393 mem->old = 1;
394 else
395 mem->old = 0;
396 }
397 old = mem->old;
398 new = mem->new;
399 oldest = mem->oldest;
400
401
402 for (j = 0; j < HBLKSIZE; j++) {
403 #ifdef NEWATAN
404 FLOAT temp1, temp2, temp3;
405 r_prime = 2.0 * r[ch][old][j] - r[ch][oldest][j];
406 phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
407
408 r[ch][new][j] = sqrt((FLOAT) energy[j]);
409 phi_sav[ch][new][j] = phi[j];
410
411 {
412 temp1 =
413 r[ch][new][j] * psycho_4_cos(mem, phi[j]) -
414 r_prime * psycho_4_cos(mem, phi_prime);
415 /* Remember your grade 11 trig? sin(theta) = cos(PI/2 - theta) */
416 temp2 =
417 r[ch][new][j] * psycho_4_cos(mem, PI2 - phi[j]) -
418 r_prime * psycho_4_cos(mem, PI2 - phi_prime);
419 }
420
421
422 temp3 = r[ch][new][j] + fabs((FLOAT) r_prime);
423 if (temp3 != 0)
424 c[j] = sqrt(temp1 * temp1 + temp2 * temp2) / temp3;
425 else
426 c[j] = 0;
427 #else
428 FLOAT temp1, temp2, temp3;
429 r_prime = 2.0 * r[ch][old][j] - r[ch][oldest][j];
430 phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
431
432 r[ch][new][j] = sqrt((FLOAT) energy[j]);
433 phi_sav[ch][new][j] = phi[j];
434
435
436 temp1 = r[ch][new][j] * cos((FLOAT) phi[j]) - r_prime * cos((FLOAT) phi_prime);
437 temp2 = r[ch][new][j] * sin((FLOAT) phi[j]) - r_prime * sin((FLOAT) phi_prime);
438
439 temp3 = r[ch][new][j] + fabs((FLOAT) r_prime);
440 if (temp3 != 0)
441 c[j] = sqrt(temp1 * temp1 + temp2 * temp2) / temp3;
442 else
443 c[j] = 0;
444 #endif
445 }
446
447 /* For each partition, sum all the energy in that partition - grouped_e and calculated
448 the energy-weighted unpredictability measure - grouped_c ISO 11172 Section D.2.4.e */
449 for (j = 1; j < CBANDS; j++) {
450 grouped_e[j] = 0;
451 grouped_c[j] = 0;
452 }
453 grouped_e[0] = energy[0];
454 grouped_c[0] = energy[0] * c[0];
455 for (j = 1; j < HBLKSIZE; j++) {
456 grouped_e[partition[j]] += energy[j];
457 grouped_c[partition[j]] += energy[j] * c[j];
458 }
459
460 /* convolve the grouped energy-weighted unpredictability measure and the grouped energy
461 with the spreading function ISO 11172 D.2.4.f */
462 for (j = 0; j < CBANDS; j++) {
463 ecb[j] = 0;
464 cb[j] = 0;
465 for (k = 0; k < CBANDS; k++) {
466 if (s[j][k] != 0.0) {
467 ecb[j] += s[j][k] * grouped_e[k];
468 cb[j] += s[j][k] * grouped_c[k];
469 }
470 }
471 if (ecb[j] != 0)
472 cb[j] = cb[j] / ecb[j];
473 else
474 cb[j] = 0;
475 }
476
477 /* Convert cb to tb (the tonality index) ISO11172 SecD.2.4.g */
478 for (i = 0; i < CBANDS; i++) {
479 if (cb[i] < 0.05)
480 cb[i] = 0.05;
481 else if (cb[i] > 0.5)
482 cb[i] = 0.5;
483 tb[i] = -0.301029996 - 0.434294482 * log((FLOAT) cb[i]);
484 }
485
486
487 /* Calculate the required SNR for each of the frequency partitions ISO 11172 Sect
488 D.2.4.h */
489 for (j = 0; j < CBANDS; j++) {
490 FLOAT SNR, SNRtemp;
491 SNRtemp = tmn[j] * tb[j] + NMT * (1.0 - tb[j]);
492 SNR = MAX(SNRtemp, minval[(int) cbval[j]]);
493 bc[j] = exp((FLOAT) - SNR * LN_TO_LOG10);
494 }
495
496 /* Calculate the permissible noise energy level in each of the frequency partitions.
497 This section used to have pre-echo control but only for LayerI ISO 11172 Sec D.2.4.k
498 - Spread the threshold energy over FFT lines */
499 for (j = 0; j < CBANDS; j++) {
500 if (rnorm[j] && numlines[j])
501 nb[j] = ecb[j] * bc[j] / (rnorm[j] * numlines[j]);
502 else
503 nb[j] = 0;
504 }
505
506 /* ISO11172 Sec D.2.4.l - thr[] the final energy threshold of audibility */
507 for (j = 0; j < HBLKSIZE; j++)
508 thr[j] = MAX(nb[partition[j]], ath[j]);
509
510 /* Translate the 512 threshold values to the 32 filter bands of the coder Using ISO
511 11172 Table D.5 and Section D.2.4.n */
512 for (j = 0; j < 193; j += 16) {
513 /* WIDTH = 0 */
514 npart = 60802371420160.0;
515 epart = 0.0;
516 for (k = 0; k < 17; k++) {
517 if (thr[j + k] < npart)
518 npart = thr[j + k]; /* For WIDTH==0, find the minimum noise, and later
519 multiply by the number of indexes i.e. 17 */
520 epart += energy[j + k];
521 }
522 snrtmp[run][j / 16] = 4.342944819 * log((FLOAT) (epart / (npart * 17.0)));
523 }
524 for (j = 208; j < (HBLKSIZE - 1); j += 16) {
525 /* WIDTH = 1 */
526 npart = 0.0;
527 epart = 0.0;
528 for (k = 0; k < 17; k++) {
529 npart += thr[j + k]; /* For WIDTH==1, sum the noise */
530 epart += energy[j + k];
531 }
532 snrtmp[run][j / 16] = 4.342944819 * log((FLOAT) (epart / npart));
533 }
534 }
535
536 /* Pick the maximum value of the two runs ISO 11172 Sect D.2.1 */
537 for (i = 0; i < 32; i++)
538 smr[ch][i] = MAX(snrtmp[0][i], snrtmp[1][i]);
539
540 } // now do other channel
541
542 }
543
544
twolame_psycho_4_deinit(psycho_4_mem ** mem)545 void twolame_psycho_4_deinit(psycho_4_mem ** mem)
546 {
547
548 if (mem == NULL || *mem == NULL)
549 return;
550
551 TWOLAME_FREE((*mem)->tmn);
552 TWOLAME_FREE((*mem)->s);
553 TWOLAME_FREE((*mem)->lthr);
554 TWOLAME_FREE((*mem)->r);
555 TWOLAME_FREE((*mem)->phi_sav);
556
557 TWOLAME_FREE((*mem));
558 }
559
560
561
562 // vim:ts=4:sw=4:nowrap:
563