1 /*
2  *  TwoLAME: an optimized MPEG Audio Layer Two encoder
3  *
4  *  Copyright (C) 2001-2004 Michael Cheng
5  *  Copyright (C) 2004-2018 The TwoLAME Project
6  *
7  *  This library is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU Lesser General Public
9  *  License as published by the Free Software Foundation; either
10  *  version 2.1 of the License, or (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  *  Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public
18  *  License along with this library; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <math.h>
26 #include <string.h>
27 
28 #include "twolame.h"
29 #include "common.h"
30 #include "mem.h"
31 #include "fft.h"
32 #include "ath.h"
33 #include "psycho_4.h"
34 
35 /****************************************************************
36 PSYCHO_4 by MFC Feb 2003
37 
38 This is a cleaned up implementation of psy model 2.
39 This is basically because I was sick of the inconsistencies between
40 the notation in the ISO docs and in the sourcecode.
41 
42 I've nicked a bunch of stuff from LAME to make this a bit easier to grok
43 - ATH values (this also overcomes the lack of mpeg-2 tables
44   which meant that LSF never had proper values)
45 - ath_freq2bark() to convert frequencies directly to bark values.
46 - spreading_function() isolated the calculation of the spreading function.
47   Basically the same code as before, just isolated in its own function.
48   LAME seem to does some extra tweaks to the ISO1117s model.
49   Not really sure if they help or hinder, so I've commented them out (#ifdef LAME)
50 
51 NB: Because of some of the tweaks to bark value calculation etc, it is now possible
52 to have 64 CBANDS. There's no real limit on the actual number of paritions.
53 I wonder if it's worth experimenting with really higher numbers? Probably won't make
54 that much difference to the final SNR values, but it's something worth trying
55     Maybe CBANDS should be a dynamic value, calculated by the psycho_init function
56     CBANDS definition has been changed in encoder.h from 63 to 64
57 
58 ****************************************************************/
59 
60 
61 /* The static variables "r", "phi_sav", "new", "old" and "oldest" have
62  to be remembered for the unpredictability measure.     For "r" and
63  "phi_sav", the first index from the left is the channel select and
64  the second index is the "age" of the data.                                */
65 
66 
67 /* NMT is a constant 5.5dB. ISO11172 Sec D.2.4.h */
68 static const FLOAT NMT = 5.5;
69 
70 /* The index into this array is a bark value
71    This array gives the 'minval' values from ISO11172 Tables D.3.x */
72 static const FLOAT minval[27] = {
73     0.0,                        /* bark = 0 */
74     20.0,                       /* 1 */
75     20.0,                       /* 2 */
76     20.0,                       /* 3 */
77     20.0,                       /* 4 */
78     20.0,                       /* 5 */
79     17.0,                       /* 6 */
80     15.0,                       /* 7 */
81     10.0,                       /* 8 */
82     7.0,                        /* 9 */
83     4.4,                        /* 10 */
84     4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,  /* 11 - 25 */
85     3.5                         /* 26 */
86 };
87 
88 
89 /* Table covers angles from     0 to TRIGTABLESIZE/TRIGTABLESCALE (3.142) radians
90    In steps of 1/TRIGTABLESCALE (0.0005) radians.
91    Largest absolute error: 0.0005
92    Only create a table for cos, and then use trig to work out sin.
93    sin(theta) = cos(PI/2 - theta)
94    MFC March 2003 */
psycho_4_trigtable_init(psycho_4_mem * p4mem)95 static void psycho_4_trigtable_init(psycho_4_mem * p4mem)
96 {
97 
98     int i;
99     for (i = 0; i < TRIGTABLESIZE; i++) {
100         p4mem->cos_table[i] = cos((FLOAT) i / TRIGTABLESCALE);
101     }
102 }
103 
104 #ifdef NEWTAN
psycho_4_cos(psycho_4_mem * p4mem,FLOAT phi)105 static inline FLOAT psycho_4_cos(psycho_4_mem * p4mem, FLOAT phi)
106 {
107     int index;
108     int sign = 1;
109 
110     index = (int) (fabs(phi) * TRIGTABLESCALE);
111     while (index >= TRIGTABLESIZE) {
112         /* If we're larger than PI, then subtract PI until we aren't each time the sign will flip -
113            Year 11 trig again. MFC March 2003 */
114         index -= TRIGTABLESIZE;
115         sign *= -1;
116     }
117     return (sign * p4mem->cos_table[index]);
118 }
119 #endif
120 
121 /* The spreading function.    Values returned in units of energy
122    Argument 'bark' is the difference in bark values between the
123    centre of two partitions.
124    This has been taken from LAME. MFC Feb 2003 */
psycho_4_spreading_function(FLOAT bark)125 static FLOAT psycho_4_spreading_function(FLOAT bark)
126 {
127 
128     FLOAT tempx, x, tempy, temp;
129     tempx = bark;
130 #ifdef LAME
131     /* MP3 standard actually spreads these values a little more */
132     if (tempx >= 0)
133         tempx *= 3;
134     else
135         tempx *= 1.5;
136 #endif
137 
138     if (tempx >= 0.5 && tempx <= 2.5) {
139         temp = tempx - 0.5;
140         x = 8.0 * (temp * temp - 2.0 * temp);
141     } else
142         x = 0.0;
143     tempx += 0.474;
144     tempy = 15.811389 + 7.5 * tempx - 17.5 * sqrt(1.0 + tempx * tempx);
145 
146     if (tempy <= -60.0)
147         return 0.0;
148 
149     tempx = exp((x + tempy) * LN_TO_LOG10);
150 
151 #ifdef LAME
152     /* I'm not sure where the magic value of 0.6609193 comes from. twolame will just keep using the
153        rnorm to normalise the spreading function MFC Feb 2003 */
154     /* Normalization.  The spreading function should be normalized so that: +inf / | s3 [ bark ]
155        d(bark) = 1 / -inf */
156     tempx /= .6609193;
157 #endif
158     return tempx;
159 
160 }
161 
162 /********************************
163  * init psycho model 2
164  ********************************/
twolame_psycho_4_init(twolame_options * glopts,int sfreq)165 static psycho_4_mem *twolame_psycho_4_init(twolame_options * glopts, int sfreq)
166 {
167     psycho_4_mem *mem;
168     FLOAT *cbval, *rnorm;
169     FLOAT *window;
170     FLOAT bark[HBLKSIZE], *ath;
171     int *numlines;
172     int *partition;
173     FCB *s;
174     FLOAT *tmn;
175     int i, j;
176 
177     {
178         mem = (psycho_4_mem *) TWOLAME_MALLOC(sizeof(psycho_4_mem));
179 
180         mem->tmn = (FLOAT *) TWOLAME_MALLOC(sizeof(DCB));
181         mem->s = (FCB *) TWOLAME_MALLOC(sizeof(FCBCB));
182         mem->lthr = (FHBLK *) TWOLAME_MALLOC(sizeof(F2HBLK));
183         mem->r = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
184         mem->phi_sav = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
185 
186         mem->new = 0;
187         mem->old = 1;
188         mem->oldest = 0;
189     }
190 
191     {
192         cbval = mem->cbval;
193         rnorm = mem->rnorm;
194         window = mem->window;
195         // bark = mem->bark;
196         ath = mem->ath;
197         numlines = mem->numlines;
198         partition = mem->partition;
199         s = mem->s;
200         tmn = mem->tmn;
201     }
202 
203 
204     /* Set up the SIN/COS tables */
205     psycho_4_trigtable_init(mem);
206 
207     /* calculate HANN window coefficients */
208     for (i = 0; i < BLKSIZE; i++)
209         window[i] = 0.5 * (1 - cos(2.0 * PI * (i - 0.5) / BLKSIZE));
210 
211     /* For each FFT line from 0(DC) to 512(Nyquist) calculate - bark : the bark value of this fft
212        line - ath : the absolute threshold of hearing for this line [ATH]
213 
214        Since it is a 1024 point FFT, each line in the fft corresponds to 1/1024 of the total
215        frequency. Line 0 should correspond to DC - which doesn't really have a ATH afaik Line 1
216        should be 1/1024th of the Sampling Freq Line 512 should be the nyquist freq */
217     for (i = 0; i < HBLKSIZE; i++) {
218         FLOAT freq = i * (FLOAT) sfreq / (FLOAT) BLKSIZE;
219         bark[i] = twolame_ath_freq2bark(freq);
220         /* The ath tables in the dist10 code seem to be a little out of kilter. they seem to start
221            with index 0 corresponding to (sampling freq)/1024. When in doubt, i'm going to assume
222            that the dist10 code is wrong. MFC Feb2003 */
223         ath[i] = twolame_ath_energy(freq, glopts->athlevel);
224         // fprintf(stderr,"%.2f ",ath[i]);
225     }
226 
227 
228     /* Work out the partitions Starting from line 0, all lines within 0.33 of the starting bark are
229        added to the same partition. When a line is greater by 0.33 of a bark, start a new
230        partition. */
231     {
232         int partition_count = 0;    /* keep a count of the partitions */
233         int cbase = 0;          /* current base index for the bark range calculation */
234         for (i = 0; i < HBLKSIZE; i++) {
235             if ((bark[i] - bark[cbase]) > 0.33) {   /* 1/3 critical band? */
236                 /* this frequency line is too different from the starting line, (in terms of the
237                    bark distance) so close that previous partition, and make this line the first
238                    member of the next partition */
239                 cbase = i;      /* Start the new partition from this frequency */
240                 partition_count++;
241             }
242             /* partition[i] tells us which partition the i'th frequency line is in */
243             partition[i] = partition_count;
244             /* keep a count of how many frequency lines are in each partition */
245             numlines[partition_count]++;
246         }
247     }
248 
249     /* For each partition within the frequency space, calculate the average bark value - cbval
250        [central bark value] */
251     for (i = 0; i < HBLKSIZE; i++)
252         cbval[partition[i]] += bark[i]; /* sum up all the bark values */
253     for (i = 0; i < CBANDS; i++) {
254         if (numlines[i] != 0)
255             cbval[i] /= numlines[i];    /* divide by the number of values */
256         else {
257             cbval[i] = 0;       /* this isn't a partition */
258         }
259     }
260 
261 
262     /* Calculate the spreading function. ISO 11172 Section D.2.3 */
263     for (i = 0; i < CBANDS; i++) {
264         for (j = 0; j < CBANDS; j++) {
265             s[i][j] = psycho_4_spreading_function(1.05 * (cbval[i] - cbval[j]));
266             rnorm[i] += s[i][j];    /* sum the spreading function values for each partition so that
267                                        they can be normalised later on */
268         }
269     }
270 
271     /* Calculate Tone Masking Noise values. ISO 11172 Tables D.3.x */
272     for (j = 0; j < CBANDS; j++)
273         tmn[j] = MAX(15.5 + cbval[j], 24.5);
274 
275 
276     if (glopts->verbosity > 6) {
277         /* Dump All the Values to STDERR */
278         int wlow, whigh = 0;
279         int ntot = 0;
280         fprintf(stderr, "psy model 4 init\n");
281         fprintf(stderr, "index \tnlines \twlow \twhigh \tbval \tminval \ttmn\n");
282         for (i = 0; i < CBANDS; i++)
283             if (numlines[i] != 0) {
284                 wlow = whigh + 1;
285                 whigh = wlow + numlines[i] - 1;
286                 fprintf(stderr, "%i \t%i \t%i \t%i \t%5.2f \t%4.2f \t%4.2f\n", i + 1, numlines[i],
287                         wlow, whigh, cbval[i], minval[(int) cbval[i]], tmn[i]);
288                 ntot += numlines[i];
289             }
290         fprintf(stderr, "total lines %i\n", ntot);
291     }
292 
293     return (mem);
294 }
295 
296 
twolame_psycho_4(twolame_options * glopts,short int buffer[2][1152],short int savebuf[2][1056],FLOAT smr[2][32])297 void twolame_psycho_4(twolame_options * glopts,
298                       short int buffer[2][1152], short int savebuf[2][1056], FLOAT smr[2][32])
299 /* to match prototype : FLOAT args are always FLOAT */
300 {
301     psycho_4_mem *mem;
302     unsigned int run, i, j, k, ch;
303     FLOAT r_prime, phi_prime;
304     FLOAT npart, epart;
305     int new, old, oldest;
306     FLOAT *grouped_c, *grouped_e;
307     FLOAT *nb, *cb, *tb, *ecb, *bc;
308     FLOAT *cbval, *rnorm;
309     FLOAT *wsamp_r, *phi, *energy, *window;
310     FLOAT *ath, *thr, *c;
311 
312     FLOAT *snrtmp[2];
313     int *numlines;
314     int *partition;
315     FLOAT *tmn;
316     FCB *s;
317     F2HBLK *r, *phi_sav;
318 
319     int nch = glopts->num_channels_out;
320     int sfreq = glopts->samplerate_out;
321 
322     if (!glopts->p4mem) {
323         glopts->p4mem = twolame_psycho_4_init(glopts, sfreq);
324     }
325 
326     mem = glopts->p4mem;
327     {
328         grouped_c = mem->grouped_c;
329         grouped_e = mem->grouped_e;
330         nb = mem->nb;
331         cb = mem->cb;
332         tb = mem->tb;
333         ecb = mem->ecb;
334         bc = mem->bc;
335         rnorm = mem->rnorm;
336         cbval = mem->cbval;
337         wsamp_r = mem->wsamp_r;
338         phi = mem->phi;
339         energy = mem->energy;
340         window = mem->window;
341         ath = mem->ath;
342         thr = mem->thr;
343         c = mem->c;
344 
345         snrtmp[0] = mem->snrtmp[0];
346         snrtmp[1] = mem->snrtmp[1];
347 
348         numlines = mem->numlines;
349         partition = mem->partition;
350         tmn = mem->tmn;
351         s = mem->s;
352         r = mem->r;
353         phi_sav = mem->phi_sav;
354     }
355 
356     for (ch = 0; ch < nch; ch++) {
357         for (run = 0; run < 2; run++) {
358             /* Net offset is 480 samples (1056-576) for layer 2; this is because one must stagger
359                input data by 256 samples to synchronize psychoacoustic model with filter bank
360                outputs, then stagger so that center of 1024 FFT window lines up with center of 576
361                "new" audio samples.
362 
363                flush = 384*3.0/2.0; = 576 syncsize = 1056; sync_flush = syncsize - flush; 480
364                BLKSIZE = 1024 */
365             {
366                 short int *bufferp = buffer[ch];
367                 for (j = 0; j < 480; j++) {
368                     savebuf[ch][j] = savebuf[ch][j + 576];
369                     wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
370                 }
371                 for (; j < 1024; j++) {
372                     savebuf[ch][j] = *bufferp++;
373                     wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
374                 }
375                 for (; j < 1056; j++)
376                     savebuf[ch][j] = *bufferp++;
377             }
378 
379             /* Compute FFT */
380             twolame_psycho_2_fft(wsamp_r, energy, phi);
381 
382             /* calculate the unpredictability measure, given energy[f] and phi[f] (the age pointers
383                [new/old/oldest] are reset automatically on the second pass */
384             {
385                 if (mem->new == 0) {
386                     mem->new = 1;
387                     mem->oldest = 1;
388                 } else {
389                     mem->new = 0;
390                     mem->oldest = 0;
391                 }
392                 if (mem->old == 0)
393                     mem->old = 1;
394                 else
395                     mem->old = 0;
396             }
397             old = mem->old;
398             new = mem->new;
399             oldest = mem->oldest;
400 
401 
402             for (j = 0; j < HBLKSIZE; j++) {
403 #ifdef NEWATAN
404                 FLOAT temp1, temp2, temp3;
405                 r_prime = 2.0 * r[ch][old][j] - r[ch][oldest][j];
406                 phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
407 
408                 r[ch][new][j] = sqrt((FLOAT) energy[j]);
409                 phi_sav[ch][new][j] = phi[j];
410 
411                 {
412                     temp1 =
413                         r[ch][new][j] * psycho_4_cos(mem, phi[j]) -
414                         r_prime * psycho_4_cos(mem, phi_prime);
415                     /* Remember your grade 11 trig? sin(theta) = cos(PI/2 - theta) */
416                     temp2 =
417                         r[ch][new][j] * psycho_4_cos(mem, PI2 - phi[j]) -
418                         r_prime * psycho_4_cos(mem, PI2 - phi_prime);
419                 }
420 
421 
422                 temp3 = r[ch][new][j] + fabs((FLOAT) r_prime);
423                 if (temp3 != 0)
424                     c[j] = sqrt(temp1 * temp1 + temp2 * temp2) / temp3;
425                 else
426                     c[j] = 0;
427 #else
428                 FLOAT temp1, temp2, temp3;
429                 r_prime = 2.0 * r[ch][old][j] - r[ch][oldest][j];
430                 phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
431 
432                 r[ch][new][j] = sqrt((FLOAT) energy[j]);
433                 phi_sav[ch][new][j] = phi[j];
434 
435 
436                 temp1 = r[ch][new][j] * cos((FLOAT) phi[j]) - r_prime * cos((FLOAT) phi_prime);
437                 temp2 = r[ch][new][j] * sin((FLOAT) phi[j]) - r_prime * sin((FLOAT) phi_prime);
438 
439                 temp3 = r[ch][new][j] + fabs((FLOAT) r_prime);
440                 if (temp3 != 0)
441                     c[j] = sqrt(temp1 * temp1 + temp2 * temp2) / temp3;
442                 else
443                     c[j] = 0;
444 #endif
445             }
446 
447             /* For each partition, sum all the energy in that partition - grouped_e and calculated
448                the energy-weighted unpredictability measure - grouped_c ISO 11172 Section D.2.4.e */
449             for (j = 1; j < CBANDS; j++) {
450                 grouped_e[j] = 0;
451                 grouped_c[j] = 0;
452             }
453             grouped_e[0] = energy[0];
454             grouped_c[0] = energy[0] * c[0];
455             for (j = 1; j < HBLKSIZE; j++) {
456                 grouped_e[partition[j]] += energy[j];
457                 grouped_c[partition[j]] += energy[j] * c[j];
458             }
459 
460             /* convolve the grouped energy-weighted unpredictability measure and the grouped energy
461                with the spreading function ISO 11172 D.2.4.f */
462             for (j = 0; j < CBANDS; j++) {
463                 ecb[j] = 0;
464                 cb[j] = 0;
465                 for (k = 0; k < CBANDS; k++) {
466                     if (s[j][k] != 0.0) {
467                         ecb[j] += s[j][k] * grouped_e[k];
468                         cb[j] += s[j][k] * grouped_c[k];
469                     }
470                 }
471                 if (ecb[j] != 0)
472                     cb[j] = cb[j] / ecb[j];
473                 else
474                     cb[j] = 0;
475             }
476 
477             /* Convert cb to tb (the tonality index) ISO11172 SecD.2.4.g */
478             for (i = 0; i < CBANDS; i++) {
479                 if (cb[i] < 0.05)
480                     cb[i] = 0.05;
481                 else if (cb[i] > 0.5)
482                     cb[i] = 0.5;
483                 tb[i] = -0.301029996 - 0.434294482 * log((FLOAT) cb[i]);
484             }
485 
486 
487             /* Calculate the required SNR for each of the frequency partitions ISO 11172 Sect
488                D.2.4.h */
489             for (j = 0; j < CBANDS; j++) {
490                 FLOAT SNR, SNRtemp;
491                 SNRtemp = tmn[j] * tb[j] + NMT * (1.0 - tb[j]);
492                 SNR = MAX(SNRtemp, minval[(int) cbval[j]]);
493                 bc[j] = exp((FLOAT) - SNR * LN_TO_LOG10);
494             }
495 
496             /* Calculate the permissible noise energy level in each of the frequency partitions.
497                This section used to have pre-echo control but only for LayerI ISO 11172 Sec D.2.4.k
498                - Spread the threshold energy over FFT lines */
499             for (j = 0; j < CBANDS; j++) {
500                 if (rnorm[j] && numlines[j])
501                     nb[j] = ecb[j] * bc[j] / (rnorm[j] * numlines[j]);
502                 else
503                     nb[j] = 0;
504             }
505 
506             /* ISO11172 Sec D.2.4.l - thr[] the final energy threshold of audibility */
507             for (j = 0; j < HBLKSIZE; j++)
508                 thr[j] = MAX(nb[partition[j]], ath[j]);
509 
510             /* Translate the 512 threshold values to the 32 filter bands of the coder Using ISO
511                11172 Table D.5 and Section D.2.4.n */
512             for (j = 0; j < 193; j += 16) {
513                 /* WIDTH = 0 */
514                 npart = 60802371420160.0;
515                 epart = 0.0;
516                 for (k = 0; k < 17; k++) {
517                     if (thr[j + k] < npart)
518                         npart = thr[j + k]; /* For WIDTH==0, find the minimum noise, and later
519                                                multiply by the number of indexes i.e. 17 */
520                     epart += energy[j + k];
521                 }
522                 snrtmp[run][j / 16] = 4.342944819 * log((FLOAT) (epart / (npart * 17.0)));
523             }
524             for (j = 208; j < (HBLKSIZE - 1); j += 16) {
525                 /* WIDTH = 1 */
526                 npart = 0.0;
527                 epart = 0.0;
528                 for (k = 0; k < 17; k++) {
529                     npart += thr[j + k];    /* For WIDTH==1, sum the noise */
530                     epart += energy[j + k];
531                 }
532                 snrtmp[run][j / 16] = 4.342944819 * log((FLOAT) (epart / npart));
533             }
534         }
535 
536         /* Pick the maximum value of the two runs ISO 11172 Sect D.2.1 */
537         for (i = 0; i < 32; i++)
538             smr[ch][i] = MAX(snrtmp[0][i], snrtmp[1][i]);
539 
540     }                           // now do other channel
541 
542 }
543 
544 
twolame_psycho_4_deinit(psycho_4_mem ** mem)545 void twolame_psycho_4_deinit(psycho_4_mem ** mem)
546 {
547 
548     if (mem == NULL || *mem == NULL)
549         return;
550 
551     TWOLAME_FREE((*mem)->tmn);
552     TWOLAME_FREE((*mem)->s);
553     TWOLAME_FREE((*mem)->lthr);
554     TWOLAME_FREE((*mem)->r);
555     TWOLAME_FREE((*mem)->phi_sav);
556 
557     TWOLAME_FREE((*mem));
558 }
559 
560 
561 
562 // vim:ts=4:sw=4:nowrap:
563