1 #include "common.h"
2 #include "encoder.h"
3 #include "fft.h"
4 #include "psycho_2.h"
5
psycho_2(double * buffer,short int * savebuf,int chn,int lay,float * snr32,double sfreq)6 void psycho_2 (double *buffer, short int *savebuf, int chn, int lay, float *snr32, double sfreq /* to match prototype : float args are always double */
7 )
8 {
9 unsigned int i, j, k;
10 FLOAT r_prime, phi_prime;
11 FLOAT freq_mult, bval_lo, minthres, sum_energy;
12 double tb, temp1, temp2, temp3;
13
14 /* The static variables "r", "phi_sav", "new", "old" and "oldest" have */
15 /* to be remembered for the unpredictability measure. For "r" and */
16 /* "phi_sav", the first index from the left is the channel select and */
17 /* the second index is the "age" of the data. */
18
19 static int new = 0, old = 1, oldest = 0;
20 static int init = 0, flush, syncsize, sfreq_idx;
21
22 /* The following static variables are constants. */
23
24 static double nmt = 5.5;
25
26 static FLOAT crit_band[27] = { 0, 100, 200, 300, 400, 510, 630, 770,
27 920, 1080, 1270, 1480, 1720, 2000, 2320, 2700,
28 3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000,
29 15500, 25000, 30000
30 };
31
32 static FLOAT bmax[27] = { 20.0, 20.0, 20.0, 20.0, 20.0, 17.0, 15.0,
33 10.0, 7.0, 4.4, 4.5, 4.5, 4.5, 4.5,
34 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
35 4.5, 4.5, 4.5, 3.5, 3.5, 3.5
36 };
37
38 /* The following pointer variables point to large areas of memory */
39 /* dynamically allocated by the mem_alloc() function. Dynamic memory */
40 /* allocation is used in order to avoid stack frame or data area */
41 /* overflow errors that otherwise would have occurred at compile time */
42 /* on the Macintosh computer. */
43
44 FLOAT *grouped_c, *grouped_e, *nb, *cb, *ecb, *bc;
45 FLOAT *wsamp_r, *wsamp_i, *phi, *energy;
46 FLOAT *c, *fthr;
47 F32 *snrtmp;
48
49 static int *numlines;
50 static int *partition;
51 static FLOAT *cbval, *rnorm;
52 static FLOAT *window;
53 static FLOAT *absthr;
54 static double *tmn;
55 static FCB *s;
56 static FHBLK *lthr;
57 static F2HBLK *r, *phi_sav;
58
59 /* These dynamic memory allocations simulate "automatic" variables */
60 /* placed on the stack. For each mem_alloc() call here, there must be */
61 /* a corresponding mem_free() call at the end of this function. */
62
63 grouped_c = (FLOAT *) mem_alloc (sizeof (FCB), "grouped_c");
64 grouped_e = (FLOAT *) mem_alloc (sizeof (FCB), "grouped_e");
65 nb = (FLOAT *) mem_alloc (sizeof (FCB), "nb");
66 cb = (FLOAT *) mem_alloc (sizeof (FCB), "cb");
67 ecb = (FLOAT *) mem_alloc (sizeof (FCB), "ecb");
68 bc = (FLOAT *) mem_alloc (sizeof (FCB), "bc");
69 wsamp_r = (FLOAT *) mem_alloc (sizeof (FBLK), "wsamp_r");
70 wsamp_i = (FLOAT *) mem_alloc (sizeof (FBLK), "wsamp_i");
71 phi = (FLOAT *) mem_alloc (sizeof (FBLK), "phi");
72 energy = (FLOAT *) mem_alloc (sizeof (FBLK), "energy");
73 c = (FLOAT *) mem_alloc (sizeof (FHBLK), "c");
74 fthr = (FLOAT *) mem_alloc (sizeof (FHBLK), "fthr");
75 snrtmp = (F32 *) mem_alloc (sizeof (F2_32), "snrtmp");
76
77 if (init == 0) {
78
79 /* These dynamic memory allocations simulate "static" variables placed */
80 /* in the data space. Each mem_alloc() call here occurs only once at */
81 /* initialization time. The mem_free() function must not be called. */
82
83 numlines = (int *) mem_alloc (sizeof (ICB), "numlines");
84 partition = (int *) mem_alloc (sizeof (IHBLK), "partition");
85 cbval = (FLOAT *) mem_alloc (sizeof (FCB), "cbval");
86 rnorm = (FLOAT *) mem_alloc (sizeof (FCB), "rnorm");
87 window = (FLOAT *) mem_alloc (sizeof (FBLK), "window");
88 absthr = (FLOAT *) mem_alloc (sizeof (FHBLK), "absthr");
89 tmn = (double *) mem_alloc (sizeof (DCB), "tmn");
90 s = (FCB *) mem_alloc (sizeof (FCBCB), "s");
91 lthr = (FHBLK *) mem_alloc (sizeof (F2HBLK), "lthr");
92 r = (F2HBLK *) mem_alloc (sizeof (F22HBLK), "r");
93 phi_sav = (F2HBLK *) mem_alloc (sizeof (F22HBLK), "phi_sav");
94
95 i = sfreq + 0.5;
96 switch (i) {
97 case 32000:
98 sfreq_idx = 0;
99 break;
100 case 44100:
101 sfreq_idx = 1;
102 break;
103 case 48000:
104 sfreq_idx = 2;
105 break;
106 default:
107 printf ("error, invalid sampling frequency: %d Hz\n", i);
108 exit (-1);
109 }
110 if (verbosity >= 2)
111 printf ("absthr[][] sampling frequency index: %d\n", sfreq_idx);
112 read_absthr (absthr, sfreq_idx);
113 if (lay == 1) {
114 flush = 448;
115 syncsize = 1024;
116 } else {
117 flush = 384 * 3.0 / 2.0;
118 syncsize = 1056;
119 }
120 /* calculate HANN window coefficients */
121 for (i = 0; i < BLKSIZE; i++)
122 window[i] = 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE - 1.0)));
123 /* reset states used in unpredictability measure */
124 for (i = 0; i < HBLKSIZE; i++) {
125 r[0][0][i] = r[1][0][i] = r[0][1][i] = r[1][1][i] = 0;
126 phi_sav[0][0][i] = phi_sav[1][0][i] = 0;
127 phi_sav[0][1][i] = phi_sav[1][1][i] = 0;
128 lthr[0][i] = 60802371420160.0;
129 lthr[1][i] = 60802371420160.0;
130 }
131 /*****************************************************************************
132 * Initialization: Compute the following constants for use later *
133 * partition[HBLKSIZE] = the partition number associated with each *
134 * frequency line *
135 * cbval[CBANDS] = the center (average) bark value of each *
136 * partition *
137 * numlines[CBANDS] = the number of frequency lines in each partition *
138 * tmn[CBANDS] = tone masking noise *
139 *****************************************************************************/
140 /* compute fft frequency multiplicand */
141 freq_mult = sfreq / BLKSIZE;
142
143 /* calculate fft frequency, then bval of each line (use fthr[] as tmp storage)*/
144 for (i = 0; i < HBLKSIZE; i++) {
145 temp1 = i * freq_mult;
146 j = 1;
147 while (temp1 > crit_band[j])
148 j++;
149 fthr[i] =
150 j - 1 + (temp1 - crit_band[j - 1]) / (crit_band[j] - crit_band[j - 1]);
151 }
152 partition[0] = 0;
153 /* temp2 is the counter of the number of frequency lines in each partition */
154 temp2 = 1;
155 cbval[0] = fthr[0];
156 bval_lo = fthr[0];
157 for (i = 1; i < HBLKSIZE; i++) {
158 if ((fthr[i] - bval_lo) > 0.33) {
159 partition[i] = partition[i - 1] + 1;
160 cbval[partition[i - 1]] = cbval[partition[i - 1]] / temp2;
161 cbval[partition[i]] = fthr[i];
162 bval_lo = fthr[i];
163 numlines[partition[i - 1]] = temp2;
164 temp2 = 1;
165 } else {
166 partition[i] = partition[i - 1];
167 cbval[partition[i]] += fthr[i];
168 temp2++;
169 }
170 }
171 numlines[partition[i - 1]] = temp2;
172 cbval[partition[i - 1]] = cbval[partition[i - 1]] / temp2;
173
174 /************************************************************************
175 * Now compute the spreading function, s[j][i], the value of the spread-*
176 * ing function, centered at band j, for band i, store for later use *
177 ************************************************************************/
178 for (j = 0; j < CBANDS; j++) {
179 for (i = 0; i < CBANDS; i++) {
180 temp1 = (cbval[i] - cbval[j]) * 1.05;
181 if (temp1 >= 0.5 && temp1 <= 2.5) {
182 temp2 = temp1 - 0.5;
183 temp2 = 8.0 * (temp2 * temp2 - 2.0 * temp2);
184 } else
185 temp2 = 0;
186 temp1 += 0.474;
187 temp3 =
188 15.811389 + 7.5 * temp1 -
189 17.5 * sqrt ((double) (1.0 + temp1 * temp1));
190 if (temp3 <= -100)
191 s[i][j] = 0;
192 else {
193 temp3 = (temp2 + temp3) * LN_TO_LOG10;
194 s[i][j] = exp (temp3);
195 }
196 }
197 }
198
199 /* Calculate Tone Masking Noise values */
200 for (j = 0; j < CBANDS; j++) {
201 temp1 = 15.5 + cbval[j];
202 tmn[j] = (temp1 > 24.5) ? temp1 : 24.5;
203 /* Calculate normalization factors for the net spreading functions */
204 rnorm[j] = 0;
205 for (i = 0; i < CBANDS; i++) {
206 rnorm[j] += s[j][i];
207 }
208 }
209 init++;
210 }
211
212 /************************* End of Initialization *****************************/
213 switch (lay) {
214 case 1:
215 case 2:
216 for (i = 0; i < lay; i++) {
217 /*****************************************************************************
218 * Net offset is 480 samples (1056-576) for layer 2; this is because one must*
219 * stagger input data by 256 samples to synchronize psychoacoustic model with*
220 * filter bank outputs, then stagger so that center of 1024 FFT window lines *
221 * up with center of 576 "new" audio samples. *
222 * *
223 * For layer 1, the input data still needs to be staggered by 256 samples, *
224 * then it must be staggered again so that the 384 "new" samples are centered*
225 * in the 1024 FFT window. The net offset is then 576 and you need 448 "new"*
226 * samples for each iteration to keep the 384 samples of interest centered *
227 *****************************************************************************/
228 for (j = 0; j < syncsize; j++) {
229 if (j < (syncsize - flush))
230 savebuf[j] = savebuf[j + flush];
231 else
232 savebuf[j] = *buffer++;
233 /**window data with HANN window***********************************************/
234 if (j < BLKSIZE) {
235 wsamp_r[j] = window[j] * ((FLOAT) savebuf[j]);
236 wsamp_i[j] = 0;
237 }
238 }
239 /**Compute FFT****************************************************************/
240 fft (wsamp_r, wsamp_i, energy, phi);
241 /*****************************************************************************
242 * calculate the unpredictability measure, given energy[f] and phi[f] *
243 *****************************************************************************/
244 for (j = 0; j < HBLKSIZE; j++) {
245 r_prime = 2.0 * r[chn][old][j] - r[chn][oldest][j];
246 phi_prime = 2.0 * phi_sav[chn][old][j] - phi_sav[chn][oldest][j];
247 r[chn][new][j] = sqrt ((double) energy[j]);
248 phi_sav[chn][new][j] = phi[j];
249 temp1 =
250 r[chn][new][j] * cos ((double) phi[j]) -
251 r_prime * cos ((double) phi_prime);
252 temp2 =
253 r[chn][new][j] * sin ((double) phi[j]) -
254 r_prime * sin ((double) phi_prime);
255 temp3 = r[chn][new][j] + fabs ((double) r_prime);
256 if (temp3 != 0)
257 c[j] = sqrt (temp1 * temp1 + temp2 * temp2) / temp3;
258 else
259 c[j] = 0;
260 }
261 /*only update data "age" pointers after you are done with the second channel */
262 /*for layer 1 computations, for the layer 2 double computations, the pointers*/
263 /*are reset automatically on the second pass */
264 if (lay == 2 || chn == 1) {
265 if (new == 0) {
266 new = 1;
267 oldest = 1;
268 } else {
269 new = 0;
270 oldest = 0;
271 }
272 if (old == 0)
273 old = 1;
274 else
275 old = 0;
276 }
277 /*****************************************************************************
278 * Calculate the grouped, energy-weighted, unpredictability measure, *
279 * grouped_c[], and the grouped energy. grouped_e[] *
280 *****************************************************************************/
281 for (j = 1; j < CBANDS; j++) {
282 grouped_e[j] = 0;
283 grouped_c[j] = 0;
284 }
285 grouped_e[0] = energy[0];
286 grouped_c[0] = energy[0] * c[0];
287 for (j = 1; j < HBLKSIZE; j++) {
288 grouped_e[partition[j]] += energy[j];
289 grouped_c[partition[j]] += energy[j] * c[j];
290 }
291 /*****************************************************************************
292 * convolve the grouped energy-weighted unpredictability measure *
293 * and the grouped energy with the spreading function, s[j][k] *
294 *****************************************************************************/
295 for (j = 0; j < CBANDS; j++) {
296 ecb[j] = 0;
297 cb[j] = 0;
298 for (k = 0; k < CBANDS; k++) {
299 if (s[j][k] != 0.0) {
300 ecb[j] += s[j][k] * grouped_e[k];
301 cb[j] += s[j][k] * grouped_c[k];
302 }
303 }
304 if (ecb[j] != 0)
305 cb[j] = cb[j] / ecb[j];
306 else
307 cb[j] = 0;
308 }
309 /*****************************************************************************
310 * Calculate the required SNR for each of the frequency partitions *
311 * this whole section can be accomplished by a table lookup *
312 *****************************************************************************/
313 for (j = 0; j < CBANDS; j++) {
314 if (cb[j] < .05)
315 cb[j] = 0.05;
316 else if (cb[j] > .5)
317 cb[j] = 0.5;
318 tb = -0.434294482 * log ((double) cb[j]) - 0.301029996;
319 bc[j] = tmn[j] * tb + nmt * (1.0 - tb);
320 k = cbval[j] + 0.5;
321 bc[j] = (bc[j] > bmax[k]) ? bc[j] : bmax[k];
322 bc[j] = exp ((double) -bc[j] * LN_TO_LOG10);
323 }
324 /*****************************************************************************
325 * Calculate the permissible noise energy level in each of the frequency *
326 * partitions. Include absolute threshold and pre-echo controls *
327 * this whole section can be accomplished by a table lookup *
328 *****************************************************************************/
329 for (j = 0; j < CBANDS; j++)
330 if (rnorm[j] && numlines[j])
331 nb[j] = ecb[j] * bc[j] / (rnorm[j] * numlines[j]);
332 else
333 nb[j] = 0;
334 for (j = 0; j < HBLKSIZE; j++) {
335 /*temp1 is the preliminary threshold */
336 temp1 = nb[partition[j]];
337 temp1 = (temp1 > absthr[j]) ? temp1 : absthr[j];
338 /*do not use pre-echo control for layer 2 because it may do bad things to the*/
339 /* MUSICAM bit allocation algorithm */
340 if (lay == 1) {
341 fthr[j] = (temp1 < lthr[chn][j]) ? temp1 : lthr[chn][j];
342 temp2 = temp1 * 0.00316;
343 fthr[j] = (temp2 > fthr[j]) ? temp2 : fthr[j];
344 } else
345 fthr[j] = temp1;
346 lthr[chn][j] = LXMIN * temp1;
347 }
348 /*****************************************************************************
349 * Translate the 512 threshold values to the 32 filter bands of the coder *
350 *****************************************************************************/
351 for (j = 0; j < 193; j += 16) {
352 minthres = 60802371420160.0;
353 sum_energy = 0.0;
354 for (k = 0; k < 17; k++) {
355 if (minthres > fthr[j + k])
356 minthres = fthr[j + k];
357 sum_energy += energy[j + k];
358 }
359 snrtmp[i][j / 16] = sum_energy / (minthres * 17.0);
360 snrtmp[i][j / 16] = 4.342944819 * log ((double) snrtmp[i][j / 16]);
361 }
362 for (j = 208; j < (HBLKSIZE - 1); j += 16) {
363 minthres = 0.0;
364 sum_energy = 0.0;
365 for (k = 0; k < 17; k++) {
366 minthres += fthr[j + k];
367 sum_energy += energy[j + k];
368 }
369 snrtmp[i][j / 16] = sum_energy / minthres;
370 snrtmp[i][j / 16] = 4.342944819 * log ((double) snrtmp[i][j / 16]);
371 }
372 /*****************************************************************************
373 * End of Psychoacuostic calculation loop *
374 *****************************************************************************/
375 }
376 for (i = 0; i < 32; i++) {
377 if (lay == 2)
378 snr32[i] = (snrtmp[0][i] > snrtmp[1][i]) ? snrtmp[0][i] : snrtmp[1][i];
379 else
380 snr32[i] = snrtmp[0][i];
381 }
382 break;
383 case 3:
384 printf ("layer 3 is not currently supported\n");
385 break;
386 default:
387 printf ("error, invalid MPEG/audio coding layer: %d\n", lay);
388 }
389
390 /* These mem_free() calls must correspond with the mem_alloc() calls */
391 /* used at the beginning of this function to simulate "automatic" */
392 /* variables placed on the stack. */
393
394 mem_free ((void **) &grouped_c);
395 mem_free ((void **) &grouped_e);
396 mem_free ((void **) &nb);
397 mem_free ((void **) &cb);
398 mem_free ((void **) &ecb);
399 mem_free ((void **) &bc);
400 mem_free ((void **) &wsamp_r);
401 mem_free ((void **) &wsamp_i);
402 mem_free ((void **) &phi);
403 mem_free ((void **) &energy);
404 mem_free ((void **) &c);
405 mem_free ((void **) &fthr);
406 mem_free ((void **) &snrtmp);
407 }
408
409 /******************************************************************************
410 routine to read in absthr table from a file.
411 ******************************************************************************/
412
read_absthr(float * absthr,long int table)413 void read_absthr (float *absthr, long int table)
414 {
415 FILE *fp;
416 long j, index;
417 float a;
418 char t[80], *ta = "absthr_0";
419
420 switch (table) {
421 case 0:
422 ta[7] = '0';
423 break;
424 case 1:
425 ta[7] = '1';
426 break;
427 case 2:
428 ta[7] = '2';
429 break;
430 default:
431 printf ("absthr table: Not valid table number\n");
432 }
433 if (!(fp = OpenTableFile (ta))) {
434 printf ("Please check %s table\n", ta);
435 exit (0);
436 }
437 fgets (t, 150, fp);
438 sscanf (t, "table %ld", &index);
439 if (index != table) {
440 printf ("error in absthr table %s", ta);
441 exit (0);
442 }
443 for (j = 0; j < HBLKSIZE; j++) {
444 fgets (t, 80, fp);
445 sscanf (t, "%f", &a);
446 absthr[j] = a;
447 }
448 fclose (fp);
449 }
450