1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
7  *                                                                  *
8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009             *
9  * by the Xiph.Org Foundation https://xiph.org/                     *
10  *                                                                  *
11  ********************************************************************
12 
13  function: normalized modified discrete cosine transform
14            power of two length transform only [64 <= n ]
15 
16  Original algorithm adapted long ago from _The use of multirate filter
17  banks for coding of high quality digital audio_, by T. Sporer,
18  K. Brandenburg and B. Edler, collection of the European Signal
19  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
20  211-214
21 
22  The below code implements an algorithm that no longer looks much like
23  that presented in the paper, but the basic structure remains if you
24  dig deep enough to see it.
25 
26  This module DOES NOT INCLUDE code to generate/apply the window
27  function.  Everybody has their own weird favorite including me... I
28  happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
29  vehemently disagree.
30 
31  ********************************************************************/
32 
33 /* this can also be run as an integer transform by uncommenting a
34    define in mdct.h; the integerization is a first pass and although
35    it's likely stable for Vorbis, the dynamic range is constrained and
36    roundoff isn't done (so it's noisy).  Consider it functional, but
37    only a starting point.  There's no point on a machine with an FPU */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <math.h>
43 #include "../../codec.h"
44 #include "mdct.h"
45 #include "os.h"
46 #include "misc.h"
47 
48 /* build lookups for trig functions; also pre-figure scaling and
49    some window function algebra. */
50 
mdct_init(mdct_lookup * lookup,int n)51 void mdct_init(mdct_lookup *lookup,int n){
52   int   *bitrev=(int*) _ogg_malloc(sizeof(*bitrev)*(n/4));
53   DATA_TYPE *T=(DATA_TYPE*) _ogg_malloc(sizeof(*T)*(n+n/4));
54 
55   int i;
56   int n2=n>>1;
57   int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
58   lookup->n=n;
59   lookup->trig=T;
60   lookup->bitrev=bitrev;
61 
62 /* trig lookups... */
63 
64   for(i=0;i<n/4;i++){
65     T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
66     T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
67     T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
68     T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
69   }
70   for(i=0;i<n/8;i++){
71     T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
72     T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
73   }
74 
75   /* bitreverse lookup... */
76 
77   {
78     int mask=(1<<(log2n-1))-1,i,j;
79     int msb=1<<(log2n-2);
80     for(i=0;i<n/8;i++){
81       int acc=0;
82       for(j=0;msb>>j;j++)
83         if((msb>>j)&i)acc|=1<<j;
84       bitrev[i*2]=((~acc)&mask)-1;
85       bitrev[i*2+1]=acc;
86 
87     }
88   }
89   lookup->scale=FLOAT_CONV(4.f/n);
90 }
91 
92 /* 8 point butterfly (in place, 4 register) */
mdct_butterfly_8(DATA_TYPE * x)93 STIN void mdct_butterfly_8(DATA_TYPE *x){
94   REG_TYPE r0   = x[6] + x[2];
95   REG_TYPE r1   = x[6] - x[2];
96   REG_TYPE r2   = x[4] + x[0];
97   REG_TYPE r3   = x[4] - x[0];
98 
99            x[6] = r0   + r2;
100            x[4] = r0   - r2;
101 
102            r0   = x[5] - x[1];
103            r2   = x[7] - x[3];
104            x[0] = r1   + r0;
105            x[2] = r1   - r0;
106 
107            r0   = x[5] + x[1];
108            r1   = x[7] + x[3];
109            x[3] = r2   + r3;
110            x[1] = r2   - r3;
111            x[7] = r1   + r0;
112            x[5] = r1   - r0;
113 
114 }
115 
116 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)117 STIN void mdct_butterfly_16(DATA_TYPE *x){
118   REG_TYPE r0     = x[1]  - x[9];
119   REG_TYPE r1     = x[0]  - x[8];
120 
121            x[8]  += x[0];
122            x[9]  += x[1];
123            x[0]   = MULT_NORM((r0   + r1) * cPI2_8);
124            x[1]   = MULT_NORM((r0   - r1) * cPI2_8);
125 
126            r0     = x[3]  - x[11];
127            r1     = x[10] - x[2];
128            x[10] += x[2];
129            x[11] += x[3];
130            x[2]   = r0;
131            x[3]   = r1;
132 
133            r0     = x[12] - x[4];
134            r1     = x[13] - x[5];
135            x[12] += x[4];
136            x[13] += x[5];
137            x[4]   = MULT_NORM((r0   - r1) * cPI2_8);
138            x[5]   = MULT_NORM((r0   + r1) * cPI2_8);
139 
140            r0     = x[14] - x[6];
141            r1     = x[15] - x[7];
142            x[14] += x[6];
143            x[15] += x[7];
144            x[6]  = r0;
145            x[7]  = r1;
146 
147            mdct_butterfly_8(x);
148            mdct_butterfly_8(x+8);
149 }
150 
151 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)152 STIN void mdct_butterfly_32(DATA_TYPE *x){
153   REG_TYPE r0     = x[30] - x[14];
154   REG_TYPE r1     = x[31] - x[15];
155 
156            x[30] +=         x[14];
157            x[31] +=         x[15];
158            x[14]  =         r0;
159            x[15]  =         r1;
160 
161            r0     = x[28] - x[12];
162            r1     = x[29] - x[13];
163            x[28] +=         x[12];
164            x[29] +=         x[13];
165            x[12]  = MULT_NORM( r0 * cPI1_8  -  r1 * cPI3_8 );
166            x[13]  = MULT_NORM( r0 * cPI3_8  +  r1 * cPI1_8 );
167 
168            r0     = x[26] - x[10];
169            r1     = x[27] - x[11];
170            x[26] +=         x[10];
171            x[27] +=         x[11];
172            x[10]  = MULT_NORM(( r0  - r1 ) * cPI2_8);
173            x[11]  = MULT_NORM(( r0  + r1 ) * cPI2_8);
174 
175            r0     = x[24] - x[8];
176            r1     = x[25] - x[9];
177            x[24] += x[8];
178            x[25] += x[9];
179            x[8]   = MULT_NORM( r0 * cPI3_8  -  r1 * cPI1_8 );
180            x[9]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
181 
182            r0     = x[22] - x[6];
183            r1     = x[7]  - x[23];
184            x[22] += x[6];
185            x[23] += x[7];
186            x[6]   = r1;
187            x[7]   = r0;
188 
189            r0     = x[4]  - x[20];
190            r1     = x[5]  - x[21];
191            x[20] += x[4];
192            x[21] += x[5];
193            x[4]   = MULT_NORM( r1 * cPI1_8  +  r0 * cPI3_8 );
194            x[5]   = MULT_NORM( r1 * cPI3_8  -  r0 * cPI1_8 );
195 
196            r0     = x[2]  - x[18];
197            r1     = x[3]  - x[19];
198            x[18] += x[2];
199            x[19] += x[3];
200            x[2]   = MULT_NORM(( r1  + r0 ) * cPI2_8);
201            x[3]   = MULT_NORM(( r1  - r0 ) * cPI2_8);
202 
203            r0     = x[0]  - x[16];
204            r1     = x[1]  - x[17];
205            x[16] += x[0];
206            x[17] += x[1];
207            x[0]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
208            x[1]   = MULT_NORM( r1 * cPI1_8  -  r0 * cPI3_8 );
209 
210            mdct_butterfly_16(x);
211            mdct_butterfly_16(x+16);
212 
213 }
214 
215 /* N point first stage butterfly (in place, 2 register) */
mdct_butterfly_first(DATA_TYPE * T,DATA_TYPE * x,int points)216 STIN void mdct_butterfly_first(DATA_TYPE *T,
217                                         DATA_TYPE *x,
218                                         int points){
219 
220   DATA_TYPE *x1        = x          + points      - 8;
221   DATA_TYPE *x2        = x          + (points>>1) - 8;
222   REG_TYPE   r0;
223   REG_TYPE   r1;
224 
225   do{
226 
227                r0      = x1[6]      -  x2[6];
228                r1      = x1[7]      -  x2[7];
229                x1[6]  += x2[6];
230                x1[7]  += x2[7];
231                x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
232                x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
233 
234                r0      = x1[4]      -  x2[4];
235                r1      = x1[5]      -  x2[5];
236                x1[4]  += x2[4];
237                x1[5]  += x2[5];
238                x2[4]   = MULT_NORM(r1 * T[5]  +  r0 * T[4]);
239                x2[5]   = MULT_NORM(r1 * T[4]  -  r0 * T[5]);
240 
241                r0      = x1[2]      -  x2[2];
242                r1      = x1[3]      -  x2[3];
243                x1[2]  += x2[2];
244                x1[3]  += x2[3];
245                x2[2]   = MULT_NORM(r1 * T[9]  +  r0 * T[8]);
246                x2[3]   = MULT_NORM(r1 * T[8]  -  r0 * T[9]);
247 
248                r0      = x1[0]      -  x2[0];
249                r1      = x1[1]      -  x2[1];
250                x1[0]  += x2[0];
251                x1[1]  += x2[1];
252                x2[0]   = MULT_NORM(r1 * T[13] +  r0 * T[12]);
253                x2[1]   = MULT_NORM(r1 * T[12] -  r0 * T[13]);
254 
255     x1-=8;
256     x2-=8;
257     T+=16;
258 
259   }while(x2>=x);
260 }
261 
262 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * T,DATA_TYPE * x,int points,int trigint)263 STIN void mdct_butterfly_generic(DATA_TYPE *T,
264                                           DATA_TYPE *x,
265                                           int points,
266                                           int trigint){
267 
268   DATA_TYPE *x1        = x          + points      - 8;
269   DATA_TYPE *x2        = x          + (points>>1) - 8;
270   REG_TYPE   r0;
271   REG_TYPE   r1;
272 
273   do{
274 
275                r0      = x1[6]      -  x2[6];
276                r1      = x1[7]      -  x2[7];
277                x1[6]  += x2[6];
278                x1[7]  += x2[7];
279                x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
280                x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
281 
282                T+=trigint;
283 
284                r0      = x1[4]      -  x2[4];
285                r1      = x1[5]      -  x2[5];
286                x1[4]  += x2[4];
287                x1[5]  += x2[5];
288                x2[4]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
289                x2[5]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
290 
291                T+=trigint;
292 
293                r0      = x1[2]      -  x2[2];
294                r1      = x1[3]      -  x2[3];
295                x1[2]  += x2[2];
296                x1[3]  += x2[3];
297                x2[2]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
298                x2[3]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
299 
300                T+=trigint;
301 
302                r0      = x1[0]      -  x2[0];
303                r1      = x1[1]      -  x2[1];
304                x1[0]  += x2[0];
305                x1[1]  += x2[1];
306                x2[0]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
307                x2[1]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
308 
309                T+=trigint;
310     x1-=8;
311     x2-=8;
312 
313   }while(x2>=x);
314 }
315 
mdct_butterflies(mdct_lookup * init,DATA_TYPE * x,int points)316 STIN void mdct_butterflies(mdct_lookup *init,
317                              DATA_TYPE *x,
318                              int points){
319 
320   DATA_TYPE *T=init->trig;
321   int stages=init->log2n-5;
322   int i,j;
323 
324   if(--stages>0){
325     mdct_butterfly_first(T,x,points);
326   }
327 
328   for(i=1;--stages>0;i++){
329     for(j=0;j<(1<<i);j++)
330       mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
331   }
332 
333   for(j=0;j<points;j+=32)
334     mdct_butterfly_32(x+j);
335 
336 }
337 
mdct_clear(mdct_lookup * l)338 void mdct_clear(mdct_lookup *l){
339   if(l){
340     if(l->trig)_ogg_free(l->trig);
341     if(l->bitrev)_ogg_free(l->bitrev);
342     memset(l,0,sizeof(*l));
343   }
344 }
345 
mdct_bitreverse(mdct_lookup * init,DATA_TYPE * x)346 STIN void mdct_bitreverse(mdct_lookup *init,
347                             DATA_TYPE *x){
348   int        n       = init->n;
349   int       *bit     = init->bitrev;
350   DATA_TYPE *w0      = x;
351   DATA_TYPE *w1      = x = w0+(n>>1);
352   DATA_TYPE *T       = init->trig+n;
353 
354   do{
355     DATA_TYPE *x0    = x+bit[0];
356     DATA_TYPE *x1    = x+bit[1];
357 
358     REG_TYPE  r0     = x0[1]  - x1[1];
359     REG_TYPE  r1     = x0[0]  + x1[0];
360     REG_TYPE  r2     = MULT_NORM(r1     * T[0]   + r0 * T[1]);
361     REG_TYPE  r3     = MULT_NORM(r1     * T[1]   - r0 * T[0]);
362 
363               w1    -= 4;
364 
365               r0     = HALVE(x0[1] + x1[1]);
366               r1     = HALVE(x0[0] - x1[0]);
367 
368               w0[0]  = r0     + r2;
369               w1[2]  = r0     - r2;
370               w0[1]  = r1     + r3;
371               w1[3]  = r3     - r1;
372 
373               x0     = x+bit[2];
374               x1     = x+bit[3];
375 
376               r0     = x0[1]  - x1[1];
377               r1     = x0[0]  + x1[0];
378               r2     = MULT_NORM(r1     * T[2]   + r0 * T[3]);
379               r3     = MULT_NORM(r1     * T[3]   - r0 * T[2]);
380 
381               r0     = HALVE(x0[1] + x1[1]);
382               r1     = HALVE(x0[0] - x1[0]);
383 
384               w0[2]  = r0     + r2;
385               w1[0]  = r0     - r2;
386               w0[3]  = r1     + r3;
387               w1[1]  = r3     - r1;
388 
389               T     += 4;
390               bit   += 4;
391               w0    += 4;
392 
393   }while(w0<w1);
394 }
395 
mdct_backward(mdct_lookup * init,DATA_TYPE * in,DATA_TYPE * out)396 void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
397   int n=init->n;
398   int n2=n>>1;
399   int n4=n>>2;
400 
401   /* rotate */
402 
403   DATA_TYPE *iX = in+n2-7;
404   DATA_TYPE *oX = out+n2+n4;
405   DATA_TYPE *T  = init->trig+n4;
406 
407   do{
408     oX         -= 4;
409     oX[0]       = MULT_NORM(-iX[2] * T[3] - iX[0]  * T[2]);
410     oX[1]       = MULT_NORM (iX[0] * T[3] - iX[2]  * T[2]);
411     oX[2]       = MULT_NORM(-iX[6] * T[1] - iX[4]  * T[0]);
412     oX[3]       = MULT_NORM (iX[4] * T[1] - iX[6]  * T[0]);
413     iX         -= 8;
414     T          += 4;
415   }while(iX>=in);
416 
417   iX            = in+n2-8;
418   oX            = out+n2+n4;
419   T             = init->trig+n4;
420 
421   do{
422     T          -= 4;
423     oX[0]       =  MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
424     oX[1]       =  MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
425     oX[2]       =  MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
426     oX[3]       =  MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
427     iX         -= 8;
428     oX         += 4;
429   }while(iX>=in);
430 
431   mdct_butterflies(init,out+n2,n2);
432   mdct_bitreverse(init,out);
433 
434   /* roatate + window */
435 
436   {
437     DATA_TYPE *oX1=out+n2+n4;
438     DATA_TYPE *oX2=out+n2+n4;
439     DATA_TYPE *iX =out;
440     T             =init->trig+n2;
441 
442     do{
443       oX1-=4;
444 
445       oX1[3]  =  MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
446       oX2[0]  = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
447 
448       oX1[2]  =  MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
449       oX2[1]  = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
450 
451       oX1[1]  =  MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
452       oX2[2]  = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
453 
454       oX1[0]  =  MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
455       oX2[3]  = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
456 
457       oX2+=4;
458       iX    +=   8;
459       T     +=   8;
460     }while(iX<oX1);
461 
462     iX=out+n2+n4;
463     oX1=out+n4;
464     oX2=oX1;
465 
466     do{
467       oX1-=4;
468       iX-=4;
469 
470       oX2[0] = -(oX1[3] = iX[3]);
471       oX2[1] = -(oX1[2] = iX[2]);
472       oX2[2] = -(oX1[1] = iX[1]);
473       oX2[3] = -(oX1[0] = iX[0]);
474 
475       oX2+=4;
476     }while(oX2<iX);
477 
478     iX=out+n2+n4;
479     oX1=out+n2+n4;
480     oX2=out+n2;
481     do{
482       oX1-=4;
483       oX1[0]= iX[3];
484       oX1[1]= iX[2];
485       oX1[2]= iX[1];
486       oX1[3]= iX[0];
487       iX+=4;
488     }while(oX1>oX2);
489   }
490 }
491 
mdct_forward(mdct_lookup * init,DATA_TYPE * in,DATA_TYPE * out)492 void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
493   int n=init->n;
494   int n2=n>>1;
495   int n4=n>>2;
496   int n8=n>>3;
497   DATA_TYPE *w=(DATA_TYPE*) alloca(n*sizeof(*w)); /* forward needs working space */
498   DATA_TYPE *w2=w+n2;
499 
500   /* rotate */
501 
502   /* window + rotate + step 1 */
503 
504   REG_TYPE r0;
505   REG_TYPE r1;
506   DATA_TYPE *x0=in+n2+n4;
507   DATA_TYPE *x1=x0+1;
508   DATA_TYPE *T=init->trig+n2;
509 
510   int i=0;
511 
512   for(i=0;i<n8;i+=2){
513     x0 -=4;
514     T-=2;
515     r0= x0[2] + x1[0];
516     r1= x0[0] + x1[2];
517     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
518     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
519     x1 +=4;
520   }
521 
522   x1=in+1;
523 
524   for(;i<n2-n8;i+=2){
525     T-=2;
526     x0 -=4;
527     r0= x0[2] - x1[0];
528     r1= x0[0] - x1[2];
529     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
530     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
531     x1 +=4;
532   }
533 
534   x0=in+n;
535 
536   for(;i<n2;i+=2){
537     T-=2;
538     x0 -=4;
539     r0= -x0[2] - x1[0];
540     r1= -x0[0] - x1[2];
541     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
542     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
543     x1 +=4;
544   }
545 
546 
547   mdct_butterflies(init,w+n2,n2);
548   mdct_bitreverse(init,w);
549 
550   /* roatate + window */
551 
552   T=init->trig+n2;
553   x0=out+n2;
554 
555   for(i=0;i<n4;i++){
556     x0--;
557     out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
558     x0[0]  =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
559     w+=2;
560     T+=2;
561   }
562 }
563