1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE OggVorbis 'TREMOR' CODEC SOURCE CODE.   *
4  *                                                                  *
5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
6  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
7  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
8  *                                                                  *
9  * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002    *
10  * BY THE Xiph.Org FOUNDATION http://www.xiph.org/                  *
11  *                                                                  *
12  ********************************************************************
13 
14  function: normalized modified discrete cosine transform
15            power of two length transform only [64 <= n ]
16  last mod: $Id$
17 
18  Original algorithm adapted long ago from _The use of multirate filter
19  banks for coding of high quality digital audio_, by T. Sporer,
20  K. Brandenburg and B. Edler, collection of the European Signal
21  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
22  211-214
23 
24  The below code implements an algorithm that no longer looks much like
25  that presented in the paper, but the basic structure remains if you
26  dig deep enough to see it.
27 
28  This module DOES NOT INCLUDE code to generate/apply the window
29  function.  Everybody has their own weird favorite including me... I
30  happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
31  vehemently disagree.
32 
33  ********************************************************************/
34 
35 #include "ivorbiscodec.h"
36 #include "codebook.h"
37 #include "misc.h"
38 #include "mdct.h"
39 #include "mdct_lookup.h"
40 
41 
42 /* 8 point butterfly (in place) */
mdct_butterfly_8(DATA_TYPE * x)43 STIN void mdct_butterfly_8(DATA_TYPE *x){
44 
45   REG_TYPE r0   = x[4] + x[0];
46   REG_TYPE r1   = x[4] - x[0];
47   REG_TYPE r2   = x[5] + x[1];
48   REG_TYPE r3   = x[5] - x[1];
49   REG_TYPE r4   = x[6] + x[2];
50   REG_TYPE r5   = x[6] - x[2];
51   REG_TYPE r6   = x[7] + x[3];
52   REG_TYPE r7   = x[7] - x[3];
53 
54 	   x[0] = r5   + r3;
55 	   x[1] = r7   - r1;
56 	   x[2] = r5   - r3;
57 	   x[3] = r7   + r1;
58            x[4] = r4   - r0;
59 	   x[5] = r6   - r2;
60            x[6] = r4   + r0;
61 	   x[7] = r6   + r2;
62 	   MB();
63 }
64 
65 /* 16 point butterfly (in place, 4 register) */
mdct_butterfly_16(DATA_TYPE * x)66 STIN void mdct_butterfly_16(DATA_TYPE *x){
67 
68   REG_TYPE r0, r1;
69 
70 	   r0 = x[ 0] - x[ 8]; x[ 8] += x[ 0];
71 	   r1 = x[ 1] - x[ 9]; x[ 9] += x[ 1];
72 	   x[ 0] = MULT31((r0 + r1) , cPI2_8);
73 	   x[ 1] = MULT31((r1 - r0) , cPI2_8);
74 	   MB();
75 
76 	   r0 = x[10] - x[ 2]; x[10] += x[ 2];
77 	   r1 = x[ 3] - x[11]; x[11] += x[ 3];
78 	   x[ 2] = r1; x[ 3] = r0;
79 	   MB();
80 
81 	   r0 = x[12] - x[ 4]; x[12] += x[ 4];
82 	   r1 = x[13] - x[ 5]; x[13] += x[ 5];
83 	   x[ 4] = MULT31((r0 - r1) , cPI2_8);
84 	   x[ 5] = MULT31((r0 + r1) , cPI2_8);
85 	   MB();
86 
87 	   r0 = x[14] - x[ 6]; x[14] += x[ 6];
88 	   r1 = x[15] - x[ 7]; x[15] += x[ 7];
89 	   x[ 6] = r0; x[ 7] = r1;
90 	   MB();
91 
92 	   mdct_butterfly_8(x);
93 	   mdct_butterfly_8(x+8);
94 }
95 
96 /* 32 point butterfly (in place, 4 register) */
mdct_butterfly_32(DATA_TYPE * x)97 STIN void mdct_butterfly_32(DATA_TYPE *x){
98 
99   REG_TYPE r0, r1;
100 
101 	   r0 = x[30] - x[14]; x[30] += x[14];
102 	   r1 = x[31] - x[15]; x[31] += x[15];
103 	   x[14] = r0; x[15] = r1;
104 	   MB();
105 
106 	   r0 = x[28] - x[12]; x[28] += x[12];
107 	   r1 = x[29] - x[13]; x[29] += x[13];
108 	   XNPROD31( r0, r1, cPI1_8, cPI3_8, &x[12], &x[13] );
109 	   MB();
110 
111 	   r0 = x[26] - x[10]; x[26] += x[10];
112 	   r1 = x[27] - x[11]; x[27] += x[11];
113 	   x[10] = MULT31((r0 - r1) , cPI2_8);
114 	   x[11] = MULT31((r0 + r1) , cPI2_8);
115 	   MB();
116 
117 	   r0 = x[24] - x[ 8]; x[24] += x[ 8];
118 	   r1 = x[25] - x[ 9]; x[25] += x[ 9];
119 	   XNPROD31( r0, r1, cPI3_8, cPI1_8, &x[ 8], &x[ 9] );
120 	   MB();
121 
122 	   r0 = x[22] - x[ 6]; x[22] += x[ 6];
123 	   r1 = x[ 7] - x[23]; x[23] += x[ 7];
124 	   x[ 6] = r1; x[ 7] = r0;
125 	   MB();
126 
127 	   r0 = x[ 4] - x[20]; x[20] += x[ 4];
128 	   r1 = x[ 5] - x[21]; x[21] += x[ 5];
129 	   XPROD31 ( r0, r1, cPI3_8, cPI1_8, &x[ 4], &x[ 5] );
130 	   MB();
131 
132 	   r0 = x[ 2] - x[18]; x[18] += x[ 2];
133 	   r1 = x[ 3] - x[19]; x[19] += x[ 3];
134 	   x[ 2] = MULT31((r1 + r0) , cPI2_8);
135 	   x[ 3] = MULT31((r1 - r0) , cPI2_8);
136 	   MB();
137 
138 	   r0 = x[ 0] - x[16]; x[16] += x[ 0];
139 	   r1 = x[ 1] - x[17]; x[17] += x[ 1];
140 	   XPROD31 ( r0, r1, cPI1_8, cPI3_8, &x[ 0], &x[ 1] );
141 	   MB();
142 
143 	   mdct_butterfly_16(x);
144 	   mdct_butterfly_16(x+16);
145 }
146 
147 /* N/stage point generic N stage butterfly (in place, 2 register) */
mdct_butterfly_generic(DATA_TYPE * x,int points,int step)148 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){
149 
150   LOOKUP_T *T   = sincos_lookup0;
151   DATA_TYPE *x1        = x + points      - 8;
152   DATA_TYPE *x2        = x + (points>>1) - 8;
153   REG_TYPE   r0;
154   REG_TYPE   r1;
155 
156   do{
157     r0 = x1[6] - x2[6]; x1[6] += x2[6];
158     r1 = x2[7] - x1[7]; x1[7] += x2[7];
159     XPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T+=step;
160 
161     r0 = x1[4] - x2[4]; x1[4] += x2[4];
162     r1 = x2[5] - x1[5]; x1[5] += x2[5];
163     XPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T+=step;
164 
165     r0 = x1[2] - x2[2]; x1[2] += x2[2];
166     r1 = x2[3] - x1[3]; x1[3] += x2[3];
167     XPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T+=step;
168 
169     r0 = x1[0] - x2[0]; x1[0] += x2[0];
170     r1 = x2[1] - x1[1]; x1[1] += x2[1];
171     XPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T+=step;
172 
173     x1-=8; x2-=8;
174   }while(T<sincos_lookup0+1024);
175   do{
176     r0 = x1[6] - x2[6]; x1[6] += x2[6];
177     r1 = x1[7] - x2[7]; x1[7] += x2[7];
178     XNPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T-=step;
179 
180     r0 = x1[4] - x2[4]; x1[4] += x2[4];
181     r1 = x1[5] - x2[5]; x1[5] += x2[5];
182     XNPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T-=step;
183 
184     r0 = x1[2] - x2[2]; x1[2] += x2[2];
185     r1 = x1[3] - x2[3]; x1[3] += x2[3];
186     XNPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T-=step;
187 
188     r0 = x1[0] - x2[0]; x1[0] += x2[0];
189     r1 = x1[1] - x2[1]; x1[1] += x2[1];
190     XNPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T-=step;
191 
192     x1-=8; x2-=8;
193   }while(T>sincos_lookup0);
194   do{
195     r0 = x2[6] - x1[6]; x1[6] += x2[6];
196     r1 = x2[7] - x1[7]; x1[7] += x2[7];
197     XPROD31( r0, r1, T[0], T[1], &x2[6], &x2[7] ); T+=step;
198 
199     r0 = x2[4] - x1[4]; x1[4] += x2[4];
200     r1 = x2[5] - x1[5]; x1[5] += x2[5];
201     XPROD31( r0, r1, T[0], T[1], &x2[4], &x2[5] ); T+=step;
202 
203     r0 = x2[2] - x1[2]; x1[2] += x2[2];
204     r1 = x2[3] - x1[3]; x1[3] += x2[3];
205     XPROD31( r0, r1, T[0], T[1], &x2[2], &x2[3] ); T+=step;
206 
207     r0 = x2[0] - x1[0]; x1[0] += x2[0];
208     r1 = x2[1] - x1[1]; x1[1] += x2[1];
209     XPROD31( r0, r1, T[0], T[1], &x2[0], &x2[1] ); T+=step;
210 
211     x1-=8; x2-=8;
212   }while(T<sincos_lookup0+1024);
213   do{
214     r0 = x1[6] - x2[6]; x1[6] += x2[6];
215     r1 = x2[7] - x1[7]; x1[7] += x2[7];
216     XNPROD31( r1, r0, T[0], T[1], &x2[6], &x2[7] ); T-=step;
217 
218     r0 = x1[4] - x2[4]; x1[4] += x2[4];
219     r1 = x2[5] - x1[5]; x1[5] += x2[5];
220     XNPROD31( r1, r0, T[0], T[1], &x2[4], &x2[5] ); T-=step;
221 
222     r0 = x1[2] - x2[2]; x1[2] += x2[2];
223     r1 = x2[3] - x1[3]; x1[3] += x2[3];
224     XNPROD31( r1, r0, T[0], T[1], &x2[2], &x2[3] ); T-=step;
225 
226     r0 = x1[0] - x2[0]; x1[0] += x2[0];
227     r1 = x2[1] - x1[1]; x1[1] += x2[1];
228     XNPROD31( r1, r0, T[0], T[1], &x2[0], &x2[1] ); T-=step;
229 
230     x1-=8; x2-=8;
231   }while(T>sincos_lookup0);
232 }
233 
mdct_butterflies(DATA_TYPE * x,int points,int shift)234 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){
235 
236   int stages=8-shift;
237   int i,j;
238 
239   for(i=0;--stages>0;i++){
240     for(j=0;j<(1<<i);j++)
241       mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift));
242   }
243 
244   for(j=0;j<points;j+=32)
245     mdct_butterfly_32(x+j);
246 
247 }
248 
249 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15};
250 
bitrev12(int x)251 STIN int bitrev12(int x){
252   return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8);
253 }
254 
mdct_bitreverse(DATA_TYPE * x,int n,int step,int shift)255 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){
256 
257   int          bit   = 0;
258   DATA_TYPE   *w0    = x;
259   DATA_TYPE   *w1    = x = w0+(n>>1);
260   LOOKUP_T    *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
261   LOOKUP_T    *Ttop  = T+1024;
262   DATA_TYPE    r2;
263 
264   do{
265     DATA_TYPE r3     = bitrev12(bit++);
266     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
267     DATA_TYPE *x1    = x + (r3>>shift);
268 
269     REG_TYPE  r0     = x0[0]  + x1[0];
270     REG_TYPE  r1     = x1[1]  - x0[1];
271 
272 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
273 
274 	      w1    -= 4;
275 
276 	      r0     = (x0[1] + x1[1])>>1;
277               r1     = (x0[0] - x1[0])>>1;
278 	      w0[0]  = r0     + r2;
279 	      w0[1]  = r1     + r3;
280 	      w1[2]  = r0     - r2;
281 	      w1[3]  = r3     - r1;
282 
283 	      r3     = bitrev12(bit++);
284               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
285               x1     = x + (r3>>shift);
286 
287               r0     = x0[0]  + x1[0];
288               r1     = x1[1]  - x0[1];
289 
290 	      XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step;
291 
292               r0     = (x0[1] + x1[1])>>1;
293               r1     = (x0[0] - x1[0])>>1;
294 	      w0[2]  = r0     + r2;
295 	      w0[3]  = r1     + r3;
296 	      w1[0]  = r0     - r2;
297 	      w1[1]  = r3     - r1;
298 
299 	      w0    += 4;
300   }while(T<Ttop);
301   do{
302     DATA_TYPE r3     = bitrev12(bit++);
303     DATA_TYPE *x0    = x + ((r3 ^ 0xfff)>>shift) -1;
304     DATA_TYPE *x1    = x + (r3>>shift);
305 
306     REG_TYPE  r0     = x0[0]  + x1[0];
307     REG_TYPE  r1     = x1[1]  - x0[1];
308 
309 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
310 
311 	      w1    -= 4;
312 
313 	      r0     = (x0[1] + x1[1])>>1;
314               r1     = (x0[0] - x1[0])>>1;
315 	      w0[0]  = r0     + r2;
316 	      w0[1]  = r1     + r3;
317 	      w1[2]  = r0     - r2;
318 	      w1[3]  = r3     - r1;
319 
320 	      r3     = bitrev12(bit++);
321               x0     = x + ((r3 ^ 0xfff)>>shift) -1;
322               x1     = x + (r3>>shift);
323 
324               r0     = x0[0]  + x1[0];
325               r1     = x1[1]  - x0[1];
326 
327 	      T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 );
328 
329               r0     = (x0[1] + x1[1])>>1;
330               r1     = (x0[0] - x1[0])>>1;
331 	      w0[2]  = r0     + r2;
332 	      w0[3]  = r1     + r3;
333 	      w1[0]  = r0     - r2;
334 	      w1[1]  = r3     - r1;
335 
336 	      w0    += 4;
337   }while(w0<w1);
338 }
339 
mdct_backward(int n,DATA_TYPE * in,DATA_TYPE * out)340 void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){
341   int n2=n>>1;
342   int n4=n>>2;
343   DATA_TYPE *iX;
344   DATA_TYPE *oX;
345   LOOKUP_T *T;
346   LOOKUP_T *V;
347   int shift;
348   int step;
349 
350   for (shift=6;!(n&(1<<shift));shift++);
351   shift=13-shift;
352   step=2<<shift;
353 
354   /* rotate */
355 
356   iX            = in+n2-7;
357   oX            = out+n2+n4;
358   T             = sincos_lookup0;
359 
360   do{
361     oX-=4;
362     XPROD31( iX[4], iX[6], T[0], T[1], &oX[2], &oX[3] ); T+=step;
363     XPROD31( iX[0], iX[2], T[0], T[1], &oX[0], &oX[1] ); T+=step;
364     iX-=8;
365   }while(iX>=in+n4);
366   do{
367     oX-=4;
368     XPROD31( iX[4], iX[6], T[1], T[0], &oX[2], &oX[3] ); T-=step;
369     XPROD31( iX[0], iX[2], T[1], T[0], &oX[0], &oX[1] ); T-=step;
370     iX-=8;
371   }while(iX>=in);
372 
373   iX            = in+n2-8;
374   oX            = out+n2+n4;
375   T             = sincos_lookup0;
376 
377   do{
378     T+=step; XNPROD31( iX[6], iX[4], T[0], T[1], &oX[0], &oX[1] );
379     T+=step; XNPROD31( iX[2], iX[0], T[0], T[1], &oX[2], &oX[3] );
380     iX-=8;
381     oX+=4;
382   }while(iX>=in+n4);
383   do{
384     T-=step; XNPROD31( iX[6], iX[4], T[1], T[0], &oX[0], &oX[1] );
385     T-=step; XNPROD31( iX[2], iX[0], T[1], T[0], &oX[2], &oX[3] );
386     iX-=8;
387     oX+=4;
388   }while(iX>=in);
389 
390   mdct_butterflies(out+n2,n2,shift);
391   mdct_bitreverse(out,n,step,shift);
392 
393   /* rotate + window */
394 
395   step>>=2;
396   {
397     DATA_TYPE *oX1=out+n2+n4;
398     DATA_TYPE *oX2=out+n2+n4;
399     DATA_TYPE *iX =out;
400 
401     switch(step) {
402       default: {
403         T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1;
404         do{
405           oX1-=4;
406 	  XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step;
407 	  XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step;
408 	  XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step;
409 	  XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step;
410 	  oX2+=4;
411 	  iX+=8;
412 	}while(iX<oX1);
413 	break;
414       }
415 
416       case 1: {
417         /* linear interpolation between table values: offset=0.5, step=1 */
418 	REG_TYPE  t0,t1,v0,v1;
419         T         = sincos_lookup0;
420         V         = sincos_lookup1;
421 	t0        = (*T++)>>1;
422 	t1        = (*T++)>>1;
423         do{
424           oX1-=4;
425 
426 	  t0 += (v0 = (*V++)>>1);
427 	  t1 += (v1 = (*V++)>>1);
428 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
429 	  v0 += (t0 = (*T++)>>1);
430 	  v1 += (t1 = (*T++)>>1);
431 	  XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] );
432 	  t0 += (v0 = (*V++)>>1);
433 	  t1 += (v1 = (*V++)>>1);
434 	  XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] );
435 	  v0 += (t0 = (*T++)>>1);
436 	  v1 += (t1 = (*T++)>>1);
437 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
438 
439 	  oX2+=4;
440 	  iX+=8;
441 	}while(iX<oX1);
442 	break;
443       }
444 
445       case 0: {
446         /* linear interpolation between table values: offset=0.25, step=0.5 */
447 	REG_TYPE  t0,t1,v0,v1,q0,q1;
448         T         = sincos_lookup0;
449         V         = sincos_lookup1;
450 	t0        = *T++;
451 	t1        = *T++;
452         do{
453           oX1-=4;
454 
455 	  v0  = *V++;
456 	  v1  = *V++;
457 	  t0 +=  (q0 = (v0-t0)>>2);
458 	  t1 +=  (q1 = (v1-t1)>>2);
459 	  XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] );
460 	  t0  = v0-q0;
461 	  t1  = v1-q1;
462 	  XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] );
463 
464 	  t0  = *T++;
465 	  t1  = *T++;
466 	  v0 += (q0 = (t0-v0)>>2);
467 	  v1 += (q1 = (t1-v1)>>2);
468 	  XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] );
469 	  v0  = t0-q0;
470 	  v1  = t1-q1;
471 	  XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] );
472 
473 	  oX2+=4;
474 	  iX+=8;
475 	}while(iX<oX1);
476 	break;
477       }
478     }
479 
480     iX=out+n2+n4;
481     oX1=out+n4;
482     oX2=oX1;
483 
484     do{
485       oX1-=4;
486       iX-=4;
487 
488       oX2[0] = -(oX1[3] = iX[3]);
489       oX2[1] = -(oX1[2] = iX[2]);
490       oX2[2] = -(oX1[1] = iX[1]);
491       oX2[3] = -(oX1[0] = iX[0]);
492 
493       oX2+=4;
494     }while(oX2<iX);
495 
496     iX=out+n2+n4;
497     oX1=out+n2+n4;
498     oX2=out+n2;
499 
500     do{
501       oX1-=4;
502       oX1[0]= iX[3];
503       oX1[1]= iX[2];
504       oX1[2]= iX[1];
505       oX1[3]= iX[0];
506       iX+=4;
507     }while(oX1>oX2);
508   }
509 }
510 
511