xref: /reactos/sdk/lib/3rdparty/libmpg123/dct64.c (revision bbabe248)
1 /*
2 	dct64.c: DCT64, the plain C version
3 
4 	copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 	see COPYING and AUTHORS files in distribution or http://mpg123.org
6 	initially written by Michael Hipp
7 */
8 
9 /*
10  * Discrete Cosine Tansform (DCT) for subband synthesis
11  *
12  * -funroll-loops (for gcc) will remove the loops for better performance
13  * using loops in the source-code enhances readabillity
14  *
15  *
16  * TODO: write an optimized version for the down-sampling modes
17  *       (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
18  */
19 
20 #include "mpg123lib_intern.h"
21 
22 void dct64(real *out0,real *out1,real *samples)
23 {
24   real bufs[64];
25 
26  {
27   register int i,j;
28   register real *b1,*b2,*bs,*costab;
29 
30   b1 = samples;
31   bs = bufs;
32   costab = pnts[0]+16;
33   b2 = b1 + 32;
34 
35   for(i=15;i>=0;i--)
36     *bs++ = (*b1++ + *--b2);
37   for(i=15;i>=0;i--)
38     *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
39 
40   b1 = bufs;
41   costab = pnts[1]+8;
42   b2 = b1 + 16;
43 
44   {
45     for(i=7;i>=0;i--)
46       *bs++ = (*b1++ + *--b2);
47     for(i=7;i>=0;i--)
48       *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
49     b2 += 32;
50     costab += 8;
51     for(i=7;i>=0;i--)
52       *bs++ = (*b1++ + *--b2);
53     for(i=7;i>=0;i--)
54       *bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
55     b2 += 32;
56   }
57 
58   bs = bufs;
59   costab = pnts[2];
60   b2 = b1 + 8;
61 
62   for(j=2;j;j--)
63   {
64     for(i=3;i>=0;i--)
65       *bs++ = (*b1++ + *--b2);
66     for(i=3;i>=0;i--)
67       *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
68     b2 += 16;
69     for(i=3;i>=0;i--)
70       *bs++ = (*b1++ + *--b2);
71     for(i=3;i>=0;i--)
72       *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
73     b2 += 16;
74   }
75 
76   b1 = bufs;
77   costab = pnts[3];
78   b2 = b1 + 4;
79 
80   for(j=4;j;j--)
81   {
82     *bs++ = (*b1++ + *--b2);
83     *bs++ = (*b1++ + *--b2);
84     *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
85     *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
86     b2 += 8;
87     *bs++ = (*b1++ + *--b2);
88     *bs++ = (*b1++ + *--b2);
89     *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
90     *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
91     b2 += 8;
92   }
93   bs = bufs;
94   costab = pnts[4];
95 
96   for(j=8;j;j--)
97   {
98     real v0,v1;
99     v0=*b1++; v1 = *b1++;
100     *bs++ = (v0 + v1);
101     *bs++ = REAL_MUL((v0 - v1), (*costab));
102     v0=*b1++; v1 = *b1++;
103     *bs++ = (v0 + v1);
104     *bs++ = REAL_MUL((v1 - v0), (*costab));
105   }
106 
107  }
108 
109 
110  {
111   register real *b1;
112   register int i;
113 
114   for(b1=bufs,i=8;i;i--,b1+=4)
115     b1[2] += b1[3];
116 
117   for(b1=bufs,i=4;i;i--,b1+=8)
118   {
119     b1[4] += b1[6];
120     b1[6] += b1[5];
121     b1[5] += b1[7];
122   }
123 
124   for(b1=bufs,i=2;i;i--,b1+=16)
125   {
126     b1[8]  += b1[12];
127     b1[12] += b1[10];
128     b1[10] += b1[14];
129     b1[14] += b1[9];
130     b1[9]  += b1[13];
131     b1[13] += b1[11];
132     b1[11] += b1[15];
133   }
134  }
135 
136 
137   out0[0x10*16] = REAL_SCALE_DCT64(bufs[0]);
138   out0[0x10*15] = REAL_SCALE_DCT64(bufs[16+0]  + bufs[16+8]);
139   out0[0x10*14] = REAL_SCALE_DCT64(bufs[8]);
140   out0[0x10*13] = REAL_SCALE_DCT64(bufs[16+8]  + bufs[16+4]);
141   out0[0x10*12] = REAL_SCALE_DCT64(bufs[4]);
142   out0[0x10*11] = REAL_SCALE_DCT64(bufs[16+4]  + bufs[16+12]);
143   out0[0x10*10] = REAL_SCALE_DCT64(bufs[12]);
144   out0[0x10* 9] = REAL_SCALE_DCT64(bufs[16+12] + bufs[16+2]);
145   out0[0x10* 8] = REAL_SCALE_DCT64(bufs[2]);
146   out0[0x10* 7] = REAL_SCALE_DCT64(bufs[16+2]  + bufs[16+10]);
147   out0[0x10* 6] = REAL_SCALE_DCT64(bufs[10]);
148   out0[0x10* 5] = REAL_SCALE_DCT64(bufs[16+10] + bufs[16+6]);
149   out0[0x10* 4] = REAL_SCALE_DCT64(bufs[6]);
150   out0[0x10* 3] = REAL_SCALE_DCT64(bufs[16+6]  + bufs[16+14]);
151   out0[0x10* 2] = REAL_SCALE_DCT64(bufs[14]);
152   out0[0x10* 1] = REAL_SCALE_DCT64(bufs[16+14] + bufs[16+1]);
153   out0[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
154 
155   out1[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
156   out1[0x10* 1] = REAL_SCALE_DCT64(bufs[16+1]  + bufs[16+9]);
157   out1[0x10* 2] = REAL_SCALE_DCT64(bufs[9]);
158   out1[0x10* 3] = REAL_SCALE_DCT64(bufs[16+9]  + bufs[16+5]);
159   out1[0x10* 4] = REAL_SCALE_DCT64(bufs[5]);
160   out1[0x10* 5] = REAL_SCALE_DCT64(bufs[16+5]  + bufs[16+13]);
161   out1[0x10* 6] = REAL_SCALE_DCT64(bufs[13]);
162   out1[0x10* 7] = REAL_SCALE_DCT64(bufs[16+13] + bufs[16+3]);
163   out1[0x10* 8] = REAL_SCALE_DCT64(bufs[3]);
164   out1[0x10* 9] = REAL_SCALE_DCT64(bufs[16+3]  + bufs[16+11]);
165   out1[0x10*10] = REAL_SCALE_DCT64(bufs[11]);
166   out1[0x10*11] = REAL_SCALE_DCT64(bufs[16+11] + bufs[16+7]);
167   out1[0x10*12] = REAL_SCALE_DCT64(bufs[7]);
168   out1[0x10*13] = REAL_SCALE_DCT64(bufs[16+7]  + bufs[16+15]);
169   out1[0x10*14] = REAL_SCALE_DCT64(bufs[15]);
170   out1[0x10*15] = REAL_SCALE_DCT64(bufs[16+15]);
171 
172 }
173 
174 
175