1 /*
2 dct64.c: DCT64, the plain C version
3
4 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5 see COPYING and AUTHORS files in distribution or http://mpg123.org
6 initially written by Michael Hipp
7 */
8
9 /*
10 * Discrete Cosine Tansform (DCT) for subband synthesis
11 *
12 * -funroll-loops (for gcc) will remove the loops for better performance
13 * using loops in the source-code enhances readabillity
14 *
15 *
16 * TODO: write an optimized version for the down-sampling modes
17 * (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
18 */
19
20 #include "mpg123lib_intern.h"
21
dct64(real * out0,real * out1,real * samples)22 void dct64(real *out0,real *out1,real *samples)
23 {
24 real bufs[64];
25
26 {
27 register int i,j;
28 register real *b1,*b2,*bs,*costab;
29
30 b1 = samples;
31 bs = bufs;
32 costab = pnts[0]+16;
33 b2 = b1 + 32;
34
35 for(i=15;i>=0;i--)
36 *bs++ = (*b1++ + *--b2);
37 for(i=15;i>=0;i--)
38 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
39
40 b1 = bufs;
41 costab = pnts[1]+8;
42 b2 = b1 + 16;
43
44 {
45 for(i=7;i>=0;i--)
46 *bs++ = (*b1++ + *--b2);
47 for(i=7;i>=0;i--)
48 *bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
49 b2 += 32;
50 costab += 8;
51 for(i=7;i>=0;i--)
52 *bs++ = (*b1++ + *--b2);
53 for(i=7;i>=0;i--)
54 *bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
55 b2 += 32;
56 }
57
58 bs = bufs;
59 costab = pnts[2];
60 b2 = b1 + 8;
61
62 for(j=2;j;j--)
63 {
64 for(i=3;i>=0;i--)
65 *bs++ = (*b1++ + *--b2);
66 for(i=3;i>=0;i--)
67 *bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
68 b2 += 16;
69 for(i=3;i>=0;i--)
70 *bs++ = (*b1++ + *--b2);
71 for(i=3;i>=0;i--)
72 *bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
73 b2 += 16;
74 }
75
76 b1 = bufs;
77 costab = pnts[3];
78 b2 = b1 + 4;
79
80 for(j=4;j;j--)
81 {
82 *bs++ = (*b1++ + *--b2);
83 *bs++ = (*b1++ + *--b2);
84 *bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
85 *bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
86 b2 += 8;
87 *bs++ = (*b1++ + *--b2);
88 *bs++ = (*b1++ + *--b2);
89 *bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
90 *bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
91 b2 += 8;
92 }
93 bs = bufs;
94 costab = pnts[4];
95
96 for(j=8;j;j--)
97 {
98 real v0,v1;
99 v0=*b1++; v1 = *b1++;
100 *bs++ = (v0 + v1);
101 *bs++ = REAL_MUL((v0 - v1), (*costab));
102 v0=*b1++; v1 = *b1++;
103 *bs++ = (v0 + v1);
104 *bs++ = REAL_MUL((v1 - v0), (*costab));
105 }
106
107 }
108
109
110 {
111 register real *b1;
112 register int i;
113
114 for(b1=bufs,i=8;i;i--,b1+=4)
115 b1[2] += b1[3];
116
117 for(b1=bufs,i=4;i;i--,b1+=8)
118 {
119 b1[4] += b1[6];
120 b1[6] += b1[5];
121 b1[5] += b1[7];
122 }
123
124 for(b1=bufs,i=2;i;i--,b1+=16)
125 {
126 b1[8] += b1[12];
127 b1[12] += b1[10];
128 b1[10] += b1[14];
129 b1[14] += b1[9];
130 b1[9] += b1[13];
131 b1[13] += b1[11];
132 b1[11] += b1[15];
133 }
134 }
135
136
137 out0[0x10*16] = REAL_SCALE_DCT64(bufs[0]);
138 out0[0x10*15] = REAL_SCALE_DCT64(bufs[16+0] + bufs[16+8]);
139 out0[0x10*14] = REAL_SCALE_DCT64(bufs[8]);
140 out0[0x10*13] = REAL_SCALE_DCT64(bufs[16+8] + bufs[16+4]);
141 out0[0x10*12] = REAL_SCALE_DCT64(bufs[4]);
142 out0[0x10*11] = REAL_SCALE_DCT64(bufs[16+4] + bufs[16+12]);
143 out0[0x10*10] = REAL_SCALE_DCT64(bufs[12]);
144 out0[0x10* 9] = REAL_SCALE_DCT64(bufs[16+12] + bufs[16+2]);
145 out0[0x10* 8] = REAL_SCALE_DCT64(bufs[2]);
146 out0[0x10* 7] = REAL_SCALE_DCT64(bufs[16+2] + bufs[16+10]);
147 out0[0x10* 6] = REAL_SCALE_DCT64(bufs[10]);
148 out0[0x10* 5] = REAL_SCALE_DCT64(bufs[16+10] + bufs[16+6]);
149 out0[0x10* 4] = REAL_SCALE_DCT64(bufs[6]);
150 out0[0x10* 3] = REAL_SCALE_DCT64(bufs[16+6] + bufs[16+14]);
151 out0[0x10* 2] = REAL_SCALE_DCT64(bufs[14]);
152 out0[0x10* 1] = REAL_SCALE_DCT64(bufs[16+14] + bufs[16+1]);
153 out0[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
154
155 out1[0x10* 0] = REAL_SCALE_DCT64(bufs[1]);
156 out1[0x10* 1] = REAL_SCALE_DCT64(bufs[16+1] + bufs[16+9]);
157 out1[0x10* 2] = REAL_SCALE_DCT64(bufs[9]);
158 out1[0x10* 3] = REAL_SCALE_DCT64(bufs[16+9] + bufs[16+5]);
159 out1[0x10* 4] = REAL_SCALE_DCT64(bufs[5]);
160 out1[0x10* 5] = REAL_SCALE_DCT64(bufs[16+5] + bufs[16+13]);
161 out1[0x10* 6] = REAL_SCALE_DCT64(bufs[13]);
162 out1[0x10* 7] = REAL_SCALE_DCT64(bufs[16+13] + bufs[16+3]);
163 out1[0x10* 8] = REAL_SCALE_DCT64(bufs[3]);
164 out1[0x10* 9] = REAL_SCALE_DCT64(bufs[16+3] + bufs[16+11]);
165 out1[0x10*10] = REAL_SCALE_DCT64(bufs[11]);
166 out1[0x10*11] = REAL_SCALE_DCT64(bufs[16+11] + bufs[16+7]);
167 out1[0x10*12] = REAL_SCALE_DCT64(bufs[7]);
168 out1[0x10*13] = REAL_SCALE_DCT64(bufs[16+7] + bufs[16+15]);
169 out1[0x10*14] = REAL_SCALE_DCT64(bufs[15]);
170 out1[0x10*15] = REAL_SCALE_DCT64(bufs[16+15]);
171
172 }
173
174
175