1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15
16 #include "_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18 fixed or floating point complex numbers. It also delares the kf_ internal functions.
19 */
20
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)21 static void kf_bfly2(
22 kiss_fft_cpx * Fout,
23 const size_t fstride,
24 const kiss_fft_cfg st,
25 int m
26 )
27 {
28 kiss_fft_cpx * Fout2;
29 kiss_fft_cpx * tw1 = st->twiddles;
30 kiss_fft_cpx t;
31 Fout2 = Fout + m;
32 do{
33 C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
34
35 C_MUL (t, *Fout2 , *tw1);
36 tw1 += fstride;
37 C_SUB( *Fout2 , *Fout , t );
38 C_ADDTO( *Fout , t );
39 ++Fout2;
40 ++Fout;
41 }while (--m);
42 }
43
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)44 static void kf_bfly4(
45 kiss_fft_cpx * Fout,
46 const size_t fstride,
47 const kiss_fft_cfg st,
48 const size_t m
49 )
50 {
51 kiss_fft_cpx *tw1,*tw2,*tw3;
52 kiss_fft_cpx scratch[6];
53 size_t k=m;
54 const size_t m2=2*m;
55 const size_t m3=3*m;
56
57 tw3 = tw2 = tw1 = st->twiddles;
58
59 do {
60 C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
61
62 C_MUL(scratch[0],Fout[m] , *tw1 );
63 C_MUL(scratch[1],Fout[m2] , *tw2 );
64 C_MUL(scratch[2],Fout[m3] , *tw3 );
65
66 C_SUB( scratch[5] , *Fout, scratch[1] );
67 C_ADDTO(*Fout, scratch[1]);
68 C_ADD( scratch[3] , scratch[0] , scratch[2] );
69 C_SUB( scratch[4] , scratch[0] , scratch[2] );
70 C_SUB( Fout[m2], *Fout, scratch[3] );
71 tw1 += fstride;
72 tw2 += fstride*2;
73 tw3 += fstride*3;
74 C_ADDTO( *Fout , scratch[3] );
75
76 if(st->inverse) {
77 Fout[m].r = scratch[5].r - scratch[4].i;
78 Fout[m].i = scratch[5].i + scratch[4].r;
79 Fout[m3].r = scratch[5].r + scratch[4].i;
80 Fout[m3].i = scratch[5].i - scratch[4].r;
81 }else{
82 Fout[m].r = scratch[5].r + scratch[4].i;
83 Fout[m].i = scratch[5].i - scratch[4].r;
84 Fout[m3].r = scratch[5].r - scratch[4].i;
85 Fout[m3].i = scratch[5].i + scratch[4].r;
86 }
87 ++Fout;
88 }while(--k);
89 }
90
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)91 static void kf_bfly3(
92 kiss_fft_cpx * Fout,
93 const size_t fstride,
94 const kiss_fft_cfg st,
95 size_t m
96 )
97 {
98 size_t k=m;
99 const size_t m2 = 2*m;
100 kiss_fft_cpx *tw1,*tw2;
101 kiss_fft_cpx scratch[5];
102 kiss_fft_cpx epi3;
103 epi3 = st->twiddles[fstride*m];
104
105 tw1=tw2=st->twiddles;
106
107 do{
108 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
109
110 C_MUL(scratch[1],Fout[m] , *tw1);
111 C_MUL(scratch[2],Fout[m2] , *tw2);
112
113 C_ADD(scratch[3],scratch[1],scratch[2]);
114 C_SUB(scratch[0],scratch[1],scratch[2]);
115 tw1 += fstride;
116 tw2 += fstride*2;
117
118 Fout[m].r = Fout->r - scratch[3].r/2;
119 Fout[m].i = Fout->i - scratch[3].i/2;
120
121 C_MULBYSCALAR( scratch[0] , epi3.i );
122
123 C_ADDTO(*Fout,scratch[3]);
124
125 Fout[m2].r = Fout[m].r + scratch[0].i;
126 Fout[m2].i = Fout[m].i - scratch[0].r;
127
128 Fout[m].r -= scratch[0].i;
129 Fout[m].i += scratch[0].r;
130
131 ++Fout;
132 }while(--k);
133 }
134
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)135 static void kf_bfly5(
136 kiss_fft_cpx * Fout,
137 const size_t fstride,
138 const kiss_fft_cfg st,
139 int m
140 )
141 {
142 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
143 int u;
144 kiss_fft_cpx scratch[13];
145 kiss_fft_cpx * twiddles = st->twiddles;
146 kiss_fft_cpx *tw;
147 kiss_fft_cpx ya,yb;
148 ya = twiddles[fstride*m];
149 yb = twiddles[fstride*2*m];
150
151 Fout0=Fout;
152 Fout1=Fout0+m;
153 Fout2=Fout0+2*m;
154 Fout3=Fout0+3*m;
155 Fout4=Fout0+4*m;
156
157 tw=st->twiddles;
158 for ( u=0; u<m; ++u ) {
159 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
160 scratch[0] = *Fout0;
161
162 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
163 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
164 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
165 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
166
167 C_ADD( scratch[7],scratch[1],scratch[4]);
168 C_SUB( scratch[10],scratch[1],scratch[4]);
169 C_ADD( scratch[8],scratch[2],scratch[3]);
170 C_SUB( scratch[9],scratch[2],scratch[3]);
171
172 Fout0->r += scratch[7].r + scratch[8].r;
173 Fout0->i += scratch[7].i + scratch[8].i;
174
175 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
176 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
177
178 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
179 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
180
181 C_SUB(*Fout1,scratch[5],scratch[6]);
182 C_ADD(*Fout4,scratch[5],scratch[6]);
183
184 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
185 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
186 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
187 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
188
189 C_ADD(*Fout2,scratch[11],scratch[12]);
190 C_SUB(*Fout3,scratch[11],scratch[12]);
191
192 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
193 }
194 }
195
196 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)197 static void kf_bfly_generic(
198 kiss_fft_cpx * Fout,
199 const size_t fstride,
200 const kiss_fft_cfg st,
201 int m,
202 int p
203 )
204 {
205 int u,k,q1,q;
206 kiss_fft_cpx * twiddles = st->twiddles;
207 kiss_fft_cpx t;
208 int Norig = st->nfft;
209
210 kiss_fft_cpx *scratchbuf=(kiss_fft_cpx *)malloc( sizeof(kiss_fft_cpx) * p );
211
212 for ( u=0; u<m; ++u ) {
213 k=u;
214 for ( q1=0 ; q1<p ; ++q1 ) {
215 scratchbuf[q1] = Fout[ k ];
216 C_FIXDIV(scratchbuf[q1],p);
217 k += m;
218 }
219
220 k=u;
221 for ( q1=0 ; q1<p ; ++q1 ) {
222 int twidx=0;
223 Fout[ k ] = scratchbuf[0];
224 for (q=1;q<p;++q ) {
225 twidx += fstride * k;
226 if (twidx>=Norig) twidx-=Norig;
227 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
228 C_ADDTO( Fout[ k ] ,t);
229 }
230 k += m;
231 }
232 }
233
234 free( scratchbuf );
235 }
236
237 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)238 void kf_work(
239 kiss_fft_cpx * Fout,
240 const kiss_fft_cpx * f,
241 const size_t fstride,
242 int in_stride,
243 int * factors,
244 const kiss_fft_cfg st
245 )
246 {
247 kiss_fft_cpx * Fout_beg=Fout;
248 const int p=*factors++; /* the radix */
249 const int m=*factors++; /* stage's fft length/p */
250 const kiss_fft_cpx * Fout_end = Fout + p*m;
251
252 if (m==1) {
253 do{
254 *Fout = *f;
255 f += fstride*in_stride;
256 }while(++Fout != Fout_end );
257 }else{
258 do{
259 kf_work( Fout , f, fstride*p, in_stride, factors,st);
260 f += fstride*in_stride;
261 }while( (Fout += m) != Fout_end );
262 }
263
264 Fout=Fout_beg;
265
266 switch (p) {
267 case 2: kf_bfly2(Fout,fstride,st,m); break;
268 case 3: kf_bfly3(Fout,fstride,st,m); break;
269 case 4: kf_bfly4(Fout,fstride,st,m); break;
270 case 5: kf_bfly5(Fout,fstride,st,m); break;
271 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
272 }
273 }
274
275 /* facbuf is populated by p1,m1,p2,m2, ...
276 where
277 p[i] * m[i] = m[i-1]
278 m0 = n */
279 static
kf_factor(int n,int * facbuf)280 void kf_factor(int n,int * facbuf)
281 {
282 int p=4;
283 double floor_sqrt;
284 floor_sqrt = floor( sqrt((double)n) );
285
286 /*factor out powers of 4, powers of 2, then any remaining primes */
287 do {
288 while (n % p) {
289 switch (p) {
290 case 4: p = 2; break;
291 case 2: p = 3; break;
292 default: p += 2; break;
293 }
294 if (p > floor_sqrt)
295 p = n; /* no more factors, skip to end */
296 }
297 n /= p;
298 *facbuf++ = p;
299 *facbuf++ = n;
300 } while (n > 1);
301 }
302
303 /*
304 *
305 * User-callable function to allocate all necessary storage space for the fft.
306 *
307 * The return value is a contiguous block of memory, allocated with malloc. As such,
308 * It can be freed with free(), rather than a kiss_fft-specific function.
309 * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)310 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
311 {
312 kiss_fft_cfg st=NULL;
313 size_t memneeded = sizeof(struct kiss_fft_state)
314 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
315
316 if ( lenmem==NULL ) {
317 st = ( kiss_fft_cfg)malloc( memneeded );
318 }else{
319 if (*lenmem >= memneeded)
320 st = (kiss_fft_cfg)mem;
321 *lenmem = memneeded;
322 }
323 if (st) {
324 int i;
325 const double pi=3.14159265358979323846264338327;
326 const double phase0 = -2.0 * pi / (double)( nfft );
327
328 st->nfft=nfft;
329 st->inverse = inverse_fft;
330
331 for (i=0;i<nfft;++i) {
332 double phase = phase0 * i;
333 if (st->inverse)
334 phase *= -1;
335 kf_cexp(st->twiddles+i, phase );
336 }
337
338 kf_factor(nfft,st->factors);
339 }
340 return st;
341 }
342
343
344
345
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)346 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
347 {
348 if (fin == fout) {
349 kiss_fft_cpx *tmpbuf=(kiss_fft_cpx*)malloc( sizeof(kiss_fft_cpx) * st->nfft );
350 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
351 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
352 free( tmpbuf );
353 }else{
354 kf_work( fout, fin, 1,in_stride, st->factors,st );
355 }
356 }
357
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)358 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
359 {
360 kiss_fft_stride(cfg,fin,fout,1);
361 }
362
363