1 /*
2 Copyright (c) 2003-2010, Mark Borgerding
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14 
15 
16 #include "_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
19  */
20 
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)21 static void kf_bfly2(
22         kiss_fft_cpx * Fout,
23         const size_t fstride,
24         const kiss_fft_cfg st,
25         int m
26         )
27 {
28     kiss_fft_cpx * Fout2;
29     kiss_fft_cpx * tw1 = st->twiddles;
30     kiss_fft_cpx t;
31     Fout2 = Fout + m;
32     do{
33         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
34 
35         C_MUL (t,  *Fout2 , *tw1);
36         tw1 += fstride;
37         C_SUB( *Fout2 ,  *Fout , t );
38         C_ADDTO( *Fout ,  t );
39         ++Fout2;
40         ++Fout;
41     }while (--m);
42 }
43 
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)44 static void kf_bfly4(
45         kiss_fft_cpx * Fout,
46         const size_t fstride,
47         const kiss_fft_cfg st,
48         const size_t m
49         )
50 {
51     kiss_fft_cpx *tw1,*tw2,*tw3;
52     kiss_fft_cpx scratch[6];
53     size_t k=m;
54     const size_t m2=2*m;
55     const size_t m3=3*m;
56 
57 
58     tw3 = tw2 = tw1 = st->twiddles;
59 
60     do {
61         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
62 
63         C_MUL(scratch[0],Fout[m] , *tw1 );
64         C_MUL(scratch[1],Fout[m2] , *tw2 );
65         C_MUL(scratch[2],Fout[m3] , *tw3 );
66 
67         C_SUB( scratch[5] , *Fout, scratch[1] );
68         C_ADDTO(*Fout, scratch[1]);
69         C_ADD( scratch[3] , scratch[0] , scratch[2] );
70         C_SUB( scratch[4] , scratch[0] , scratch[2] );
71         C_SUB( Fout[m2], *Fout, scratch[3] );
72         tw1 += fstride;
73         tw2 += fstride*2;
74         tw3 += fstride*3;
75         C_ADDTO( *Fout , scratch[3] );
76 
77         if(st->inverse) {
78             Fout[m].r = scratch[5].r - scratch[4].i;
79             Fout[m].i = scratch[5].i + scratch[4].r;
80             Fout[m3].r = scratch[5].r + scratch[4].i;
81             Fout[m3].i = scratch[5].i - scratch[4].r;
82         }else{
83             Fout[m].r = scratch[5].r + scratch[4].i;
84             Fout[m].i = scratch[5].i - scratch[4].r;
85             Fout[m3].r = scratch[5].r - scratch[4].i;
86             Fout[m3].i = scratch[5].i + scratch[4].r;
87         }
88         ++Fout;
89     }while(--k);
90 }
91 
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)92 static void kf_bfly3(
93          kiss_fft_cpx * Fout,
94          const size_t fstride,
95          const kiss_fft_cfg st,
96          size_t m
97          )
98 {
99      size_t k=m;
100      const size_t m2 = 2*m;
101      kiss_fft_cpx *tw1,*tw2;
102      kiss_fft_cpx scratch[5];
103      kiss_fft_cpx epi3;
104      epi3 = st->twiddles[fstride*m];
105 
106      tw1=tw2=st->twiddles;
107 
108      do{
109          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
110 
111          C_MUL(scratch[1],Fout[m] , *tw1);
112          C_MUL(scratch[2],Fout[m2] , *tw2);
113 
114          C_ADD(scratch[3],scratch[1],scratch[2]);
115          C_SUB(scratch[0],scratch[1],scratch[2]);
116          tw1 += fstride;
117          tw2 += fstride*2;
118 
119          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
120          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
121 
122          C_MULBYSCALAR( scratch[0] , epi3.i );
123 
124          C_ADDTO(*Fout,scratch[3]);
125 
126          Fout[m2].r = Fout[m].r + scratch[0].i;
127          Fout[m2].i = Fout[m].i - scratch[0].r;
128 
129          Fout[m].r -= scratch[0].i;
130          Fout[m].i += scratch[0].r;
131 
132          ++Fout;
133      }while(--k);
134 }
135 
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)136 static void kf_bfly5(
137         kiss_fft_cpx * Fout,
138         const size_t fstride,
139         const kiss_fft_cfg st,
140         int m
141         )
142 {
143     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
144     int u;
145     kiss_fft_cpx scratch[13];
146     kiss_fft_cpx * twiddles = st->twiddles;
147     kiss_fft_cpx *tw;
148     kiss_fft_cpx ya,yb;
149     ya = twiddles[fstride*m];
150     yb = twiddles[fstride*2*m];
151 
152     Fout0=Fout;
153     Fout1=Fout0+m;
154     Fout2=Fout0+2*m;
155     Fout3=Fout0+3*m;
156     Fout4=Fout0+4*m;
157 
158     tw=st->twiddles;
159     for ( u=0; u<m; ++u ) {
160         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
161         scratch[0] = *Fout0;
162 
163         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
164         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
165         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
166         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
167 
168         C_ADD( scratch[7],scratch[1],scratch[4]);
169         C_SUB( scratch[10],scratch[1],scratch[4]);
170         C_ADD( scratch[8],scratch[2],scratch[3]);
171         C_SUB( scratch[9],scratch[2],scratch[3]);
172 
173         Fout0->r += scratch[7].r + scratch[8].r;
174         Fout0->i += scratch[7].i + scratch[8].i;
175 
176         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
177         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
178 
179         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
180         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
181 
182         C_SUB(*Fout1,scratch[5],scratch[6]);
183         C_ADD(*Fout4,scratch[5],scratch[6]);
184 
185         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
186         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
187         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
188         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
189 
190         C_ADD(*Fout2,scratch[11],scratch[12]);
191         C_SUB(*Fout3,scratch[11],scratch[12]);
192 
193         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
194     }
195 }
196 
197 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)198 static void kf_bfly_generic(
199         kiss_fft_cpx * Fout,
200         const size_t fstride,
201         const kiss_fft_cfg st,
202         int m,
203         int p
204         )
205 {
206     int u,k,q1,q;
207     kiss_fft_cpx * twiddles = st->twiddles;
208     kiss_fft_cpx t;
209     int Norig = st->nfft;
210 
211     kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
212 
213     for ( u=0; u<m; ++u ) {
214         k=u;
215         for ( q1=0 ; q1<p ; ++q1 ) {
216             scratch[q1] = Fout[ k  ];
217             C_FIXDIV(scratch[q1],p);
218             k += m;
219         }
220 
221         k=u;
222         for ( q1=0 ; q1<p ; ++q1 ) {
223             int twidx=0;
224             Fout[ k ] = scratch[0];
225             for (q=1;q<p;++q ) {
226                 twidx += fstride * k;
227                 if (twidx>=Norig) twidx-=Norig;
228                 C_MUL(t,scratch[q] , twiddles[twidx] );
229                 C_ADDTO( Fout[ k ] ,t);
230             }
231             k += m;
232         }
233     }
234     KISS_FFT_TMP_FREE(scratch);
235 }
236 
237 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)238 void kf_work(
239         kiss_fft_cpx * Fout,
240         const kiss_fft_cpx * f,
241         const size_t fstride,
242         int in_stride,
243         int * factors,
244         const kiss_fft_cfg st
245         )
246 {
247     kiss_fft_cpx * Fout_beg=Fout;
248     const int p=*factors++; /* the radix  */
249     const int m=*factors++; /* stage's fft length/p */
250     const kiss_fft_cpx * Fout_end = Fout + p*m;
251 
252 #ifdef _OPENMP
253 /*
254     // use openmp extensions at the
255     // top-level (not recursive)
256 */
257     if (fstride==1 && p<=5)
258     {
259         int k;
260 
261 /*
262         // execute the p different work units in different threads
263 */
264 #       pragma omp parallel for
265         for (k=0;k<p;++k)
266             kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
267 /*
268         // all threads have joined by this point
269 */
270 
271         switch (p) {
272             case 2: kf_bfly2(Fout,fstride,st,m); break;
273             case 3: kf_bfly3(Fout,fstride,st,m); break;
274             case 4: kf_bfly4(Fout,fstride,st,m); break;
275             case 5: kf_bfly5(Fout,fstride,st,m); break;
276             default: kf_bfly_generic(Fout,fstride,st,m,p); break;
277         }
278         return;
279     }
280 #endif
281 
282     if (m==1) {
283         do{
284             *Fout = *f;
285             f += fstride*in_stride;
286         }while(++Fout != Fout_end );
287     }else{
288         do{
289 /*
290             // recursive call:
291             // DFT of size m*p performed by doing
292             // p instances of smaller DFTs of size m,
293             // each one takes a decimated version of the input
294 */
295             kf_work( Fout , f, fstride*p, in_stride, factors,st);
296             f += fstride*in_stride;
297         }while( (Fout += m) != Fout_end );
298     }
299 
300     Fout=Fout_beg;
301 
302 /*
303     // recombine the p smaller DFTs
304 */
305     switch (p) {
306         case 2: kf_bfly2(Fout,fstride,st,m); break;
307         case 3: kf_bfly3(Fout,fstride,st,m); break;
308         case 4: kf_bfly4(Fout,fstride,st,m); break;
309         case 5: kf_bfly5(Fout,fstride,st,m); break;
310         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
311     }
312 }
313 
314 /*  facbuf is populated by p1,m1,p2,m2, ...
315     where
316     p[i] * m[i] = m[i-1]
317     m0 = n                  */
318 static
kf_factor(int n,int * facbuf)319 void kf_factor(int n,int * facbuf)
320 {
321     int p=4;
322     double floor_sqrt;
323     floor_sqrt = floor( sqrt((double)n) );
324 
325     /*factor out powers of 4, powers of 2, then any remaining primes */
326     do {
327         while (n % p) {
328             switch (p) {
329                 case 4: p = 2; break;
330                 case 2: p = 3; break;
331                 default: p += 2; break;
332             }
333             if (p > floor_sqrt)
334                 p = n;          /* no more factors, skip to end */
335         }
336         n /= p;
337         *facbuf++ = p;
338         *facbuf++ = n;
339     } while (n > 1);
340 }
341 
342 /*
343  *
344  * User-callable function to allocate all necessary storage space for the fft.
345  *
346  * The return value is a contiguous block of memory, allocated with malloc.  As such,
347  * It can be freed with free(), rather than a kiss_fft-specific function.
348  * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)349 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
350 {
351     kiss_fft_cfg st=NULL;
352     size_t memneeded = sizeof(struct kiss_fft_state)
353         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
354 
355     if ( lenmem==NULL ) {
356         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
357     }else{
358         if (mem != NULL && *lenmem >= memneeded)
359             st = (kiss_fft_cfg)mem;
360         *lenmem = memneeded;
361     }
362     if (st) {
363         int i;
364         st->nfft=nfft;
365         st->inverse = inverse_fft;
366 
367         for (i=0;i<nfft;++i) {
368             const double pi=3.141592653589793238462643383279502884197169399375105820974944;
369             double phase = -2*pi*i / nfft;
370             if (st->inverse)
371                 phase *= -1;
372             kf_cexp(st->twiddles+i, phase );
373         }
374 
375         kf_factor(nfft,st->factors);
376     }
377     return st;
378 }
379 
380 
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)381 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
382 {
383     if (fin == fout) {
384 /*
385         //NOTE: this is not really an in-place FFT algorithm.
386         //It just performs an out-of-place FFT into a temp buffer
387 */
388         kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
389         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
390         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
391         KISS_FFT_TMP_FREE(tmpbuf);
392     }else{
393         kf_work( fout, fin, 1,in_stride, st->factors,st );
394     }
395 }
396 
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)397 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
398 {
399     kiss_fft_stride(cfg,fin,fout,1);
400 }
401 
402 
kiss_fft_cleanup(void)403 void kiss_fft_cleanup(void)
404 {
405 /*
406     // nothing needed any more
407 */
408 }
409 
kiss_fft_next_fast_size(int n)410 int kiss_fft_next_fast_size(int n)
411 {
412     while(1) {
413         int m=n;
414         while ( (m%2) == 0 ) m/=2;
415         while ( (m%3) == 0 ) m/=3;
416         while ( (m%5) == 0 ) m/=5;
417         if (m<=1)
418             break; /* n is completely factorable by twos, threes, and fives */
419         n++;
420     }
421     return n;
422 }
423