1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14 
15 
16 #include "_kiss_fft_guts.h"
17 /* The guts header contains all the multiplication and addition macros that are defined for
18  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
19  */
20 
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)21 static void kf_bfly2(
22         kiss_fft_cpx * Fout,
23         const size_t fstride,
24         const kiss_fft_cfg st,
25         int m
26         )
27 {
28     kiss_fft_cpx * Fout2;
29     kiss_fft_cpx * tw1 = st->twiddles;
30     kiss_fft_cpx t;
31     Fout2 = Fout + m;
32     do{
33         C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
34 
35         C_MUL (t,  *Fout2 , *tw1);
36         tw1 += fstride;
37         C_SUB( *Fout2 ,  *Fout , t );
38         C_ADDTO( *Fout ,  t );
39         ++Fout2;
40         ++Fout;
41     }while (--m);
42 }
43 
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)44 static void kf_bfly4(
45         kiss_fft_cpx * Fout,
46         const size_t fstride,
47         const kiss_fft_cfg st,
48         const size_t m
49         )
50 {
51     kiss_fft_cpx *tw1,*tw2,*tw3;
52     kiss_fft_cpx scratch[6];
53     size_t k=m;
54     const size_t m2=2*m;
55     const size_t m3=3*m;
56 
57     tw3 = tw2 = tw1 = st->twiddles;
58 
59     do {
60         C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
61 
62         C_MUL(scratch[0],Fout[m] , *tw1 );
63         C_MUL(scratch[1],Fout[m2] , *tw2 );
64         C_MUL(scratch[2],Fout[m3] , *tw3 );
65 
66         C_SUB( scratch[5] , *Fout, scratch[1] );
67         C_ADDTO(*Fout, scratch[1]);
68         C_ADD( scratch[3] , scratch[0] , scratch[2] );
69         C_SUB( scratch[4] , scratch[0] , scratch[2] );
70         C_SUB( Fout[m2], *Fout, scratch[3] );
71         tw1 += fstride;
72         tw2 += fstride*2;
73         tw3 += fstride*3;
74         C_ADDTO( *Fout , scratch[3] );
75 
76         if(st->inverse) {
77             Fout[m].r = scratch[5].r - scratch[4].i;
78             Fout[m].i = scratch[5].i + scratch[4].r;
79             Fout[m3].r = scratch[5].r + scratch[4].i;
80             Fout[m3].i = scratch[5].i - scratch[4].r;
81         }else{
82             Fout[m].r = scratch[5].r + scratch[4].i;
83             Fout[m].i = scratch[5].i - scratch[4].r;
84             Fout[m3].r = scratch[5].r - scratch[4].i;
85             Fout[m3].i = scratch[5].i + scratch[4].r;
86         }
87         ++Fout;
88     }while(--k);
89 }
90 
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)91 static void kf_bfly3(
92          kiss_fft_cpx * Fout,
93          const size_t fstride,
94          const kiss_fft_cfg st,
95          size_t m
96          )
97 {
98      size_t k=m;
99      const size_t m2 = 2*m;
100      kiss_fft_cpx *tw1,*tw2;
101      kiss_fft_cpx scratch[5];
102      kiss_fft_cpx epi3;
103      epi3 = st->twiddles[fstride*m];
104 
105      tw1=tw2=st->twiddles;
106 
107      do{
108          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
109 
110          C_MUL(scratch[1],Fout[m] , *tw1);
111          C_MUL(scratch[2],Fout[m2] , *tw2);
112 
113          C_ADD(scratch[3],scratch[1],scratch[2]);
114          C_SUB(scratch[0],scratch[1],scratch[2]);
115          tw1 += fstride;
116          tw2 += fstride*2;
117 
118          Fout[m].r = Fout->r - scratch[3].r/2;
119          Fout[m].i = Fout->i - scratch[3].i/2;
120 
121          C_MULBYSCALAR( scratch[0] , epi3.i );
122 
123          C_ADDTO(*Fout,scratch[3]);
124 
125          Fout[m2].r = Fout[m].r + scratch[0].i;
126          Fout[m2].i = Fout[m].i - scratch[0].r;
127 
128          Fout[m].r -= scratch[0].i;
129          Fout[m].i += scratch[0].r;
130 
131          ++Fout;
132      }while(--k);
133 }
134 
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)135 static void kf_bfly5(
136         kiss_fft_cpx * Fout,
137         const size_t fstride,
138         const kiss_fft_cfg st,
139         int m
140         )
141 {
142     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
143     int u;
144     kiss_fft_cpx scratch[13];
145     kiss_fft_cpx * twiddles = st->twiddles;
146     kiss_fft_cpx *tw;
147     kiss_fft_cpx ya,yb;
148     ya = twiddles[fstride*m];
149     yb = twiddles[fstride*2*m];
150 
151     Fout0=Fout;
152     Fout1=Fout0+m;
153     Fout2=Fout0+2*m;
154     Fout3=Fout0+3*m;
155     Fout4=Fout0+4*m;
156 
157     tw=st->twiddles;
158     for ( u=0; u<m; ++u ) {
159         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
160         scratch[0] = *Fout0;
161 
162         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
163         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
164         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
165         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
166 
167         C_ADD( scratch[7],scratch[1],scratch[4]);
168         C_SUB( scratch[10],scratch[1],scratch[4]);
169         C_ADD( scratch[8],scratch[2],scratch[3]);
170         C_SUB( scratch[9],scratch[2],scratch[3]);
171 
172         Fout0->r += scratch[7].r + scratch[8].r;
173         Fout0->i += scratch[7].i + scratch[8].i;
174 
175         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
176         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
177 
178         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
179         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
180 
181         C_SUB(*Fout1,scratch[5],scratch[6]);
182         C_ADD(*Fout4,scratch[5],scratch[6]);
183 
184         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
185         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
186         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
187         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
188 
189         C_ADD(*Fout2,scratch[11],scratch[12]);
190         C_SUB(*Fout3,scratch[11],scratch[12]);
191 
192         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
193     }
194 }
195 
196 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)197 static void kf_bfly_generic(
198         kiss_fft_cpx * Fout,
199         const size_t fstride,
200         const kiss_fft_cfg st,
201         int m,
202         int p
203         )
204 {
205     int u,k,q1,q;
206     kiss_fft_cpx * twiddles = st->twiddles;
207     kiss_fft_cpx t;
208     int Norig = st->nfft;
209 
210     kiss_fft_cpx *scratchbuf=(kiss_fft_cpx *)malloc( sizeof(kiss_fft_cpx) * p );
211 
212     for ( u=0; u<m; ++u ) {
213         k=u;
214         for ( q1=0 ; q1<p ; ++q1 ) {
215             scratchbuf[q1] = Fout[ k  ];
216             C_FIXDIV(scratchbuf[q1],p);
217             k += m;
218         }
219 
220         k=u;
221         for ( q1=0 ; q1<p ; ++q1 ) {
222             int twidx=0;
223             Fout[ k ] = scratchbuf[0];
224             for (q=1;q<p;++q ) {
225                 twidx += fstride * k;
226                 if (twidx>=Norig) twidx-=Norig;
227                 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
228                 C_ADDTO( Fout[ k ] ,t);
229             }
230             k += m;
231         }
232     }
233 
234     free( scratchbuf );
235 }
236 
237 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)238 void kf_work(
239         kiss_fft_cpx * Fout,
240         const kiss_fft_cpx * f,
241         const size_t fstride,
242         int in_stride,
243         int * factors,
244         const kiss_fft_cfg st
245         )
246 {
247     kiss_fft_cpx * Fout_beg=Fout;
248     const int p=*factors++; /* the radix  */
249     const int m=*factors++; /* stage's fft length/p */
250     const kiss_fft_cpx * Fout_end = Fout + p*m;
251 
252     if (m==1) {
253         do{
254             *Fout = *f;
255             f += fstride*in_stride;
256         }while(++Fout != Fout_end );
257     }else{
258         do{
259             kf_work( Fout , f, fstride*p, in_stride, factors,st);
260             f += fstride*in_stride;
261         }while( (Fout += m) != Fout_end );
262     }
263 
264     Fout=Fout_beg;
265 
266     switch (p) {
267         case 2: kf_bfly2(Fout,fstride,st,m); break;
268         case 3: kf_bfly3(Fout,fstride,st,m); break;
269         case 4: kf_bfly4(Fout,fstride,st,m); break;
270         case 5: kf_bfly5(Fout,fstride,st,m); break;
271         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
272     }
273 }
274 
275 /*  facbuf is populated by p1,m1,p2,m2, ...
276     where
277     p[i] * m[i] = m[i-1]
278     m0 = n                  */
279 static
kf_factor(int n,int * facbuf)280 void kf_factor(int n,int * facbuf)
281 {
282     int p=4;
283     double floor_sqrt;
284     floor_sqrt = floor( sqrt((double)n) );
285 
286     /*factor out powers of 4, powers of 2, then any remaining primes */
287     do {
288         while (n % p) {
289             switch (p) {
290                 case 4: p = 2; break;
291                 case 2: p = 3; break;
292                 default: p += 2; break;
293             }
294             if (p > floor_sqrt)
295                 p = n;          /* no more factors, skip to end */
296         }
297         n /= p;
298         *facbuf++ = p;
299         *facbuf++ = n;
300     } while (n > 1);
301 }
302 
303 /*
304  *
305  * User-callable function to allocate all necessary storage space for the fft.
306  *
307  * The return value is a contiguous block of memory, allocated with malloc.  As such,
308  * It can be freed with free(), rather than a kiss_fft-specific function.
309  * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)310 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
311 {
312     kiss_fft_cfg st=NULL;
313     size_t memneeded = sizeof(struct kiss_fft_state)
314         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
315 
316     if ( lenmem==NULL ) {
317         st = ( kiss_fft_cfg)malloc( memneeded );
318     }else{
319         if (*lenmem >= memneeded)
320             st = (kiss_fft_cfg)mem;
321         *lenmem = memneeded;
322     }
323     if (st) {
324         int i;
325         const double pi=3.14159265358979323846264338327;
326         const double phase0 = -2.0 * pi / (double)( nfft );
327 
328         st->nfft=nfft;
329         st->inverse = inverse_fft;
330 
331         for (i=0;i<nfft;++i) {
332             double phase = phase0 * i;
333             if (st->inverse)
334                 phase *= -1;
335             kf_cexp(st->twiddles+i, phase );
336         }
337 
338         kf_factor(nfft,st->factors);
339     }
340     return st;
341 }
342 
343 
344 
345 
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)346 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
347 {
348     if (fin == fout) {
349         kiss_fft_cpx *tmpbuf=(kiss_fft_cpx*)malloc( sizeof(kiss_fft_cpx) * st->nfft );
350         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
351         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
352         free( tmpbuf );
353     }else{
354         kf_work( fout, fin, 1,in_stride, st->factors,st );
355     }
356 }
357 
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)358 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
359 {
360     kiss_fft_stride(cfg,fin,fout,1);
361 }
362 
363