1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14 
15 
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19 
20 #ifdef _WIN32
21 #pragma warning(disable:4018)
22 #pragma warning(disable:4127)
23 #pragma warning(disable:4244)
24 #endif
25 
26 #include "_kiss_fft_guts.h"
27 #include "misc.h"
28 
29 /* The guts header contains all the multiplication and addition macros that are defined for
30  fixed or floating point complex numbers.  It also delares the kf_ internal functions.
31  */
32 
33 static kiss_fft_cpx *scratchbuf=NULL;
34 static size_t nscratchbuf=0;
35 static kiss_fft_cpx *tmpbuf=NULL;
36 static size_t ntmpbuf=0;
37 
38 #define CHECKBUF(buf,nbuf,n) \
39     do { \
40         if ( nbuf < (size_t)(n) ) {\
41             free(buf); \
42             buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
43             nbuf = (size_t)(n); \
44         } \
45    }while(0)
46 
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)47 static void kf_bfly2(
48         kiss_fft_cpx * Fout,
49         const size_t fstride,
50         const kiss_fft_cfg st,
51         int m
52         )
53 {
54     kiss_fft_cpx * Fout2;
55     kiss_fft_cpx * tw1 = st->twiddles;
56     kiss_fft_cpx t;
57     Fout2 = Fout + m;
58     if (!st->inverse) {
59        int i;
60        kiss_fft_cpx *x=Fout;
61        for (i=0;i<2*m;i++)
62        {
63           x[i].r = SHR(x[i].r,1);
64           x[i].i = SHR(x[i].i,1);
65        }
66     }
67 
68     do{
69         C_MUL (t,  *Fout2 , *tw1);
70         tw1 += fstride;
71         C_SUB( *Fout2 ,  *Fout , t );
72         C_ADDTO( *Fout ,  t );
73         ++Fout2;
74         ++Fout;
75     }while (--m);
76 }
77 
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)78 static void kf_bfly4(
79         kiss_fft_cpx * Fout,
80         const size_t fstride,
81         const kiss_fft_cfg st,
82         const size_t m
83         )
84 {
85     kiss_fft_cpx *tw1,*tw2,*tw3;
86     kiss_fft_cpx scratch[6];
87     size_t k=m;
88     const size_t m2=2*m;
89     const size_t m3=3*m;
90 
91     tw3 = tw2 = tw1 = st->twiddles;
92 
93     if (!st->inverse) {
94        int i;
95        kiss_fft_cpx *x=Fout;
96        for (i=0;i<4*m;i++)
97        {
98           //C_FIXDIV(x[i],4);
99           x[i].r = PSHR16(x[i].r,2);
100           x[i].i = PSHR16(x[i].i,2);
101        }
102     }
103     if (st->inverse)
104     {
105        do {
106           C_MUL(scratch[0],Fout[m] , *tw1 );
107           C_MUL(scratch[1],Fout[m2] , *tw2 );
108           C_MUL(scratch[2],Fout[m3] , *tw3 );
109 
110           C_SUB( scratch[5] , *Fout, scratch[1] );
111           C_ADDTO(*Fout, scratch[1]);
112           C_ADD( scratch[3] , scratch[0] , scratch[2] );
113           C_SUB( scratch[4] , scratch[0] , scratch[2] );
114           C_SUB( Fout[m2], *Fout, scratch[3] );
115           tw1 += fstride;
116           tw2 += fstride*2;
117           tw3 += fstride*3;
118           C_ADDTO( *Fout , scratch[3] );
119 
120           Fout[m].r = scratch[5].r - scratch[4].i;
121           Fout[m].i = scratch[5].i + scratch[4].r;
122           Fout[m3].r = scratch[5].r + scratch[4].i;
123           Fout[m3].i = scratch[5].i - scratch[4].r;
124           ++Fout;
125        } while(--k);
126     } else
127     {
128        do {
129           C_MUL(scratch[0],Fout[m] , *tw1 );
130           C_MUL(scratch[1],Fout[m2] , *tw2 );
131           C_MUL(scratch[2],Fout[m3] , *tw3 );
132 
133           C_SUB( scratch[5] , *Fout, scratch[1] );
134           C_ADDTO(*Fout, scratch[1]);
135           C_ADD( scratch[3] , scratch[0] , scratch[2] );
136           C_SUB( scratch[4] , scratch[0] , scratch[2] );
137           C_SUB( Fout[m2], *Fout, scratch[3] );
138           tw1 += fstride;
139           tw2 += fstride*2;
140           tw3 += fstride*3;
141           C_ADDTO( *Fout , scratch[3] );
142 
143           Fout[m].r = scratch[5].r + scratch[4].i;
144           Fout[m].i = scratch[5].i - scratch[4].r;
145           Fout[m3].r = scratch[5].r - scratch[4].i;
146           Fout[m3].i = scratch[5].i + scratch[4].r;
147           ++Fout;
148        }while(--k);
149     }
150 }
151 
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)152 static void kf_bfly3(
153          kiss_fft_cpx * Fout,
154          const size_t fstride,
155          const kiss_fft_cfg st,
156          size_t m
157          )
158 {
159      size_t k=m;
160      const size_t m2 = 2*m;
161      kiss_fft_cpx *tw1,*tw2;
162      kiss_fft_cpx scratch[5];
163      kiss_fft_cpx epi3;
164      epi3 = st->twiddles[fstride*m];
165 
166      tw1=tw2=st->twiddles;
167 
168      do{
169         if (!st->inverse) {
170          C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
171 	}
172 
173          C_MUL(scratch[1],Fout[m] , *tw1);
174          C_MUL(scratch[2],Fout[m2] , *tw2);
175 
176          C_ADD(scratch[3],scratch[1],scratch[2]);
177          C_SUB(scratch[0],scratch[1],scratch[2]);
178          tw1 += fstride;
179          tw2 += fstride*2;
180 
181          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
182          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
183 
184          C_MULBYSCALAR( scratch[0] , epi3.i );
185 
186          C_ADDTO(*Fout,scratch[3]);
187 
188          Fout[m2].r = Fout[m].r + scratch[0].i;
189          Fout[m2].i = Fout[m].i - scratch[0].r;
190 
191          Fout[m].r -= scratch[0].i;
192          Fout[m].i += scratch[0].r;
193 
194          ++Fout;
195      }while(--k);
196 }
197 
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)198 static void kf_bfly5(
199         kiss_fft_cpx * Fout,
200         const size_t fstride,
201         const kiss_fft_cfg st,
202         int m
203         )
204 {
205     kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
206     int u;
207     kiss_fft_cpx scratch[13];
208     kiss_fft_cpx * twiddles = st->twiddles;
209     kiss_fft_cpx *tw;
210     kiss_fft_cpx ya,yb;
211     ya = twiddles[fstride*m];
212     yb = twiddles[fstride*2*m];
213 
214     Fout0=Fout;
215     Fout1=Fout0+m;
216     Fout2=Fout0+2*m;
217     Fout3=Fout0+3*m;
218     Fout4=Fout0+4*m;
219 
220     tw=st->twiddles;
221     for ( u=0; u<m; ++u ) {
222         if (!st->inverse) {
223         C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
224 	}
225         scratch[0] = *Fout0;
226 
227         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
228         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
229         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
230         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
231 
232         C_ADD( scratch[7],scratch[1],scratch[4]);
233         C_SUB( scratch[10],scratch[1],scratch[4]);
234         C_ADD( scratch[8],scratch[2],scratch[3]);
235         C_SUB( scratch[9],scratch[2],scratch[3]);
236 
237         Fout0->r += scratch[7].r + scratch[8].r;
238         Fout0->i += scratch[7].i + scratch[8].i;
239 
240         scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
241         scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
242 
243         scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
244         scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
245 
246         C_SUB(*Fout1,scratch[5],scratch[6]);
247         C_ADD(*Fout4,scratch[5],scratch[6]);
248 
249         scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
250         scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
251         scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
252         scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
253 
254         C_ADD(*Fout2,scratch[11],scratch[12]);
255         C_SUB(*Fout3,scratch[11],scratch[12]);
256 
257         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
258     }
259 }
260 
261 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)262 static void kf_bfly_generic(
263         kiss_fft_cpx * Fout,
264         const size_t fstride,
265         const kiss_fft_cfg st,
266         int m,
267         int p
268         )
269 {
270     int u,k,q1,q;
271     kiss_fft_cpx * twiddles = st->twiddles;
272     kiss_fft_cpx t;
273     int Norig = st->nfft;
274 
275     CHECKBUF(scratchbuf,nscratchbuf,p);
276 
277     for ( u=0; u<m; ++u ) {
278         k=u;
279         for ( q1=0 ; q1<p ; ++q1 ) {
280             scratchbuf[q1] = Fout[ k  ];
281         if (!st->inverse) {
282             C_FIXDIV(scratchbuf[q1],p);
283 	}
284             k += m;
285         }
286 
287         k=u;
288         for ( q1=0 ; q1<p ; ++q1 ) {
289             int twidx=0;
290             Fout[ k ] = scratchbuf[0];
291             for (q=1;q<p;++q ) {
292                 twidx += fstride * k;
293                 if (twidx>=Norig) twidx-=Norig;
294                 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
295                 C_ADDTO( Fout[ k ] ,t);
296             }
297             k += m;
298         }
299     }
300 }
301 
302 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)303 void kf_work(
304         kiss_fft_cpx * Fout,
305         const kiss_fft_cpx * f,
306         const size_t fstride,
307         int in_stride,
308         int * factors,
309         const kiss_fft_cfg st
310         )
311 {
312     kiss_fft_cpx * Fout_beg=Fout;
313     const int p=*factors++; /* the radix  */
314     const int m=*factors++; /* stage's fft length/p */
315     const kiss_fft_cpx * Fout_end = Fout + p*m;
316 
317     if (m==1) {
318         do{
319             *Fout = *f;
320             f += fstride*in_stride;
321         }while(++Fout != Fout_end );
322     }else{
323         do{
324             kf_work( Fout , f, fstride*p, in_stride, factors,st);
325             f += fstride*in_stride;
326         }while( (Fout += m) != Fout_end );
327     }
328 
329     Fout=Fout_beg;
330 
331     switch (p) {
332         case 2: kf_bfly2(Fout,fstride,st,m); break;
333         case 3: kf_bfly3(Fout,fstride,st,m); break;
334         case 4: kf_bfly4(Fout,fstride,st,m); break;
335         case 5: kf_bfly5(Fout,fstride,st,m); break;
336         default: kf_bfly_generic(Fout,fstride,st,m,p); break;
337     }
338 }
339 
340 /*  facbuf is populated by p1,m1,p2,m2, ...
341     where
342     p[i] * m[i] = m[i-1]
343     m0 = n                  */
344 static
kf_factor(int n,int * facbuf)345 void kf_factor(int n,int * facbuf)
346 {
347     int p=4;
348     double floor_sqrt;
349     floor_sqrt = floor( sqrt((double)n) );
350 
351     /*factor out powers of 4, powers of 2, then any remaining primes */
352     do {
353         while (n % p) {
354             switch (p) {
355                 case 4: p = 2; break;
356                 case 2: p = 3; break;
357                 default: p += 2; break;
358             }
359             if (p > floor_sqrt)
360                 p = n;          /* no more factors, skip to end */
361         }
362         n /= p;
363         *facbuf++ = p;
364         *facbuf++ = n;
365     } while (n > 1);
366 }
367 
368 /*
369  *
370  * User-callable function to allocate all necessary storage space for the fft.
371  *
372  * The return value is a contiguous block of memory, allocated with malloc.  As such,
373  * It can be freed with free(), rather than a kiss_fft-specific function.
374  * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)375 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
376 {
377     kiss_fft_cfg st=NULL;
378     size_t memneeded = sizeof(struct kiss_fft_state)
379         + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
380 
381     if ( lenmem==NULL ) {
382         st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
383     }else{
384         if (mem != NULL && *lenmem >= memneeded)
385             st = (kiss_fft_cfg)mem;
386         *lenmem = memneeded;
387     }
388     if (st) {
389         int i;
390         st->nfft=nfft;
391         st->inverse = inverse_fft;
392 
393         for (i=0;i<nfft;++i) {
394             const double pi=3.14159265358979323846264338327;
395             double phase = ( -2*pi /nfft ) * i;
396             if (st->inverse)
397                 phase *= -1;
398             kf_cexp(st->twiddles+i, phase );
399         }
400 
401         kf_factor(nfft,st->factors);
402     }
403     return st;
404 }
405 
406 
407 
408 
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)409 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
410 {
411     if (fin == fout) {
412         CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
413         kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
414         memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
415     }else{
416         kf_work( fout, fin, 1,in_stride, st->factors,st );
417     }
418 }
419 
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)420 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
421 {
422     kiss_fft_stride(cfg,fin,fout,1);
423 }
424 
425 
426 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
427    buffers from CHECKBUF
428  */
kiss_fft_cleanup(void)429 void kiss_fft_cleanup(void)
430 {
431     free(scratchbuf);
432     scratchbuf = NULL;
433     nscratchbuf=0;
434     free(tmpbuf);
435     tmpbuf=NULL;
436     ntmpbuf=0;
437 }
438