1 /*
2 Copyright (c) 2003-2004, Mark Borgerding
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11
12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 */
14
15
16 #ifdef HAVE_CONFIG_H
17 #include "config.h"
18 #endif
19
20 #ifdef _WIN32
21 #pragma warning(disable:4018)
22 #pragma warning(disable:4127)
23 #pragma warning(disable:4244)
24 #endif
25
26 #include "_kiss_fft_guts.h"
27 #include "misc.h"
28
29 /* The guts header contains all the multiplication and addition macros that are defined for
30 fixed or floating point complex numbers. It also delares the kf_ internal functions.
31 */
32
33 static kiss_fft_cpx *scratchbuf=NULL;
34 static size_t nscratchbuf=0;
35 static kiss_fft_cpx *tmpbuf=NULL;
36 static size_t ntmpbuf=0;
37
38 #define CHECKBUF(buf,nbuf,n) \
39 do { \
40 if ( nbuf < (size_t)(n) ) {\
41 free(buf); \
42 buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
43 nbuf = (size_t)(n); \
44 } \
45 }while(0)
46
kf_bfly2(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)47 static void kf_bfly2(
48 kiss_fft_cpx * Fout,
49 const size_t fstride,
50 const kiss_fft_cfg st,
51 int m
52 )
53 {
54 kiss_fft_cpx * Fout2;
55 kiss_fft_cpx * tw1 = st->twiddles;
56 kiss_fft_cpx t;
57 Fout2 = Fout + m;
58 if (!st->inverse) {
59 int i;
60 kiss_fft_cpx *x=Fout;
61 for (i=0;i<2*m;i++)
62 {
63 x[i].r = SHR(x[i].r,1);
64 x[i].i = SHR(x[i].i,1);
65 }
66 }
67
68 do{
69 C_MUL (t, *Fout2 , *tw1);
70 tw1 += fstride;
71 C_SUB( *Fout2 , *Fout , t );
72 C_ADDTO( *Fout , t );
73 ++Fout2;
74 ++Fout;
75 }while (--m);
76 }
77
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,const size_t m)78 static void kf_bfly4(
79 kiss_fft_cpx * Fout,
80 const size_t fstride,
81 const kiss_fft_cfg st,
82 const size_t m
83 )
84 {
85 kiss_fft_cpx *tw1,*tw2,*tw3;
86 kiss_fft_cpx scratch[6];
87 size_t k=m;
88 const size_t m2=2*m;
89 const size_t m3=3*m;
90
91 tw3 = tw2 = tw1 = st->twiddles;
92
93 if (!st->inverse) {
94 int i;
95 kiss_fft_cpx *x=Fout;
96 for (i=0;i<4*m;i++)
97 {
98 //C_FIXDIV(x[i],4);
99 x[i].r = PSHR16(x[i].r,2);
100 x[i].i = PSHR16(x[i].i,2);
101 }
102 }
103 if (st->inverse)
104 {
105 do {
106 C_MUL(scratch[0],Fout[m] , *tw1 );
107 C_MUL(scratch[1],Fout[m2] , *tw2 );
108 C_MUL(scratch[2],Fout[m3] , *tw3 );
109
110 C_SUB( scratch[5] , *Fout, scratch[1] );
111 C_ADDTO(*Fout, scratch[1]);
112 C_ADD( scratch[3] , scratch[0] , scratch[2] );
113 C_SUB( scratch[4] , scratch[0] , scratch[2] );
114 C_SUB( Fout[m2], *Fout, scratch[3] );
115 tw1 += fstride;
116 tw2 += fstride*2;
117 tw3 += fstride*3;
118 C_ADDTO( *Fout , scratch[3] );
119
120 Fout[m].r = scratch[5].r - scratch[4].i;
121 Fout[m].i = scratch[5].i + scratch[4].r;
122 Fout[m3].r = scratch[5].r + scratch[4].i;
123 Fout[m3].i = scratch[5].i - scratch[4].r;
124 ++Fout;
125 } while(--k);
126 } else
127 {
128 do {
129 C_MUL(scratch[0],Fout[m] , *tw1 );
130 C_MUL(scratch[1],Fout[m2] , *tw2 );
131 C_MUL(scratch[2],Fout[m3] , *tw3 );
132
133 C_SUB( scratch[5] , *Fout, scratch[1] );
134 C_ADDTO(*Fout, scratch[1]);
135 C_ADD( scratch[3] , scratch[0] , scratch[2] );
136 C_SUB( scratch[4] , scratch[0] , scratch[2] );
137 C_SUB( Fout[m2], *Fout, scratch[3] );
138 tw1 += fstride;
139 tw2 += fstride*2;
140 tw3 += fstride*3;
141 C_ADDTO( *Fout , scratch[3] );
142
143 Fout[m].r = scratch[5].r + scratch[4].i;
144 Fout[m].i = scratch[5].i - scratch[4].r;
145 Fout[m3].r = scratch[5].r - scratch[4].i;
146 Fout[m3].i = scratch[5].i + scratch[4].r;
147 ++Fout;
148 }while(--k);
149 }
150 }
151
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,size_t m)152 static void kf_bfly3(
153 kiss_fft_cpx * Fout,
154 const size_t fstride,
155 const kiss_fft_cfg st,
156 size_t m
157 )
158 {
159 size_t k=m;
160 const size_t m2 = 2*m;
161 kiss_fft_cpx *tw1,*tw2;
162 kiss_fft_cpx scratch[5];
163 kiss_fft_cpx epi3;
164 epi3 = st->twiddles[fstride*m];
165
166 tw1=tw2=st->twiddles;
167
168 do{
169 if (!st->inverse) {
170 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
171 }
172
173 C_MUL(scratch[1],Fout[m] , *tw1);
174 C_MUL(scratch[2],Fout[m2] , *tw2);
175
176 C_ADD(scratch[3],scratch[1],scratch[2]);
177 C_SUB(scratch[0],scratch[1],scratch[2]);
178 tw1 += fstride;
179 tw2 += fstride*2;
180
181 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
182 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
183
184 C_MULBYSCALAR( scratch[0] , epi3.i );
185
186 C_ADDTO(*Fout,scratch[3]);
187
188 Fout[m2].r = Fout[m].r + scratch[0].i;
189 Fout[m2].i = Fout[m].i - scratch[0].r;
190
191 Fout[m].r -= scratch[0].i;
192 Fout[m].i += scratch[0].r;
193
194 ++Fout;
195 }while(--k);
196 }
197
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m)198 static void kf_bfly5(
199 kiss_fft_cpx * Fout,
200 const size_t fstride,
201 const kiss_fft_cfg st,
202 int m
203 )
204 {
205 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
206 int u;
207 kiss_fft_cpx scratch[13];
208 kiss_fft_cpx * twiddles = st->twiddles;
209 kiss_fft_cpx *tw;
210 kiss_fft_cpx ya,yb;
211 ya = twiddles[fstride*m];
212 yb = twiddles[fstride*2*m];
213
214 Fout0=Fout;
215 Fout1=Fout0+m;
216 Fout2=Fout0+2*m;
217 Fout3=Fout0+3*m;
218 Fout4=Fout0+4*m;
219
220 tw=st->twiddles;
221 for ( u=0; u<m; ++u ) {
222 if (!st->inverse) {
223 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
224 }
225 scratch[0] = *Fout0;
226
227 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
228 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
229 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
230 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
231
232 C_ADD( scratch[7],scratch[1],scratch[4]);
233 C_SUB( scratch[10],scratch[1],scratch[4]);
234 C_ADD( scratch[8],scratch[2],scratch[3]);
235 C_SUB( scratch[9],scratch[2],scratch[3]);
236
237 Fout0->r += scratch[7].r + scratch[8].r;
238 Fout0->i += scratch[7].i + scratch[8].i;
239
240 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
241 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
242
243 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
244 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
245
246 C_SUB(*Fout1,scratch[5],scratch[6]);
247 C_ADD(*Fout4,scratch[5],scratch[6]);
248
249 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
250 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
251 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
252 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
253
254 C_ADD(*Fout2,scratch[11],scratch[12]);
255 C_SUB(*Fout3,scratch[11],scratch[12]);
256
257 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
258 }
259 }
260
261 /* perform the butterfly for one stage of a mixed radix FFT */
kf_bfly_generic(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_cfg st,int m,int p)262 static void kf_bfly_generic(
263 kiss_fft_cpx * Fout,
264 const size_t fstride,
265 const kiss_fft_cfg st,
266 int m,
267 int p
268 )
269 {
270 int u,k,q1,q;
271 kiss_fft_cpx * twiddles = st->twiddles;
272 kiss_fft_cpx t;
273 int Norig = st->nfft;
274
275 CHECKBUF(scratchbuf,nscratchbuf,p);
276
277 for ( u=0; u<m; ++u ) {
278 k=u;
279 for ( q1=0 ; q1<p ; ++q1 ) {
280 scratchbuf[q1] = Fout[ k ];
281 if (!st->inverse) {
282 C_FIXDIV(scratchbuf[q1],p);
283 }
284 k += m;
285 }
286
287 k=u;
288 for ( q1=0 ; q1<p ; ++q1 ) {
289 int twidx=0;
290 Fout[ k ] = scratchbuf[0];
291 for (q=1;q<p;++q ) {
292 twidx += fstride * k;
293 if (twidx>=Norig) twidx-=Norig;
294 C_MUL(t,scratchbuf[q] , twiddles[twidx] );
295 C_ADDTO( Fout[ k ] ,t);
296 }
297 k += m;
298 }
299 }
300 }
301
302 static
kf_work(kiss_fft_cpx * Fout,const kiss_fft_cpx * f,const size_t fstride,int in_stride,int * factors,const kiss_fft_cfg st)303 void kf_work(
304 kiss_fft_cpx * Fout,
305 const kiss_fft_cpx * f,
306 const size_t fstride,
307 int in_stride,
308 int * factors,
309 const kiss_fft_cfg st
310 )
311 {
312 kiss_fft_cpx * Fout_beg=Fout;
313 const int p=*factors++; /* the radix */
314 const int m=*factors++; /* stage's fft length/p */
315 const kiss_fft_cpx * Fout_end = Fout + p*m;
316
317 if (m==1) {
318 do{
319 *Fout = *f;
320 f += fstride*in_stride;
321 }while(++Fout != Fout_end );
322 }else{
323 do{
324 kf_work( Fout , f, fstride*p, in_stride, factors,st);
325 f += fstride*in_stride;
326 }while( (Fout += m) != Fout_end );
327 }
328
329 Fout=Fout_beg;
330
331 switch (p) {
332 case 2: kf_bfly2(Fout,fstride,st,m); break;
333 case 3: kf_bfly3(Fout,fstride,st,m); break;
334 case 4: kf_bfly4(Fout,fstride,st,m); break;
335 case 5: kf_bfly5(Fout,fstride,st,m); break;
336 default: kf_bfly_generic(Fout,fstride,st,m,p); break;
337 }
338 }
339
340 /* facbuf is populated by p1,m1,p2,m2, ...
341 where
342 p[i] * m[i] = m[i-1]
343 m0 = n */
344 static
kf_factor(int n,int * facbuf)345 void kf_factor(int n,int * facbuf)
346 {
347 int p=4;
348 double floor_sqrt;
349 floor_sqrt = floor( sqrt((double)n) );
350
351 /*factor out powers of 4, powers of 2, then any remaining primes */
352 do {
353 while (n % p) {
354 switch (p) {
355 case 4: p = 2; break;
356 case 2: p = 3; break;
357 default: p += 2; break;
358 }
359 if (p > floor_sqrt)
360 p = n; /* no more factors, skip to end */
361 }
362 n /= p;
363 *facbuf++ = p;
364 *facbuf++ = n;
365 } while (n > 1);
366 }
367
368 /*
369 *
370 * User-callable function to allocate all necessary storage space for the fft.
371 *
372 * The return value is a contiguous block of memory, allocated with malloc. As such,
373 * It can be freed with free(), rather than a kiss_fft-specific function.
374 * */
kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)375 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
376 {
377 kiss_fft_cfg st=NULL;
378 size_t memneeded = sizeof(struct kiss_fft_state)
379 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
380
381 if ( lenmem==NULL ) {
382 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
383 }else{
384 if (mem != NULL && *lenmem >= memneeded)
385 st = (kiss_fft_cfg)mem;
386 *lenmem = memneeded;
387 }
388 if (st) {
389 int i;
390 st->nfft=nfft;
391 st->inverse = inverse_fft;
392
393 for (i=0;i<nfft;++i) {
394 const double pi=3.14159265358979323846264338327;
395 double phase = ( -2*pi /nfft ) * i;
396 if (st->inverse)
397 phase *= -1;
398 kf_cexp(st->twiddles+i, phase );
399 }
400
401 kf_factor(nfft,st->factors);
402 }
403 return st;
404 }
405
406
407
408
kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout,int in_stride)409 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
410 {
411 if (fin == fout) {
412 CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
413 kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
414 memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
415 }else{
416 kf_work( fout, fin, 1,in_stride, st->factors,st );
417 }
418 }
419
kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)420 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
421 {
422 kiss_fft_stride(cfg,fin,fout,1);
423 }
424
425
426 /* not really necessary to call, but if someone is doing in-place ffts, they may want to free the
427 buffers from CHECKBUF
428 */
kiss_fft_cleanup(void)429 void kiss_fft_cleanup(void)
430 {
431 free(scratchbuf);
432 scratchbuf = NULL;
433 nscratchbuf=0;
434 free(tmpbuf);
435 tmpbuf=NULL;
436 ntmpbuf=0;
437 }
438