1 // Copyright (c) <2012> <Leif Asbrink>
2 //
3 // Permission is hereby granted, free of charge, to any person
4 // obtaining a copy of this software and associated documentation
5 // files (the "Software"), to deal in the Software without restriction,
6 // including without limitation the rights to use, copy, modify,
7 // merge, publish, distribute, sublicense, and/or sell copies of
8 // the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be
12 // included in all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
21 // OR OTHER DEALINGS IN THE SOFTWARE.
22
23
24 #include "globdef.h"
25 #include "uidef.h"
26 #include "fft1def.h"
27 #include "fft2def.h"
28
29 #define SQRTHALF (double)(0.707106781186547524400844362104)
30
fft_real_to_hermitian(float * z,int size,int n,COSIN_TABLE * tab)31 void fft_real_to_hermitian(float *z, int size, int n, COSIN_TABLE *tab)
32 {
33 // decimation-in-time, split-radix
34 int i, j;
35 int i1, i2, i3, i4, itab, itab3, inc, inc3;
36 float cc1, ss1, cc3, ss3;
37 register float t1, t2;
38 float t3, t4, t5, t6;
39 int mask, is, id,ik;
40 int n2, n4, n8, n5, n6;
41 int iz;
42 mask = size-1;
43 is = 0;
44 id = 4;
45 ik=8;
46 do
47 {
48 for (i=is; i<size; i+=ik)
49 {
50 t1 = z[i];
51 z[i ] += z[i+1];
52 z[i+1] = t1 - z[i+1];
53 t1 = z[i+id];
54 z[i+id ] += z[i+id+1];
55 z[i+id+1] = t1 - z[i+id+1];
56 }
57 is = (id<<1)-2;
58 id <<= 2;
59 ik<<=2;
60 } while (is<size);
61 is=0;
62 id=8;
63 do
64 {
65 for (i=is; i<size; i+=id)
66 {
67 t1=z[i+3]+z[i+2];
68 z[i+3]-=z[i+2];
69 z[i+2]=z[i]-t1;
70 z[i]+=t1;
71 }
72 is=(id<<1)-4;
73 id<<=2;
74 } while (is<size);
75 n2=4;
76 for(iz=1; iz<n; iz++)
77 {
78 n5=n2;
79 n4=n2>>1;
80 n8=n2>>2;
81 n2<<=1;
82 n6=n4+n5;
83 is=0;
84 id=n2<<1;
85 do
86 {
87 for (i=is; i<size; i+=id)
88 {
89 i2=i+n4;
90 i3=i2+n4;
91 i4=i3+n4;
92 i1=i+n8;
93 t1=z[i4]+z[i3];
94 z[i4]-=z[i3];
95 z[i3]=z[i]-t1;
96 z[i]+=t1;
97 i3+=n8;
98 i4+=n8;
99 i2+=n8;
100 t1=(z[i3]+z[i4])*SQRTHALF;
101 t2=(z[i3]-z[i4])*SQRTHALF;
102 z[i4]=z[i2]-t1;
103 z[i3]=-z[i2]-t1;
104 z[i2]=z[i1]-t2;
105 z[i1]+=t2;
106 }
107 is=(id<<1)-n2;
108 id<<=2;
109 } while(is<size);
110 inc = size/n2;
111 itab = inc;
112 inc3=inc+(inc<<1);
113 itab3=inc3;
114 for (j=1; j<n8; j++)
115 {
116 cc1=tab[itab].cos;
117 ss1=tab[itab].sin;
118 cc3=tab[itab3].cos;
119 ss3=tab[itab3].sin;
120 is = 0;
121 id = n2<<1;
122 itab = (itab+inc)&(mask);
123 itab3 = (itab3+inc3)&(mask);
124 do
125 {
126 for (i=is; i<size; i+=id)
127 {
128 t1=z[i+j+n5]*cc1+z[i-j+n6]*ss1;
129 t3=z[i+j+n6]*cc3+z[i-j+n2]*ss3;
130 t2=z[i-j+n6]*cc1-z[i+j+n5]*ss1;
131 t4=z[i-j+n2]*cc3-z[i+j+n6]*ss3;
132 t5=t1+t3;
133 t3=t1-t3;
134 t6=t2+t4;
135 t4=t2-t4;
136 t2=z[i-j+n5]+t6;
137 z[i+j+n5]=t6-z[i-j+n5];
138 z[i-j+n2]=t2;
139 t2=z[i+j+n4]-t3;
140 z[i-j+n6]=-z[i+j+n4]-t3;
141 z[i+j+n6]=t2;
142 t1=z[i+j]+t5;
143 z[i-j+n5]=z[i+j]-t5;
144 z[i+j]=t1;
145 t1=z[i-j+n4]+t4;
146 z[i-j+n4]-=t4;
147 z[i+j+n4]=t1;
148 }
149 is = (id<<1) - n2;
150 id <<= 2;
151 } while (is<size);
152 }
153 }
154 }
155
bulk_of_dif(int size,int n,float * x,COSIN_TABLE * sincos,int yieldflag)156 void bulk_of_dif(int size, int n, float *x, COSIN_TABLE *sincos, int yieldflag)
157 {
158 int inc, j2, m2, m1, ja;
159 int ia, ib,it;
160 float x1,x2,t1,t2,t3,t4;
161 inc=2;
162 j2=size/2;
163 m2=size;
164 for(m1=1; m1<n-1; m1++)
165 {
166 if(yieldflag)lir_sched_yield();
167 for(ja=0; ja<=2*size-m2; ja+=m2)
168 {
169 it=0;
170 for(ia=ja; ia<j2+ja; ia+=2)
171 {
172 ib=ia+j2;
173 t1=x[ia];
174 t2=x[ib];
175 x[ia ]=t1+t2;
176 t3=x[ia+1];
177 t4=x[ib+1];
178 x1=t1-t2;
179 x[ia+1]=t3+t4;
180 x2=t3-t4;
181 x[ib ]=sincos[it].cos*x1-sincos[it].sin*x2;
182 x[ib+1]=sincos[it].sin*x1+sincos[it].cos*x2;
183 it+=inc;
184 }
185 }
186 inc*=2;
187 m2=m2/2;
188 j2=j2/2;
189 }
190 }
191
bulk_of_dual_dif(int size,int n,float * x,COSIN_TABLE * sincos,int yieldflag)192 void bulk_of_dual_dif(int size, int n, float *x, COSIN_TABLE *sincos, int yieldflag)
193 {
194 int inc, j2, m2, m1, ja;
195 int ia, ib,it;
196 float x1,x2,t1,t2,t3,t4;
197 inc=2;
198 j2=size/2;
199 m2=size;
200 for(m1=1; m1<n-1; m1++)
201 {
202 if(yieldflag)lir_sched_yield();
203 for(ja=0; ja<=2*size-m2; ja+=m2)
204 {
205 it=0;
206 for(ia=ja; ia<j2+ja; ia+=2)
207 {
208 ib=ia+j2;
209 t1=x[2*ia];
210 t2=x[2*ib];
211 x[2*ia ]=t1+t2;
212 t3=x[2*ia+1];
213 t4=x[2*ib+1];
214 x1=t1-t2;
215 x[2*ia+1]=t3+t4;
216 x2=t3-t4;
217 x[2*ib ]=sincos[it].cos*x1-sincos[it].sin*x2;
218 x[2*ib+1]=sincos[it].sin*x1+sincos[it].cos*x2;
219 t1=x[2*ia+2];
220 t2=x[2*ib+2];
221 x[2*ia+2]=t1+t2;
222 t3=x[2*ia+3];
223 t4=x[2*ib+3];
224 x1=t1-t2;
225 x[2*ia+3]=t3+t4;
226 x2=t3-t4;
227 x[2*ib+2]=sincos[it].cos*x1-sincos[it].sin*x2;
228 x[2*ib+3]=sincos[it].sin*x1+sincos[it].cos*x2;
229 it+=inc;
230 }
231 }
232 inc*=2;
233 m2=m2/2;
234 j2=j2/2;
235 }
236 }
237
238
fft_iqshift(int size,float * x)239 void fft_iqshift(int size, float *x)
240 {
241 int i,k;
242 float t1;
243 for(i=0; i<size/2; i++)
244 {
245 k=i+size/2;
246 t1=x[2*i];
247 x[2*i]=x[2*k];
248 x[2*k]=t1;
249 t1=x[2*i+1];
250 x[2*i+1]=x[2*k+1];
251 x[2*k+1]=t1;
252 }
253 }
254
255
dual_fftback(int size,int n,float * x,COSIN_TABLE * sincos,unsigned short int * permute,int yieldflag)256 void dual_fftback(int size,
257 int n, float *x,
258 COSIN_TABLE *sincos,
259 unsigned short int *permute,
260 int yieldflag)
261 {
262 int inc, j2, m2, m1, ja;
263 int ia, ib,it;
264 float x1,x2,t1,t2,t3,t4;
265 inc=1;
266 j2=size;
267 m2=2*size;
268 for(m1=0; m1<n; m1++)
269 {
270 if(yieldflag)lir_sched_yield();
271 for(ja=0; ja<=2*size-m2; ja+=m2)
272 {
273 it=0;
274 for(ia=ja; ia<j2+ja; ia+=2)
275 {
276 ib=ia+j2;
277 t1=x[2*ia];
278 t2=x[2*ib];
279 x[2*ia ]=t1+t2;
280 t3=x[2*ia+1];
281 t4=x[2*ib+1];
282 x1=t1-t2;
283 x[2*ia+1]=t3+t4;
284 x2=t3-t4;
285 x[2*ib ]= sincos[it].cos*x1+sincos[it].sin*x2;
286 x[2*ib+1]=-sincos[it].sin*x1+sincos[it].cos*x2;
287 t1=x[2*ia+2];
288 t2=x[2*ib+2];
289 x[2*ia+2]=t1+t2;
290 t3=x[2*ia+3];
291 t4=x[2*ib+3];
292 x1=t1-t2;
293 x[2*ia+3]=t3+t4;
294 x2=t3-t4;
295 x[2*ib+2]= sincos[it].cos*x1+sincos[it].sin*x2;
296 x[2*ib+3]=-sincos[it].sin*x1+sincos[it].cos*x2;
297 it+=inc;
298 }
299 }
300 inc*=2;
301 m2=m2/2;
302 j2=j2/2;
303 }
304 if(yieldflag)lir_sched_yield();
305 for(ia=0; ia < size; ia++)
306 {
307 ib=permute[ia ];
308 if(ib<ia)
309 {
310 t1=x[4*ia];
311 x[4*ia]=x[4*ib];
312 x[4*ib]=t1;
313 t1=x[4*ia+1];
314 x[4*ia+1]=x[4*ib+1];
315 x[4*ib+1]=t1;
316 t1=x[4*ia+2];
317 x[4*ia+2]=x[4*ib+2];
318 x[4*ib+2]=t1;
319 t1=x[4*ia+3];
320 x[4*ia+3]=x[4*ib+3];
321 x[4*ib+3]=t1;
322 }
323 }
324 }
325
fftback(int size,int n,float * x,COSIN_TABLE * sincos,unsigned short int * permute,int yieldflag)326 void fftback(int size,
327 int n, float *x,
328 COSIN_TABLE *sincos,
329 unsigned short int *permute,
330 int yieldflag)
331 {
332 int inc, j2, m2, m1, ja;
333 int ia, ib,it;
334 float x1,x2,t1,t2,t3,t4;
335 inc=1;
336 j2=size;
337 m2=2*size;
338 for(m1=0; m1<n; m1++)
339 {
340 if(yieldflag)lir_sched_yield();
341 for(ja=0; ja<=2*size-m2; ja+=m2)
342 {
343 it=0;
344 for(ia=ja; ia<j2+ja; ia+=2)
345 {
346 ib=ia+j2;
347 t1=x[ia];
348 t2=x[ib];
349 x[ia ]=t1+t2;
350 t3=x[ia+1];
351 t4=x[ib+1];
352 x1=t1-t2;
353 x[ia+1]=t3+t4;
354 x2=t3-t4;
355 x[ib ]= sincos[it].cos*x1+sincos[it].sin*x2;
356 x[ib+1]=-sincos[it].sin*x1+sincos[it].cos*x2;
357 it+=inc;
358 }
359 }
360 inc*=2;
361 m2=m2/2;
362 j2=j2/2;
363 }
364 if(yieldflag)lir_sched_yield();
365 for(ia=0; ia < size; ia++)
366 {
367 ib=permute[ia ];
368 if(ib<ia)
369 {
370 t1=x[2*ia];
371 x[2*ia]=x[2*ib];
372 x[2*ib]=t1;
373 t1=x[2*ia+1];
374 x[2*ia+1]=x[2*ib+1];
375 x[2*ib+1]=t1;
376 }
377 }
378 }
379
d_fftback(int size,int n,double * x,D_COSIN_TABLE * sincos,unsigned short int * permute)380 void d_fftback(int size,
381 int n, double *x,
382 D_COSIN_TABLE *sincos,
383 unsigned short int *permute)
384 {
385 int inc, j2, m2, m1, ja;
386 int ia, ib,it;
387 double x1,x2,t1,t2,t3,t4;
388 inc=1;
389 j2=size;
390 m2=2*size;
391 for(m1=0; m1<n; m1++)
392 {
393 for(ja=0; ja<=2*size-m2; ja+=m2)
394 {
395 it=0;
396 for(ia=ja; ia<j2+ja; ia+=2)
397 {
398 ib=ia+j2;
399 t1=x[ia];
400 t2=x[ib];
401 x[ia ]=t1+t2;
402 t3=x[ia+1];
403 t4=x[ib+1];
404 x1=t1-t2;
405 x[ia+1]=t3+t4;
406 x2=t3-t4;
407 x[ib ]= sincos[it].cos*x1+sincos[it].sin*x2;
408 x[ib+1]=-sincos[it].sin*x1+sincos[it].cos*x2;
409 it+=inc;
410 }
411 }
412 inc*=2;
413 m2=m2/2;
414 j2=j2/2;
415 }
416 for(ia=0; ia < size; ia++)
417 {
418 ib=permute[ia ];
419 if(ib<ia)
420 {
421 t1=x[2*ia];
422 x[2*ia]=x[2*ib];
423 x[2*ib]=t1;
424 t1=x[2*ia+1];
425 x[2*ia+1]=x[2*ib+1];
426 x[2*ib+1]=t1;
427 }
428 }
429 }
430
431
fftforward(int size,int n,float * x,COSIN_TABLE * sincos,unsigned short int * permute,int yieldflag)432 void fftforward(int size,
433 int n, float *x,
434 COSIN_TABLE *sincos,
435 unsigned short int *permute,
436 int yieldflag)
437 {
438 int inc, j2, m2, m1, ja;
439 int ia, ib,it;
440 float x1,x2,t1,t2,t3,t4;
441 inc=1;
442 j2=size;
443 m2=2*size;
444 for(m1=0; m1<n; m1++)
445 {
446 if(yieldflag)lir_sched_yield();
447 for(ja=0; ja<=2*size-m2; ja+=m2)
448 {
449 it=0;
450 for(ia=ja; ia<j2+ja; ia+=2)
451 {
452 ib=ia+j2;
453 t1=x[ia];
454 t2=x[ib];
455 x[ia ]=t1+t2;
456 t3=x[ia+1];
457 t4=x[ib+1];
458 x1=t1-t2;
459 x[ia+1]=t3+t4;
460 x2=t3-t4;
461 x[ib ]= sincos[it].cos*x1-sincos[it].sin*x2;
462 x[ib+1]= sincos[it].sin*x1+sincos[it].cos*x2;
463 it+=inc;
464 }
465 }
466 inc*=2;
467 m2=m2/2;
468 j2=j2/2;
469 }
470 if(yieldflag)lir_sched_yield();
471 for(ia=0; ia < size; ia++)
472 {
473 ib=permute[ia ];
474 if(ib<ia)
475 {
476 t1=x[2*ia];
477 x[2*ia]=x[2*ib];
478 x[2*ib]=t1;
479 t1=x[2*ia+1];
480 x[2*ia+1]=x[2*ib+1];
481 x[2*ib+1]=t1;
482 }
483 }
484 }
485
d_fftforward(int size,int n,double * x,D_COSIN_TABLE * sincos,unsigned short int * permute)486 void d_fftforward(int size,
487 int n, double *x,
488 D_COSIN_TABLE *sincos,
489 unsigned short int *permute)
490 {
491 int inc, j2, m2, m1, ja;
492 int ia, ib,it;
493 double x1,x2,t1,t2,t3,t4;
494 inc=1;
495 j2=size;
496 m2=2*size;
497 for(m1=0; m1<n; m1++)
498 {
499 for(ja=0; ja<=2*size-m2; ja+=m2)
500 {
501 it=0;
502 for(ia=ja; ia<j2+ja; ia+=2)
503 {
504 ib=ia+j2;
505 t1=x[ia];
506 t2=x[ib];
507 x[ia ]=t1+t2;
508 t3=x[ia+1];
509 t4=x[ib+1];
510 x1=t1-t2;
511 x[ia+1]=t3+t4;
512 x2=t3-t4;
513 x[ib ]= sincos[it].cos*x1-sincos[it].sin*x2;
514 x[ib+1]= sincos[it].sin*x1+sincos[it].cos*x2;
515 it+=inc;
516 }
517 }
518 inc*=2;
519 m2=m2/2;
520 j2=j2/2;
521 }
522 for(ia=0; ia < size; ia++)
523 {
524 ib=permute[ia ];
525 if(ib<ia)
526 {
527 t1=x[2*ia];
528 x[2*ia]=x[2*ib];
529 x[2*ib]=t1;
530 t1=x[2*ia+1];
531 x[2*ia+1]=x[2*ib+1];
532 x[2*ib+1]=t1;
533 }
534 }
535 for(ia=0; ia < 2*size; ia++)
536 {
537 x[ia]/=size;
538 }
539 }
540
541
542
big_fftforward(int size,int n,float * x,COSIN_TABLE * sincos,unsigned int * permute,int yieldflag)543 void big_fftforward(int size,
544 int n, float *x,
545 COSIN_TABLE *sincos,
546 unsigned int *permute,
547 int yieldflag)
548 {
549 // This routine is only called from THREAD_FFT2 and that is the thread
550 // that we should give priority in a dual core machine.
551 // Allow it to run 100 % of the time if there is more than one CPU.
552 int inc, j2, m2, m1, ja;
553 int ia, ib,it;
554 float x1,x2,t1,t2,t3,t4;
555 inc=1;
556 j2=size;
557 m2=2*size;
558 for(m1=0; m1<n; m1++)
559 {
560 if(yieldflag)lir_sched_yield();
561 for(ja=0; ja<=2*size-m2; ja+=m2)
562 {
563 it=0;
564 for(ia=ja; ia<j2+ja; ia+=2)
565 {
566 ib=ia+j2;
567 t1=x[ia];
568 t2=x[ib];
569 x[ia ]=t1+t2;
570 t3=x[ia+1];
571 t4=x[ib+1];
572 x1=t1-t2;
573 x[ia+1]=t3+t4;
574 x2=t3-t4;
575 x[ib ]= sincos[it].cos*x1-sincos[it].sin*x2;
576 x[ib+1]= sincos[it].sin*x1+sincos[it].cos*x2;
577 it+=inc;
578 }
579 }
580 inc*=2;
581 m2=m2/2;
582 j2=j2/2;
583 }
584 if(yieldflag)lir_sched_yield();
585 for(ia=0; ia < size; ia++)
586 {
587 ib=(int)permute[ia ];
588 if(ib<ia)
589 {
590 t1=x[2*ia];
591 x[2*ia]=x[2*ib];
592 x[2*ib]=t1;
593 t1=x[2*ia+1];
594 x[2*ia+1]=x[2*ib+1];
595 x[2*ib+1]=t1;
596 }
597 }
598 }
599
600 extern int *baseband_handle;
601
make_window(int mo,int sz,int n,float * win)602 void make_window(int mo, int sz, int n, float *win)
603 {
604 double x,z,sumsq;
605 double e1, e2;
606 float t1;
607 int i, size;
608 if(mo == 5 || mo == 6)
609 {
610 e1=3.2;
611 e2=13.0/sz;
612 for (i=0; i <= sz/2; i++)
613 {
614 win[i]=0.5F*(float)erfc(e1);
615 e1-=e2;
616 }
617 if(mo == 5)return;
618 for (i=0; i < sz/2; i++)
619 {
620 win[i]=(float)pow(win[i]+.0005,-2.0);
621 win[sz-i-1]=win[i];
622 }
623 return;
624 }
625 if(n == 0)return;
626 size=sz;
627 z=n;
628 if(mo == 2)size=2*sz;
629 sumsq=0;
630 // Produce a sin power n function over size/2 points for n=1 to 7.
631 // In case n equals 9, use the error function erfc instead, start at -150 dB.
632 if(n==9)
633 {
634 e1=3.6;
635 e2=80.0/size;
636 if(size < 128)e2/=1.5;
637 if(size < 64)e2/=1.7;
638 for (i=0; i <= size/2; i++)
639 {
640 win[i]=0.5F*(float)erfc(e1);
641 sumsq+=win[i]*win[i];
642 e1-=e2;
643 }
644 }
645 else
646 {
647 // In case n equals 8, use a Gaussian.
648 if(n==8)
649 {
650 e1=0;
651 e2=8.4/size;
652 for (i=size/2; i >= 0; i--)
653 {
654 win[i]=(float)pow(2.7,-e1*e1);
655 sumsq+=win[i]*win[i];
656 e1+=e2;
657 }
658 }
659 else
660 {
661 x=0;
662 for (i=0; i <= size/2; i++)
663 {
664 win[i]=(float)pow(sin(x),z);
665 sumsq+=win[i]*win[i];
666 x+=PI_L/size;
667 }
668 }
669 }
670 if(mo == 5)return;
671 // Produce an inverted window over size/2 points
672 if(mo == 3)
673 {
674 win[0]=1;
675 for (i=1; i <= size/2; i++)
676 {
677 win[i]=1/win[i];
678 }
679 return;
680 }
681 z=1/sqrt(2*sumsq/size);
682 for (i=0; i <= size/2; i++)
683 {
684 win[i]*=(float)z;
685 }
686 if(mo == 4)
687 {
688 for(i=size/2+1; i<size; i++)
689 {
690 win[i]=win[size-i];
691 }
692 return;
693 }
694 // Store the data in the order we want to use it
695 // This will save time by better cash usage for large transforms.
696 if(mo == 1)
697 {
698 t1=win[1];
699 win[1]=win[size/2];
700 for (i=size/2-1; i >1; i--)
701 {
702 win[2*i]=win[i];
703 }
704 win[2]=t1;
705 for (i=0; i < size-3; i+=2)
706 {
707 win[i+3]=win[size-i-2];
708 }
709 }
710 }
711
make_mmxwindow(int mo,int size,int n,short int * win)712 void make_mmxwindow(int mo, int size, int n, short int *win)
713 {
714 double x,z,t1;
715 int i, k, iw;
716 double *tmpwin;
717 double e1, e2;
718 if(n <= 0)return;
719 // We only use MMX when the second fft is enabled.
720 tmpwin=(double*)timf2_shi;
721 // Produce a sin power n function over size points for n=1 to 7.
722 // In case n equals 9, use the error function erfc instead, start at -150 dB.
723 if(n==9)
724 {
725 e1=3.6;
726 e2=80.0/size;
727 if(size < 128)e2/=1.5;
728 if(size < 64)e2/=1.7;
729 for (i=0; i <= size/2; i++)
730 {
731 tmpwin[i]=0.5*erfc(e1);
732 e1-=e2;
733 }
734 }
735 else
736 {
737 // In case n equals 8, use a Gaussian.
738 if(n==8)
739 {
740 e1=0;
741 e2=8.4/size;
742 for (i=size/2; i >= 0; i--)
743 {
744 tmpwin[i]=pow(2.7,-e1*e1);
745 e1+=e2;
746 }
747 }
748 else
749 {
750 x=0;
751 z=n;
752 for (i=0; i <= size/2; i++)
753 {
754 tmpwin[i]=pow(sin(x),z);
755 x+=PI_L/size;
756 }
757 }
758 }
759 for(i=size-1; i>size/2; i--)
760 {
761 tmpwin[i]=tmpwin[size-i];
762 }
763 // ************************************************
764 // mo == 0
765 // Produce a sin power n function over size points
766 // Store each value once.
767 x=0;
768 z=n;
769 if(mo == 0)
770 {
771 for (i=0; i < size; i++)
772 {
773 t1=tmpwin[i];
774 iw=(int)((0.5+0x8000)*t1);
775 if(iw >= 0x8000)iw=0x7fff;
776 win[i]=(short int)iw;
777 x+=PI_L/size;
778 }
779 return;
780 }
781 // Produce a sin power n function over size/2 points
782 // or the inverted function over size/2+1 points
783 // Store the result 4 times for dual or quad transforms
784 k=size/2;
785 if(mo==3)k++;
786 for (i=0; i < k; i++)
787 {
788 t1=tmpwin[i];
789 x+=PI_L/size;
790 if(mo == 3)t1=0.5/t1;
791 iw=(int)floor(0x7f00*t1);
792 if(iw>0x7ffe)iw=0x7ffe;
793 win[4*i ]=(short int)iw;
794 win[4*i+1]=(short int)iw;
795 win[4*i+2]=(short int)iw;
796 win[4*i+3]=(short int)iw;
797 }
798 }
799
800
init_mmxfft(int size,MMX_COSIN_TABLE * tab)801 void init_mmxfft( int size, MMX_COSIN_TABLE *tab)
802 {
803 double x;
804 int i,is,ic;
805 x=0;
806 for (i=0; i<size/2; i++)
807 {
808 is=(int)(32767.5*sin(x));
809 ic=(int)(32767.5*cos(x));
810 if(is>0x7fff)is=0x7fff;
811 if(ic>0x7fff)ic=0x7fff;
812 if(is<-0x7fff)is=-0x7fff;
813 if(ic<-0x7fff)ic=-0x7fff;
814
815 x+=(double)(PI_L/(size/2));
816 tab[i].c1p=(short int)ic;
817 tab[i].s2p=(short int)is;
818 tab[i].c3p=(short int)-is;
819 tab[i].s4m=(short int)ic;
820 }
821 }
822
make_permute(int mo,int nz,int sz,unsigned short int * perm)823 void make_permute(int mo, int nz, int sz, unsigned short int *perm)
824 {
825 unsigned int i, i2, j, k, l, m;
826 unsigned short int *tmp;
827 unsigned int n, size;
828 if(mo == 2)
829 {
830 n=(unsigned int)(nz+1);
831 size=(unsigned int)(sz*2);
832 }
833 else
834 {
835 n=(unsigned int)nz;
836 size=(unsigned int)sz;
837 }
838 if(size > 65536)
839 {
840 lirerr(937);
841 return;
842 }
843 for(i=0; i<size; i++)perm[i]=0;
844 tmp=malloc(size*sizeof(short int));
845 if(tmp == NULL)
846 {
847 lirerr(1032);
848 return;
849 }
850 for(i=0; i<size; i++)tmp[i]=(short unsigned int)i;
851 for(i=1; i<size; i++)
852 {
853 m=i;
854 j=0;
855 for(k=0; k<n; k++)
856 {
857 j=2*j;
858 l=m/2;
859 if(2*l != m)j=j+1;
860 m=l;
861 }
862 if(j > i)
863 {
864 i2=tmp[i];
865 tmp[i]=tmp[j];
866 tmp[j]=(short unsigned int)i2;
867 }
868 }
869 if(mo == 1)
870 {
871 for(i=0; i<size/2; i++)
872 {
873 k=tmp[i];
874 tmp[i]=tmp[i+size/2];
875 tmp[i+size/2]=(short unsigned int)k;
876 }
877 }
878 for(i=0; i<size; i++)perm[tmp[i]]=(short unsigned int)i;
879 free(tmp);
880 }
881
make_bigpermute(int mo,int nz,int sz,unsigned int * perm)882 void make_bigpermute(int mo, int nz, int sz, unsigned int *perm)
883 {
884 unsigned int i, i2, j, k, l, m;
885 unsigned int *tmp;
886 unsigned int n, size;
887 if(mo == 2)
888 {
889 n=(unsigned int)nz+1;
890 size=(unsigned int)sz*2;
891 }
892 else
893 {
894 n=(unsigned int)nz;
895 size=(unsigned int)sz;
896 }
897 for(i=0; i<size; i++)perm[i]=0;
898 tmp=malloc(size*sizeof(int));
899 if(tmp == NULL)
900 {
901 lirerr(1032);
902 return;
903 }
904 for(i=0; i<size; i++)tmp[i]=i;
905 for(i=0; i<size; i++)
906 {
907 m=i;
908 j=0;
909 for(k=0; k<n; k++)
910 {
911 j=2*j;
912 l=m/2;
913 if(2*l != m)j=j+1;
914 m=l;
915 }
916 if(j > i)
917 {
918 i2=tmp[i];
919 tmp[i]=tmp[j];
920 tmp[j]=i2;
921 }
922 }
923 if(mo == 1)
924 {
925 for(i=0; i<size/2; i++)
926 {
927 k=tmp[i];
928 tmp[i]=tmp[i+size/2];
929 tmp[i+size/2]=k;
930 }
931 }
932 for(i=0; i<size; i++)perm[tmp[i]]=i;
933 free(tmp);
934 }
935
make_sincos(int mo,int sz,COSIN_TABLE * tab)936 void make_sincos(int mo, int sz, COSIN_TABLE *tab)
937 {
938 int i;
939 double x,step;
940 int size;
941 if(mo == 2)
942 {
943 size=sz*2;
944 }
945 else
946 {
947 size=sz;
948 }
949 x=0;
950 step=(double)(PI_L/(size/2));
951 for (i=0; i<size/2; i++)
952 {
953 tab[i].sin=(float)sin(x);
954 tab[i].cos=(float)cos(x);
955 x+=step;
956 }
957 }
958
make_d_sincos(int mo,int sz,D_COSIN_TABLE * tab)959 void make_d_sincos(int mo, int sz, D_COSIN_TABLE *tab)
960 {
961 int i;
962 double x,step;
963 int size;
964 if(mo == 2)
965 {
966 size=sz*2;
967 }
968 else
969 {
970 size=sz;
971 }
972 x=0;
973 step=(double)(PI_L/(size/2));
974 for (i=0; i<size/2; i++)
975 {
976 tab[i].sin=sin(x);
977 tab[i].cos=cos(x);
978 x+=step;
979 }
980 }
981
init_fft(int mo,int nz,int sz,COSIN_TABLE * tab,unsigned short int * perm)982 void init_fft(int mo, int nz, int sz, COSIN_TABLE *tab,
983 unsigned short int *perm)
984 {
985 make_permute(mo, nz, sz, perm);
986 make_sincos(mo, sz, tab);
987 }
988
init_d_fft(int mo,int nz,int sz,D_COSIN_TABLE * tab,unsigned short int * perm)989 void init_d_fft(int mo, int nz, int sz, D_COSIN_TABLE *tab,
990 unsigned short int *perm)
991 {
992 make_permute(mo, nz, sz, perm);
993 make_d_sincos(mo, sz, tab);
994 }
995
996
init_big_fft(int mo,int nz,int sz,COSIN_TABLE * tab,unsigned int * perm)997 void init_big_fft(int mo, int nz, int sz, COSIN_TABLE *tab,
998 unsigned int *perm)
999 {
1000 make_bigpermute(mo, nz, sz, perm);
1001 make_sincos(mo, sz, tab);
1002 }
1003
bulk_of_dual_dit(int size,int n,float * x,COSIN_TABLE * tab,int yieldflag)1004 void bulk_of_dual_dit(int size, int n,
1005 float *x, COSIN_TABLE *tab, int yieldflag)
1006 {
1007 int ia, ib, ic, id, itab;
1008 int inc,j,k,m,nn;
1009 int m1, m2;
1010 float t1,t2,t3,t4,t5,t6,t7,t8;
1011 float r1,r2,r3,r4,r5,r6,r7,r8;
1012 float x0,x1,x2,x3,x4,x5,x6,x7;
1013 float ya,yb,y2,y3,y4,y5,y6,y7;
1014 float u0,u1,u2,u3,u4,u5,u6,u7;
1015 float w0,w1,w2,w3,w4,w5,w6,w7;
1016 float s1,s2,c1,c2;
1017 nn=size;
1018 m1=4;
1019 m2=16;
1020 inc=size/16;
1021 m=n-2;
1022 while(m>2)
1023 {
1024 if(yieldflag)lir_sched_yield();
1025 m-=2;
1026 for(j=0; j<nn; j+=m2)
1027 {
1028 itab=0;
1029 k=j;
1030 lp1:;
1031 ia=4*k;
1032 s1= tab[2*itab].sin;
1033 c1= tab[2*itab].cos;
1034 s2= tab[itab].sin;
1035 c2= tab[itab].cos;
1036
1037 x0=x[ia ];
1038 ya=x[ia+1];
1039 u0=x[ia+2];
1040 w0=x[ia+3];
1041 ia+=m2;
1042
1043 x1=x[ia ];
1044 yb=x[ia+1];
1045 u1=x[ia+2];
1046 w1=x[ia+3];
1047 ia+=m2;
1048
1049 t1=c1*x1+s1*yb;
1050 t2=c1*yb-s1*x1;
1051 r1=c1*u1+s1*w1;
1052 r2=c1*w1-s1*u1;
1053
1054 x4=x0+t1;
1055 y4=ya+t2;
1056 u4=u0+r1;
1057 w4=w0+r2;
1058
1059 x6=x0-t1;
1060 y6=ya-t2;
1061 u6=u0-r1;
1062 w6=w0-r2;
1063
1064 x2=x[ia ];
1065 y2=x[ia+1];
1066 u2=x[ia+2];
1067 w2=x[ia+3];
1068 ia+=m2;
1069
1070 x3=x[ia ];
1071 y3=x[ia+1];
1072 u3=x[ia+2];
1073 w3=x[ia+3];
1074
1075 t3=c1*x3+s1*y3;
1076 t4=c1*y3-s1*x3;
1077 r3=c1*u3+s1*w3;
1078 r4=c1*w3-s1*u3;
1079
1080 x5=x2+t3;
1081 y5=y2+t4;
1082 u5=u2+r3;
1083 w5=w2+r4;
1084
1085 x7=x2-t3;
1086 y7=y2-t4;
1087 u7=u2-r3;
1088 w7=w2-r4;
1089
1090 t5=c2*x5+s2*y5;
1091 t6=c2*y5-s2*x5;
1092 r5=c2*u5+s2*w5;
1093 r6=c2*w5-s2*u5;
1094
1095 ia=k;
1096 ib=ia+m1;
1097 ic=ia+2*m1;
1098 id=ia+3*m1;
1099
1100 x[4*ia ]=x4+t5;
1101 x[4*ia+1]=y4+t6;
1102 x[4*ia+2]=u4+r5;
1103 x[4*ia+3]=w4+r6;
1104
1105 x[4*ic ]=x4-t5;
1106 x[4*ic+1]=y4-t6;
1107 x[4*ic+2]=u4-r5;
1108 x[4*ic+3]=w4-r6;
1109
1110 t8=c2*x7+s2*y7;
1111 t7=c2*y7-s2*x7;
1112 r8=c2*u7+s2*w7;
1113 r7=c2*w7-s2*u7;
1114
1115 x[4*ib ]=x6+t7;
1116 x[4*ib+1]=y6-t8;
1117 x[4*ib+2]=u6+r7;
1118 x[4*ib+3]=w6-r8;
1119
1120 x[4*id ]=x6-t7;
1121 x[4*id+1]=y6+t8;
1122 x[4*id+2]=u6-r7;
1123 x[4*id+3]=w6+r8;
1124
1125 itab+=inc;
1126 k++;
1127 if(itab<size/4)goto lp1;
1128 }
1129 inc/=4;
1130 m1*=4;
1131 m2*=4;
1132 }
1133 if(yieldflag)lir_sched_yield();
1134 if( (n&1) ==1 )
1135 {
1136 ib=nn/2;
1137 m=nn/2;
1138 for(ia=0; ia<m; ia++)
1139 {
1140 s1= tab[ia].sin;
1141 c1= tab[ia].cos;
1142 x0=x[4*ia ];
1143 ya=x[4*ia+1];
1144 u0=x[4*ia+2];
1145 w0=x[4*ia+3];
1146 x1=x[4*ib ];
1147 yb=x[4*ib+1];
1148 u1=x[4*ib+2];
1149 w1=x[4*ib+3];
1150 t1=c1*x1+s1*yb;
1151 t2=c1*yb-s1*x1;
1152 r1=c1*u1+s1*w1;
1153 r2=c1*w1-s1*u1;
1154 x[4*ia ]=x0-t1;
1155 x[4*ia+1]=-(ya-t2);
1156 x[4*ia+2]=u0-r1;
1157 x[4*ia+3]=-(w0-r2);
1158 x[4*ib ]=x0+t1;
1159 x[4*ib+1]=-(ya+t2);
1160 x[4*ib+2]=u0+r1;
1161 x[4*ib+3]=-(w0+r2);
1162 ib++;
1163 }
1164 }
1165 else
1166 {
1167 itab=0;
1168 lp2:;
1169 ia=4*itab;
1170 ib=ia+nn;
1171 ic=ia+2*nn;
1172 id=ia+3*nn;
1173
1174 s1= tab[2*itab].sin;
1175 c1= tab[2*itab].cos;
1176 s2= tab[itab].sin;
1177 c2= tab[itab].cos;
1178
1179 x0=x[ia ];
1180 ya=x[ia+1];
1181 u0=x[ia+2];
1182 w0=x[ia+3];
1183
1184 x1=x[ib ];
1185 yb=x[ib+1];
1186 u1=x[ib+2];
1187 w1=x[ib+3];
1188
1189 t1=c1*x1+s1*yb;
1190 t2=c1*yb-s1*x1;
1191 r1=c1*u1+s1*w1;
1192 r2=c1*w1-s1*u1;
1193
1194 x4=x0+t1;
1195 y4=ya+t2;
1196 u4=u0+r1;
1197 w4=w0+r2;
1198
1199 x6=x0-t1;
1200 y6=ya-t2;
1201 u6=u0-r1;
1202 w6=w0-r2;
1203
1204 x2=x[ic ];
1205 y2=x[ic+1];
1206 u2=x[ic+2];
1207 w2=x[ic+3];
1208
1209 x3=x[id ];
1210 y3=x[id+1];
1211 u3=x[id+2];
1212 w3=x[id+3];
1213
1214 t3=c1*x3+s1*y3;
1215 t4=c1*y3-s1*x3;
1216 r3=c1*u3+s1*w3;
1217 r4=c1*w3-s1*u3;
1218
1219 x5=x2+t3;
1220 y5=y2+t4;
1221 u5=u2+r3;
1222 w5=w2+r4;
1223
1224 x7=x2-t3;
1225 y7=y2-t4;
1226 u7=u2-r3;
1227 w7=w2-r4;
1228
1229 t5=c2*x5+s2*y5;
1230 t6=c2*y5-s2*x5;
1231 r5=c2*u5+s2*w5;
1232 r6=c2*w5-s2*u5;
1233
1234 x[ic ]=x4+t5;
1235 x[ic+1]=-(y4+t6);
1236 x[ic+2]=u4+r5;
1237 x[ic+3]=-(w4+r6);
1238
1239 x[ia ]=x4-t5;
1240 x[ia+1]=-(y4-t6);
1241 x[ia+2]=u4-r5;
1242 x[ia+3]=-(w4-r6);
1243
1244 t8=c2*x7+s2*y7;
1245 t7=c2*y7-s2*x7;
1246 r8=c2*u7+s2*w7;
1247 r7=c2*w7-s2*u7;
1248
1249 x[id ]=x6+t7;
1250 x[id+1]=-(y6-t8);
1251 x[id+2]=u6+r7;
1252 x[id+3]=-(w6-r8);
1253
1254 x[ib ]=x6-t7;
1255 x[ib+1]=-(y6+t8);
1256 x[ib+2]=u6-r7;
1257 x[ib+3]=-(w6+r8);
1258 itab++;
1259 if(itab<size/4)goto lp2;
1260 }
1261 }
1262
bulk_of_dit(int size,int n,float * x,COSIN_TABLE * tab,int yieldflag)1263 void bulk_of_dit(int size, int n, float *x, COSIN_TABLE *tab, int yieldflag)
1264 {
1265 int ia, ib, ic, id, itab;
1266 int inc,j,k,m,nn;
1267 int m1, m2;
1268 float t1,t2,t3,t4,t5,t6,t7,t8;
1269 float x0,x1,x2,x3,x4,x5,x6,x7;
1270 float ya,yb,y2,y3,y4,y5,y6,y7;
1271 float s1,s2,c1,c2;
1272 nn=size;
1273 m1=4;
1274 m2=16;
1275 inc=size/16;
1276 m=n-2;
1277 while(m>2)
1278 {
1279 if(yieldflag)lir_sched_yield();
1280 m-=2;
1281 for(j=0; j<nn; j+=m2)
1282 {
1283 itab=0;
1284 k=j;
1285 lp1:;
1286 ia=2*k;
1287 s1= tab[2*itab].sin;
1288 c1= tab[2*itab].cos;
1289 s2= tab[itab].sin;
1290 c2= tab[itab].cos;
1291
1292 x0=x[ia ];
1293 ya=x[ia+1];
1294 ia+=m2/2;
1295
1296 x1=x[ia ];
1297 yb=x[ia+1];
1298 ia+=m2/2;
1299
1300 t1=c1*x1+s1*yb;
1301 t2=c1*yb-s1*x1;
1302
1303 x4=x0+t1;
1304 y4=ya+t2;
1305
1306 x6=x0-t1;
1307 y6=ya-t2;
1308
1309 x2=x[ia ];
1310 y2=x[ia+1];
1311 ia+=m2/2;
1312
1313 x3=x[ia ];
1314 y3=x[ia+1];
1315
1316 t3=c1*x3+s1*y3;
1317 t4=c1*y3-s1*x3;
1318
1319 x5=x2+t3;
1320 y5=y2+t4;
1321
1322 x7=x2-t3;
1323 y7=y2-t4;
1324
1325 t5=c2*x5+s2*y5;
1326 t6=c2*y5-s2*x5;
1327
1328 ia=k;
1329 ib=ia+m1;
1330 ic=ia+2*m1;
1331 id=ia+3*m1;
1332
1333 x[2*ia ]=x4+t5;
1334 x[2*ia+1]=y4+t6;
1335
1336 x[2*ic ]=x4-t5;
1337 x[2*ic+1]=y4-t6;
1338
1339 t8=c2*x7+s2*y7;
1340 t7=c2*y7-s2*x7;
1341
1342 x[2*ib ]=x6+t7;
1343 x[2*ib+1]=y6-t8;
1344
1345 x[2*id ]=x6-t7;
1346 x[2*id+1]=y6+t8;
1347
1348 itab+=inc;
1349 k++;
1350 if(itab<size/4)goto lp1;
1351 }
1352 inc/=4;
1353 m1*=4;
1354 m2*=4;
1355 }
1356 if(yieldflag)lir_sched_yield();
1357 if( (n&1) ==1 )
1358 {
1359 ib=nn/2;
1360 m=nn/2;
1361 for(ia=0; ia<m; ia++)
1362 {
1363 s1= tab[ia].sin;
1364 c1= tab[ia].cos;
1365 x0=x[2*ia ];
1366 ya=x[2*ia+1];
1367 x1=x[2*ib ];
1368 yb=x[2*ib+1];
1369 t1=c1*x1+s1*yb;
1370 t2=c1*yb-s1*x1;
1371 x[2*ia ]=x0-t1;
1372 x[2*ia+1]=-(ya-t2);
1373 x[2*ib ]=x0+t1;
1374 x[2*ib+1]=-(ya+t2);
1375 ib++;
1376 }
1377 }
1378 else
1379 {
1380 itab=0;
1381 lp2:;
1382 ia=2*itab;
1383 ib=ia+m2/2;
1384 ic=ia+m2;
1385 id=ia+3*m2/2;
1386
1387 s1= tab[2*itab].sin;
1388 c1= tab[2*itab].cos;
1389 s2= tab[itab].sin;
1390 c2= tab[itab].cos;
1391
1392 x0=x[ia ];
1393 ya=x[ia+1];
1394
1395 x1=x[ib ];
1396 yb=x[ib+1];
1397
1398 t1=c1*x1+s1*yb;
1399 t2=c1*yb-s1*x1;
1400
1401 x4=x0+t1;
1402 y4=ya+t2;
1403
1404 x6=x0-t1;
1405 y6=ya-t2;
1406
1407 x2=x[ic ];
1408 y2=x[ic+1];
1409
1410 x3=x[id ];
1411 y3=x[id+1];
1412
1413 t3=c1*x3+s1*y3;
1414 t4=c1*y3-s1*x3;
1415
1416 x5=x2+t3;
1417 y5=y2+t4;
1418
1419 x7=x2-t3;
1420 y7=y2-t4;
1421
1422 t5=c2*x5+s2*y5;
1423 t6=c2*y5-s2*x5;
1424
1425 x[ic ]=x4+t5;
1426 x[ic+1]=-(y4+t6);
1427
1428 x[ia ]=x4-t5;
1429 x[ia+1]=-(y4-t6);
1430
1431 t8=c2*x7+s2*y7;
1432 t7=c2*y7-s2*x7;
1433
1434 x[id ]=x6+t7;
1435 x[id+1]=-(y6-t8);
1436
1437 x[ib ]=x6-t7;
1438 x[ib+1]=-(y6+t8);
1439 itab++;
1440 if(itab<size/4)goto lp2;
1441 }
1442 }
1443