1 /*Copyright (c) 2003-2004, Mark Borgerding
2 Lots of modifications by Jean-Marc Valin
3 Copyright (c) 2005-2007, Xiph.Org Foundation
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5
6 All rights reserved.
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met:
10
11 * Redistributions of source code must retain the above copyright notice,
12 this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.*/
28
29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
30 heavily modified to better suit Opus */
31
32 #ifndef SKIP_CONFIG_H
33 # ifdef HAVE_CONFIG_H
34 # include "config.h"
35 # endif
36 #endif
37
38 #include "_kiss_fft_guts.h"
39 #define CUSTOM_MODES
40
41 /* The guts header contains all the multiplication and addition macros that are defined for
42 complex numbers. It also delares the kf_ internal functions.
43 */
44
kf_bfly2(kiss_fft_cpx * Fout,int m,int N)45 static void kf_bfly2(
46 kiss_fft_cpx * Fout,
47 int m,
48 int N
49 )
50 {
51 kiss_fft_cpx * Fout2;
52 int i;
53 (void)m;
54 #ifdef CUSTOM_MODES
55 if (m==1)
56 {
57 celt_assert(m==1);
58 for (i=0;i<N;i++)
59 {
60 kiss_fft_cpx t;
61 Fout2 = Fout + 1;
62 t = *Fout2;
63 C_SUB( *Fout2 , *Fout , t );
64 C_ADDTO( *Fout , t );
65 Fout += 2;
66 }
67 } else
68 #endif
69 {
70 opus_val16 tw;
71 tw = QCONST16(0.7071067812f, 15);
72 /* We know that m==4 here because the radix-2 is just after a radix-4 */
73 celt_assert(m==4);
74 for (i=0;i<N;i++)
75 {
76 kiss_fft_cpx t;
77 Fout2 = Fout + 4;
78 t = Fout2[0];
79 C_SUB( Fout2[0] , Fout[0] , t );
80 C_ADDTO( Fout[0] , t );
81
82 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
83 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
84 C_SUB( Fout2[1] , Fout[1] , t );
85 C_ADDTO( Fout[1] , t );
86
87 t.r = Fout2[2].i;
88 t.i = -Fout2[2].r;
89 C_SUB( Fout2[2] , Fout[2] , t );
90 C_ADDTO( Fout[2] , t );
91
92 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
93 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
94 C_SUB( Fout2[3] , Fout[3] , t );
95 C_ADDTO( Fout[3] , t );
96 Fout += 8;
97 }
98 }
99 }
100
kf_bfly4(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)101 static void kf_bfly4(
102 kiss_fft_cpx * Fout,
103 const size_t fstride,
104 const kiss_fft_state *st,
105 int m,
106 int N,
107 int mm
108 )
109 {
110 int i;
111
112 if (m==1)
113 {
114 /* Degenerate case where all the twiddles are 1. */
115 for (i=0;i<N;i++)
116 {
117 kiss_fft_cpx scratch0, scratch1;
118
119 C_SUB( scratch0 , *Fout, Fout[2] );
120 C_ADDTO(*Fout, Fout[2]);
121 C_ADD( scratch1 , Fout[1] , Fout[3] );
122 C_SUB( Fout[2], *Fout, scratch1 );
123 C_ADDTO( *Fout , scratch1 );
124 C_SUB( scratch1 , Fout[1] , Fout[3] );
125
126 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
127 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
128 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
129 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
130 Fout+=4;
131 }
132 } else {
133 int j;
134 kiss_fft_cpx scratch[6];
135 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
136 const int m2=2*m;
137 const int m3=3*m;
138 kiss_fft_cpx * Fout_beg = Fout;
139 for (i=0;i<N;i++)
140 {
141 Fout = Fout_beg + i*mm;
142 tw3 = tw2 = tw1 = st->twiddles;
143 /* m is guaranteed to be a multiple of 4. */
144 for (j=0;j<m;j++)
145 {
146 C_MUL(scratch[0],Fout[m] , *tw1 );
147 C_MUL(scratch[1],Fout[m2] , *tw2 );
148 C_MUL(scratch[2],Fout[m3] , *tw3 );
149
150 C_SUB( scratch[5] , *Fout, scratch[1] );
151 C_ADDTO(*Fout, scratch[1]);
152 C_ADD( scratch[3] , scratch[0] , scratch[2] );
153 C_SUB( scratch[4] , scratch[0] , scratch[2] );
154 C_SUB( Fout[m2], *Fout, scratch[3] );
155 tw1 += fstride;
156 tw2 += fstride*2;
157 tw3 += fstride*3;
158 C_ADDTO( *Fout , scratch[3] );
159
160 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
161 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
162 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
163 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
164 ++Fout;
165 }
166 }
167 }
168 }
169
170
171 #ifndef RADIX_TWO_ONLY
172
kf_bfly3(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)173 static void kf_bfly3(
174 kiss_fft_cpx * Fout,
175 const size_t fstride,
176 const kiss_fft_state *st,
177 int m,
178 int N,
179 int mm
180 )
181 {
182 int i;
183 size_t k;
184 const size_t m2 = 2*m;
185 const kiss_twiddle_cpx *tw1,*tw2;
186 kiss_fft_cpx scratch[5];
187 kiss_twiddle_cpx epi3;
188
189 kiss_fft_cpx * Fout_beg = Fout;
190 #ifdef FIXED_POINT
191 /*epi3.r = -16384;*/ /* Unused */
192 epi3.i = -28378;
193 #else
194 epi3 = st->twiddles[fstride*m];
195 #endif
196 for (i=0;i<N;i++)
197 {
198 Fout = Fout_beg + i*mm;
199 tw1=tw2=st->twiddles;
200 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
201 k=m;
202 do {
203
204 C_MUL(scratch[1],Fout[m] , *tw1);
205 C_MUL(scratch[2],Fout[m2] , *tw2);
206
207 C_ADD(scratch[3],scratch[1],scratch[2]);
208 C_SUB(scratch[0],scratch[1],scratch[2]);
209 tw1 += fstride;
210 tw2 += fstride*2;
211
212 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
213 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
214
215 C_MULBYSCALAR( scratch[0] , epi3.i );
216
217 C_ADDTO(*Fout,scratch[3]);
218
219 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
220 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
221
222 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
223 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
224
225 ++Fout;
226 } while(--k);
227 }
228 }
229
230
231 #ifndef OVERRIDE_kf_bfly5
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)232 static void kf_bfly5(
233 kiss_fft_cpx * Fout,
234 const size_t fstride,
235 const kiss_fft_state *st,
236 int m,
237 int N,
238 int mm
239 )
240 {
241 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
242 int i, u;
243 kiss_fft_cpx scratch[13];
244 const kiss_twiddle_cpx *tw;
245 kiss_twiddle_cpx ya,yb;
246 kiss_fft_cpx * Fout_beg = Fout;
247
248 #ifdef FIXED_POINT
249 ya.r = 10126;
250 ya.i = -31164;
251 yb.r = -26510;
252 yb.i = -19261;
253 #else
254 ya = st->twiddles[fstride*m];
255 yb = st->twiddles[fstride*2*m];
256 #endif
257 tw=st->twiddles;
258
259 for (i=0;i<N;i++)
260 {
261 Fout = Fout_beg + i*mm;
262 Fout0=Fout;
263 Fout1=Fout0+m;
264 Fout2=Fout0+2*m;
265 Fout3=Fout0+3*m;
266 Fout4=Fout0+4*m;
267
268 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
269 for ( u=0; u<m; ++u ) {
270 scratch[0] = *Fout0;
271
272 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
273 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
274 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
275 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
276
277 C_ADD( scratch[7],scratch[1],scratch[4]);
278 C_SUB( scratch[10],scratch[1],scratch[4]);
279 C_ADD( scratch[8],scratch[2],scratch[3]);
280 C_SUB( scratch[9],scratch[2],scratch[3]);
281
282 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r));
283 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i));
284
285 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r)));
286 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r)));
287
288 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i));
289 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i)));
290
291 C_SUB(*Fout1,scratch[5],scratch[6]);
292 C_ADD(*Fout4,scratch[5],scratch[6]);
293
294 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r)));
295 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r)));
296 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i));
297 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i));
298
299 C_ADD(*Fout2,scratch[11],scratch[12]);
300 C_SUB(*Fout3,scratch[11],scratch[12]);
301
302 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
303 }
304 }
305 }
306 #endif /* OVERRIDE_kf_bfly5 */
307
308
309 #endif
310
311
312 #ifdef CUSTOM_MODES
313
314 static
compute_bitrev_table(int Fout,opus_int16 * f,const size_t fstride,int in_stride,opus_int16 * factors,const kiss_fft_state * st)315 void compute_bitrev_table(
316 int Fout,
317 opus_int16 *f,
318 const size_t fstride,
319 int in_stride,
320 opus_int16 * factors,
321 const kiss_fft_state *st
322 )
323 {
324 const int p=*factors++; /* the radix */
325 const int m=*factors++; /* stage's fft length/p */
326
327 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
328 if (m==1)
329 {
330 int j;
331 for (j=0;j<p;j++)
332 {
333 *f = Fout+j;
334 f += fstride*in_stride;
335 }
336 } else {
337 int j;
338 for (j=0;j<p;j++)
339 {
340 compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
341 f += fstride*in_stride;
342 Fout += m;
343 }
344 }
345 }
346
347 /* facbuf is populated by p1,m1,p2,m2, ...
348 where
349 p[i] * m[i] = m[i-1]
350 m0 = n */
351 static
kf_factor(int n,opus_int16 * facbuf)352 int kf_factor(int n,opus_int16 * facbuf)
353 {
354 int p=4;
355 int i;
356 int stages=0;
357 int nbak = n;
358
359 /*factor out powers of 4, powers of 2, then any remaining primes */
360 do {
361 while (n % p) {
362 switch (p) {
363 case 4: p = 2; break;
364 case 2: p = 3; break;
365 default: p += 2; break;
366 }
367 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
368 p = n; /* no more factors, skip to end */
369 }
370 n /= p;
371 #ifdef RADIX_TWO_ONLY
372 if (p!=2 && p != 4)
373 #else
374 if (p>5)
375 #endif
376 {
377 return 0;
378 }
379 facbuf[2*stages] = p;
380 if (p==2 && stages > 1)
381 {
382 facbuf[2*stages] = 4;
383 facbuf[2] = 2;
384 }
385 stages++;
386 } while (n > 1);
387 n = nbak;
388 /* Reverse the order to get the radix 4 at the end, so we can use the
389 fast degenerate case. It turns out that reversing the order also
390 improves the noise behaviour. */
391 for (i=0;i<stages/2;i++)
392 {
393 int tmp;
394 tmp = facbuf[2*i];
395 facbuf[2*i] = facbuf[2*(stages-i-1)];
396 facbuf[2*(stages-i-1)] = tmp;
397 }
398 for (i=0;i<stages;i++)
399 {
400 n /= facbuf[2*i];
401 facbuf[2*i+1] = n;
402 }
403 return 1;
404 }
405
compute_twiddles(kiss_twiddle_cpx * twiddles,int nfft)406 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
407 {
408 int i;
409 #ifdef FIXED_POINT
410 for (i=0;i<nfft;++i) {
411 opus_val32 phase = -i;
412 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
413 }
414 #else
415 for (i=0;i<nfft;++i) {
416 const double pi=3.14159265358979323846264338327;
417 double phase = ( -2*pi /nfft ) * i;
418 kf_cexp(twiddles+i, phase );
419 }
420 #endif
421 }
422
opus_fft_alloc_arch_c(kiss_fft_state * st)423 int opus_fft_alloc_arch_c(kiss_fft_state *st) {
424 (void)st;
425 return 0;
426 }
427
428 /*
429 *
430 * Allocates all necessary storage space for the fft and ifft.
431 * The return value is a contiguous block of memory. As such,
432 * It can be freed with free().
433 * */
opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,const kiss_fft_state * base,int arch)434 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
435 const kiss_fft_state *base, int arch)
436 {
437 kiss_fft_state *st=NULL;
438 size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
439
440 if ( lenmem==NULL ) {
441 st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
442 }else{
443 if (mem != NULL && *lenmem >= memneeded)
444 st = (kiss_fft_state*)mem;
445 *lenmem = memneeded;
446 }
447 if (st) {
448 opus_int16 *bitrev;
449 kiss_twiddle_cpx *twiddles;
450
451 st->nfft=nfft;
452 #ifdef FIXED_POINT
453 st->scale_shift = celt_ilog2(st->nfft);
454 if (st->nfft == 1<<st->scale_shift)
455 st->scale = Q15ONE;
456 else
457 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
458 #else
459 st->scale = 1.f/nfft;
460 #endif
461 if (base != NULL)
462 {
463 st->twiddles = base->twiddles;
464 st->shift = 0;
465 while (st->shift < 32 && nfft<<st->shift != base->nfft)
466 st->shift++;
467 if (st->shift>=32)
468 goto fail;
469 } else {
470 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
471 compute_twiddles(twiddles, nfft);
472 st->shift = -1;
473 }
474 if (!kf_factor(nfft,st->factors))
475 {
476 goto fail;
477 }
478
479 /* bitrev */
480 st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
481 if (st->bitrev==NULL)
482 goto fail;
483 compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
484
485 /* Initialize architecture specific fft parameters */
486 if (opus_fft_alloc_arch(st, arch))
487 goto fail;
488 }
489 return st;
490 fail:
491 opus_fft_free(st, arch);
492 return NULL;
493 }
494
opus_fft_alloc(int nfft,void * mem,size_t * lenmem,int arch)495 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
496 {
497 return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
498 }
499
opus_fft_free_arch_c(kiss_fft_state * st)500 void opus_fft_free_arch_c(kiss_fft_state *st) {
501 (void)st;
502 }
503
opus_fft_free(const kiss_fft_state * cfg,int arch)504 void opus_fft_free(const kiss_fft_state *cfg, int arch)
505 {
506 if (cfg)
507 {
508 opus_fft_free_arch((kiss_fft_state *)cfg, arch);
509 opus_free((opus_int16*)cfg->bitrev);
510 if (cfg->shift < 0)
511 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
512 opus_free((kiss_fft_state*)cfg);
513 }
514 }
515
516 #endif /* CUSTOM_MODES */
517
opus_fft_impl(const kiss_fft_state * st,kiss_fft_cpx * fout)518 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
519 {
520 int m2, m;
521 int p;
522 int L;
523 int fstride[MAXFACTORS];
524 int i;
525 int shift;
526
527 /* st->shift can be -1 */
528 shift = st->shift>0 ? st->shift : 0;
529
530 fstride[0] = 1;
531 L=0;
532 do {
533 p = st->factors[2*L];
534 m = st->factors[2*L+1];
535 fstride[L+1] = fstride[L]*p;
536 L++;
537 } while(m!=1);
538 m = st->factors[2*L-1];
539 for (i=L-1;i>=0;i--)
540 {
541 if (i!=0)
542 m2 = st->factors[2*i-1];
543 else
544 m2 = 1;
545 switch (st->factors[2*i])
546 {
547 case 2:
548 kf_bfly2(fout, m, fstride[i]);
549 break;
550 case 4:
551 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
552 break;
553 #ifndef RADIX_TWO_ONLY
554 case 3:
555 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
556 break;
557 case 5:
558 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
559 break;
560 #endif
561 }
562 m = m2;
563 }
564 }
565
opus_fft_c(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)566 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
567 {
568 int i;
569 opus_val16 scale;
570 #ifdef FIXED_POINT
571 /* Allows us to scale with MULT16_32_Q16(), which is faster than
572 MULT16_32_Q15() on ARM. */
573 int scale_shift = st->scale_shift-1;
574 #endif
575 scale = st->scale;
576
577 celt_assert2 (fin != fout, "In-place FFT not supported");
578 /* Bit-reverse the input */
579 for (i=0;i<st->nfft;i++)
580 {
581 kiss_fft_cpx x = fin[i];
582 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
583 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
584 }
585 opus_fft_impl(st, fout);
586 }
587
588
opus_ifft_c(const kiss_fft_state * st,const kiss_fft_cpx * fin,kiss_fft_cpx * fout)589 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
590 {
591 int i;
592 celt_assert2 (fin != fout, "In-place FFT not supported");
593 /* Bit-reverse the input */
594 for (i=0;i<st->nfft;i++)
595 fout[st->bitrev[i]] = fin[i];
596 for (i=0;i<st->nfft;i++)
597 fout[i].i = -fout[i].i;
598 opus_fft_impl(st, fout);
599 for (i=0;i<st->nfft;i++)
600 fout[i].i = -fout[i].i;
601 }
602