1 #ifndef KISSFFT_I32_CLASS_HH 2 #define KISSFFT_I32_CLASS_HH 3 4 #include <complex> 5 #include <utility> 6 #include <vector> 7 8 // TODO1: substitute complex<type> (behaviour not defined for nonfloats), should be faster 9 // TODO2: use std:: namespace 10 // TODO3: make unittests for all ffts (c, cpp, i32) 11 12 template <typename DType> 13 struct complex_s 14 { 15 DType real; 16 DType imag; 17 }; 18 19 class kissfft_i32 20 { 21 private: 22 23 using scalar_type = int32_t; 24 using cpx_type = complex<int32_t>; 25 26 scalar_type _scale_factor; 27 std::size_t _nfft; 28 bool _inverse; 29 std::vector<cpx_type> _twiddles; 30 std::vector<std::size_t> _stageRadix; 31 std::vector<std::size_t> _stageRemainder; 32 33 public: 34 35 // scale_factor: upscale twiddle-factors otherwise they lie between 0..1 (out of range for integer) --> fixed point math kissfft_i32(const std::size_t nfft,const bool inverse,const double scale_factor=1024.0)36 kissfft_i32(const std::size_t nfft, const bool inverse, const double scale_factor = 1024.0) 37 : _scale_factor(scalar_type(scale_factor)), _nfft(nfft), _inverse(inverse) 38 { 39 // fill twiddle factors 40 _twiddles.resize(_nfft); 41 const double phinc = (_inverse ? 2 : -2) * acos(-1.0) / _nfft; 42 for (std::size_t i = 0; i < _nfft; ++i) 43 { 44 _twiddles[i] = scale_factor * exp(complex<double>(0, i * phinc)); 45 } 46 //factorize 47 //start factoring out 4's, then 2's, then 3,5,7,9,... 48 std::size_t n = _nfft; 49 std::size_t p = 4; 50 do 51 { 52 while (n % p) 53 { 54 switch (p) 55 { 56 case 4: 57 p = 2; 58 break; 59 case 2: 60 p = 3; 61 break; 62 default: 63 p += 2; 64 break; 65 } 66 if (p * p > n) p = n;// no more factors 67 } 68 n /= p; 69 _stageRadix.push_back(p); 70 _stageRemainder.push_back(n); 71 } while (n > 1); 72 } 73 74 /// Calculates the complex Discrete Fourier Transform. 75 /// 76 /// The size of the passed arrays must be passed in the constructor. 77 /// The sum of the squares of the absolute values in the @c dst 78 /// array will be @c N times the sum of the squares of the absolute 79 /// values in the @c src array, where @c N is the size of the array. 80 /// In other words, the l_2 norm of the resulting array will be 81 /// @c sqrt(N) times as big as the l_2 norm of the input array. 82 /// This is also the case when the inverse flag is set in the 83 /// constructor. Hence when applying the same transform twice, but with 84 /// the inverse flag changed the second time, then the result will 85 /// be equal to the original input times @c N. transform(const cpx_type * FSrc,cpx_type * FDst,const std::size_t stage=0,const std::size_t fstride=1,const std::size_t in_stride=1) const86 void transform(const cpx_type * FSrc, 87 cpx_type * FDst, 88 const std::size_t stage = 0, 89 const std::size_t fstride = 1, 90 const std::size_t in_stride = 1) const 91 { 92 const std::size_t p = _stageRadix[stage]; 93 const std::size_t m = _stageRemainder[stage]; 94 cpx_type *const Fout_beg = FDst; 95 cpx_type *const Fout_end = FDst + p * m; 96 97 if (m == 1) 98 { 99 do 100 { 101 *FDst = *FSrc; 102 FSrc += fstride * in_stride; 103 } while (++FDst != Fout_end); 104 } 105 else 106 { 107 do 108 { 109 // recursive call: 110 // DFT of size m*p performed by doing 111 // p instances of smaller DFTs of size m, 112 // each one takes a decimated version of the input 113 transform(FSrc, FDst, stage + 1, fstride * p, in_stride); 114 FSrc += fstride * in_stride; 115 } while ((FDst += m) != Fout_end); 116 } 117 118 FDst = Fout_beg; 119 120 // recombine the p smaller DFTs 121 switch (p) 122 { 123 case 2: 124 kf_bfly2(FDst, fstride, m); 125 break; 126 case 3: 127 kf_bfly3(FDst, fstride, m); 128 break; 129 case 4: 130 kf_bfly4(FDst, fstride, m); 131 break; 132 case 5: 133 kf_bfly5(FDst, fstride, m); 134 break; 135 default: 136 kf_bfly_generic(FDst, fstride, m, p); 137 break; 138 } 139 } 140 141 private: 142 kf_bfly2(cpx_type * const Fout,const size_t fstride,const std::size_t m) const143 void kf_bfly2(cpx_type *const Fout, const size_t fstride, const std::size_t m) const 144 { 145 for (std::size_t k = 0; k < m; ++k) 146 { 147 const cpx_type t = (Fout[m + k] * _twiddles[k * fstride]) / _scale_factor; 148 Fout[m + k] = Fout[k] - t; 149 Fout[k] += t; 150 } 151 } 152 kf_bfly3(cpx_type * Fout,const std::size_t fstride,const std::size_t m) const153 void kf_bfly3(cpx_type *Fout, const std::size_t fstride, const std::size_t m) const 154 { 155 std::size_t k = m; 156 const std::size_t m2 = 2 * m; 157 const cpx_type *tw1, *tw2; 158 cpx_type scratch[5]; 159 const cpx_type epi3 = _twiddles[fstride * m]; 160 161 tw1 = tw2 = &_twiddles[0]; 162 163 do 164 { 165 scratch[1] = (Fout[m] * *tw1) / _scale_factor; 166 scratch[2] = (Fout[m2] * *tw2) / _scale_factor; 167 168 scratch[3] = scratch[1] + scratch[2]; 169 scratch[0] = scratch[1] - scratch[2]; 170 tw1 += fstride; 171 tw2 += fstride * 2; 172 173 Fout[m] = Fout[0] - (scratch[3] / 2); 174 scratch[0] *= epi3.imag(); 175 scratch[0] /= _scale_factor; 176 177 Fout[0] += scratch[3]; 178 179 Fout[m2] = cpx_type(Fout[m].real() + scratch[0].imag(), Fout[m].imag() - scratch[0].real()); 180 181 Fout[m] += cpx_type(-scratch[0].imag(), scratch[0].real()); 182 ++Fout; 183 } while (--k); 184 } 185 kf_bfly4(cpx_type * const Fout,const std::size_t fstride,const std::size_t m) const186 void kf_bfly4(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const 187 { 188 cpx_type scratch[7]; 189 const scalar_type negative_if_inverse = _inverse ? -1 : +1; 190 191 for (std::size_t k = 0; k < m; ++k) 192 { 193 scratch[0] = (Fout[k + m] * _twiddles[k * fstride]) / _scale_factor; 194 scratch[1] = (Fout[k + 2 * m] * _twiddles[k * fstride * 2]) / _scale_factor; 195 scratch[2] = (Fout[k + 3 * m] * _twiddles[k * fstride * 3]) / _scale_factor; 196 scratch[5] = Fout[k] - scratch[1]; 197 198 Fout[k] += scratch[1]; 199 scratch[3] = scratch[0] + scratch[2]; 200 scratch[4] = scratch[0] - scratch[2]; 201 scratch[4] = cpx_type(scratch[4].imag() * negative_if_inverse, 202 -scratch[4].real() * negative_if_inverse); 203 204 Fout[k + 2 * m] = Fout[k] - scratch[3]; 205 Fout[k] += scratch[3]; 206 Fout[k + m] = scratch[5] + scratch[4]; 207 Fout[k + 3 * m] = scratch[5] - scratch[4]; 208 } 209 } 210 kf_bfly5(cpx_type * const Fout,const std::size_t fstride,const std::size_t m) const211 void kf_bfly5(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const 212 { 213 cpx_type *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; 214 cpx_type scratch[13]; 215 const cpx_type ya = _twiddles[fstride * m]; 216 const cpx_type yb = _twiddles[fstride * 2 * m]; 217 218 Fout0 = Fout; 219 Fout1 = Fout0 + m; 220 Fout2 = Fout0 + 2 * m; 221 Fout3 = Fout0 + 3 * m; 222 Fout4 = Fout0 + 4 * m; 223 224 for (std::size_t u = 0; u < m; ++u) 225 { 226 scratch[0] = *Fout0; 227 228 scratch[1] = (*Fout1 * _twiddles[u * fstride]) / _scale_factor; 229 scratch[2] = (*Fout2 * _twiddles[2 * u * fstride]) / _scale_factor; 230 scratch[3] = (*Fout3 * _twiddles[3 * u * fstride]) / _scale_factor; 231 scratch[4] = (*Fout4 * _twiddles[4 * u * fstride]) / _scale_factor; 232 233 scratch[7] = scratch[1] + scratch[4]; 234 scratch[10] = scratch[1] - scratch[4]; 235 scratch[8] = scratch[2] + scratch[3]; 236 scratch[9] = scratch[2] - scratch[3]; 237 238 *Fout0 += scratch[7]; 239 *Fout0 += scratch[8]; 240 241 scratch[5] = scratch[0] + (cpx_type( 242 scratch[7].real() * ya.real() + scratch[8].real() * yb.real(), 243 scratch[7].imag() * ya.real() + scratch[8].imag() * yb.real() ) / _scale_factor); 244 245 scratch[6] = cpx_type( 246 scratch[10].imag() * ya.imag() + scratch[9].imag() * yb.imag(), 247 -scratch[10].real() * ya.imag() - scratch[9].real() * yb.imag() ) / _scale_factor; 248 249 *Fout1 = scratch[5] - scratch[6]; 250 *Fout4 = scratch[5] + scratch[6]; 251 252 scratch[11] = scratch[0] + (cpx_type( 253 scratch[7].real() * yb.real() + scratch[8].real() * ya.real(), 254 scratch[7].imag() * yb.real() + scratch[8].imag() * ya.real() ) / _scale_factor); 255 256 scratch[12] = cpx_type( 257 -scratch[10].imag() * yb.imag() + scratch[9].imag() * ya.imag(), 258 scratch[10].real() * yb.imag() - scratch[9].real() * ya.imag() ) / _scale_factor; 259 260 *Fout2 = scratch[11] + scratch[12]; 261 *Fout3 = scratch[11] - scratch[12]; 262 263 ++Fout0; 264 ++Fout1; 265 ++Fout2; 266 ++Fout3; 267 ++Fout4; 268 } 269 } 270 271 /* perform the butterfly for one stage of a mixed radix FFT */ kf_bfly_generic(cpx_type * const Fout,const size_t fstride,const std::size_t m,const std::size_t p) const272 void kf_bfly_generic(cpx_type * const Fout, const size_t fstride, const std::size_t m, const std::size_t p) const 273 { 274 const cpx_type *twiddles = &_twiddles[0]; 275 cpx_type scratchbuf[p]; 276 277 for (std::size_t u = 0; u < m; ++u) 278 { 279 std::size_t k = u; 280 for (std::size_t q1 = 0; q1 < p; ++q1) 281 { 282 scratchbuf[q1] = Fout[k]; 283 k += m; 284 } 285 286 k = u; 287 for (std::size_t q1 = 0; q1 < p; ++q1) 288 { 289 std::size_t twidx = 0; 290 Fout[k] = scratchbuf[0]; 291 for (std::size_t q = 1; q < p; ++q) 292 { 293 twidx += fstride * k; 294 if (twidx >= _nfft) 295 twidx -= _nfft; 296 Fout[k] += (scratchbuf[q] * twiddles[twidx]) / _scale_factor; 297 } 298 k += m; 299 } 300 } 301 } 302 }; 303 304 #endif 305