1 // ================================================================================== 2 // Copyright (c) 2016 HiFi-LoFi 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a copy 5 // of this software and associated documentation files (the "Software"), to deal 6 // in the Software without restriction, including without limitation the rights 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 // copies of the Software, and to permit persons to whom the Software is furnished 9 // to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 19 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 // ================================================================================== 21 22 #include "AudioFFT.h" 23 24 #include <cassert> 25 #include <cmath> 26 #include <cstring> 27 28 29 #if defined(AUDIOFFT_APPLE_ACCELERATE) 30 #define AUDIOFFT_APPLE_ACCELERATE_USED 31 #include <Accelerate/Accelerate.h> 32 #include <vector> 33 #elif defined (AUDIOFFT_FFTW3) 34 #define AUDIOFFT_FFTW3_USED 35 #include <fftw3.h> 36 #else 37 #if !defined(AUDIOFFT_OOURA) 38 #define AUDIOFFT_OOURA 39 #endif 40 #define AUDIOFFT_OOURA_USED 41 #include <vector> 42 #endif 43 44 45 namespace audiofft 46 { 47 48 namespace details 49 { 50 51 static bool IsPowerOf2(size_t val) 52 { 53 return (val == 1 || (val & (val-1)) == 0); 54 } 55 56 57 template<typename TypeDest, typename TypeSrc> 58 void ConvertBuffer(TypeDest* dest, const TypeSrc* src, size_t len) 59 { 60 for (size_t i=0; i<len; ++i) 61 { 62 dest[i] = static_cast<TypeDest>(src[i]); 63 } 64 } 65 66 67 template<typename TypeDest, typename TypeSrc, typename TypeFactor> 68 void ScaleBuffer(TypeDest* dest, const TypeSrc* src, const TypeFactor factor, size_t len) 69 { 70 for (size_t i=0; i<len; ++i) 71 { 72 dest[i] = static_cast<TypeDest>(static_cast<TypeFactor>(src[i]) * factor); 73 } 74 } 75 76 77 // ================================================================ 78 79 80 #ifdef AUDIOFFT_OOURA_USED 81 82 /** 83 * @internal 84 * @class OouraFFT 85 * @brief FFT implementation based on the great radix-4 routines by Takuya Ooura 86 */ 87 class OouraFFT : public AudioFFTImpl 88 { 89 public: 90 OouraFFT() : 91 AudioFFTImpl(), 92 _size(0), 93 _ip(), 94 _w(), 95 _buffer() 96 { 97 } 98 99 virtual void init(size_t size) override 100 { 101 if (_size != size) 102 { 103 _ip.resize(2 + static_cast<int>(std::sqrt(static_cast<double>(size)))); 104 _w.resize(size / 2); 105 _buffer.resize(size); 106 _size = size; 107 108 const int size4 = static_cast<int>(_size) / 4; 109 makewt(size4, _ip.data(), _w.data()); 110 makect(size4, _ip.data(), _w.data() + size4); 111 } 112 } 113 114 virtual void fft(const float* data, float* re, float* im) override 115 { 116 // Convert into the format as required by the Ooura FFT 117 ConvertBuffer(&_buffer[0], data, _size); 118 119 rdft(static_cast<int>(_size), +1, _buffer.data(), _ip.data(), _w.data()); 120 121 // Convert back to split-complex 122 { 123 double* b = &_buffer[0]; 124 double* bEnd = b + _size; 125 float *r = re; 126 float *i = im; 127 while (b != bEnd) 128 { 129 *(r++) = static_cast<float>(*(b++)); 130 *(i++) = static_cast<float>(-(*(b++))); 131 } 132 } 133 const size_t size2 = _size / 2; 134 re[size2] = -im[0]; 135 im[0] = 0.0; 136 im[size2] = 0.0; 137 } 138 139 virtual void ifft(float* data, const float* re, const float* im) override 140 { 141 // Convert into the format as required by the Ooura FFT 142 { 143 double* b = &_buffer[0]; 144 double* bEnd = b + _size; 145 const float *r = re; 146 const float *i = im; 147 while (b != bEnd) 148 { 149 *(b++) = static_cast<double>(*(r++)); 150 *(b++) = -static_cast<double>(*(i++)); 151 } 152 _buffer[1] = re[_size / 2]; 153 } 154 155 rdft(static_cast<int>(_size), -1, _buffer.data(), _ip.data(), _w.data()); 156 157 // Convert back to split-complex 158 ScaleBuffer(data, &_buffer[0], 2.0 / static_cast<double>(_size), _size); 159 } 160 161 private: 162 size_t _size; 163 std::vector<int> _ip; 164 std::vector<double> _w; 165 std::vector<double> _buffer; 166 167 void rdft(int n, int isgn, double *a, int *ip, double *w) 168 { 169 int nw = ip[0]; 170 int nc = ip[1]; 171 172 if (isgn >= 0) 173 { 174 if (n > 4) 175 { 176 bitrv2(n, ip + 2, a); 177 cftfsub(n, a, w); 178 rftfsub(n, a, nc, w + nw); 179 } 180 else if (n == 4) 181 { 182 cftfsub(n, a, w); 183 } 184 double xi = a[0] - a[1]; 185 a[0] += a[1]; 186 a[1] = xi; 187 } 188 else 189 { 190 a[1] = 0.5 * (a[0] - a[1]); 191 a[0] -= a[1]; 192 if (n > 4) 193 { 194 rftbsub(n, a, nc, w + nw); 195 bitrv2(n, ip + 2, a); 196 cftbsub(n, a, w); 197 } 198 else if (n == 4) 199 { 200 cftfsub(n, a, w); 201 } 202 } 203 } 204 205 206 /* -------- initializing routines -------- */ 207 208 void makewt(int nw, int *ip, double *w) 209 { 210 int j, nwh; 211 double delta, x, y; 212 213 ip[0] = nw; 214 ip[1] = 1; 215 if (nw > 2) { 216 nwh = nw >> 1; 217 delta = atan(1.0) / nwh; 218 w[0] = 1; 219 w[1] = 0; 220 w[nwh] = cos(delta * nwh); 221 w[nwh + 1] = w[nwh]; 222 if (nwh > 2) { 223 for (j = 2; j < nwh; j += 2) { 224 x = cos(delta * j); 225 y = sin(delta * j); 226 w[j] = x; 227 w[j + 1] = y; 228 w[nw - j] = y; 229 w[nw - j + 1] = x; 230 } 231 bitrv2(nw, ip + 2, w); 232 } 233 } 234 } 235 236 237 void makect(int nc, int *ip, double *c) 238 { 239 int j, nch; 240 double delta; 241 242 ip[1] = nc; 243 if (nc > 1) { 244 nch = nc >> 1; 245 delta = atan(1.0) / nch; 246 c[0] = cos(delta * nch); 247 c[nch] = 0.5 * c[0]; 248 for (j = 1; j < nch; j++) { 249 c[j] = 0.5 * cos(delta * j); 250 c[nc - j] = 0.5 * sin(delta * j); 251 } 252 } 253 } 254 255 256 /* -------- child routines -------- */ 257 258 259 void bitrv2(int n, int *ip, double *a) 260 { 261 int j, j1, k, k1, l, m, m2; 262 double xr, xi, yr, yi; 263 264 ip[0] = 0; 265 l = n; 266 m = 1; 267 while ((m << 3) < l) { 268 l >>= 1; 269 for (j = 0; j < m; j++) { 270 ip[m + j] = ip[j] + l; 271 } 272 m <<= 1; 273 } 274 m2 = 2 * m; 275 if ((m << 3) == l) { 276 for (k = 0; k < m; k++) { 277 for (j = 0; j < k; j++) { 278 j1 = 2 * j + ip[k]; 279 k1 = 2 * k + ip[j]; 280 xr = a[j1]; 281 xi = a[j1 + 1]; 282 yr = a[k1]; 283 yi = a[k1 + 1]; 284 a[j1] = yr; 285 a[j1 + 1] = yi; 286 a[k1] = xr; 287 a[k1 + 1] = xi; 288 j1 += m2; 289 k1 += 2 * m2; 290 xr = a[j1]; 291 xi = a[j1 + 1]; 292 yr = a[k1]; 293 yi = a[k1 + 1]; 294 a[j1] = yr; 295 a[j1 + 1] = yi; 296 a[k1] = xr; 297 a[k1 + 1] = xi; 298 j1 += m2; 299 k1 -= m2; 300 xr = a[j1]; 301 xi = a[j1 + 1]; 302 yr = a[k1]; 303 yi = a[k1 + 1]; 304 a[j1] = yr; 305 a[j1 + 1] = yi; 306 a[k1] = xr; 307 a[k1 + 1] = xi; 308 j1 += m2; 309 k1 += 2 * m2; 310 xr = a[j1]; 311 xi = a[j1 + 1]; 312 yr = a[k1]; 313 yi = a[k1 + 1]; 314 a[j1] = yr; 315 a[j1 + 1] = yi; 316 a[k1] = xr; 317 a[k1 + 1] = xi; 318 } 319 j1 = 2 * k + m2 + ip[k]; 320 k1 = j1 + m2; 321 xr = a[j1]; 322 xi = a[j1 + 1]; 323 yr = a[k1]; 324 yi = a[k1 + 1]; 325 a[j1] = yr; 326 a[j1 + 1] = yi; 327 a[k1] = xr; 328 a[k1 + 1] = xi; 329 } 330 } else { 331 for (k = 1; k < m; k++) { 332 for (j = 0; j < k; j++) { 333 j1 = 2 * j + ip[k]; 334 k1 = 2 * k + ip[j]; 335 xr = a[j1]; 336 xi = a[j1 + 1]; 337 yr = a[k1]; 338 yi = a[k1 + 1]; 339 a[j1] = yr; 340 a[j1 + 1] = yi; 341 a[k1] = xr; 342 a[k1 + 1] = xi; 343 j1 += m2; 344 k1 += m2; 345 xr = a[j1]; 346 xi = a[j1 + 1]; 347 yr = a[k1]; 348 yi = a[k1 + 1]; 349 a[j1] = yr; 350 a[j1 + 1] = yi; 351 a[k1] = xr; 352 a[k1 + 1] = xi; 353 } 354 } 355 } 356 } 357 358 359 void cftfsub(int n, double *a, double *w) 360 { 361 int j, j1, j2, j3, l; 362 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 363 364 l = 2; 365 if (n > 8) { 366 cft1st(n, a, w); 367 l = 8; 368 while ((l << 2) < n) { 369 cftmdl(n, l, a, w); 370 l <<= 2; 371 } 372 } 373 if ((l << 2) == n) { 374 for (j = 0; j < l; j += 2) { 375 j1 = j + l; 376 j2 = j1 + l; 377 j3 = j2 + l; 378 x0r = a[j] + a[j1]; 379 x0i = a[j + 1] + a[j1 + 1]; 380 x1r = a[j] - a[j1]; 381 x1i = a[j + 1] - a[j1 + 1]; 382 x2r = a[j2] + a[j3]; 383 x2i = a[j2 + 1] + a[j3 + 1]; 384 x3r = a[j2] - a[j3]; 385 x3i = a[j2 + 1] - a[j3 + 1]; 386 a[j] = x0r + x2r; 387 a[j + 1] = x0i + x2i; 388 a[j2] = x0r - x2r; 389 a[j2 + 1] = x0i - x2i; 390 a[j1] = x1r - x3i; 391 a[j1 + 1] = x1i + x3r; 392 a[j3] = x1r + x3i; 393 a[j3 + 1] = x1i - x3r; 394 } 395 } else { 396 for (j = 0; j < l; j += 2) { 397 j1 = j + l; 398 x0r = a[j] - a[j1]; 399 x0i = a[j + 1] - a[j1 + 1]; 400 a[j] += a[j1]; 401 a[j + 1] += a[j1 + 1]; 402 a[j1] = x0r; 403 a[j1 + 1] = x0i; 404 } 405 } 406 } 407 408 409 void cftbsub(int n, double *a, double *w) 410 { 411 int j, j1, j2, j3, l; 412 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 413 414 l = 2; 415 if (n > 8) { 416 cft1st(n, a, w); 417 l = 8; 418 while ((l << 2) < n) { 419 cftmdl(n, l, a, w); 420 l <<= 2; 421 } 422 } 423 if ((l << 2) == n) { 424 for (j = 0; j < l; j += 2) { 425 j1 = j + l; 426 j2 = j1 + l; 427 j3 = j2 + l; 428 x0r = a[j] + a[j1]; 429 x0i = -a[j + 1] - a[j1 + 1]; 430 x1r = a[j] - a[j1]; 431 x1i = -a[j + 1] + a[j1 + 1]; 432 x2r = a[j2] + a[j3]; 433 x2i = a[j2 + 1] + a[j3 + 1]; 434 x3r = a[j2] - a[j3]; 435 x3i = a[j2 + 1] - a[j3 + 1]; 436 a[j] = x0r + x2r; 437 a[j + 1] = x0i - x2i; 438 a[j2] = x0r - x2r; 439 a[j2 + 1] = x0i + x2i; 440 a[j1] = x1r - x3i; 441 a[j1 + 1] = x1i - x3r; 442 a[j3] = x1r + x3i; 443 a[j3 + 1] = x1i + x3r; 444 } 445 } else { 446 for (j = 0; j < l; j += 2) { 447 j1 = j + l; 448 x0r = a[j] - a[j1]; 449 x0i = -a[j + 1] + a[j1 + 1]; 450 a[j] += a[j1]; 451 a[j + 1] = -a[j + 1] - a[j1 + 1]; 452 a[j1] = x0r; 453 a[j1 + 1] = x0i; 454 } 455 } 456 } 457 458 459 void cft1st(int n, double *a, double *w) 460 { 461 int j, k1, k2; 462 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 463 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 464 465 x0r = a[0] + a[2]; 466 x0i = a[1] + a[3]; 467 x1r = a[0] - a[2]; 468 x1i = a[1] - a[3]; 469 x2r = a[4] + a[6]; 470 x2i = a[5] + a[7]; 471 x3r = a[4] - a[6]; 472 x3i = a[5] - a[7]; 473 a[0] = x0r + x2r; 474 a[1] = x0i + x2i; 475 a[4] = x0r - x2r; 476 a[5] = x0i - x2i; 477 a[2] = x1r - x3i; 478 a[3] = x1i + x3r; 479 a[6] = x1r + x3i; 480 a[7] = x1i - x3r; 481 wk1r = w[2]; 482 x0r = a[8] + a[10]; 483 x0i = a[9] + a[11]; 484 x1r = a[8] - a[10]; 485 x1i = a[9] - a[11]; 486 x2r = a[12] + a[14]; 487 x2i = a[13] + a[15]; 488 x3r = a[12] - a[14]; 489 x3i = a[13] - a[15]; 490 a[8] = x0r + x2r; 491 a[9] = x0i + x2i; 492 a[12] = x2i - x0i; 493 a[13] = x0r - x2r; 494 x0r = x1r - x3i; 495 x0i = x1i + x3r; 496 a[10] = wk1r * (x0r - x0i); 497 a[11] = wk1r * (x0r + x0i); 498 x0r = x3i + x1r; 499 x0i = x3r - x1i; 500 a[14] = wk1r * (x0i - x0r); 501 a[15] = wk1r * (x0i + x0r); 502 k1 = 0; 503 for (j = 16; j < n; j += 16) { 504 k1 += 2; 505 k2 = 2 * k1; 506 wk2r = w[k1]; 507 wk2i = w[k1 + 1]; 508 wk1r = w[k2]; 509 wk1i = w[k2 + 1]; 510 wk3r = wk1r - 2 * wk2i * wk1i; 511 wk3i = 2 * wk2i * wk1r - wk1i; 512 x0r = a[j] + a[j + 2]; 513 x0i = a[j + 1] + a[j + 3]; 514 x1r = a[j] - a[j + 2]; 515 x1i = a[j + 1] - a[j + 3]; 516 x2r = a[j + 4] + a[j + 6]; 517 x2i = a[j + 5] + a[j + 7]; 518 x3r = a[j + 4] - a[j + 6]; 519 x3i = a[j + 5] - a[j + 7]; 520 a[j] = x0r + x2r; 521 a[j + 1] = x0i + x2i; 522 x0r -= x2r; 523 x0i -= x2i; 524 a[j + 4] = wk2r * x0r - wk2i * x0i; 525 a[j + 5] = wk2r * x0i + wk2i * x0r; 526 x0r = x1r - x3i; 527 x0i = x1i + x3r; 528 a[j + 2] = wk1r * x0r - wk1i * x0i; 529 a[j + 3] = wk1r * x0i + wk1i * x0r; 530 x0r = x1r + x3i; 531 x0i = x1i - x3r; 532 a[j + 6] = wk3r * x0r - wk3i * x0i; 533 a[j + 7] = wk3r * x0i + wk3i * x0r; 534 wk1r = w[k2 + 2]; 535 wk1i = w[k2 + 3]; 536 wk3r = wk1r - 2 * wk2r * wk1i; 537 wk3i = 2 * wk2r * wk1r - wk1i; 538 x0r = a[j + 8] + a[j + 10]; 539 x0i = a[j + 9] + a[j + 11]; 540 x1r = a[j + 8] - a[j + 10]; 541 x1i = a[j + 9] - a[j + 11]; 542 x2r = a[j + 12] + a[j + 14]; 543 x2i = a[j + 13] + a[j + 15]; 544 x3r = a[j + 12] - a[j + 14]; 545 x3i = a[j + 13] - a[j + 15]; 546 a[j + 8] = x0r + x2r; 547 a[j + 9] = x0i + x2i; 548 x0r -= x2r; 549 x0i -= x2i; 550 a[j + 12] = -wk2i * x0r - wk2r * x0i; 551 a[j + 13] = -wk2i * x0i + wk2r * x0r; 552 x0r = x1r - x3i; 553 x0i = x1i + x3r; 554 a[j + 10] = wk1r * x0r - wk1i * x0i; 555 a[j + 11] = wk1r * x0i + wk1i * x0r; 556 x0r = x1r + x3i; 557 x0i = x1i - x3r; 558 a[j + 14] = wk3r * x0r - wk3i * x0i; 559 a[j + 15] = wk3r * x0i + wk3i * x0r; 560 } 561 } 562 563 564 void cftmdl(int n, int l, double *a, double *w) 565 { 566 int j, j1, j2, j3, k, k1, k2, m, m2; 567 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 568 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 569 570 m = l << 2; 571 for (j = 0; j < l; j += 2) { 572 j1 = j + l; 573 j2 = j1 + l; 574 j3 = j2 + l; 575 x0r = a[j] + a[j1]; 576 x0i = a[j + 1] + a[j1 + 1]; 577 x1r = a[j] - a[j1]; 578 x1i = a[j + 1] - a[j1 + 1]; 579 x2r = a[j2] + a[j3]; 580 x2i = a[j2 + 1] + a[j3 + 1]; 581 x3r = a[j2] - a[j3]; 582 x3i = a[j2 + 1] - a[j3 + 1]; 583 a[j] = x0r + x2r; 584 a[j + 1] = x0i + x2i; 585 a[j2] = x0r - x2r; 586 a[j2 + 1] = x0i - x2i; 587 a[j1] = x1r - x3i; 588 a[j1 + 1] = x1i + x3r; 589 a[j3] = x1r + x3i; 590 a[j3 + 1] = x1i - x3r; 591 } 592 wk1r = w[2]; 593 for (j = m; j < l + m; j += 2) { 594 j1 = j + l; 595 j2 = j1 + l; 596 j3 = j2 + l; 597 x0r = a[j] + a[j1]; 598 x0i = a[j + 1] + a[j1 + 1]; 599 x1r = a[j] - a[j1]; 600 x1i = a[j + 1] - a[j1 + 1]; 601 x2r = a[j2] + a[j3]; 602 x2i = a[j2 + 1] + a[j3 + 1]; 603 x3r = a[j2] - a[j3]; 604 x3i = a[j2 + 1] - a[j3 + 1]; 605 a[j] = x0r + x2r; 606 a[j + 1] = x0i + x2i; 607 a[j2] = x2i - x0i; 608 a[j2 + 1] = x0r - x2r; 609 x0r = x1r - x3i; 610 x0i = x1i + x3r; 611 a[j1] = wk1r * (x0r - x0i); 612 a[j1 + 1] = wk1r * (x0r + x0i); 613 x0r = x3i + x1r; 614 x0i = x3r - x1i; 615 a[j3] = wk1r * (x0i - x0r); 616 a[j3 + 1] = wk1r * (x0i + x0r); 617 } 618 k1 = 0; 619 m2 = 2 * m; 620 for (k = m2; k < n; k += m2) { 621 k1 += 2; 622 k2 = 2 * k1; 623 wk2r = w[k1]; 624 wk2i = w[k1 + 1]; 625 wk1r = w[k2]; 626 wk1i = w[k2 + 1]; 627 wk3r = wk1r - 2 * wk2i * wk1i; 628 wk3i = 2 * wk2i * wk1r - wk1i; 629 for (j = k; j < l + k; j += 2) { 630 j1 = j + l; 631 j2 = j1 + l; 632 j3 = j2 + l; 633 x0r = a[j] + a[j1]; 634 x0i = a[j + 1] + a[j1 + 1]; 635 x1r = a[j] - a[j1]; 636 x1i = a[j + 1] - a[j1 + 1]; 637 x2r = a[j2] + a[j3]; 638 x2i = a[j2 + 1] + a[j3 + 1]; 639 x3r = a[j2] - a[j3]; 640 x3i = a[j2 + 1] - a[j3 + 1]; 641 a[j] = x0r + x2r; 642 a[j + 1] = x0i + x2i; 643 x0r -= x2r; 644 x0i -= x2i; 645 a[j2] = wk2r * x0r - wk2i * x0i; 646 a[j2 + 1] = wk2r * x0i + wk2i * x0r; 647 x0r = x1r - x3i; 648 x0i = x1i + x3r; 649 a[j1] = wk1r * x0r - wk1i * x0i; 650 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 651 x0r = x1r + x3i; 652 x0i = x1i - x3r; 653 a[j3] = wk3r * x0r - wk3i * x0i; 654 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 655 } 656 wk1r = w[k2 + 2]; 657 wk1i = w[k2 + 3]; 658 wk3r = wk1r - 2 * wk2r * wk1i; 659 wk3i = 2 * wk2r * wk1r - wk1i; 660 for (j = k + m; j < l + (k + m); j += 2) { 661 j1 = j + l; 662 j2 = j1 + l; 663 j3 = j2 + l; 664 x0r = a[j] + a[j1]; 665 x0i = a[j + 1] + a[j1 + 1]; 666 x1r = a[j] - a[j1]; 667 x1i = a[j + 1] - a[j1 + 1]; 668 x2r = a[j2] + a[j3]; 669 x2i = a[j2 + 1] + a[j3 + 1]; 670 x3r = a[j2] - a[j3]; 671 x3i = a[j2 + 1] - a[j3 + 1]; 672 a[j] = x0r + x2r; 673 a[j + 1] = x0i + x2i; 674 x0r -= x2r; 675 x0i -= x2i; 676 a[j2] = -wk2i * x0r - wk2r * x0i; 677 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 678 x0r = x1r - x3i; 679 x0i = x1i + x3r; 680 a[j1] = wk1r * x0r - wk1i * x0i; 681 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 682 x0r = x1r + x3i; 683 x0i = x1i - x3r; 684 a[j3] = wk3r * x0r - wk3i * x0i; 685 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 686 } 687 } 688 } 689 690 691 void rftfsub(int n, double *a, int nc, double *c) 692 { 693 int j, k, kk, ks, m; 694 double wkr, wki, xr, xi, yr, yi; 695 696 m = n >> 1; 697 ks = 2 * nc / m; 698 kk = 0; 699 for (j = 2; j < m; j += 2) { 700 k = n - j; 701 kk += ks; 702 wkr = 0.5 - c[nc - kk]; 703 wki = c[kk]; 704 xr = a[j] - a[k]; 705 xi = a[j + 1] + a[k + 1]; 706 yr = wkr * xr - wki * xi; 707 yi = wkr * xi + wki * xr; 708 a[j] -= yr; 709 a[j + 1] -= yi; 710 a[k] += yr; 711 a[k + 1] -= yi; 712 } 713 } 714 715 716 void rftbsub(int n, double *a, int nc, double *c) 717 { 718 int j, k, kk, ks, m; 719 double wkr, wki, xr, xi, yr, yi; 720 721 a[1] = -a[1]; 722 m = n >> 1; 723 ks = 2 * nc / m; 724 kk = 0; 725 for (j = 2; j < m; j += 2) { 726 k = n - j; 727 kk += ks; 728 wkr = 0.5 - c[nc - kk]; 729 wki = c[kk]; 730 xr = a[j] - a[k]; 731 xi = a[j + 1] + a[k + 1]; 732 yr = wkr * xr + wki * xi; 733 yi = wkr * xi - wki * xr; 734 a[j] -= yr; 735 a[j + 1] = yi - a[j + 1]; 736 a[k] += yr; 737 a[k + 1] = yi - a[k + 1]; 738 } 739 a[m + 1] = -a[m + 1]; 740 } 741 742 OouraFFT(const OouraFFT&) = delete; 743 OouraFFT& operator=(const OouraFFT&) = delete; 744 }; 745 746 std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl() 747 { 748 return std::unique_ptr<OouraFFT>(new OouraFFT()); 749 } 750 751 752 #endif // AUDIOFFT_OOURA_USED 753 754 755 // ================================================================ 756 757 758 #ifdef AUDIOFFT_APPLE_ACCELERATE_USED 759 760 761 /** 762 * @internal 763 * @class AppleAccelerateFFT 764 * @brief FFT implementation using the Apple Accelerate framework internally 765 */ 766 class AppleAccelerateFFT : public AudioFFTImpl 767 { 768 public: 769 AppleAccelerateFFT() : 770 AudioFFTImpl(), 771 _size(0), 772 _powerOf2(0), 773 _fftSetup(0), 774 _re(), 775 _im() 776 { 777 } 778 779 virtual ~AppleAccelerateFFT() 780 { 781 init(0); 782 } 783 784 virtual void init(size_t size) override 785 { 786 if (_fftSetup) 787 { 788 vDSP_destroy_fftsetup(_fftSetup); 789 _size = 0; 790 _powerOf2 = 0; 791 _fftSetup = 0; 792 _re.clear(); 793 _im.clear(); 794 } 795 796 if (size > 0) 797 { 798 _size = size; 799 _powerOf2 = 0; 800 while ((1 << _powerOf2) < _size) 801 { 802 ++_powerOf2; 803 } 804 _fftSetup = vDSP_create_fftsetup(_powerOf2, FFT_RADIX2); 805 _re.resize(_size / 2); 806 _im.resize(_size / 2); 807 } 808 } 809 810 virtual void fft(const float* data, float* re, float* im) override 811 { 812 const size_t size2 = _size / 2; 813 DSPSplitComplex splitComplex; 814 splitComplex.realp = re; 815 splitComplex.imagp = im; 816 vDSP_ctoz(reinterpret_cast<const COMPLEX*>(data), 2, &splitComplex, 1, size2); 817 vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_FORWARD); 818 const float factor = 0.5f; 819 vDSP_vsmul(re, 1, &factor, re, 1, size2); 820 vDSP_vsmul(im, 1, &factor, im, 1, size2); 821 re[size2] = im[0]; 822 im[0] = 0.0f; 823 im[size2] = 0.0f; 824 } 825 826 virtual void ifft(float* data, const float* re, const float* im) override 827 { 828 const size_t size2 = _size / 2; 829 ::memcpy(_re.data(), re, size2 * sizeof(float)); 830 ::memcpy(_im.data(), im, size2 * sizeof(float)); 831 _im[0] = re[size2]; 832 DSPSplitComplex splitComplex; 833 splitComplex.realp = _re.data(); 834 splitComplex.imagp = _im.data(); 835 vDSP_fft_zrip(_fftSetup, &splitComplex, 1, _powerOf2, FFT_INVERSE); 836 vDSP_ztoc(&splitComplex, 1, reinterpret_cast<COMPLEX*>(data), 2, size2); 837 const float factor = 1.0f / static_cast<float>(_size); 838 vDSP_vsmul(data, 1, &factor, data, 1, _size); 839 } 840 841 private: 842 size_t _size; 843 size_t _powerOf2; 844 FFTSetup _fftSetup; 845 std::vector<float> _re; 846 std::vector<float> _im; 847 848 AppleAccelerateFFT(const AppleAccelerateFFT&) = delete; 849 AppleAccelerateFFT& operator=(const AppleAccelerateFFT&) = delete; 850 }; 851 852 853 std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl() 854 { 855 return std::unique_ptr<AppleAccelerateFFT>(new AppleAccelerateFFT()); 856 } 857 858 859 #endif // AUDIOFFT_APPLE_ACCELERATE_USED 860 861 862 // ================================================================ 863 864 865 #ifdef AUDIOFFT_FFTW3_USED 866 867 868 /** 869 * @internal 870 * @class FFTW3FFT 871 * @brief FFT implementation using FFTW3 internally (see fftw.org) 872 */ 873 class FFTW3FFT : public AudioFFTImpl 874 { 875 public: 876 FFTW3FFT() : 877 AudioFFTImpl(), 878 _size(0), 879 _complexSize(0), 880 _planForward(0), 881 _planBackward(0), 882 _data(0), 883 _re(0), 884 _im(0) 885 { 886 } 887 888 virtual ~FFTW3FFT() 889 { 890 init(0); 891 } 892 893 virtual void init(size_t size) override 894 { 895 if (_size != size) 896 { 897 if (_size > 0) 898 { 899 fftwf_destroy_plan(_planForward); 900 fftwf_destroy_plan(_planBackward); 901 _planForward = 0; 902 _planBackward = 0; 903 _size = 0; 904 _complexSize = 0; 905 906 if (_data) 907 { 908 fftwf_free(_data); 909 _data = 0; 910 } 911 912 if (_re) 913 { 914 fftwf_free(_re); 915 _re = 0; 916 } 917 918 if (_im) 919 { 920 fftwf_free(_im); 921 _im = 0; 922 } 923 } 924 925 if (size > 0) 926 { 927 _size = size; 928 _complexSize = AudioFFT::ComplexSize(_size); 929 const size_t complexSize = AudioFFT::ComplexSize(_size); 930 _data = reinterpret_cast<float*>(fftwf_malloc(_size * sizeof(float))); 931 _re = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float))); 932 _im = reinterpret_cast<float*>(fftwf_malloc(complexSize * sizeof(float))); 933 934 fftw_iodim dim; 935 dim.n = static_cast<int>(size); 936 dim.is = 1; 937 dim.os = 1; 938 _planForward = fftwf_plan_guru_split_dft_r2c(1, &dim, 0, 0, _data, _re, _im, FFTW_MEASURE); 939 _planBackward = fftwf_plan_guru_split_dft_c2r(1, &dim, 0, 0, _re, _im, _data, FFTW_MEASURE); 940 } 941 } 942 } 943 944 virtual void fft(const float* data, float* re, float* im) override 945 { 946 ::memcpy(_data, data, _size * sizeof(float)); 947 fftwf_execute_split_dft_r2c(_planForward, _data, _re, _im); 948 ::memcpy(re, _re, _complexSize * sizeof(float)); 949 ::memcpy(im, _im, _complexSize * sizeof(float)); 950 } 951 952 void ifft(float* data, const float* re, const float* im) 953 { 954 ::memcpy(_re, re, _complexSize * sizeof(float)); 955 ::memcpy(_im, im, _complexSize * sizeof(float)); 956 fftwf_execute_split_dft_c2r(_planBackward, _re, _im, _data); 957 ScaleBuffer(data, _data, 1.0f / static_cast<float>(_size), _size); 958 } 959 960 private: 961 size_t _size; 962 size_t _complexSize; 963 fftwf_plan _planForward; 964 fftwf_plan _planBackward; 965 float* _data; 966 float* _re; 967 float* _im; 968 969 FFTW3FFT(const FFTW3FFT&) = delete; 970 FFTW3FFT& operator=(const FFTW3FFT&) = delete; 971 }; 972 973 974 std::unique_ptr<AudioFFTImpl> MakeAudioFFTImpl() 975 { 976 return std::unique_ptr<FFTW3FFT>(new FFTW3FFT()); 977 } 978 979 980 #endif // AUDIOFFT_FFTW3_USED 981 982 } // End of namespace details 983 984 985 // ============================================================= 986 987 988 AudioFFT::AudioFFT() : 989 _impl(details::MakeAudioFFTImpl()) 990 { 991 } 992 993 994 void AudioFFT::init(size_t size) 995 { 996 assert(details::IsPowerOf2(size)); 997 _impl->init(size); 998 } 999 1000 1001 void AudioFFT::fft(const float* data, float* re, float* im) 1002 { 1003 _impl->fft(data, re, im); 1004 } 1005 1006 1007 void AudioFFT::ifft(float* data, const float* re, const float* im) 1008 { 1009 _impl->ifft(data, re, im); 1010 } 1011 1012 1013 size_t AudioFFT::ComplexSize(size_t size) 1014 { 1015 return (size / 2) + 1; 1016 } 1017 1018 } // End of namespace 1019