1 use num_integer::gcd; 2 use std::any::TypeId; 3 use std::collections::HashMap; 4 5 use std::sync::Arc; 6 7 use crate::{common::FftNum, fft_cache::FftCache, FftDirection}; 8 9 use crate::algorithm::*; 10 use crate::sse::sse_butterflies::*; 11 use crate::sse::sse_prime_butterflies::*; 12 use crate::sse::sse_radix4::*; 13 use crate::Fft; 14 15 use crate::math_utils::{PrimeFactor, PrimeFactors}; 16 17 const MIN_RADIX4_BITS: u32 = 6; // smallest size to consider radix 4 an option is 2^6 = 64 18 const MAX_RADER_PRIME_FACTOR: usize = 23; // don't use Raders if the inner fft length has prime factor larger than this 19 const MIN_BLUESTEIN_MIXED_RADIX_LEN: usize = 90; // only use mixed radix for the inner fft of Bluestein if length is larger than this 20 21 /// A Recipe is a structure that describes the design of a FFT, without actually creating it. 22 /// It is used as a middle step in the planning process. 23 #[derive(Debug, PartialEq, Clone)] 24 pub enum Recipe { 25 Dft(usize), 26 MixedRadix { 27 left_fft: Arc<Recipe>, 28 right_fft: Arc<Recipe>, 29 }, 30 #[allow(dead_code)] 31 GoodThomasAlgorithm { 32 left_fft: Arc<Recipe>, 33 right_fft: Arc<Recipe>, 34 }, 35 MixedRadixSmall { 36 left_fft: Arc<Recipe>, 37 right_fft: Arc<Recipe>, 38 }, 39 GoodThomasAlgorithmSmall { 40 left_fft: Arc<Recipe>, 41 right_fft: Arc<Recipe>, 42 }, 43 RadersAlgorithm { 44 inner_fft: Arc<Recipe>, 45 }, 46 BluesteinsAlgorithm { 47 len: usize, 48 inner_fft: Arc<Recipe>, 49 }, 50 Radix4(usize), 51 Butterfly1, 52 Butterfly2, 53 Butterfly3, 54 Butterfly4, 55 Butterfly5, 56 Butterfly6, 57 Butterfly7, 58 Butterfly8, 59 Butterfly9, 60 Butterfly10, 61 Butterfly11, 62 Butterfly12, 63 Butterfly13, 64 Butterfly15, 65 Butterfly16, 66 Butterfly17, 67 Butterfly19, 68 Butterfly23, 69 Butterfly29, 70 Butterfly31, 71 Butterfly32, 72 } 73 74 impl Recipe { len(&self) -> usize75 pub fn len(&self) -> usize { 76 match self { 77 Recipe::Dft(length) => *length, 78 Recipe::Radix4(length) => *length, 79 Recipe::Butterfly1 => 1, 80 Recipe::Butterfly2 => 2, 81 Recipe::Butterfly3 => 3, 82 Recipe::Butterfly4 => 4, 83 Recipe::Butterfly5 => 5, 84 Recipe::Butterfly6 => 6, 85 Recipe::Butterfly7 => 7, 86 Recipe::Butterfly8 => 8, 87 Recipe::Butterfly9 => 9, 88 Recipe::Butterfly10 => 10, 89 Recipe::Butterfly11 => 11, 90 Recipe::Butterfly12 => 12, 91 Recipe::Butterfly13 => 13, 92 Recipe::Butterfly15 => 15, 93 Recipe::Butterfly16 => 16, 94 Recipe::Butterfly17 => 17, 95 Recipe::Butterfly19 => 19, 96 Recipe::Butterfly23 => 23, 97 Recipe::Butterfly29 => 29, 98 Recipe::Butterfly31 => 31, 99 Recipe::Butterfly32 => 32, 100 Recipe::MixedRadix { 101 left_fft, 102 right_fft, 103 } => left_fft.len() * right_fft.len(), 104 Recipe::GoodThomasAlgorithm { 105 left_fft, 106 right_fft, 107 } => left_fft.len() * right_fft.len(), 108 Recipe::MixedRadixSmall { 109 left_fft, 110 right_fft, 111 } => left_fft.len() * right_fft.len(), 112 Recipe::GoodThomasAlgorithmSmall { 113 left_fft, 114 right_fft, 115 } => left_fft.len() * right_fft.len(), 116 Recipe::RadersAlgorithm { inner_fft } => inner_fft.len() + 1, 117 Recipe::BluesteinsAlgorithm { len, .. } => *len, 118 } 119 } 120 } 121 122 /// The SSE FFT planner creates new FFT algorithm instances using a mix of scalar and SSE accelerated algorithms. 123 /// It requires at least SSE4.1, which is available on all reasonably recent x86_64 cpus. 124 /// 125 /// RustFFT has several FFT algorithms available. For a given FFT size, the `FftPlannerSse` decides which of the 126 /// available FFT algorithms to use and then initializes them. 127 /// 128 /// ~~~ 129 /// // Perform a forward Fft of size 1234 130 /// use std::sync::Arc; 131 /// use rustfft::{FftPlannerSse, num_complex::Complex}; 132 /// 133 /// if let Ok(mut planner) = FftPlannerSse::new() { 134 /// let fft = planner.plan_fft_forward(1234); 135 /// 136 /// let mut buffer = vec![Complex{ re: 0.0f32, im: 0.0f32 }; 1234]; 137 /// fft.process(&mut buffer); 138 /// 139 /// // The FFT instance returned by the planner has the type `Arc<dyn Fft<T>>`, 140 /// // where T is the numeric type, ie f32 or f64, so it's cheap to clone 141 /// let fft_clone = Arc::clone(&fft); 142 /// } 143 /// ~~~ 144 /// 145 /// If you plan on creating multiple FFT instances, it is recommended to reuse the same planner for all of them. This 146 /// is because the planner re-uses internal data across FFT instances wherever possible, saving memory and reducing 147 /// setup time. (FFT instances created with one planner will never re-use data and buffers with FFT instances created 148 /// by a different planner) 149 /// 150 /// Each FFT instance owns [`Arc`s](std::sync::Arc) to its internal data, rather than borrowing it from the planner, so it's perfectly 151 /// safe to drop the planner after creating Fft instances. 152 pub struct FftPlannerSse<T: FftNum> { 153 algorithm_cache: FftCache<T>, 154 recipe_cache: HashMap<usize, Arc<Recipe>>, 155 } 156 157 impl<T: FftNum> FftPlannerSse<T> { 158 /// Creates a new `FftPlannerSse` instance. 159 /// 160 /// Returns `Ok(planner_instance)` if this machine has the required instruction sets. 161 /// Returns `Err(())` if some instruction sets are missing. new() -> Result<Self, ()>162 pub fn new() -> Result<Self, ()> { 163 if is_x86_feature_detected!("sse4.1") { 164 // Ideally, we would implement the planner with specialization. 165 // Specialization won't be on stable rust for a long time though, so in the meantime, we can hack around it. 166 // 167 // We use TypeID to determine if T is f32, f64, or neither. If neither, we don't want to do any SSE acceleration 168 // If it's f32 or f64, then construct and return a SSE planner instance. 169 // 170 // All SSE accelerated algorithms come in separate versions for f32 and f64. The type is checked when a new one is created, and if it does not 171 // match the type the FFT is meant for, it will panic. This will never be a problem if using a planner to construct the FFTs. 172 // 173 // An annoying snag with this setup is that we frequently have to transmute buffers from &mut [Complex<T>] to &mut [Complex<f32 or f64>] or vice versa. 174 // We know this is safe because we assert everywhere that Type(f32 or f64)==Type(T), so it's just a matter of "doing it right" every time. 175 // These transmutes are required because the FFT algorithm's input will come through the FFT trait, which may only be bounded by FftNum. 176 // So the buffers will have the type &mut [Complex<T>]. 177 let id_f32 = TypeId::of::<f32>(); 178 let id_f64 = TypeId::of::<f64>(); 179 let id_t = TypeId::of::<T>(); 180 181 if id_t == id_f32 || id_t == id_f64 { 182 return Ok(Self { 183 algorithm_cache: FftCache::new(), 184 recipe_cache: HashMap::new(), 185 }); 186 } 187 } 188 Err(()) 189 } 190 191 /// Returns a `Fft` instance which uses SSE4.1 instructions to compute FFTs of size `len`. 192 /// 193 /// If the provided `direction` is `FftDirection::Forward`, the returned instance will compute forward FFTs. If it's `FftDirection::Inverse`, it will compute inverse FFTs. 194 /// 195 /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time. plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>>196 pub fn plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> { 197 // Step 1: Create a "recipe" for this FFT, which will tell us exactly which combination of algorithms to use 198 let recipe = self.design_fft_for_len(len); 199 200 // Step 2: Use our recipe to construct a Fft trait object 201 self.build_fft(&recipe, direction) 202 } 203 204 /// Returns a `Fft` instance which uses SSE4.1 instructions to compute forward FFTs of size `len` 205 /// 206 /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time. plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>>207 pub fn plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>> { 208 self.plan_fft(len, FftDirection::Forward) 209 } 210 211 /// Returns a `Fft` instance which uses SSE4.1 instructions to compute inverse FFTs of size `len. 212 /// 213 /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time. plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>>214 pub fn plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>> { 215 self.plan_fft(len, FftDirection::Inverse) 216 } 217 218 // Make a recipe for a length design_fft_for_len(&mut self, len: usize) -> Arc<Recipe>219 fn design_fft_for_len(&mut self, len: usize) -> Arc<Recipe> { 220 if len < 1 { 221 Arc::new(Recipe::Dft(len)) 222 } else if let Some(recipe) = self.recipe_cache.get(&len) { 223 Arc::clone(&recipe) 224 } else { 225 let factors = PrimeFactors::compute(len); 226 let recipe = self.design_fft_with_factors(len, factors); 227 self.recipe_cache.insert(len, Arc::clone(&recipe)); 228 recipe 229 } 230 } 231 232 // Create the fft from a recipe, take from cache if possible build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>>233 fn build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> { 234 let len = recipe.len(); 235 if let Some(instance) = self.algorithm_cache.get(len, direction) { 236 instance 237 } else { 238 let fft = self.build_new_fft(recipe, direction); 239 self.algorithm_cache.insert(&fft); 240 fft 241 } 242 } 243 244 // Create a new fft from a recipe build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>>245 fn build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> { 246 let id_f32 = TypeId::of::<f32>(); 247 let id_f64 = TypeId::of::<f64>(); 248 let id_t = TypeId::of::<T>(); 249 250 match recipe { 251 Recipe::Dft(len) => Arc::new(Dft::new(*len, direction)) as Arc<dyn Fft<T>>, 252 Recipe::Radix4(len) => { 253 if id_t == id_f32 { 254 Arc::new(Sse32Radix4::new(*len, direction)) as Arc<dyn Fft<T>> 255 } else if id_t == id_f64 { 256 Arc::new(Sse64Radix4::new(*len, direction)) as Arc<dyn Fft<T>> 257 } else { 258 panic!("Not f32 or f64"); 259 } 260 } 261 Recipe::Butterfly1 => { 262 if id_t == id_f32 { 263 Arc::new(SseF32Butterfly1::new(direction)) as Arc<dyn Fft<T>> 264 } else if id_t == id_f64 { 265 Arc::new(SseF64Butterfly1::new(direction)) as Arc<dyn Fft<T>> 266 } else { 267 panic!("Not f32 or f64"); 268 } 269 } 270 Recipe::Butterfly2 => { 271 if id_t == id_f32 { 272 Arc::new(SseF32Butterfly2::new(direction)) as Arc<dyn Fft<T>> 273 } else if id_t == id_f64 { 274 Arc::new(SseF64Butterfly2::new(direction)) as Arc<dyn Fft<T>> 275 } else { 276 panic!("Not f32 or f64"); 277 } 278 } 279 Recipe::Butterfly3 => { 280 if id_t == id_f32 { 281 Arc::new(SseF32Butterfly3::new(direction)) as Arc<dyn Fft<T>> 282 } else if id_t == id_f64 { 283 Arc::new(SseF64Butterfly3::new(direction)) as Arc<dyn Fft<T>> 284 } else { 285 panic!("Not f32 or f64"); 286 } 287 } 288 Recipe::Butterfly4 => { 289 if id_t == id_f32 { 290 Arc::new(SseF32Butterfly4::new(direction)) as Arc<dyn Fft<T>> 291 } else if id_t == id_f64 { 292 Arc::new(SseF64Butterfly4::new(direction)) as Arc<dyn Fft<T>> 293 } else { 294 panic!("Not f32 or f64"); 295 } 296 } 297 Recipe::Butterfly5 => { 298 if id_t == id_f32 { 299 Arc::new(SseF32Butterfly5::new(direction)) as Arc<dyn Fft<T>> 300 } else if id_t == id_f64 { 301 Arc::new(SseF64Butterfly5::new(direction)) as Arc<dyn Fft<T>> 302 } else { 303 panic!("Not f32 or f64"); 304 } 305 } 306 Recipe::Butterfly6 => { 307 if id_t == id_f32 { 308 Arc::new(SseF32Butterfly6::new(direction)) as Arc<dyn Fft<T>> 309 } else if id_t == id_f64 { 310 Arc::new(SseF64Butterfly6::new(direction)) as Arc<dyn Fft<T>> 311 } else { 312 panic!("Not f32 or f64"); 313 } 314 } 315 Recipe::Butterfly7 => { 316 if id_t == id_f32 { 317 Arc::new(SseF32Butterfly7::new(direction)) as Arc<dyn Fft<T>> 318 } else if id_t == id_f64 { 319 Arc::new(SseF64Butterfly7::new(direction)) as Arc<dyn Fft<T>> 320 } else { 321 panic!("Not f32 or f64"); 322 } 323 } 324 Recipe::Butterfly8 => { 325 if id_t == id_f32 { 326 Arc::new(SseF32Butterfly8::new(direction)) as Arc<dyn Fft<T>> 327 } else if id_t == id_f64 { 328 Arc::new(SseF64Butterfly8::new(direction)) as Arc<dyn Fft<T>> 329 } else { 330 panic!("Not f32 or f64"); 331 } 332 } 333 Recipe::Butterfly9 => { 334 if id_t == id_f32 { 335 Arc::new(SseF32Butterfly9::new(direction)) as Arc<dyn Fft<T>> 336 } else if id_t == id_f64 { 337 Arc::new(SseF64Butterfly9::new(direction)) as Arc<dyn Fft<T>> 338 } else { 339 panic!("Not f32 or f64"); 340 } 341 } 342 Recipe::Butterfly10 => { 343 if id_t == id_f32 { 344 Arc::new(SseF32Butterfly10::new(direction)) as Arc<dyn Fft<T>> 345 } else if id_t == id_f64 { 346 Arc::new(SseF64Butterfly10::new(direction)) as Arc<dyn Fft<T>> 347 } else { 348 panic!("Not f32 or f64"); 349 } 350 } 351 Recipe::Butterfly11 => { 352 if id_t == id_f32 { 353 Arc::new(SseF32Butterfly11::new(direction)) as Arc<dyn Fft<T>> 354 } else if id_t == id_f64 { 355 Arc::new(SseF64Butterfly11::new(direction)) as Arc<dyn Fft<T>> 356 } else { 357 panic!("Not f32 or f64"); 358 } 359 } 360 Recipe::Butterfly12 => { 361 if id_t == id_f32 { 362 Arc::new(SseF32Butterfly12::new(direction)) as Arc<dyn Fft<T>> 363 } else if id_t == id_f64 { 364 Arc::new(SseF64Butterfly12::new(direction)) as Arc<dyn Fft<T>> 365 } else { 366 panic!("Not f32 or f64"); 367 } 368 } 369 Recipe::Butterfly13 => { 370 if id_t == id_f32 { 371 Arc::new(SseF32Butterfly13::new(direction)) as Arc<dyn Fft<T>> 372 } else if id_t == id_f64 { 373 Arc::new(SseF64Butterfly13::new(direction)) as Arc<dyn Fft<T>> 374 } else { 375 panic!("Not f32 or f64"); 376 } 377 } 378 Recipe::Butterfly15 => { 379 if id_t == id_f32 { 380 Arc::new(SseF32Butterfly15::new(direction)) as Arc<dyn Fft<T>> 381 } else if id_t == id_f64 { 382 Arc::new(SseF64Butterfly15::new(direction)) as Arc<dyn Fft<T>> 383 } else { 384 panic!("Not f32 or f64"); 385 } 386 } 387 Recipe::Butterfly16 => { 388 if id_t == id_f32 { 389 Arc::new(SseF32Butterfly16::new(direction)) as Arc<dyn Fft<T>> 390 } else if id_t == id_f64 { 391 Arc::new(SseF64Butterfly16::new(direction)) as Arc<dyn Fft<T>> 392 } else { 393 panic!("Not f32 or f64"); 394 } 395 } 396 Recipe::Butterfly17 => { 397 if id_t == id_f32 { 398 Arc::new(SseF32Butterfly17::new(direction)) as Arc<dyn Fft<T>> 399 } else if id_t == id_f64 { 400 Arc::new(SseF64Butterfly17::new(direction)) as Arc<dyn Fft<T>> 401 } else { 402 panic!("Not f32 or f64"); 403 } 404 } 405 Recipe::Butterfly19 => { 406 if id_t == id_f32 { 407 Arc::new(SseF32Butterfly19::new(direction)) as Arc<dyn Fft<T>> 408 } else if id_t == id_f64 { 409 Arc::new(SseF64Butterfly19::new(direction)) as Arc<dyn Fft<T>> 410 } else { 411 panic!("Not f32 or f64"); 412 } 413 } 414 Recipe::Butterfly23 => { 415 if id_t == id_f32 { 416 Arc::new(SseF32Butterfly23::new(direction)) as Arc<dyn Fft<T>> 417 } else if id_t == id_f64 { 418 Arc::new(SseF64Butterfly23::new(direction)) as Arc<dyn Fft<T>> 419 } else { 420 panic!("Not f32 or f64"); 421 } 422 } 423 Recipe::Butterfly29 => { 424 if id_t == id_f32 { 425 Arc::new(SseF32Butterfly29::new(direction)) as Arc<dyn Fft<T>> 426 } else if id_t == id_f64 { 427 Arc::new(SseF64Butterfly29::new(direction)) as Arc<dyn Fft<T>> 428 } else { 429 panic!("Not f32 or f64"); 430 } 431 } 432 Recipe::Butterfly31 => { 433 if id_t == id_f32 { 434 Arc::new(SseF32Butterfly31::new(direction)) as Arc<dyn Fft<T>> 435 } else if id_t == id_f64 { 436 Arc::new(SseF64Butterfly31::new(direction)) as Arc<dyn Fft<T>> 437 } else { 438 panic!("Not f32 or f64"); 439 } 440 } 441 Recipe::Butterfly32 => { 442 if id_t == id_f32 { 443 Arc::new(SseF32Butterfly32::new(direction)) as Arc<dyn Fft<T>> 444 } else if id_t == id_f64 { 445 Arc::new(SseF64Butterfly32::new(direction)) as Arc<dyn Fft<T>> 446 } else { 447 panic!("Not f32 or f64"); 448 } 449 } 450 Recipe::MixedRadix { 451 left_fft, 452 right_fft, 453 } => { 454 let left_fft = self.build_fft(&left_fft, direction); 455 let right_fft = self.build_fft(&right_fft, direction); 456 Arc::new(MixedRadix::new(left_fft, right_fft)) as Arc<dyn Fft<T>> 457 } 458 Recipe::GoodThomasAlgorithm { 459 left_fft, 460 right_fft, 461 } => { 462 let left_fft = self.build_fft(&left_fft, direction); 463 let right_fft = self.build_fft(&right_fft, direction); 464 Arc::new(GoodThomasAlgorithm::new(left_fft, right_fft)) as Arc<dyn Fft<T>> 465 } 466 Recipe::MixedRadixSmall { 467 left_fft, 468 right_fft, 469 } => { 470 let left_fft = self.build_fft(&left_fft, direction); 471 let right_fft = self.build_fft(&right_fft, direction); 472 Arc::new(MixedRadixSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>> 473 } 474 Recipe::GoodThomasAlgorithmSmall { 475 left_fft, 476 right_fft, 477 } => { 478 let left_fft = self.build_fft(&left_fft, direction); 479 let right_fft = self.build_fft(&right_fft, direction); 480 Arc::new(GoodThomasAlgorithmSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>> 481 } 482 Recipe::RadersAlgorithm { inner_fft } => { 483 let inner_fft = self.build_fft(&inner_fft, direction); 484 Arc::new(RadersAlgorithm::new(inner_fft)) as Arc<dyn Fft<T>> 485 } 486 Recipe::BluesteinsAlgorithm { len, inner_fft } => { 487 let inner_fft = self.build_fft(&inner_fft, direction); 488 Arc::new(BluesteinsAlgorithm::new(*len, inner_fft)) as Arc<dyn Fft<T>> 489 } 490 } 491 } 492 design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe>493 fn design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe> { 494 if let Some(fft_instance) = self.design_butterfly_algorithm(len) { 495 fft_instance 496 } else if factors.is_prime() { 497 self.design_prime(len) 498 } else if len.trailing_zeros() >= MIN_RADIX4_BITS { 499 if len.is_power_of_two() { 500 Arc::new(Recipe::Radix4(len)) 501 } else { 502 let non_power_of_two = factors 503 .remove_factors(PrimeFactor { 504 value: 2, 505 count: len.trailing_zeros(), 506 }) 507 .unwrap(); 508 let power_of_two = PrimeFactors::compute(1 << len.trailing_zeros()); 509 self.design_mixed_radix(power_of_two, non_power_of_two) 510 } 511 } else { 512 // Can we do this as a mixed radix with just two butterflies? 513 // Loop through and find all combinations 514 // If more than one is found, keep the one where the factors are closer together. 515 // For example length 20 where 10x2 and 5x4 are possible, we use 5x4. 516 let butterflies: [usize; 20] = [ 517 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 23, 29, 31, 32, 518 ]; 519 let mut bf_left = 0; 520 let mut bf_right = 0; 521 // If the length is below 14, or over 1024 we don't need to try this. 522 if len > 13 && len <= 1024 { 523 for (n, bf_l) in butterflies.iter().enumerate() { 524 if len % bf_l == 0 { 525 let bf_r = len / bf_l; 526 if butterflies.iter().skip(n).any(|&m| m == bf_r) { 527 bf_left = *bf_l; 528 bf_right = bf_r; 529 } 530 } 531 } 532 if bf_left > 0 { 533 let fact_l = PrimeFactors::compute(bf_left); 534 let fact_r = PrimeFactors::compute(bf_right); 535 return self.design_mixed_radix(fact_l, fact_r); 536 } 537 } 538 // Not possible with just butterflies, go with the general solution. 539 let (left_factors, right_factors) = factors.partition_factors(); 540 self.design_mixed_radix(left_factors, right_factors) 541 } 542 } 543 design_mixed_radix( &mut self, left_factors: PrimeFactors, right_factors: PrimeFactors, ) -> Arc<Recipe>544 fn design_mixed_radix( 545 &mut self, 546 left_factors: PrimeFactors, 547 right_factors: PrimeFactors, 548 ) -> Arc<Recipe> { 549 let left_len = left_factors.get_product(); 550 let right_len = right_factors.get_product(); 551 552 //neither size is a butterfly, so go with the normal algorithm 553 let left_fft = self.design_fft_with_factors(left_len, left_factors); 554 let right_fft = self.design_fft_with_factors(right_len, right_factors); 555 556 //if both left_len and right_len are small, use algorithms optimized for small FFTs 557 if left_len < 33 && right_len < 33 { 558 // for small FFTs, if gcd is 1, good-thomas is faster 559 if gcd(left_len, right_len) == 1 { 560 Arc::new(Recipe::GoodThomasAlgorithmSmall { 561 left_fft, 562 right_fft, 563 }) 564 } else { 565 Arc::new(Recipe::MixedRadixSmall { 566 left_fft, 567 right_fft, 568 }) 569 } 570 } else { 571 Arc::new(Recipe::MixedRadix { 572 left_fft, 573 right_fft, 574 }) 575 } 576 } 577 578 // Returns Some(instance) if we have a butterfly available for this size. Returns None if there is no butterfly available for this size design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>>579 fn design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>> { 580 match len { 581 1 => Some(Arc::new(Recipe::Butterfly1)), 582 2 => Some(Arc::new(Recipe::Butterfly2)), 583 3 => Some(Arc::new(Recipe::Butterfly3)), 584 4 => Some(Arc::new(Recipe::Butterfly4)), 585 5 => Some(Arc::new(Recipe::Butterfly5)), 586 6 => Some(Arc::new(Recipe::Butterfly6)), 587 7 => Some(Arc::new(Recipe::Butterfly7)), 588 8 => Some(Arc::new(Recipe::Butterfly8)), 589 9 => Some(Arc::new(Recipe::Butterfly9)), 590 10 => Some(Arc::new(Recipe::Butterfly10)), 591 11 => Some(Arc::new(Recipe::Butterfly11)), 592 12 => Some(Arc::new(Recipe::Butterfly12)), 593 13 => Some(Arc::new(Recipe::Butterfly13)), 594 15 => Some(Arc::new(Recipe::Butterfly15)), 595 16 => Some(Arc::new(Recipe::Butterfly16)), 596 17 => Some(Arc::new(Recipe::Butterfly17)), 597 19 => Some(Arc::new(Recipe::Butterfly19)), 598 23 => Some(Arc::new(Recipe::Butterfly23)), 599 29 => Some(Arc::new(Recipe::Butterfly29)), 600 31 => Some(Arc::new(Recipe::Butterfly31)), 601 32 => Some(Arc::new(Recipe::Butterfly32)), 602 _ => None, 603 } 604 } 605 design_prime(&mut self, len: usize) -> Arc<Recipe>606 fn design_prime(&mut self, len: usize) -> Arc<Recipe> { 607 let inner_fft_len_rader = len - 1; 608 let raders_factors = PrimeFactors::compute(inner_fft_len_rader); 609 // If any of the prime factors is too large, Rader's gets slow and Bluestein's is the better choice 610 if raders_factors 611 .get_other_factors() 612 .iter() 613 .any(|val| val.value > MAX_RADER_PRIME_FACTOR) 614 { 615 let inner_fft_len_pow2 = (2 * len - 1).checked_next_power_of_two().unwrap(); 616 // for long ffts a mixed radix inner fft is faster than a longer radix4 617 let min_inner_len = 2 * len - 1; 618 let mixed_radix_len = 3 * inner_fft_len_pow2 / 4; 619 let inner_fft = 620 if mixed_radix_len >= min_inner_len && len >= MIN_BLUESTEIN_MIXED_RADIX_LEN { 621 let mixed_radix_factors = PrimeFactors::compute(mixed_radix_len); 622 self.design_fft_with_factors(mixed_radix_len, mixed_radix_factors) 623 } else { 624 Arc::new(Recipe::Radix4(inner_fft_len_pow2)) 625 }; 626 Arc::new(Recipe::BluesteinsAlgorithm { len, inner_fft }) 627 } else { 628 let inner_fft = self.design_fft_with_factors(inner_fft_len_rader, raders_factors); 629 Arc::new(Recipe::RadersAlgorithm { inner_fft }) 630 } 631 } 632 } 633 634 #[cfg(test)] 635 mod unit_tests { 636 use super::*; 637 is_mixedradix(plan: &Recipe) -> bool638 fn is_mixedradix(plan: &Recipe) -> bool { 639 match plan { 640 &Recipe::MixedRadix { .. } => true, 641 _ => false, 642 } 643 } 644 is_mixedradixsmall(plan: &Recipe) -> bool645 fn is_mixedradixsmall(plan: &Recipe) -> bool { 646 match plan { 647 &Recipe::MixedRadixSmall { .. } => true, 648 _ => false, 649 } 650 } 651 is_goodthomassmall(plan: &Recipe) -> bool652 fn is_goodthomassmall(plan: &Recipe) -> bool { 653 match plan { 654 &Recipe::GoodThomasAlgorithmSmall { .. } => true, 655 _ => false, 656 } 657 } 658 is_raders(plan: &Recipe) -> bool659 fn is_raders(plan: &Recipe) -> bool { 660 match plan { 661 &Recipe::RadersAlgorithm { .. } => true, 662 _ => false, 663 } 664 } 665 is_bluesteins(plan: &Recipe) -> bool666 fn is_bluesteins(plan: &Recipe) -> bool { 667 match plan { 668 &Recipe::BluesteinsAlgorithm { .. } => true, 669 _ => false, 670 } 671 } 672 673 #[test] test_plan_sse_trivial()674 fn test_plan_sse_trivial() { 675 // Length 0 and 1 should use Dft 676 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 677 for len in 0..1 { 678 let plan = planner.design_fft_for_len(len); 679 assert_eq!(*plan, Recipe::Dft(len)); 680 assert_eq!(plan.len(), len, "Recipe reports wrong length"); 681 } 682 } 683 684 #[test] test_plan_sse_largepoweroftwo()685 fn test_plan_sse_largepoweroftwo() { 686 // Powers of 2 above 6 should use Radix4 687 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 688 for pow in 6..32 { 689 let len = 1 << pow; 690 let plan = planner.design_fft_for_len(len); 691 assert_eq!(*plan, Recipe::Radix4(len)); 692 assert_eq!(plan.len(), len, "Recipe reports wrong length"); 693 } 694 } 695 696 #[test] test_plan_sse_butterflies()697 fn test_plan_sse_butterflies() { 698 // Check that all butterflies are used 699 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 700 assert_eq!(*planner.design_fft_for_len(2), Recipe::Butterfly2); 701 assert_eq!(*planner.design_fft_for_len(3), Recipe::Butterfly3); 702 assert_eq!(*planner.design_fft_for_len(4), Recipe::Butterfly4); 703 assert_eq!(*planner.design_fft_for_len(5), Recipe::Butterfly5); 704 assert_eq!(*planner.design_fft_for_len(6), Recipe::Butterfly6); 705 assert_eq!(*planner.design_fft_for_len(7), Recipe::Butterfly7); 706 assert_eq!(*planner.design_fft_for_len(8), Recipe::Butterfly8); 707 assert_eq!(*planner.design_fft_for_len(9), Recipe::Butterfly9); 708 assert_eq!(*planner.design_fft_for_len(10), Recipe::Butterfly10); 709 assert_eq!(*planner.design_fft_for_len(11), Recipe::Butterfly11); 710 assert_eq!(*planner.design_fft_for_len(12), Recipe::Butterfly12); 711 assert_eq!(*planner.design_fft_for_len(13), Recipe::Butterfly13); 712 assert_eq!(*planner.design_fft_for_len(15), Recipe::Butterfly15); 713 assert_eq!(*planner.design_fft_for_len(16), Recipe::Butterfly16); 714 assert_eq!(*planner.design_fft_for_len(17), Recipe::Butterfly17); 715 assert_eq!(*planner.design_fft_for_len(19), Recipe::Butterfly19); 716 assert_eq!(*planner.design_fft_for_len(23), Recipe::Butterfly23); 717 assert_eq!(*planner.design_fft_for_len(29), Recipe::Butterfly29); 718 assert_eq!(*planner.design_fft_for_len(31), Recipe::Butterfly31); 719 assert_eq!(*planner.design_fft_for_len(32), Recipe::Butterfly32); 720 } 721 722 #[test] test_plan_sse_mixedradix()723 fn test_plan_sse_mixedradix() { 724 // Products of several different primes should become MixedRadix 725 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 726 for pow2 in 2..5 { 727 for pow3 in 2..5 { 728 for pow5 in 2..5 { 729 for pow7 in 2..5 { 730 let len = 2usize.pow(pow2) 731 * 3usize.pow(pow3) 732 * 5usize.pow(pow5) 733 * 7usize.pow(pow7); 734 let plan = planner.design_fft_for_len(len); 735 assert!(is_mixedradix(&plan), "Expected MixedRadix, got {:?}", plan); 736 assert_eq!(plan.len(), len, "Recipe reports wrong length"); 737 } 738 } 739 } 740 } 741 } 742 743 #[test] test_plan_sse_mixedradixsmall()744 fn test_plan_sse_mixedradixsmall() { 745 // Products of two "small" lengths < 31 that have a common divisor >1, and isn't a power of 2 should be MixedRadixSmall 746 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 747 for len in [5 * 20, 5 * 25].iter() { 748 let plan = planner.design_fft_for_len(*len); 749 assert!( 750 is_mixedradixsmall(&plan), 751 "Expected MixedRadixSmall, got {:?}", 752 plan 753 ); 754 assert_eq!(plan.len(), *len, "Recipe reports wrong length"); 755 } 756 } 757 758 #[test] test_plan_sse_goodthomasbutterfly()759 fn test_plan_sse_goodthomasbutterfly() { 760 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 761 for len in [3 * 7, 5 * 7, 11 * 13, 2 * 29].iter() { 762 let plan = planner.design_fft_for_len(*len); 763 assert!( 764 is_goodthomassmall(&plan), 765 "Expected GoodThomasAlgorithmSmall, got {:?}", 766 plan 767 ); 768 assert_eq!(plan.len(), *len, "Recipe reports wrong length"); 769 } 770 } 771 772 #[test] test_plan_sse_bluestein_vs_rader()773 fn test_plan_sse_bluestein_vs_rader() { 774 let difficultprimes: [usize; 11] = [59, 83, 107, 149, 167, 173, 179, 359, 719, 1439, 2879]; 775 let easyprimes: [usize; 24] = [ 776 53, 61, 67, 71, 73, 79, 89, 97, 101, 103, 109, 113, 127, 131, 137, 139, 151, 157, 163, 777 181, 191, 193, 197, 199, 778 ]; 779 780 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 781 for len in difficultprimes.iter() { 782 let plan = planner.design_fft_for_len(*len); 783 assert!( 784 is_bluesteins(&plan), 785 "Expected BluesteinsAlgorithm, got {:?}", 786 plan 787 ); 788 assert_eq!(plan.len(), *len, "Recipe reports wrong length"); 789 } 790 for len in easyprimes.iter() { 791 let plan = planner.design_fft_for_len(*len); 792 assert!(is_raders(&plan), "Expected RadersAlgorithm, got {:?}", plan); 793 assert_eq!(plan.len(), *len, "Recipe reports wrong length"); 794 } 795 } 796 797 #[test] test_sse_fft_cache()798 fn test_sse_fft_cache() { 799 { 800 // Check that FFTs are reused if they're both forward 801 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 802 let fft_a = planner.plan_fft(1234, FftDirection::Forward); 803 let fft_b = planner.plan_fft(1234, FftDirection::Forward); 804 assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused"); 805 } 806 { 807 // Check that FFTs are reused if they're both inverse 808 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 809 let fft_a = planner.plan_fft(1234, FftDirection::Inverse); 810 let fft_b = planner.plan_fft(1234, FftDirection::Inverse); 811 assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused"); 812 } 813 { 814 // Check that FFTs are NOT resued if they don't both have the same direction 815 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 816 let fft_a = planner.plan_fft(1234, FftDirection::Forward); 817 let fft_b = planner.plan_fft(1234, FftDirection::Inverse); 818 assert!( 819 !Arc::ptr_eq(&fft_a, &fft_b), 820 "Existing fft was reused, even though directions don't match" 821 ); 822 } 823 } 824 825 #[test] test_sse_recipe_cache()826 fn test_sse_recipe_cache() { 827 // Check that all butterflies are used 828 let mut planner = FftPlannerSse::<f64>::new().unwrap(); 829 let fft_a = planner.design_fft_for_len(1234); 830 let fft_b = planner.design_fft_for_len(1234); 831 assert!( 832 Arc::ptr_eq(&fft_a, &fft_b), 833 "Existing recipe was not reused" 834 ); 835 } 836 } 837