1 use num_integer::gcd;
2 use std::any::TypeId;
3 use std::collections::HashMap;
4 
5 use std::sync::Arc;
6 
7 use crate::{common::FftNum, fft_cache::FftCache, FftDirection};
8 
9 use crate::algorithm::*;
10 use crate::sse::sse_butterflies::*;
11 use crate::sse::sse_prime_butterflies::*;
12 use crate::sse::sse_radix4::*;
13 use crate::Fft;
14 
15 use crate::math_utils::{PrimeFactor, PrimeFactors};
16 
17 const MIN_RADIX4_BITS: u32 = 6; // smallest size to consider radix 4 an option is 2^6 = 64
18 const MAX_RADER_PRIME_FACTOR: usize = 23; // don't use Raders if the inner fft length has prime factor larger than this
19 const MIN_BLUESTEIN_MIXED_RADIX_LEN: usize = 90; // only use mixed radix for the inner fft of Bluestein if length is larger than this
20 
21 /// A Recipe is a structure that describes the design of a FFT, without actually creating it.
22 /// It is used as a middle step in the planning process.
23 #[derive(Debug, PartialEq, Clone)]
24 pub enum Recipe {
25     Dft(usize),
26     MixedRadix {
27         left_fft: Arc<Recipe>,
28         right_fft: Arc<Recipe>,
29     },
30     #[allow(dead_code)]
31     GoodThomasAlgorithm {
32         left_fft: Arc<Recipe>,
33         right_fft: Arc<Recipe>,
34     },
35     MixedRadixSmall {
36         left_fft: Arc<Recipe>,
37         right_fft: Arc<Recipe>,
38     },
39     GoodThomasAlgorithmSmall {
40         left_fft: Arc<Recipe>,
41         right_fft: Arc<Recipe>,
42     },
43     RadersAlgorithm {
44         inner_fft: Arc<Recipe>,
45     },
46     BluesteinsAlgorithm {
47         len: usize,
48         inner_fft: Arc<Recipe>,
49     },
50     Radix4(usize),
51     Butterfly1,
52     Butterfly2,
53     Butterfly3,
54     Butterfly4,
55     Butterfly5,
56     Butterfly6,
57     Butterfly7,
58     Butterfly8,
59     Butterfly9,
60     Butterfly10,
61     Butterfly11,
62     Butterfly12,
63     Butterfly13,
64     Butterfly15,
65     Butterfly16,
66     Butterfly17,
67     Butterfly19,
68     Butterfly23,
69     Butterfly29,
70     Butterfly31,
71     Butterfly32,
72 }
73 
74 impl Recipe {
len(&self) -> usize75     pub fn len(&self) -> usize {
76         match self {
77             Recipe::Dft(length) => *length,
78             Recipe::Radix4(length) => *length,
79             Recipe::Butterfly1 => 1,
80             Recipe::Butterfly2 => 2,
81             Recipe::Butterfly3 => 3,
82             Recipe::Butterfly4 => 4,
83             Recipe::Butterfly5 => 5,
84             Recipe::Butterfly6 => 6,
85             Recipe::Butterfly7 => 7,
86             Recipe::Butterfly8 => 8,
87             Recipe::Butterfly9 => 9,
88             Recipe::Butterfly10 => 10,
89             Recipe::Butterfly11 => 11,
90             Recipe::Butterfly12 => 12,
91             Recipe::Butterfly13 => 13,
92             Recipe::Butterfly15 => 15,
93             Recipe::Butterfly16 => 16,
94             Recipe::Butterfly17 => 17,
95             Recipe::Butterfly19 => 19,
96             Recipe::Butterfly23 => 23,
97             Recipe::Butterfly29 => 29,
98             Recipe::Butterfly31 => 31,
99             Recipe::Butterfly32 => 32,
100             Recipe::MixedRadix {
101                 left_fft,
102                 right_fft,
103             } => left_fft.len() * right_fft.len(),
104             Recipe::GoodThomasAlgorithm {
105                 left_fft,
106                 right_fft,
107             } => left_fft.len() * right_fft.len(),
108             Recipe::MixedRadixSmall {
109                 left_fft,
110                 right_fft,
111             } => left_fft.len() * right_fft.len(),
112             Recipe::GoodThomasAlgorithmSmall {
113                 left_fft,
114                 right_fft,
115             } => left_fft.len() * right_fft.len(),
116             Recipe::RadersAlgorithm { inner_fft } => inner_fft.len() + 1,
117             Recipe::BluesteinsAlgorithm { len, .. } => *len,
118         }
119     }
120 }
121 
122 /// The SSE FFT planner creates new FFT algorithm instances using a mix of scalar and SSE accelerated algorithms.
123 /// It requires at least SSE4.1, which is available on all reasonably recent x86_64 cpus.
124 ///
125 /// RustFFT has several FFT algorithms available. For a given FFT size, the `FftPlannerSse` decides which of the
126 /// available FFT algorithms to use and then initializes them.
127 ///
128 /// ~~~
129 /// // Perform a forward Fft of size 1234
130 /// use std::sync::Arc;
131 /// use rustfft::{FftPlannerSse, num_complex::Complex};
132 ///
133 /// if let Ok(mut planner) = FftPlannerSse::new() {
134 ///   let fft = planner.plan_fft_forward(1234);
135 ///
136 ///   let mut buffer = vec![Complex{ re: 0.0f32, im: 0.0f32 }; 1234];
137 ///   fft.process(&mut buffer);
138 ///
139 ///   // The FFT instance returned by the planner has the type `Arc<dyn Fft<T>>`,
140 ///   // where T is the numeric type, ie f32 or f64, so it's cheap to clone
141 ///   let fft_clone = Arc::clone(&fft);
142 /// }
143 /// ~~~
144 ///
145 /// If you plan on creating multiple FFT instances, it is recommended to reuse the same planner for all of them. This
146 /// is because the planner re-uses internal data across FFT instances wherever possible, saving memory and reducing
147 /// setup time. (FFT instances created with one planner will never re-use data and buffers with FFT instances created
148 /// by a different planner)
149 ///
150 /// Each FFT instance owns [`Arc`s](std::sync::Arc) to its internal data, rather than borrowing it from the planner, so it's perfectly
151 /// safe to drop the planner after creating Fft instances.
152 pub struct FftPlannerSse<T: FftNum> {
153     algorithm_cache: FftCache<T>,
154     recipe_cache: HashMap<usize, Arc<Recipe>>,
155 }
156 
157 impl<T: FftNum> FftPlannerSse<T> {
158     /// Creates a new `FftPlannerSse` instance.
159     ///
160     /// Returns `Ok(planner_instance)` if this machine has the required instruction sets.
161     /// Returns `Err(())` if some instruction sets are missing.
new() -> Result<Self, ()>162     pub fn new() -> Result<Self, ()> {
163         if is_x86_feature_detected!("sse4.1") {
164             // Ideally, we would implement the planner with specialization.
165             // Specialization won't be on stable rust for a long time though, so in the meantime, we can hack around it.
166             //
167             // We use TypeID to determine if T is f32, f64, or neither. If neither, we don't want to do any SSE acceleration
168             // If it's f32 or f64, then construct and return a SSE planner instance.
169             //
170             // All SSE accelerated algorithms come in separate versions for f32 and f64. The type is checked when a new one is created, and if it does not
171             // match the type the FFT is meant for, it will panic. This will never be a problem if using a planner to construct the FFTs.
172             //
173             // An annoying snag with this setup is that we frequently have to transmute buffers from &mut [Complex<T>] to &mut [Complex<f32 or f64>] or vice versa.
174             // We know this is safe because we assert everywhere that Type(f32 or f64)==Type(T), so it's just a matter of "doing it right" every time.
175             // These transmutes are required because the FFT algorithm's input will come through the FFT trait, which may only be bounded by FftNum.
176             // So the buffers will have the type &mut [Complex<T>].
177             let id_f32 = TypeId::of::<f32>();
178             let id_f64 = TypeId::of::<f64>();
179             let id_t = TypeId::of::<T>();
180 
181             if id_t == id_f32 || id_t == id_f64 {
182                 return Ok(Self {
183                     algorithm_cache: FftCache::new(),
184                     recipe_cache: HashMap::new(),
185                 });
186             }
187         }
188         Err(())
189     }
190 
191     /// Returns a `Fft` instance which uses SSE4.1 instructions to compute FFTs of size `len`.
192     ///
193     /// If the provided `direction` is `FftDirection::Forward`, the returned instance will compute forward FFTs. If it's `FftDirection::Inverse`, it will compute inverse FFTs.
194     ///
195     /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>>196     pub fn plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> {
197         // Step 1: Create a "recipe" for this FFT, which will tell us exactly which combination of algorithms to use
198         let recipe = self.design_fft_for_len(len);
199 
200         // Step 2: Use our recipe to construct a Fft trait object
201         self.build_fft(&recipe, direction)
202     }
203 
204     /// Returns a `Fft` instance which uses SSE4.1 instructions to compute forward FFTs of size `len`
205     ///
206     /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>>207     pub fn plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>> {
208         self.plan_fft(len, FftDirection::Forward)
209     }
210 
211     /// Returns a `Fft` instance which uses SSE4.1 instructions to compute inverse FFTs of size `len.
212     ///
213     /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>>214     pub fn plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>> {
215         self.plan_fft(len, FftDirection::Inverse)
216     }
217 
218     // Make a recipe for a length
design_fft_for_len(&mut self, len: usize) -> Arc<Recipe>219     fn design_fft_for_len(&mut self, len: usize) -> Arc<Recipe> {
220         if len < 1 {
221             Arc::new(Recipe::Dft(len))
222         } else if let Some(recipe) = self.recipe_cache.get(&len) {
223             Arc::clone(&recipe)
224         } else {
225             let factors = PrimeFactors::compute(len);
226             let recipe = self.design_fft_with_factors(len, factors);
227             self.recipe_cache.insert(len, Arc::clone(&recipe));
228             recipe
229         }
230     }
231 
232     // Create the fft from a recipe, take from cache if possible
build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>>233     fn build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
234         let len = recipe.len();
235         if let Some(instance) = self.algorithm_cache.get(len, direction) {
236             instance
237         } else {
238             let fft = self.build_new_fft(recipe, direction);
239             self.algorithm_cache.insert(&fft);
240             fft
241         }
242     }
243 
244     // Create a new fft from a recipe
build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>>245     fn build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
246         let id_f32 = TypeId::of::<f32>();
247         let id_f64 = TypeId::of::<f64>();
248         let id_t = TypeId::of::<T>();
249 
250         match recipe {
251             Recipe::Dft(len) => Arc::new(Dft::new(*len, direction)) as Arc<dyn Fft<T>>,
252             Recipe::Radix4(len) => {
253                 if id_t == id_f32 {
254                     Arc::new(Sse32Radix4::new(*len, direction)) as Arc<dyn Fft<T>>
255                 } else if id_t == id_f64 {
256                     Arc::new(Sse64Radix4::new(*len, direction)) as Arc<dyn Fft<T>>
257                 } else {
258                     panic!("Not f32 or f64");
259                 }
260             }
261             Recipe::Butterfly1 => {
262                 if id_t == id_f32 {
263                     Arc::new(SseF32Butterfly1::new(direction)) as Arc<dyn Fft<T>>
264                 } else if id_t == id_f64 {
265                     Arc::new(SseF64Butterfly1::new(direction)) as Arc<dyn Fft<T>>
266                 } else {
267                     panic!("Not f32 or f64");
268                 }
269             }
270             Recipe::Butterfly2 => {
271                 if id_t == id_f32 {
272                     Arc::new(SseF32Butterfly2::new(direction)) as Arc<dyn Fft<T>>
273                 } else if id_t == id_f64 {
274                     Arc::new(SseF64Butterfly2::new(direction)) as Arc<dyn Fft<T>>
275                 } else {
276                     panic!("Not f32 or f64");
277                 }
278             }
279             Recipe::Butterfly3 => {
280                 if id_t == id_f32 {
281                     Arc::new(SseF32Butterfly3::new(direction)) as Arc<dyn Fft<T>>
282                 } else if id_t == id_f64 {
283                     Arc::new(SseF64Butterfly3::new(direction)) as Arc<dyn Fft<T>>
284                 } else {
285                     panic!("Not f32 or f64");
286                 }
287             }
288             Recipe::Butterfly4 => {
289                 if id_t == id_f32 {
290                     Arc::new(SseF32Butterfly4::new(direction)) as Arc<dyn Fft<T>>
291                 } else if id_t == id_f64 {
292                     Arc::new(SseF64Butterfly4::new(direction)) as Arc<dyn Fft<T>>
293                 } else {
294                     panic!("Not f32 or f64");
295                 }
296             }
297             Recipe::Butterfly5 => {
298                 if id_t == id_f32 {
299                     Arc::new(SseF32Butterfly5::new(direction)) as Arc<dyn Fft<T>>
300                 } else if id_t == id_f64 {
301                     Arc::new(SseF64Butterfly5::new(direction)) as Arc<dyn Fft<T>>
302                 } else {
303                     panic!("Not f32 or f64");
304                 }
305             }
306             Recipe::Butterfly6 => {
307                 if id_t == id_f32 {
308                     Arc::new(SseF32Butterfly6::new(direction)) as Arc<dyn Fft<T>>
309                 } else if id_t == id_f64 {
310                     Arc::new(SseF64Butterfly6::new(direction)) as Arc<dyn Fft<T>>
311                 } else {
312                     panic!("Not f32 or f64");
313                 }
314             }
315             Recipe::Butterfly7 => {
316                 if id_t == id_f32 {
317                     Arc::new(SseF32Butterfly7::new(direction)) as Arc<dyn Fft<T>>
318                 } else if id_t == id_f64 {
319                     Arc::new(SseF64Butterfly7::new(direction)) as Arc<dyn Fft<T>>
320                 } else {
321                     panic!("Not f32 or f64");
322                 }
323             }
324             Recipe::Butterfly8 => {
325                 if id_t == id_f32 {
326                     Arc::new(SseF32Butterfly8::new(direction)) as Arc<dyn Fft<T>>
327                 } else if id_t == id_f64 {
328                     Arc::new(SseF64Butterfly8::new(direction)) as Arc<dyn Fft<T>>
329                 } else {
330                     panic!("Not f32 or f64");
331                 }
332             }
333             Recipe::Butterfly9 => {
334                 if id_t == id_f32 {
335                     Arc::new(SseF32Butterfly9::new(direction)) as Arc<dyn Fft<T>>
336                 } else if id_t == id_f64 {
337                     Arc::new(SseF64Butterfly9::new(direction)) as Arc<dyn Fft<T>>
338                 } else {
339                     panic!("Not f32 or f64");
340                 }
341             }
342             Recipe::Butterfly10 => {
343                 if id_t == id_f32 {
344                     Arc::new(SseF32Butterfly10::new(direction)) as Arc<dyn Fft<T>>
345                 } else if id_t == id_f64 {
346                     Arc::new(SseF64Butterfly10::new(direction)) as Arc<dyn Fft<T>>
347                 } else {
348                     panic!("Not f32 or f64");
349                 }
350             }
351             Recipe::Butterfly11 => {
352                 if id_t == id_f32 {
353                     Arc::new(SseF32Butterfly11::new(direction)) as Arc<dyn Fft<T>>
354                 } else if id_t == id_f64 {
355                     Arc::new(SseF64Butterfly11::new(direction)) as Arc<dyn Fft<T>>
356                 } else {
357                     panic!("Not f32 or f64");
358                 }
359             }
360             Recipe::Butterfly12 => {
361                 if id_t == id_f32 {
362                     Arc::new(SseF32Butterfly12::new(direction)) as Arc<dyn Fft<T>>
363                 } else if id_t == id_f64 {
364                     Arc::new(SseF64Butterfly12::new(direction)) as Arc<dyn Fft<T>>
365                 } else {
366                     panic!("Not f32 or f64");
367                 }
368             }
369             Recipe::Butterfly13 => {
370                 if id_t == id_f32 {
371                     Arc::new(SseF32Butterfly13::new(direction)) as Arc<dyn Fft<T>>
372                 } else if id_t == id_f64 {
373                     Arc::new(SseF64Butterfly13::new(direction)) as Arc<dyn Fft<T>>
374                 } else {
375                     panic!("Not f32 or f64");
376                 }
377             }
378             Recipe::Butterfly15 => {
379                 if id_t == id_f32 {
380                     Arc::new(SseF32Butterfly15::new(direction)) as Arc<dyn Fft<T>>
381                 } else if id_t == id_f64 {
382                     Arc::new(SseF64Butterfly15::new(direction)) as Arc<dyn Fft<T>>
383                 } else {
384                     panic!("Not f32 or f64");
385                 }
386             }
387             Recipe::Butterfly16 => {
388                 if id_t == id_f32 {
389                     Arc::new(SseF32Butterfly16::new(direction)) as Arc<dyn Fft<T>>
390                 } else if id_t == id_f64 {
391                     Arc::new(SseF64Butterfly16::new(direction)) as Arc<dyn Fft<T>>
392                 } else {
393                     panic!("Not f32 or f64");
394                 }
395             }
396             Recipe::Butterfly17 => {
397                 if id_t == id_f32 {
398                     Arc::new(SseF32Butterfly17::new(direction)) as Arc<dyn Fft<T>>
399                 } else if id_t == id_f64 {
400                     Arc::new(SseF64Butterfly17::new(direction)) as Arc<dyn Fft<T>>
401                 } else {
402                     panic!("Not f32 or f64");
403                 }
404             }
405             Recipe::Butterfly19 => {
406                 if id_t == id_f32 {
407                     Arc::new(SseF32Butterfly19::new(direction)) as Arc<dyn Fft<T>>
408                 } else if id_t == id_f64 {
409                     Arc::new(SseF64Butterfly19::new(direction)) as Arc<dyn Fft<T>>
410                 } else {
411                     panic!("Not f32 or f64");
412                 }
413             }
414             Recipe::Butterfly23 => {
415                 if id_t == id_f32 {
416                     Arc::new(SseF32Butterfly23::new(direction)) as Arc<dyn Fft<T>>
417                 } else if id_t == id_f64 {
418                     Arc::new(SseF64Butterfly23::new(direction)) as Arc<dyn Fft<T>>
419                 } else {
420                     panic!("Not f32 or f64");
421                 }
422             }
423             Recipe::Butterfly29 => {
424                 if id_t == id_f32 {
425                     Arc::new(SseF32Butterfly29::new(direction)) as Arc<dyn Fft<T>>
426                 } else if id_t == id_f64 {
427                     Arc::new(SseF64Butterfly29::new(direction)) as Arc<dyn Fft<T>>
428                 } else {
429                     panic!("Not f32 or f64");
430                 }
431             }
432             Recipe::Butterfly31 => {
433                 if id_t == id_f32 {
434                     Arc::new(SseF32Butterfly31::new(direction)) as Arc<dyn Fft<T>>
435                 } else if id_t == id_f64 {
436                     Arc::new(SseF64Butterfly31::new(direction)) as Arc<dyn Fft<T>>
437                 } else {
438                     panic!("Not f32 or f64");
439                 }
440             }
441             Recipe::Butterfly32 => {
442                 if id_t == id_f32 {
443                     Arc::new(SseF32Butterfly32::new(direction)) as Arc<dyn Fft<T>>
444                 } else if id_t == id_f64 {
445                     Arc::new(SseF64Butterfly32::new(direction)) as Arc<dyn Fft<T>>
446                 } else {
447                     panic!("Not f32 or f64");
448                 }
449             }
450             Recipe::MixedRadix {
451                 left_fft,
452                 right_fft,
453             } => {
454                 let left_fft = self.build_fft(&left_fft, direction);
455                 let right_fft = self.build_fft(&right_fft, direction);
456                 Arc::new(MixedRadix::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
457             }
458             Recipe::GoodThomasAlgorithm {
459                 left_fft,
460                 right_fft,
461             } => {
462                 let left_fft = self.build_fft(&left_fft, direction);
463                 let right_fft = self.build_fft(&right_fft, direction);
464                 Arc::new(GoodThomasAlgorithm::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
465             }
466             Recipe::MixedRadixSmall {
467                 left_fft,
468                 right_fft,
469             } => {
470                 let left_fft = self.build_fft(&left_fft, direction);
471                 let right_fft = self.build_fft(&right_fft, direction);
472                 Arc::new(MixedRadixSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
473             }
474             Recipe::GoodThomasAlgorithmSmall {
475                 left_fft,
476                 right_fft,
477             } => {
478                 let left_fft = self.build_fft(&left_fft, direction);
479                 let right_fft = self.build_fft(&right_fft, direction);
480                 Arc::new(GoodThomasAlgorithmSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
481             }
482             Recipe::RadersAlgorithm { inner_fft } => {
483                 let inner_fft = self.build_fft(&inner_fft, direction);
484                 Arc::new(RadersAlgorithm::new(inner_fft)) as Arc<dyn Fft<T>>
485             }
486             Recipe::BluesteinsAlgorithm { len, inner_fft } => {
487                 let inner_fft = self.build_fft(&inner_fft, direction);
488                 Arc::new(BluesteinsAlgorithm::new(*len, inner_fft)) as Arc<dyn Fft<T>>
489             }
490         }
491     }
492 
design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe>493     fn design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe> {
494         if let Some(fft_instance) = self.design_butterfly_algorithm(len) {
495             fft_instance
496         } else if factors.is_prime() {
497             self.design_prime(len)
498         } else if len.trailing_zeros() >= MIN_RADIX4_BITS {
499             if len.is_power_of_two() {
500                 Arc::new(Recipe::Radix4(len))
501             } else {
502                 let non_power_of_two = factors
503                     .remove_factors(PrimeFactor {
504                         value: 2,
505                         count: len.trailing_zeros(),
506                     })
507                     .unwrap();
508                 let power_of_two = PrimeFactors::compute(1 << len.trailing_zeros());
509                 self.design_mixed_radix(power_of_two, non_power_of_two)
510             }
511         } else {
512             // Can we do this as a mixed radix with just two butterflies?
513             // Loop through and find all combinations
514             // If more than one is found, keep the one where the factors are closer together.
515             // For example length 20 where 10x2 and 5x4 are possible, we use 5x4.
516             let butterflies: [usize; 20] = [
517                 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 23, 29, 31, 32,
518             ];
519             let mut bf_left = 0;
520             let mut bf_right = 0;
521             // If the length is below 14, or over 1024 we don't need to try this.
522             if len > 13 && len <= 1024 {
523                 for (n, bf_l) in butterflies.iter().enumerate() {
524                     if len % bf_l == 0 {
525                         let bf_r = len / bf_l;
526                         if butterflies.iter().skip(n).any(|&m| m == bf_r) {
527                             bf_left = *bf_l;
528                             bf_right = bf_r;
529                         }
530                     }
531                 }
532                 if bf_left > 0 {
533                     let fact_l = PrimeFactors::compute(bf_left);
534                     let fact_r = PrimeFactors::compute(bf_right);
535                     return self.design_mixed_radix(fact_l, fact_r);
536                 }
537             }
538             // Not possible with just butterflies, go with the general solution.
539             let (left_factors, right_factors) = factors.partition_factors();
540             self.design_mixed_radix(left_factors, right_factors)
541         }
542     }
543 
design_mixed_radix( &mut self, left_factors: PrimeFactors, right_factors: PrimeFactors, ) -> Arc<Recipe>544     fn design_mixed_radix(
545         &mut self,
546         left_factors: PrimeFactors,
547         right_factors: PrimeFactors,
548     ) -> Arc<Recipe> {
549         let left_len = left_factors.get_product();
550         let right_len = right_factors.get_product();
551 
552         //neither size is a butterfly, so go with the normal algorithm
553         let left_fft = self.design_fft_with_factors(left_len, left_factors);
554         let right_fft = self.design_fft_with_factors(right_len, right_factors);
555 
556         //if both left_len and right_len are small, use algorithms optimized for small FFTs
557         if left_len < 33 && right_len < 33 {
558             // for small FFTs, if gcd is 1, good-thomas is faster
559             if gcd(left_len, right_len) == 1 {
560                 Arc::new(Recipe::GoodThomasAlgorithmSmall {
561                     left_fft,
562                     right_fft,
563                 })
564             } else {
565                 Arc::new(Recipe::MixedRadixSmall {
566                     left_fft,
567                     right_fft,
568                 })
569             }
570         } else {
571             Arc::new(Recipe::MixedRadix {
572                 left_fft,
573                 right_fft,
574             })
575         }
576     }
577 
578     // Returns Some(instance) if we have a butterfly available for this size. Returns None if there is no butterfly available for this size
design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>>579     fn design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>> {
580         match len {
581             1 => Some(Arc::new(Recipe::Butterfly1)),
582             2 => Some(Arc::new(Recipe::Butterfly2)),
583             3 => Some(Arc::new(Recipe::Butterfly3)),
584             4 => Some(Arc::new(Recipe::Butterfly4)),
585             5 => Some(Arc::new(Recipe::Butterfly5)),
586             6 => Some(Arc::new(Recipe::Butterfly6)),
587             7 => Some(Arc::new(Recipe::Butterfly7)),
588             8 => Some(Arc::new(Recipe::Butterfly8)),
589             9 => Some(Arc::new(Recipe::Butterfly9)),
590             10 => Some(Arc::new(Recipe::Butterfly10)),
591             11 => Some(Arc::new(Recipe::Butterfly11)),
592             12 => Some(Arc::new(Recipe::Butterfly12)),
593             13 => Some(Arc::new(Recipe::Butterfly13)),
594             15 => Some(Arc::new(Recipe::Butterfly15)),
595             16 => Some(Arc::new(Recipe::Butterfly16)),
596             17 => Some(Arc::new(Recipe::Butterfly17)),
597             19 => Some(Arc::new(Recipe::Butterfly19)),
598             23 => Some(Arc::new(Recipe::Butterfly23)),
599             29 => Some(Arc::new(Recipe::Butterfly29)),
600             31 => Some(Arc::new(Recipe::Butterfly31)),
601             32 => Some(Arc::new(Recipe::Butterfly32)),
602             _ => None,
603         }
604     }
605 
design_prime(&mut self, len: usize) -> Arc<Recipe>606     fn design_prime(&mut self, len: usize) -> Arc<Recipe> {
607         let inner_fft_len_rader = len - 1;
608         let raders_factors = PrimeFactors::compute(inner_fft_len_rader);
609         // If any of the prime factors is too large, Rader's gets slow and Bluestein's is the better choice
610         if raders_factors
611             .get_other_factors()
612             .iter()
613             .any(|val| val.value > MAX_RADER_PRIME_FACTOR)
614         {
615             let inner_fft_len_pow2 = (2 * len - 1).checked_next_power_of_two().unwrap();
616             // for long ffts a mixed radix inner fft is faster than a longer radix4
617             let min_inner_len = 2 * len - 1;
618             let mixed_radix_len = 3 * inner_fft_len_pow2 / 4;
619             let inner_fft =
620                 if mixed_radix_len >= min_inner_len && len >= MIN_BLUESTEIN_MIXED_RADIX_LEN {
621                     let mixed_radix_factors = PrimeFactors::compute(mixed_radix_len);
622                     self.design_fft_with_factors(mixed_radix_len, mixed_radix_factors)
623                 } else {
624                     Arc::new(Recipe::Radix4(inner_fft_len_pow2))
625                 };
626             Arc::new(Recipe::BluesteinsAlgorithm { len, inner_fft })
627         } else {
628             let inner_fft = self.design_fft_with_factors(inner_fft_len_rader, raders_factors);
629             Arc::new(Recipe::RadersAlgorithm { inner_fft })
630         }
631     }
632 }
633 
634 #[cfg(test)]
635 mod unit_tests {
636     use super::*;
637 
is_mixedradix(plan: &Recipe) -> bool638     fn is_mixedradix(plan: &Recipe) -> bool {
639         match plan {
640             &Recipe::MixedRadix { .. } => true,
641             _ => false,
642         }
643     }
644 
is_mixedradixsmall(plan: &Recipe) -> bool645     fn is_mixedradixsmall(plan: &Recipe) -> bool {
646         match plan {
647             &Recipe::MixedRadixSmall { .. } => true,
648             _ => false,
649         }
650     }
651 
is_goodthomassmall(plan: &Recipe) -> bool652     fn is_goodthomassmall(plan: &Recipe) -> bool {
653         match plan {
654             &Recipe::GoodThomasAlgorithmSmall { .. } => true,
655             _ => false,
656         }
657     }
658 
is_raders(plan: &Recipe) -> bool659     fn is_raders(plan: &Recipe) -> bool {
660         match plan {
661             &Recipe::RadersAlgorithm { .. } => true,
662             _ => false,
663         }
664     }
665 
is_bluesteins(plan: &Recipe) -> bool666     fn is_bluesteins(plan: &Recipe) -> bool {
667         match plan {
668             &Recipe::BluesteinsAlgorithm { .. } => true,
669             _ => false,
670         }
671     }
672 
673     #[test]
test_plan_sse_trivial()674     fn test_plan_sse_trivial() {
675         // Length 0 and 1 should use Dft
676         let mut planner = FftPlannerSse::<f64>::new().unwrap();
677         for len in 0..1 {
678             let plan = planner.design_fft_for_len(len);
679             assert_eq!(*plan, Recipe::Dft(len));
680             assert_eq!(plan.len(), len, "Recipe reports wrong length");
681         }
682     }
683 
684     #[test]
test_plan_sse_largepoweroftwo()685     fn test_plan_sse_largepoweroftwo() {
686         // Powers of 2 above 6 should use Radix4
687         let mut planner = FftPlannerSse::<f64>::new().unwrap();
688         for pow in 6..32 {
689             let len = 1 << pow;
690             let plan = planner.design_fft_for_len(len);
691             assert_eq!(*plan, Recipe::Radix4(len));
692             assert_eq!(plan.len(), len, "Recipe reports wrong length");
693         }
694     }
695 
696     #[test]
test_plan_sse_butterflies()697     fn test_plan_sse_butterflies() {
698         // Check that all butterflies are used
699         let mut planner = FftPlannerSse::<f64>::new().unwrap();
700         assert_eq!(*planner.design_fft_for_len(2), Recipe::Butterfly2);
701         assert_eq!(*planner.design_fft_for_len(3), Recipe::Butterfly3);
702         assert_eq!(*planner.design_fft_for_len(4), Recipe::Butterfly4);
703         assert_eq!(*planner.design_fft_for_len(5), Recipe::Butterfly5);
704         assert_eq!(*planner.design_fft_for_len(6), Recipe::Butterfly6);
705         assert_eq!(*planner.design_fft_for_len(7), Recipe::Butterfly7);
706         assert_eq!(*planner.design_fft_for_len(8), Recipe::Butterfly8);
707         assert_eq!(*planner.design_fft_for_len(9), Recipe::Butterfly9);
708         assert_eq!(*planner.design_fft_for_len(10), Recipe::Butterfly10);
709         assert_eq!(*planner.design_fft_for_len(11), Recipe::Butterfly11);
710         assert_eq!(*planner.design_fft_for_len(12), Recipe::Butterfly12);
711         assert_eq!(*planner.design_fft_for_len(13), Recipe::Butterfly13);
712         assert_eq!(*planner.design_fft_for_len(15), Recipe::Butterfly15);
713         assert_eq!(*planner.design_fft_for_len(16), Recipe::Butterfly16);
714         assert_eq!(*planner.design_fft_for_len(17), Recipe::Butterfly17);
715         assert_eq!(*planner.design_fft_for_len(19), Recipe::Butterfly19);
716         assert_eq!(*planner.design_fft_for_len(23), Recipe::Butterfly23);
717         assert_eq!(*planner.design_fft_for_len(29), Recipe::Butterfly29);
718         assert_eq!(*planner.design_fft_for_len(31), Recipe::Butterfly31);
719         assert_eq!(*planner.design_fft_for_len(32), Recipe::Butterfly32);
720     }
721 
722     #[test]
test_plan_sse_mixedradix()723     fn test_plan_sse_mixedradix() {
724         // Products of several different primes should become MixedRadix
725         let mut planner = FftPlannerSse::<f64>::new().unwrap();
726         for pow2 in 2..5 {
727             for pow3 in 2..5 {
728                 for pow5 in 2..5 {
729                     for pow7 in 2..5 {
730                         let len = 2usize.pow(pow2)
731                             * 3usize.pow(pow3)
732                             * 5usize.pow(pow5)
733                             * 7usize.pow(pow7);
734                         let plan = planner.design_fft_for_len(len);
735                         assert!(is_mixedradix(&plan), "Expected MixedRadix, got {:?}", plan);
736                         assert_eq!(plan.len(), len, "Recipe reports wrong length");
737                     }
738                 }
739             }
740         }
741     }
742 
743     #[test]
test_plan_sse_mixedradixsmall()744     fn test_plan_sse_mixedradixsmall() {
745         // Products of two "small" lengths < 31 that have a common divisor >1, and isn't a power of 2 should be MixedRadixSmall
746         let mut planner = FftPlannerSse::<f64>::new().unwrap();
747         for len in [5 * 20, 5 * 25].iter() {
748             let plan = planner.design_fft_for_len(*len);
749             assert!(
750                 is_mixedradixsmall(&plan),
751                 "Expected MixedRadixSmall, got {:?}",
752                 plan
753             );
754             assert_eq!(plan.len(), *len, "Recipe reports wrong length");
755         }
756     }
757 
758     #[test]
test_plan_sse_goodthomasbutterfly()759     fn test_plan_sse_goodthomasbutterfly() {
760         let mut planner = FftPlannerSse::<f64>::new().unwrap();
761         for len in [3 * 7, 5 * 7, 11 * 13, 2 * 29].iter() {
762             let plan = planner.design_fft_for_len(*len);
763             assert!(
764                 is_goodthomassmall(&plan),
765                 "Expected GoodThomasAlgorithmSmall, got {:?}",
766                 plan
767             );
768             assert_eq!(plan.len(), *len, "Recipe reports wrong length");
769         }
770     }
771 
772     #[test]
test_plan_sse_bluestein_vs_rader()773     fn test_plan_sse_bluestein_vs_rader() {
774         let difficultprimes: [usize; 11] = [59, 83, 107, 149, 167, 173, 179, 359, 719, 1439, 2879];
775         let easyprimes: [usize; 24] = [
776             53, 61, 67, 71, 73, 79, 89, 97, 101, 103, 109, 113, 127, 131, 137, 139, 151, 157, 163,
777             181, 191, 193, 197, 199,
778         ];
779 
780         let mut planner = FftPlannerSse::<f64>::new().unwrap();
781         for len in difficultprimes.iter() {
782             let plan = planner.design_fft_for_len(*len);
783             assert!(
784                 is_bluesteins(&plan),
785                 "Expected BluesteinsAlgorithm, got {:?}",
786                 plan
787             );
788             assert_eq!(plan.len(), *len, "Recipe reports wrong length");
789         }
790         for len in easyprimes.iter() {
791             let plan = planner.design_fft_for_len(*len);
792             assert!(is_raders(&plan), "Expected RadersAlgorithm, got {:?}", plan);
793             assert_eq!(plan.len(), *len, "Recipe reports wrong length");
794         }
795     }
796 
797     #[test]
test_sse_fft_cache()798     fn test_sse_fft_cache() {
799         {
800             // Check that FFTs are reused if they're both forward
801             let mut planner = FftPlannerSse::<f64>::new().unwrap();
802             let fft_a = planner.plan_fft(1234, FftDirection::Forward);
803             let fft_b = planner.plan_fft(1234, FftDirection::Forward);
804             assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
805         }
806         {
807             // Check that FFTs are reused if they're both inverse
808             let mut planner = FftPlannerSse::<f64>::new().unwrap();
809             let fft_a = planner.plan_fft(1234, FftDirection::Inverse);
810             let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
811             assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
812         }
813         {
814             // Check that FFTs are NOT resued if they don't both have the same direction
815             let mut planner = FftPlannerSse::<f64>::new().unwrap();
816             let fft_a = planner.plan_fft(1234, FftDirection::Forward);
817             let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
818             assert!(
819                 !Arc::ptr_eq(&fft_a, &fft_b),
820                 "Existing fft was reused, even though directions don't match"
821             );
822         }
823     }
824 
825     #[test]
test_sse_recipe_cache()826     fn test_sse_recipe_cache() {
827         // Check that all butterflies are used
828         let mut planner = FftPlannerSse::<f64>::new().unwrap();
829         let fft_a = planner.design_fft_for_len(1234);
830         let fft_b = planner.design_fft_for_len(1234);
831         assert!(
832             Arc::ptr_eq(&fft_a, &fft_b),
833             "Existing recipe was not reused"
834         );
835     }
836 }
837