1 /***************************************************************************
2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and         *
3 * Martin Renou                                                             *
4 * Copyright (c) QuantStack                                                 *
5 *                                                                          *
6 * Distributed under the terms of the BSD 3-Clause License.                 *
7 *                                                                          *
8 * The full license is in the file LICENSE, distributed with this software. *
9 ****************************************************************************/
10 
11 #ifndef XSIMD_AVX_COMPLEX_HPP
12 #define XSIMD_AVX_COMPLEX_HPP
13 
14 #include <complex>
15 #include <tuple>
16 #include <utility>
17 
18 #ifdef XSIMD_ENABLE_XTL_COMPLEX
19 #include "xtl/xcomplex.hpp"
20 #endif
21 
22 #include "xsimd_avx_float.hpp"
23 #include "xsimd_avx_double.hpp"
24 #include "xsimd_complex_base.hpp"
25 
26 namespace xsimd
27 {
28 
29     /**************************************
30      * batch_bool<std::complex<float>, 8> *
31      **************************************/
32 
33     template <>
34     struct simd_batch_traits<batch_bool<std::complex<float>, 8>>
35         : complex_batch_bool_traits<std::complex<float>, float, 8, 32>
36     {
37     };
38 
39     template<>
40     class batch_bool<std::complex<float>, 8>
41         : public simd_complex_batch_bool<batch_bool<std::complex<float>, 8>>
42     {
43     public:
44 
45         using self_type = batch_bool<std::complex<float>, 8>;
46         using base_type = simd_complex_batch_bool<self_type>;
47         using real_batch = batch_bool<float, 8>;
48 
49         batch_bool() = default;
50         using base_type::base_type;
51 
52         // VS2015 has a bug with inheriting constructors involving SFINAE
batch_bool(bool b0,bool b1,bool b2,bool b3,bool b4,bool b5,bool b6,bool b7)53         batch_bool(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7)
54             : base_type(real_batch(b0, b1, b2, b3, b4, b5, b6, b7))
55         {
56         }
57     };
58 
59     /*********************************
60      * batch<std::complex<float>, 8> *
61      *********************************/
62 
63     template <>
64     struct simd_batch_traits<batch<std::complex<float>, 8>>
65         : complex_batch_traits<std::complex<float>, float, 8, 32>
66     {
67     };
68 
69     template <>
70     class batch<std::complex<float>, 8>
71         : public simd_complex_batch<batch<std::complex<float>, 8>>
72     {
73     public:
74 
75         using self_type = batch<std::complex<float>, 8>;
76         using base_type = simd_complex_batch<self_type>;
77         using value_type = std::complex<float>;
78         using real_batch = batch<float, 8>;
79 
80         batch() = default;
81         using base_type::base_type;
82 
83         // VS2015 has a bug with inheriting constructors involving SFINAE
batch(value_type c0,value_type c1,value_type c2,value_type c3,value_type c4,value_type c5,value_type c6,value_type c7)84         batch(value_type c0, value_type c1, value_type c2, value_type c3,
85             value_type c4, value_type c5, value_type c6, value_type c7)
86             : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real(), c4.real(), c5.real(), c6.real(), c7.real()),
87                         real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag(), c4.imag(), c5.imag(), c6.imag(), c7.imag()))
88         {
89         }
90 
91     private:
92 
93         batch& load_complex(const real_batch& hi, const real_batch& lo);
94         real_batch get_complex_high() const;
95         real_batch get_complex_low() const;
96 
97         friend class simd_complex_batch<batch<std::complex<float>, 8>>;
98     };
99 
100     /***************************************
101      * batch_bool<std::complex<double>, 4> *
102      ***************************************/
103 
104     template <>
105     struct simd_batch_traits<batch_bool<std::complex<double>, 4>>
106         : complex_batch_bool_traits<std::complex<double>, double, 4, 32>
107     {
108     };
109 
110     template<>
111     class batch_bool<std::complex<double>, 4>
112         : public simd_complex_batch_bool<batch_bool<std::complex<double>, 4>>
113     {
114     public:
115 
116         using self_type = batch_bool<std::complex<double>, 4>;
117         using base_type = simd_complex_batch_bool<self_type>;
118         using real_batch = batch_bool<double, 4>;
119 
120         batch_bool() = default;
121         using base_type::base_type;
122 
123         // VS2015 has a bug with inheriting constructors involving SFINAE
batch_bool(bool b0,bool b1,bool b2,bool b3)124         batch_bool(bool b0, bool b1, bool b2, bool b3)
125             : base_type(real_batch(b0, b1, b2, b3))
126         {
127         }
128     };
129 
130     /**********************************
131      * batch<std::complex<double>, 4> *
132      **********************************/
133 
134     template <>
135     struct simd_batch_traits<batch<std::complex<double>, 4>>
136         : complex_batch_traits<std::complex<double>, double, 4, 32>
137     {
138     };
139 
140     template <>
141     class batch<std::complex<double>, 4>
142         : public simd_complex_batch<batch<std::complex<double>, 4>>
143     {
144     public:
145 
146         using self_type = batch<std::complex<double>, 4>;
147         using base_type = simd_complex_batch<self_type>;
148         using value_type = std::complex<double>;
149         using real_batch = batch<double, 4>;
150 
151         batch() = default;
152         using base_type::base_type;
153 
154         // VS2015 has a bug with inheriting constructors involving SFINAE
batch(value_type c0,value_type c1,value_type c2,value_type c3)155         batch(value_type c0, value_type c1, value_type c2, value_type c3)
156             : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real()),
157                         real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag()))
158         {
159         }
160 
161     private:
162 
163         batch& load_complex(const real_batch& hi, const real_batch& lo);
164         real_batch get_complex_high() const;
165         real_batch get_complex_low() const;
166 
167         friend class simd_complex_batch<batch<std::complex<double>, 4>>;
168     };
169 
170     /**********************************************
171      * common functions to avoid code duplication *
172      **********************************************/
173 
174     namespace detail
175     {
176         template <class B>
load_complex_f(const B & hi,const B & lo)177         inline std::pair<B, B> load_complex_f(const B& hi, const B& lo)
178         {
179 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
180             B real = _mm256_castpd_ps(
181                          _mm256_permute4x64_pd(
182                              _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(2, 0, 2, 0))),
183                              _MM_SHUFFLE(3, 1, 2, 0)));
184             B imag = _mm256_castpd_ps(
185                          _mm256_permute4x64_pd(
186                              _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(3, 1, 3, 1))),
187                              _MM_SHUFFLE(3, 1, 2, 0)));
188 
189 #else
190             __m128 tmp0 = _mm256_extractf128_ps(hi, 0);
191             __m128 tmp1 = _mm256_extractf128_ps(hi, 1);
192             __m128 tmp_real = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(2, 0, 2, 0));
193             __m128 tmp_imag = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
194             B real, imag;
195             real = _mm256_insertf128_ps(real, tmp_real, 0);
196             imag = _mm256_insertf128_ps(imag, tmp_imag, 0);
197             tmp0 = _mm256_extractf128_ps(lo, 0);
198             tmp1 = _mm256_extractf128_ps(lo, 1);
199             tmp_real = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(2, 0, 2, 0));
200             tmp_imag = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(3, 1, 3, 1));
201             real = _mm256_insertf128_ps(real, tmp_real, 1);
202             imag = _mm256_insertf128_ps(imag, tmp_imag, 1);
203 #endif
204             return std::make_pair(real, imag);
205         }
206 
207         // On clang, _mm256_extractf128_ps is built upon build_shufflevector
208         // which require index parameter to be a constant
209         template <int index, class B>
get_half_complex_f(const B & real,const B & imag)210         inline B get_half_complex_f(const B& real, const B& imag)
211         {
212             __m128 tmp0 = _mm256_extractf128_ps(real, index);
213             __m128 tmp1 = _mm256_extractf128_ps(imag, index);
214             __m128 tmp2 = _mm_unpackhi_ps(tmp0, tmp1);
215             tmp0 = _mm_unpacklo_ps(tmp0, tmp1);
216             __m256 res = real;
217             res = _mm256_insertf128_ps(res, tmp0, 0);
218             res = _mm256_insertf128_ps(res, tmp2, 1);
219             return res;
220         }
221 
222         template <class B>
get_complex_high_f(const B & real,const B & imag)223         inline B get_complex_high_f(const B& real, const B& imag)
224         {
225             return get_half_complex_f<0>(real, imag);
226         }
227 
228         template <class B>
get_complex_low_f(const B & real,const B & imag)229         inline B get_complex_low_f(const B& real, const B& imag)
230         {
231             return get_half_complex_f<1>(real, imag);
232         }
233 
234         template <class B>
load_complex_d(const B & hi,const B & lo)235         inline std::pair<B, B> load_complex_d(const B& hi, const B& lo)
236         {
237 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
238             B real = _mm256_permute4x64_pd(_mm256_unpacklo_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0));
239             B imag = _mm256_permute4x64_pd(_mm256_unpackhi_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0));
240 #else
241             __m128d tmp0 = _mm256_extractf128_pd(hi, 0);
242             __m128d tmp1 = _mm256_extractf128_pd(hi, 1);
243             B real, imag;
244             __m256d re_tmp0 = _mm256_insertf128_pd(real, _mm_unpacklo_pd(tmp0, tmp1), 0);
245             __m256d im_tmp0 = _mm256_insertf128_pd(imag, _mm_unpackhi_pd(tmp0, tmp1), 0);
246             tmp0 = _mm256_extractf128_pd(lo, 0);
247             tmp1 = _mm256_extractf128_pd(lo, 1);
248             __m256d re_tmp1 = _mm256_insertf128_pd(real, _mm_unpacklo_pd(tmp0, tmp1), 1);
249             __m256d im_tmp1 = _mm256_insertf128_pd(imag, _mm_unpackhi_pd(tmp0, tmp1), 1);
250             real = _mm256_blend_pd(re_tmp0, re_tmp1, 12);
251             imag = _mm256_blend_pd(im_tmp0, im_tmp1, 12);
252 #endif
253             return std::make_pair(real, imag);
254         }
255 
256         // On clang, _mm256_extractf128_pd is built upon build_shufflevector
257         // which require index parameter to be a constant
258         template <int index, class B>
get_half_complex_d(const B & real,const B & imag)259         inline B get_half_complex_d(const B& real, const B& imag)
260         {
261             __m128d tmp0 = _mm256_extractf128_pd(real, index);
262             __m128d tmp1 = _mm256_extractf128_pd(imag, index);
263             __m128d tmp2 = _mm_unpackhi_pd(tmp0, tmp1);
264             tmp0 = _mm_unpacklo_pd(tmp0, tmp1);
265             __m256d res = real;
266             res = _mm256_insertf128_pd(res, tmp0, 0);
267             res = _mm256_insertf128_pd(res, tmp2, 1);
268             return res;
269         }
270 
271         template <class B>
get_complex_high_d(const B & real,const B & imag)272         inline B get_complex_high_d(const B& real, const B& imag)
273         {
274 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
275             __m256d tmp0 = _mm256_permute4x64_pd(real, _MM_SHUFFLE(3, 1, 1, 0));
276             __m256d tmp1 = _mm256_permute4x64_pd(imag, _MM_SHUFFLE(1, 2, 0, 0));
277             return _mm256_blend_pd(tmp0, tmp1, 10);
278 #else
279             return get_half_complex_d<0>(real, imag);
280 #endif
281         }
282 
283         template <class B>
get_complex_low_d(const B & real,const B & imag)284         inline B get_complex_low_d(const B& real, const B& imag)
285         {
286 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION
287             __m256d tmp0 = _mm256_permute4x64_pd(real, _MM_SHUFFLE(3, 3, 1, 2));
288             __m256d tmp1 = _mm256_permute4x64_pd(imag, _MM_SHUFFLE(3, 2, 2, 0));
289             return _mm256_blend_pd(tmp0, tmp1, 10);
290 #else
291             return get_half_complex_d<1>(real, imag);
292 #endif
293         }
294     }
295 
296     /********************************************
297      * batch<std::complex<T>, N> implementation *
298      ********************************************/
299 
300     inline batch<std::complex<float>, 8>&
load_complex(const real_batch & hi,const real_batch & lo)301     batch<std::complex<float>, 8>::load_complex(const real_batch& hi, const real_batch& lo)
302     {
303         std::tie(this->m_real, this->m_imag) = detail::load_complex_f(hi, lo);
304         return *this;
305     }
306 
get_complex_high() const307     inline auto batch<std::complex<float>, 8>::get_complex_high() const -> real_batch
308     {
309         return detail::get_complex_high_f(this->m_real, this->m_imag);
310     }
311 
get_complex_low() const312     inline auto batch<std::complex<float>, 8>::get_complex_low() const -> real_batch
313     {
314         return detail::get_complex_low_f(this->m_real, this->m_imag);
315     }
316 
317     inline batch<std::complex<double>, 4>&
load_complex(const real_batch & hi,const real_batch & lo)318     batch<std::complex<double>, 4>::load_complex(const real_batch& hi, const real_batch& lo)
319     {
320         std::tie(m_real, m_imag) = detail::load_complex_d(hi, lo);
321         return *this;
322     }
323 
get_complex_high() const324     inline auto batch<std::complex<double>, 4>::get_complex_high() const -> real_batch
325     {
326         return detail::get_complex_high_d(this->m_real, this->m_imag);
327     }
328 
get_complex_low() const329     inline auto batch<std::complex<double>, 4>::get_complex_low() const -> real_batch
330     {
331         return detail::get_complex_low_d(this->m_real, this->m_imag);
332     }
333 
334 #ifdef XSIMD_ENABLE_XTL_COMPLEX
335 
336     /****************************************************
337      * batch_bool<xtl::xcomplex<float, float, i3ec>, 8> *
338      ****************************************************/
339 
340     template <bool i3ec>
341     struct simd_batch_traits<batch_bool<xtl::xcomplex<float, float, i3ec>, 8>>
342         : complex_batch_bool_traits<xtl::xcomplex<float, float, i3ec>, float, 8, 32>
343     {
344     };
345 
346     template<bool i3ec>
347     class batch_bool<xtl::xcomplex<float, float, i3ec>, 8>
348         : public simd_complex_batch_bool<batch_bool<xtl::xcomplex<float, float, i3ec>, 8>>
349     {
350     public:
351 
352         using self_type = batch_bool<xtl::xcomplex<float, float, i3ec>, 8>;
353         using base_type = simd_complex_batch_bool<self_type>;
354         using real_batch = batch_bool<float, 8>;
355 
356         batch_bool() = default;
357         using base_type::base_type;
358 
359         // VS2015 has a bug with inheriting constructors involving SFINAE
batch_bool(bool b0,bool b1,bool b2,bool b3,bool b4,bool b5,bool b6,bool b7)360         batch_bool(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7)
361             : base_type(real_batch(b0, b1, b2, b3, b4, b5, b6, b7))
362         {
363         }
364     };
365 
366     /***********************************************
367      * batch<xtl::xcomplex<float, float, i3ec>, 8> *
368      ***********************************************/
369 
370     template <bool i3ec>
371     struct simd_batch_traits<batch<xtl::xcomplex<float, float, i3ec>, 8>>
372         : complex_batch_traits<xtl::xcomplex<float, float, i3ec>, float, 8, 32>
373     {
374     };
375 
376     template <bool i3ec>
377     class batch<xtl::xcomplex<float, float, i3ec>, 8>
378         : public simd_complex_batch<batch<xtl::xcomplex<float, float, i3ec>, 8>>
379     {
380     public:
381 
382         using self_type = batch<xtl::xcomplex<float, float, i3ec>, 8>;
383         using base_type = simd_complex_batch<self_type>;
384         using value_type = xtl::xcomplex<float, float, i3ec>;
385         using real_batch = batch<float, 8>;
386 
387         batch() = default;
388         using base_type::base_type;
389 
390 
391         // VS2015 has a bug with inheriting constructors involving SFINAE
batch(value_type c0,value_type c1,value_type c2,value_type c3,value_type c4,value_type c5,value_type c6,value_type c7)392         batch(value_type c0, value_type c1, value_type c2, value_type c3,
393               value_type c4, value_type c5, value_type c6, value_type c7)
394             : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real(), c4.real(), c5.real(), c6.real(), c7.real()),
395                         real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag(), c4.imag(), c5.imag(), c6.imag(), c7.imag()))
396         {
397         }
398 
399     private:
400 
401         batch& load_complex(const real_batch& hi, const real_batch& lo);
402         real_batch get_complex_high() const;
403         real_batch get_complex_low() const;
404 
405         friend class simd_complex_batch<batch<xtl::xcomplex<float, float, i3ec>, 8>>;
406     };
407 
408     /******************************************************
409      * batch_bool<xtl::xcomplex<double, double, i3ec>, 4> *
410      ******************************************************/
411 
412     template <bool i3ec>
413     struct simd_batch_traits<batch_bool<xtl::xcomplex<double, double, i3ec>, 4>>
414         : complex_batch_bool_traits<xtl::xcomplex<double, double, i3ec>, double, 4, 32>
415     {
416     };
417 
418     template<bool i3ec>
419     class batch_bool<xtl::xcomplex<double, double, i3ec>, 4>
420         : public simd_complex_batch_bool<batch_bool<xtl::xcomplex<double, double, i3ec>, 4>>
421     {
422     public:
423 
424         using self_type = batch_bool<xtl::xcomplex<double, double, i3ec>, 4>;
425         using base_type = simd_complex_batch_bool<self_type>;
426         using real_batch = batch_bool<double, 4>;
427 
428         batch_bool() = default;
429         using base_type::base_type;
430 
431         // VS2015 has a bug with inheriting constructors involving SFINAE
batch_bool(bool b0,bool b1,bool b2,bool b3)432         batch_bool(bool b0, bool b1, bool b2, bool b3)
433             : base_type(real_batch(b0, b1, b2, b3))
434         {
435         }
436     };
437 
438     /*************************************************
439      * batch<xtl::xcomplex<double, double, i3ec>, 4> *
440      *************************************************/
441 
442     template <bool i3ec>
443     struct simd_batch_traits<batch<xtl::xcomplex<double, double, i3ec>, 4>>
444         : complex_batch_traits<xtl::xcomplex<double, double, i3ec>, double, 4, 32>
445     {
446     };
447 
448     template <bool i3ec>
449     class batch<xtl::xcomplex<double, double, i3ec>, 4>
450         : public simd_complex_batch<batch<xtl::xcomplex<double, double, i3ec>, 4>>
451     {
452     public:
453 
454         using self_type = batch<xtl::xcomplex<double, double, i3ec>, 4>;
455         using base_type = simd_complex_batch<self_type>;
456         using value_type = xtl::xcomplex<double, double, i3ec>;
457         using real_batch = batch<double, 4>;
458 
459         batch() = default;
460         using base_type::base_type;
461 
462         // VS2015 has a bug with inheriting constructors involving SFINAE
batch(value_type c0,value_type c1,value_type c2,value_type c3)463         batch(value_type c0, value_type c1, value_type c2, value_type c3)
464             : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real()),
465                         real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag()))
466         {
467         }
468 
469     private:
470 
471         batch& load_complex(const real_batch& hi, const real_batch& lo);
472         real_batch get_complex_high() const;
473         real_batch get_complex_low() const;
474 
475         friend class simd_complex_batch<batch<xtl::xcomplex<double>, 4>>;
476     };
477 
478     /********************************************
479      * batch<std::complex<T>, N> implementation *
480      ********************************************/
481 
482     template <bool i3ec>
483     inline batch<xtl::xcomplex<float, float, i3ec>, 8>&
load_complex(const real_batch & hi,const real_batch & lo)484     batch<xtl::xcomplex<float, float, i3ec>, 8>::load_complex(const real_batch& hi, const real_batch& lo)
485     {
486         std::tie(this->m_real, this->m_imag) = detail::load_complex_f(hi, lo);
487         return *this;
488     }
489 
490     template <bool i3ec>
get_complex_high() const491     inline auto batch<xtl::xcomplex<float, float, i3ec>, 8>::get_complex_high() const -> real_batch
492     {
493         return detail::get_complex_high_f(this->m_real, this->m_imag);
494     }
495 
496     template <bool i3ec>
get_complex_low() const497     inline auto batch<xtl::xcomplex<float, float, i3ec>, 8>::get_complex_low() const -> real_batch
498     {
499         return detail::get_complex_low_f(this->m_real, this->m_imag);
500     }
501 
502     template <bool i3ec>
503     inline batch<xtl::xcomplex<double, double, i3ec>, 4>&
load_complex(const real_batch & hi,const real_batch & lo)504     batch<xtl::xcomplex<double, double, i3ec>, 4>::load_complex(const real_batch& hi, const real_batch& lo)
505     {
506         std::tie(this->m_real, this->m_imag) = detail::load_complex_d(hi, lo);
507         return *this;
508     }
509 
510     template <bool i3ec>
get_complex_high() const511     inline auto batch<xtl::xcomplex<double, double, i3ec>, 4>::get_complex_high() const -> real_batch
512     {
513         return detail::get_complex_high_d(this->m_real, this->m_imag);
514     }
515 
516     template <bool i3ec>
get_complex_low() const517     inline auto batch<xtl::xcomplex<double, double, i3ec>, 4>::get_complex_low() const -> real_batch
518     {
519         return detail::get_complex_low_d(this->m_real, this->m_imag);
520     }
521 
522 #endif
523 }
524 
525 #endif
526