1 /*************************************************************************** 2 * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * 3 * Martin Renou * 4 * Copyright (c) QuantStack * 5 * * 6 * Distributed under the terms of the BSD 3-Clause License. * 7 * * 8 * The full license is in the file LICENSE, distributed with this software. * 9 ****************************************************************************/ 10 11 #ifndef XSIMD_AVX_COMPLEX_HPP 12 #define XSIMD_AVX_COMPLEX_HPP 13 14 #include <complex> 15 #include <tuple> 16 #include <utility> 17 18 #ifdef XSIMD_ENABLE_XTL_COMPLEX 19 #include "xtl/xcomplex.hpp" 20 #endif 21 22 #include "xsimd_avx_float.hpp" 23 #include "xsimd_avx_double.hpp" 24 #include "xsimd_complex_base.hpp" 25 26 namespace xsimd 27 { 28 29 /************************************** 30 * batch_bool<std::complex<float>, 8> * 31 **************************************/ 32 33 template <> 34 struct simd_batch_traits<batch_bool<std::complex<float>, 8>> 35 : complex_batch_bool_traits<std::complex<float>, float, 8, 32> 36 { 37 }; 38 39 template<> 40 class batch_bool<std::complex<float>, 8> 41 : public simd_complex_batch_bool<batch_bool<std::complex<float>, 8>> 42 { 43 public: 44 45 using self_type = batch_bool<std::complex<float>, 8>; 46 using base_type = simd_complex_batch_bool<self_type>; 47 using real_batch = batch_bool<float, 8>; 48 49 batch_bool() = default; 50 using base_type::base_type; 51 52 // VS2015 has a bug with inheriting constructors involving SFINAE batch_bool(bool b0,bool b1,bool b2,bool b3,bool b4,bool b5,bool b6,bool b7)53 batch_bool(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7) 54 : base_type(real_batch(b0, b1, b2, b3, b4, b5, b6, b7)) 55 { 56 } 57 }; 58 59 /********************************* 60 * batch<std::complex<float>, 8> * 61 *********************************/ 62 63 template <> 64 struct simd_batch_traits<batch<std::complex<float>, 8>> 65 : complex_batch_traits<std::complex<float>, float, 8, 32> 66 { 67 }; 68 69 template <> 70 class batch<std::complex<float>, 8> 71 : public simd_complex_batch<batch<std::complex<float>, 8>> 72 { 73 public: 74 75 using self_type = batch<std::complex<float>, 8>; 76 using base_type = simd_complex_batch<self_type>; 77 using value_type = std::complex<float>; 78 using real_batch = batch<float, 8>; 79 80 batch() = default; 81 using base_type::base_type; 82 83 // VS2015 has a bug with inheriting constructors involving SFINAE batch(value_type c0,value_type c1,value_type c2,value_type c3,value_type c4,value_type c5,value_type c6,value_type c7)84 batch(value_type c0, value_type c1, value_type c2, value_type c3, 85 value_type c4, value_type c5, value_type c6, value_type c7) 86 : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real(), c4.real(), c5.real(), c6.real(), c7.real()), 87 real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag(), c4.imag(), c5.imag(), c6.imag(), c7.imag())) 88 { 89 } 90 91 private: 92 93 batch& load_complex(const real_batch& hi, const real_batch& lo); 94 real_batch get_complex_high() const; 95 real_batch get_complex_low() const; 96 97 friend class simd_complex_batch<batch<std::complex<float>, 8>>; 98 }; 99 100 /*************************************** 101 * batch_bool<std::complex<double>, 4> * 102 ***************************************/ 103 104 template <> 105 struct simd_batch_traits<batch_bool<std::complex<double>, 4>> 106 : complex_batch_bool_traits<std::complex<double>, double, 4, 32> 107 { 108 }; 109 110 template<> 111 class batch_bool<std::complex<double>, 4> 112 : public simd_complex_batch_bool<batch_bool<std::complex<double>, 4>> 113 { 114 public: 115 116 using self_type = batch_bool<std::complex<double>, 4>; 117 using base_type = simd_complex_batch_bool<self_type>; 118 using real_batch = batch_bool<double, 4>; 119 120 batch_bool() = default; 121 using base_type::base_type; 122 123 // VS2015 has a bug with inheriting constructors involving SFINAE batch_bool(bool b0,bool b1,bool b2,bool b3)124 batch_bool(bool b0, bool b1, bool b2, bool b3) 125 : base_type(real_batch(b0, b1, b2, b3)) 126 { 127 } 128 }; 129 130 /********************************** 131 * batch<std::complex<double>, 4> * 132 **********************************/ 133 134 template <> 135 struct simd_batch_traits<batch<std::complex<double>, 4>> 136 : complex_batch_traits<std::complex<double>, double, 4, 32> 137 { 138 }; 139 140 template <> 141 class batch<std::complex<double>, 4> 142 : public simd_complex_batch<batch<std::complex<double>, 4>> 143 { 144 public: 145 146 using self_type = batch<std::complex<double>, 4>; 147 using base_type = simd_complex_batch<self_type>; 148 using value_type = std::complex<double>; 149 using real_batch = batch<double, 4>; 150 151 batch() = default; 152 using base_type::base_type; 153 154 // VS2015 has a bug with inheriting constructors involving SFINAE batch(value_type c0,value_type c1,value_type c2,value_type c3)155 batch(value_type c0, value_type c1, value_type c2, value_type c3) 156 : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real()), 157 real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag())) 158 { 159 } 160 161 private: 162 163 batch& load_complex(const real_batch& hi, const real_batch& lo); 164 real_batch get_complex_high() const; 165 real_batch get_complex_low() const; 166 167 friend class simd_complex_batch<batch<std::complex<double>, 4>>; 168 }; 169 170 /********************************************** 171 * common functions to avoid code duplication * 172 **********************************************/ 173 174 namespace detail 175 { 176 template <class B> load_complex_f(const B & hi,const B & lo)177 inline std::pair<B, B> load_complex_f(const B& hi, const B& lo) 178 { 179 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION 180 B real = _mm256_castpd_ps( 181 _mm256_permute4x64_pd( 182 _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(2, 0, 2, 0))), 183 _MM_SHUFFLE(3, 1, 2, 0))); 184 B imag = _mm256_castpd_ps( 185 _mm256_permute4x64_pd( 186 _mm256_castps_pd(_mm256_shuffle_ps(hi, lo, _MM_SHUFFLE(3, 1, 3, 1))), 187 _MM_SHUFFLE(3, 1, 2, 0))); 188 189 #else 190 __m128 tmp0 = _mm256_extractf128_ps(hi, 0); 191 __m128 tmp1 = _mm256_extractf128_ps(hi, 1); 192 __m128 tmp_real = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(2, 0, 2, 0)); 193 __m128 tmp_imag = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(3, 1, 3, 1)); 194 B real, imag; 195 real = _mm256_insertf128_ps(real, tmp_real, 0); 196 imag = _mm256_insertf128_ps(imag, tmp_imag, 0); 197 tmp0 = _mm256_extractf128_ps(lo, 0); 198 tmp1 = _mm256_extractf128_ps(lo, 1); 199 tmp_real = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(2, 0, 2, 0)); 200 tmp_imag = _mm_shuffle_ps(tmp0, tmp1, _MM_SHUFFLE(3, 1, 3, 1)); 201 real = _mm256_insertf128_ps(real, tmp_real, 1); 202 imag = _mm256_insertf128_ps(imag, tmp_imag, 1); 203 #endif 204 return std::make_pair(real, imag); 205 } 206 207 // On clang, _mm256_extractf128_ps is built upon build_shufflevector 208 // which require index parameter to be a constant 209 template <int index, class B> get_half_complex_f(const B & real,const B & imag)210 inline B get_half_complex_f(const B& real, const B& imag) 211 { 212 __m128 tmp0 = _mm256_extractf128_ps(real, index); 213 __m128 tmp1 = _mm256_extractf128_ps(imag, index); 214 __m128 tmp2 = _mm_unpackhi_ps(tmp0, tmp1); 215 tmp0 = _mm_unpacklo_ps(tmp0, tmp1); 216 __m256 res = real; 217 res = _mm256_insertf128_ps(res, tmp0, 0); 218 res = _mm256_insertf128_ps(res, tmp2, 1); 219 return res; 220 } 221 222 template <class B> get_complex_high_f(const B & real,const B & imag)223 inline B get_complex_high_f(const B& real, const B& imag) 224 { 225 return get_half_complex_f<0>(real, imag); 226 } 227 228 template <class B> get_complex_low_f(const B & real,const B & imag)229 inline B get_complex_low_f(const B& real, const B& imag) 230 { 231 return get_half_complex_f<1>(real, imag); 232 } 233 234 template <class B> load_complex_d(const B & hi,const B & lo)235 inline std::pair<B, B> load_complex_d(const B& hi, const B& lo) 236 { 237 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION 238 B real = _mm256_permute4x64_pd(_mm256_unpacklo_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0)); 239 B imag = _mm256_permute4x64_pd(_mm256_unpackhi_pd(hi, lo), _MM_SHUFFLE(3, 1, 2, 0)); 240 #else 241 __m128d tmp0 = _mm256_extractf128_pd(hi, 0); 242 __m128d tmp1 = _mm256_extractf128_pd(hi, 1); 243 B real, imag; 244 __m256d re_tmp0 = _mm256_insertf128_pd(real, _mm_unpacklo_pd(tmp0, tmp1), 0); 245 __m256d im_tmp0 = _mm256_insertf128_pd(imag, _mm_unpackhi_pd(tmp0, tmp1), 0); 246 tmp0 = _mm256_extractf128_pd(lo, 0); 247 tmp1 = _mm256_extractf128_pd(lo, 1); 248 __m256d re_tmp1 = _mm256_insertf128_pd(real, _mm_unpacklo_pd(tmp0, tmp1), 1); 249 __m256d im_tmp1 = _mm256_insertf128_pd(imag, _mm_unpackhi_pd(tmp0, tmp1), 1); 250 real = _mm256_blend_pd(re_tmp0, re_tmp1, 12); 251 imag = _mm256_blend_pd(im_tmp0, im_tmp1, 12); 252 #endif 253 return std::make_pair(real, imag); 254 } 255 256 // On clang, _mm256_extractf128_pd is built upon build_shufflevector 257 // which require index parameter to be a constant 258 template <int index, class B> get_half_complex_d(const B & real,const B & imag)259 inline B get_half_complex_d(const B& real, const B& imag) 260 { 261 __m128d tmp0 = _mm256_extractf128_pd(real, index); 262 __m128d tmp1 = _mm256_extractf128_pd(imag, index); 263 __m128d tmp2 = _mm_unpackhi_pd(tmp0, tmp1); 264 tmp0 = _mm_unpacklo_pd(tmp0, tmp1); 265 __m256d res = real; 266 res = _mm256_insertf128_pd(res, tmp0, 0); 267 res = _mm256_insertf128_pd(res, tmp2, 1); 268 return res; 269 } 270 271 template <class B> get_complex_high_d(const B & real,const B & imag)272 inline B get_complex_high_d(const B& real, const B& imag) 273 { 274 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION 275 __m256d tmp0 = _mm256_permute4x64_pd(real, _MM_SHUFFLE(3, 1, 1, 0)); 276 __m256d tmp1 = _mm256_permute4x64_pd(imag, _MM_SHUFFLE(1, 2, 0, 0)); 277 return _mm256_blend_pd(tmp0, tmp1, 10); 278 #else 279 return get_half_complex_d<0>(real, imag); 280 #endif 281 } 282 283 template <class B> get_complex_low_d(const B & real,const B & imag)284 inline B get_complex_low_d(const B& real, const B& imag) 285 { 286 #if XSIMD_X86_INSTR_SET >= XSIMD_X86_AVX2_VERSION 287 __m256d tmp0 = _mm256_permute4x64_pd(real, _MM_SHUFFLE(3, 3, 1, 2)); 288 __m256d tmp1 = _mm256_permute4x64_pd(imag, _MM_SHUFFLE(3, 2, 2, 0)); 289 return _mm256_blend_pd(tmp0, tmp1, 10); 290 #else 291 return get_half_complex_d<1>(real, imag); 292 #endif 293 } 294 } 295 296 /******************************************** 297 * batch<std::complex<T>, N> implementation * 298 ********************************************/ 299 300 inline batch<std::complex<float>, 8>& load_complex(const real_batch & hi,const real_batch & lo)301 batch<std::complex<float>, 8>::load_complex(const real_batch& hi, const real_batch& lo) 302 { 303 std::tie(this->m_real, this->m_imag) = detail::load_complex_f(hi, lo); 304 return *this; 305 } 306 get_complex_high() const307 inline auto batch<std::complex<float>, 8>::get_complex_high() const -> real_batch 308 { 309 return detail::get_complex_high_f(this->m_real, this->m_imag); 310 } 311 get_complex_low() const312 inline auto batch<std::complex<float>, 8>::get_complex_low() const -> real_batch 313 { 314 return detail::get_complex_low_f(this->m_real, this->m_imag); 315 } 316 317 inline batch<std::complex<double>, 4>& load_complex(const real_batch & hi,const real_batch & lo)318 batch<std::complex<double>, 4>::load_complex(const real_batch& hi, const real_batch& lo) 319 { 320 std::tie(m_real, m_imag) = detail::load_complex_d(hi, lo); 321 return *this; 322 } 323 get_complex_high() const324 inline auto batch<std::complex<double>, 4>::get_complex_high() const -> real_batch 325 { 326 return detail::get_complex_high_d(this->m_real, this->m_imag); 327 } 328 get_complex_low() const329 inline auto batch<std::complex<double>, 4>::get_complex_low() const -> real_batch 330 { 331 return detail::get_complex_low_d(this->m_real, this->m_imag); 332 } 333 334 #ifdef XSIMD_ENABLE_XTL_COMPLEX 335 336 /**************************************************** 337 * batch_bool<xtl::xcomplex<float, float, i3ec>, 8> * 338 ****************************************************/ 339 340 template <bool i3ec> 341 struct simd_batch_traits<batch_bool<xtl::xcomplex<float, float, i3ec>, 8>> 342 : complex_batch_bool_traits<xtl::xcomplex<float, float, i3ec>, float, 8, 32> 343 { 344 }; 345 346 template<bool i3ec> 347 class batch_bool<xtl::xcomplex<float, float, i3ec>, 8> 348 : public simd_complex_batch_bool<batch_bool<xtl::xcomplex<float, float, i3ec>, 8>> 349 { 350 public: 351 352 using self_type = batch_bool<xtl::xcomplex<float, float, i3ec>, 8>; 353 using base_type = simd_complex_batch_bool<self_type>; 354 using real_batch = batch_bool<float, 8>; 355 356 batch_bool() = default; 357 using base_type::base_type; 358 359 // VS2015 has a bug with inheriting constructors involving SFINAE batch_bool(bool b0,bool b1,bool b2,bool b3,bool b4,bool b5,bool b6,bool b7)360 batch_bool(bool b0, bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7) 361 : base_type(real_batch(b0, b1, b2, b3, b4, b5, b6, b7)) 362 { 363 } 364 }; 365 366 /*********************************************** 367 * batch<xtl::xcomplex<float, float, i3ec>, 8> * 368 ***********************************************/ 369 370 template <bool i3ec> 371 struct simd_batch_traits<batch<xtl::xcomplex<float, float, i3ec>, 8>> 372 : complex_batch_traits<xtl::xcomplex<float, float, i3ec>, float, 8, 32> 373 { 374 }; 375 376 template <bool i3ec> 377 class batch<xtl::xcomplex<float, float, i3ec>, 8> 378 : public simd_complex_batch<batch<xtl::xcomplex<float, float, i3ec>, 8>> 379 { 380 public: 381 382 using self_type = batch<xtl::xcomplex<float, float, i3ec>, 8>; 383 using base_type = simd_complex_batch<self_type>; 384 using value_type = xtl::xcomplex<float, float, i3ec>; 385 using real_batch = batch<float, 8>; 386 387 batch() = default; 388 using base_type::base_type; 389 390 391 // VS2015 has a bug with inheriting constructors involving SFINAE batch(value_type c0,value_type c1,value_type c2,value_type c3,value_type c4,value_type c5,value_type c6,value_type c7)392 batch(value_type c0, value_type c1, value_type c2, value_type c3, 393 value_type c4, value_type c5, value_type c6, value_type c7) 394 : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real(), c4.real(), c5.real(), c6.real(), c7.real()), 395 real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag(), c4.imag(), c5.imag(), c6.imag(), c7.imag())) 396 { 397 } 398 399 private: 400 401 batch& load_complex(const real_batch& hi, const real_batch& lo); 402 real_batch get_complex_high() const; 403 real_batch get_complex_low() const; 404 405 friend class simd_complex_batch<batch<xtl::xcomplex<float, float, i3ec>, 8>>; 406 }; 407 408 /****************************************************** 409 * batch_bool<xtl::xcomplex<double, double, i3ec>, 4> * 410 ******************************************************/ 411 412 template <bool i3ec> 413 struct simd_batch_traits<batch_bool<xtl::xcomplex<double, double, i3ec>, 4>> 414 : complex_batch_bool_traits<xtl::xcomplex<double, double, i3ec>, double, 4, 32> 415 { 416 }; 417 418 template<bool i3ec> 419 class batch_bool<xtl::xcomplex<double, double, i3ec>, 4> 420 : public simd_complex_batch_bool<batch_bool<xtl::xcomplex<double, double, i3ec>, 4>> 421 { 422 public: 423 424 using self_type = batch_bool<xtl::xcomplex<double, double, i3ec>, 4>; 425 using base_type = simd_complex_batch_bool<self_type>; 426 using real_batch = batch_bool<double, 4>; 427 428 batch_bool() = default; 429 using base_type::base_type; 430 431 // VS2015 has a bug with inheriting constructors involving SFINAE batch_bool(bool b0,bool b1,bool b2,bool b3)432 batch_bool(bool b0, bool b1, bool b2, bool b3) 433 : base_type(real_batch(b0, b1, b2, b3)) 434 { 435 } 436 }; 437 438 /************************************************* 439 * batch<xtl::xcomplex<double, double, i3ec>, 4> * 440 *************************************************/ 441 442 template <bool i3ec> 443 struct simd_batch_traits<batch<xtl::xcomplex<double, double, i3ec>, 4>> 444 : complex_batch_traits<xtl::xcomplex<double, double, i3ec>, double, 4, 32> 445 { 446 }; 447 448 template <bool i3ec> 449 class batch<xtl::xcomplex<double, double, i3ec>, 4> 450 : public simd_complex_batch<batch<xtl::xcomplex<double, double, i3ec>, 4>> 451 { 452 public: 453 454 using self_type = batch<xtl::xcomplex<double, double, i3ec>, 4>; 455 using base_type = simd_complex_batch<self_type>; 456 using value_type = xtl::xcomplex<double, double, i3ec>; 457 using real_batch = batch<double, 4>; 458 459 batch() = default; 460 using base_type::base_type; 461 462 // VS2015 has a bug with inheriting constructors involving SFINAE batch(value_type c0,value_type c1,value_type c2,value_type c3)463 batch(value_type c0, value_type c1, value_type c2, value_type c3) 464 : base_type(real_batch(c0.real(), c1.real(), c2.real(), c3.real()), 465 real_batch(c0.imag(), c1.imag(), c2.imag(), c3.imag())) 466 { 467 } 468 469 private: 470 471 batch& load_complex(const real_batch& hi, const real_batch& lo); 472 real_batch get_complex_high() const; 473 real_batch get_complex_low() const; 474 475 friend class simd_complex_batch<batch<xtl::xcomplex<double>, 4>>; 476 }; 477 478 /******************************************** 479 * batch<std::complex<T>, N> implementation * 480 ********************************************/ 481 482 template <bool i3ec> 483 inline batch<xtl::xcomplex<float, float, i3ec>, 8>& load_complex(const real_batch & hi,const real_batch & lo)484 batch<xtl::xcomplex<float, float, i3ec>, 8>::load_complex(const real_batch& hi, const real_batch& lo) 485 { 486 std::tie(this->m_real, this->m_imag) = detail::load_complex_f(hi, lo); 487 return *this; 488 } 489 490 template <bool i3ec> get_complex_high() const491 inline auto batch<xtl::xcomplex<float, float, i3ec>, 8>::get_complex_high() const -> real_batch 492 { 493 return detail::get_complex_high_f(this->m_real, this->m_imag); 494 } 495 496 template <bool i3ec> get_complex_low() const497 inline auto batch<xtl::xcomplex<float, float, i3ec>, 8>::get_complex_low() const -> real_batch 498 { 499 return detail::get_complex_low_f(this->m_real, this->m_imag); 500 } 501 502 template <bool i3ec> 503 inline batch<xtl::xcomplex<double, double, i3ec>, 4>& load_complex(const real_batch & hi,const real_batch & lo)504 batch<xtl::xcomplex<double, double, i3ec>, 4>::load_complex(const real_batch& hi, const real_batch& lo) 505 { 506 std::tie(this->m_real, this->m_imag) = detail::load_complex_d(hi, lo); 507 return *this; 508 } 509 510 template <bool i3ec> get_complex_high() const511 inline auto batch<xtl::xcomplex<double, double, i3ec>, 4>::get_complex_high() const -> real_batch 512 { 513 return detail::get_complex_high_d(this->m_real, this->m_imag); 514 } 515 516 template <bool i3ec> get_complex_low() const517 inline auto batch<xtl::xcomplex<double, double, i3ec>, 4>::get_complex_low() const -> real_batch 518 { 519 return detail::get_complex_low_d(this->m_real, this->m_imag); 520 } 521 522 #endif 523 } 524 525 #endif 526