1 /* 2 Copyright (C) 2010 X. Andrade 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 17 02110-1301, USA. 18 19 */ 20 21 #include <config.h> 22 23 #ifndef VECTORS_H 24 #define VECTORS_H 25 26 #ifdef HAVE_VEC 27 28 #ifdef HAVE_M512D 29 #include <immintrin.h> 30 #define VEC_SIZE 8 31 #define VEC_TYPE __m512d 32 #define VEC_LD(addr) _mm512_load_pd(addr) 33 #define VEC_LDU(addr) _mm512_loadu_pd(addr) 34 #define VEC_ST(addr, vec) _mm512_stream_pd(addr, vec) 35 #define VEC_STU(addr, vec) _mm512_storeu_pd(addr, vec) 36 #define VEC_FMA(aa, bb, cc) _mm512_fmadd_pd(aa, bb, cc) 37 #define VEC_SCAL(aa) _mm512_set1_pd(aa) 38 #define VEC_ZERO _mm512_setzero_pd() 39 #include <emmintrin.h> 40 #define FENCE _mm_mfence() 41 42 #define DEPTH 16 43 44 #elif defined(HAVE_M256D) 45 #include <immintrin.h> 46 #if defined(HAVE_FMA4) || defined(HAVE_FMA3) 47 #include <x86intrin.h> 48 #endif 49 #define VEC_SIZE 4 50 #define VEC_TYPE __m256d 51 #define VEC_LD(addr) _mm256_load_pd(addr) 52 #define VEC_LDU(addr) _mm256_loadu_pd(addr) 53 #define VEC_ST(addr, vec) _mm256_stream_pd(addr, vec) 54 #define VEC_STU(addr, vec) _mm256_storeu_pd(addr, vec) 55 #ifdef HAVE_FMA3 56 #define VEC_FMA(aa, bb, cc) _mm256_fmadd_pd(aa, bb, cc) 57 #elif defined(HAVE_FMA4) 58 #define VEC_FMA(aa, bb, cc) _mm256_macc_pd(aa, bb, cc) 59 #else 60 #define VEC_FMA(aa, bb, cc) _mm256_add_pd(cc, _mm256_mul_pd(aa, bb)) 61 #endif 62 #define VEC_SCAL(aa) _mm256_set1_pd(aa) 63 #define VEC_ZERO _mm256_setzero_pd() 64 #include <emmintrin.h> 65 #define FENCE _mm_mfence() 66 67 #define DEPTH 16 68 #endif 69 70 #if !defined(HAVE_M256D) && defined(HAVE_M128D) 71 #include <emmintrin.h> 72 #if defined(HAVE_FMA4) || defined(HAVE_FMA3) 73 #include <x86intrin.h> 74 #endif 75 #define VEC_SIZE 2 76 #define VEC_TYPE __m128d 77 #define VEC_LD(addr) _mm_load_pd(addr) 78 #define VEC_LDU(addr) _mm_loadu_pd(addr) 79 #define VEC_ST(addr, vec) _mm_stream_pd(addr, vec) 80 #define VEC_STU(addr, vec) _mm_storeu_pd(addr, vec) 81 #ifdef HAVE_FMA3 82 #define VEC_FMA(aa, bb, cc) _mm_fmadd_pd(aa, bb, cc) 83 #elif defined(HAVE_FMA4) 84 #define VEC_FMA(aa, bb, cc) _mm_macc_pd(aa, bb, cc) 85 #else 86 #define VEC_FMA(aa, bb, cc) _mm_add_pd(cc, _mm_mul_pd(aa, bb)) 87 #endif 88 #define VEC_SCAL(aa) _mm_set1_pd(aa) 89 #define VEC_ZERO _mm_setzero_pd() 90 #define FENCE _mm_mfence() 91 92 #define DEPTH 16 93 #endif 94 95 #ifdef HAVE_BLUE_GENE_Q 96 #define VEC_SIZE 4 97 #define VEC_TYPE vector4double 98 #define VEC_LD(addr) vec_ld(0, (double *) (addr)) 99 #define VEC_LDU(addr) ((vector4double) {(addr)[0], (addr)[1], (addr)[2], (addr)[3]}) 100 #define VEC_ST(addr, vec) vec_st(vec, 0, (double *) (addr)) 101 #define VEC_STU(addr, vec) (addr)[0] = vec_extract(vec, 0); (addr)[1] = vec_extract(vec, 1); (addr)[2] = vec_extract(vec, 2); (addr)[3] = vec_extract(vec, 3) 102 #define VEC_FMA(aa, bb, cc) vec_madd(aa, bb, cc) 103 #define VEC_SCAL(aa) ((vector4double) {aa, aa, aa, aa}) 104 #define VEC_SCAL_LD(addr) vec_lds(0, (double*) (addr)) 105 #define VEC_ZERO ((vector4double) {0.0, 0.0, 0.0, 0.0}) 106 107 #define DEPTH 16 108 #endif 109 110 #if defined(HAVE_BLUE_GENE) && !defined(HAVE_BLUE_GENE_Q) 111 #define VEC_SIZE 2 112 #define VEC_TYPE double _Complex 113 #define VEC_LD(addr) __lfpd(addr) 114 #define VEC_LDU(addr) __cmplx((addr)[0], (addr)[1]) 115 #define VEC_ST(addr, vec) __stfpd(addr, vec) 116 #define VEC_STU(addr, vec) (addr)[0] = __creal(vec); (addr)[1] = __cimag(vec) 117 #define VEC_FMA(aa, bb, cc) __fpmadd(cc, aa, bb) 118 #define VEC_SCAL(aa) __cmplx(aa, aa) 119 #define VEC_ZERO __cmplx(0.0, 0.0) 120 121 #define DEPTH 16 122 #endif 123 124 #endif 125 126 #ifndef VEC_SIZE 127 128 #define VEC_SIZE 1 129 #define VEC_TYPE double 130 #define VEC_LD(addr) (addr)[0] 131 #define VEC_LDU(addr) VEC_LD(addr) 132 #define VEC_ST(addr, vec) (addr)[0] = vec 133 #define VEC_STU(addr, vec) VEC_ST(addr, vec) 134 #define VEC_FMA(aa, bb, cc) aa*bb + cc 135 #define VEC_SCAL(aa) aa 136 #define VEC_ZERO 0.0 137 138 #define DEPTH 8 139 #endif 140 141 #define max1(x) (((x) > 0)?(x):1) 142 143 #endif 144