1 /*
2  Copyright (C) 2010 X. Andrade
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2, or (at your option)
7  any later version.
8 
9  This program is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  GNU General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  02110-1301, USA.
18 
19 */
20 
21 #include <config.h>
22 
23 #ifndef VECTORS_H
24 #define VECTORS_H
25 
26 #ifdef HAVE_VEC
27 
28 #ifdef HAVE_M512D
29 #include <immintrin.h>
30 #define VEC_SIZE 8
31 #define VEC_TYPE __m512d
32 #define VEC_LD(addr) _mm512_load_pd(addr)
33 #define VEC_LDU(addr) _mm512_loadu_pd(addr)
34 #define VEC_ST(addr, vec)  _mm512_stream_pd(addr, vec)
35 #define VEC_STU(addr, vec)  _mm512_storeu_pd(addr, vec)
36 #define VEC_FMA(aa, bb, cc) _mm512_fmadd_pd(aa, bb, cc)
37 #define VEC_SCAL(aa) _mm512_set1_pd(aa)
38 #define VEC_ZERO _mm512_setzero_pd()
39 #include <emmintrin.h>
40 #define FENCE _mm_mfence()
41 
42 #define DEPTH 16
43 
44 #elif defined(HAVE_M256D)
45 #include <immintrin.h>
46 #if defined(HAVE_FMA4) || defined(HAVE_FMA3)
47 #include <x86intrin.h>
48 #endif
49 #define VEC_SIZE 4
50 #define VEC_TYPE __m256d
51 #define VEC_LD(addr) _mm256_load_pd(addr)
52 #define VEC_LDU(addr) _mm256_loadu_pd(addr)
53 #define VEC_ST(addr, vec)  _mm256_stream_pd(addr, vec)
54 #define VEC_STU(addr, vec)  _mm256_storeu_pd(addr, vec)
55 #ifdef HAVE_FMA3
56 #define VEC_FMA(aa, bb, cc) _mm256_fmadd_pd(aa, bb, cc)
57 #elif defined(HAVE_FMA4)
58 #define VEC_FMA(aa, bb, cc) _mm256_macc_pd(aa, bb, cc)
59 #else
60 #define VEC_FMA(aa, bb, cc) _mm256_add_pd(cc, _mm256_mul_pd(aa, bb))
61 #endif
62 #define VEC_SCAL(aa) _mm256_set1_pd(aa)
63 #define VEC_ZERO _mm256_setzero_pd()
64 #include <emmintrin.h>
65 #define FENCE _mm_mfence()
66 
67 #define DEPTH 16
68 #endif
69 
70 #if !defined(HAVE_M256D) && defined(HAVE_M128D)
71 #include <emmintrin.h>
72 #if defined(HAVE_FMA4) || defined(HAVE_FMA3)
73 #include <x86intrin.h>
74 #endif
75 #define VEC_SIZE 2
76 #define VEC_TYPE __m128d
77 #define VEC_LD(addr) _mm_load_pd(addr)
78 #define VEC_LDU(addr) _mm_loadu_pd(addr)
79 #define VEC_ST(addr, vec)  _mm_stream_pd(addr, vec)
80 #define VEC_STU(addr, vec)  _mm_storeu_pd(addr, vec)
81 #ifdef HAVE_FMA3
82 #define VEC_FMA(aa, bb, cc) _mm_fmadd_pd(aa, bb, cc)
83 #elif defined(HAVE_FMA4)
84 #define VEC_FMA(aa, bb, cc) _mm_macc_pd(aa, bb, cc)
85 #else
86 #define VEC_FMA(aa, bb, cc) _mm_add_pd(cc, _mm_mul_pd(aa, bb))
87 #endif
88 #define VEC_SCAL(aa) _mm_set1_pd(aa)
89 #define VEC_ZERO _mm_setzero_pd()
90 #define FENCE _mm_mfence()
91 
92 #define DEPTH 16
93 #endif
94 
95 #ifdef HAVE_BLUE_GENE_Q
96 #define VEC_SIZE 4
97 #define VEC_TYPE vector4double
98 #define VEC_LD(addr) vec_ld(0, (double *) (addr))
99 #define VEC_LDU(addr) ((vector4double) {(addr)[0], (addr)[1], (addr)[2], (addr)[3]})
100 #define VEC_ST(addr, vec)  vec_st(vec, 0, (double *) (addr))
101 #define VEC_STU(addr, vec) (addr)[0] = vec_extract(vec, 0); (addr)[1] = vec_extract(vec, 1); (addr)[2] = vec_extract(vec, 2); (addr)[3] = vec_extract(vec, 3)
102 #define VEC_FMA(aa, bb, cc) vec_madd(aa, bb, cc)
103 #define VEC_SCAL(aa) ((vector4double) {aa, aa, aa, aa})
104 #define VEC_SCAL_LD(addr) vec_lds(0, (double*) (addr))
105 #define VEC_ZERO ((vector4double) {0.0, 0.0, 0.0, 0.0})
106 
107 #define DEPTH 16
108 #endif
109 
110 #if defined(HAVE_BLUE_GENE) && !defined(HAVE_BLUE_GENE_Q)
111 #define VEC_SIZE 2
112 #define VEC_TYPE double _Complex
113 #define VEC_LD(addr) __lfpd(addr)
114 #define VEC_LDU(addr) __cmplx((addr)[0], (addr)[1])
115 #define VEC_ST(addr, vec)  __stfpd(addr, vec)
116 #define VEC_STU(addr, vec)  (addr)[0] = __creal(vec); (addr)[1] = __cimag(vec)
117 #define VEC_FMA(aa, bb, cc) __fpmadd(cc, aa, bb)
118 #define VEC_SCAL(aa) __cmplx(aa, aa)
119 #define VEC_ZERO __cmplx(0.0, 0.0)
120 
121 #define DEPTH 16
122 #endif
123 
124 #endif
125 
126 #ifndef VEC_SIZE
127 
128 #define VEC_SIZE 1
129 #define VEC_TYPE double
130 #define VEC_LD(addr) (addr)[0]
131 #define VEC_LDU(addr) VEC_LD(addr)
132 #define VEC_ST(addr, vec) (addr)[0] = vec
133 #define VEC_STU(addr, vec) VEC_ST(addr, vec)
134 #define VEC_FMA(aa, bb, cc) aa*bb + cc
135 #define VEC_SCAL(aa) aa
136 #define VEC_ZERO 0.0
137 
138 #define DEPTH 8
139 #endif
140 
141 #define max1(x) (((x) > 0)?(x):1)
142 
143 #endif
144