1 /*
2  *  Copyright (C) 2004-2021 Edward F. Valeev
3  *
4  *  This file is part of Libint.
5  *
6  *  Libint is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  Libint is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef _libint2_src_lib_libint_vectorppc_h_
22 #define _libint2_src_lib_libint_vectorppc_h_
23 
24 #include <libint2/util/type_traits.h>
25 
26 // clang on BG/Q defines __VECTOR4DOUBLE__ and intrinsics in this header file
27 #if defined(__clang__) && defined(__bgq__)
28 # include <qpxintrin.h>
29 #endif
30 
31 #ifdef __VECTOR4DOUBLE__
32 
33 namespace libint2 { namespace simd {
34 
35   /**
36    * SIMD vector of 4 double-precision floating-point real numbers, operations on which use QPX instructions
37    * available on some recent PowerPC hardware, e.g. Blue Gene/Q.
38    */
39   struct VectorQPXDouble {
40 
41       typedef double T;
42       vector4double d;
43 
44       /**
45        * creates a vector of default-initialized values.
46        */
VectorQPXDoubleVectorQPXDouble47       VectorQPXDouble() {}
48 
49       /** Initializes all elements to the same value
50        *  @param a the value to which all elements will be set
51        */
VectorQPXDoubleVectorQPXDouble52       VectorQPXDouble(T a) {
53         d = vec_splats(a);
54       }
55 
56       /**
57        * creates a vector of values initialized by an ordinary static-sized array
58        */
VectorQPXDoubleVectorQPXDouble59       VectorQPXDouble(T (&a)[4]) {
60         d = vec_ld(0, &a[0]);
61       }
62 
63       /**
64        * creates a vector of values initialized by an ordinary static-sized array
65        */
VectorQPXDoubleVectorQPXDouble66       VectorQPXDouble(T a0, T a1, T a2, T a3) {
67         T a[4]; a[0] = a0; a[1] = a1; a[2] = a2; a[3] = a3;
68         d = vec_ld(0, &a[0]);
69       }
70 
71       VectorQPXDouble& operator=(T a) {
72         d = vec_splats(a);
73         return *this;
74       }
75 
76       VectorQPXDouble& operator+=(VectorQPXDouble a) {
77         d = vec_add(d, a.d);
78         return *this;
79       }
80 
81       VectorQPXDouble& operator-=(VectorQPXDouble a) {
82         d = vec_sub(d, a.d);
83         return *this;
84       }
85 
86       operator double() const {
87         double d0 = vec_extract(d, 0);
88         return d0;
89       }
90 
convertVectorQPXDouble91       void convert(double(&a)[4]) const {
92         vec_st(d, 0, &a[0]);
93       }
94 
95   };
96 
97   //@{ arithmetic operators
98   inline VectorQPXDouble operator*(double a, VectorQPXDouble b) {
99     VectorQPXDouble c;
100     VectorQPXDouble _a(a);
101     c.d = vec_mul(_a.d, b.d);
102     return c;
103   }
104 
105   inline VectorQPXDouble operator*(VectorQPXDouble a, double b) {
106     VectorQPXDouble c;
107     VectorQPXDouble _b(b);
108     c.d = vec_mul(a.d, _b.d);
109     return c;
110   }
111 
112   inline VectorQPXDouble operator*(int a, VectorQPXDouble b) {
113     if (a == 1)
114       return b;
115     else {
116       VectorQPXDouble c;
117       VectorQPXDouble _a((double)a);
118       c.d = vec_mul(_a.d, b.d);
119       return c;
120     }
121   }
122 
123   inline VectorQPXDouble operator*(VectorQPXDouble a, int b) {
124     if (b == 1)
125       return a;
126     else {
127       VectorQPXDouble c;
128       VectorQPXDouble _b((double)b);
129       c.d = vec_mul(a.d, _b.d);
130       return c;
131     }
132   }
133 
134   inline VectorQPXDouble operator*(VectorQPXDouble a, VectorQPXDouble b) {
135     VectorQPXDouble c;
136     c.d = vec_mul(a.d, b.d);
137     return c;
138   }
139 
140   inline VectorQPXDouble operator+(VectorQPXDouble a, VectorQPXDouble b) {
141     VectorQPXDouble c;
142     c.d = vec_add(a.d, b.d);
143     return c;
144   }
145 
146   inline VectorQPXDouble operator-(VectorQPXDouble a, VectorQPXDouble b) {
147     VectorQPXDouble c;
148     c.d = vec_sub(a.d, b.d);
149     return c;
150   }
151 
152   inline VectorQPXDouble operator/(VectorQPXDouble a, VectorQPXDouble b) {
153     VectorQPXDouble c;
154     c.d = vec_swdiv(a.d, b.d);
155     return c;
156   }
157 
fma_plus(VectorQPXDouble a,VectorQPXDouble b,VectorQPXDouble c)158   inline VectorQPXDouble fma_plus(VectorQPXDouble a, VectorQPXDouble b, VectorQPXDouble c) {
159     VectorQPXDouble d;
160     d.d = vec_madd(a.d, b.d, c.d);
161     return d;
162   }
fma_minus(VectorQPXDouble a,VectorQPXDouble b,VectorQPXDouble c)163   inline VectorQPXDouble fma_minus(VectorQPXDouble a, VectorQPXDouble b, VectorQPXDouble c) {
164     VectorQPXDouble d;
165     d.d = vec_msub(a.d, b.d, c.d);
166     return d;
167   }
168 
169   //@}
170 
171 };}; // namespace libint2::simd
172 
173 namespace libint2 {
174 
175   //@{ vector traits of VectorQPXDouble
176 
177   template <>
178   struct is_vector<simd::VectorQPXDouble> {
179       static const bool value = true;
180   };
181 
182   template <>
183   struct vector_traits<simd::VectorQPXDouble> {
184       typedef double scalar_type;
185       static const size_t extent = 4;
186   };
187 
188   //@}
189 
190 } // namespace libint2
191 
192 #endif // QPX-only
193 
194 // only xlC on BG/L and BG/P supports FP2 (Double Hummer)instructions, not sure how to check if they are enabled
195 #if (defined(__xlC__) || defined(__clang__)) && (defined(__bgp__) || defined(__blrts__))
196 
197 #if defined(__xlC__)
198 # include <builtins.h>
199 #endif
200 #if defined(__clang__)
201 # include <fp2intrin.h>
202 #endif
203 
204 namespace libint2 { namespace simd {
205 
206   /**
207    * SIMD vector of 2 double-precision floating-point real numbers, operations on which use FP2 (Double Hummer) instructions
208    * available on some PowerPC hardware, e.g. Blue Gene/L and Blue Gene/P.
209    */
210   struct VectorFP2Double {
211 
212       typedef double T;
213       double _Complex d; //< represents 2 doubles
214 
215       /**
216        * creates a vector of default-initialized values.
217        */
218       VectorFP2Double() {}
219 
220       /** Initializes all elements to the same value
221        *  @param a the value to which all elements will be set
222        */
223       VectorFP2Double(T a) {
224         T a01[2]; a01[0] = a; a01[1] = a;
225         d = __lfpd(&a01[0]);
226       }
227 
228       /**
229        * creates a vector of values initialized by an ordinary static-sized array
230        */
231       VectorFP2Double(T (&a)[2]) {
232         d = __lfpd(&a[0]);
233       }
234 
235       /**
236        * creates a vector of values initialized by an ordinary static-sized array
237        */
238       VectorFP2Double(T a0, T a1) {
239         T a[2]; a[0] = a0; a[1] = a1;
240         d = __lfpd(&a[0]);
241       }
242 
243       VectorFP2Double& operator=(T a) {
244         T a01[2]; a01[0] = a; a01[1] = a;
245         d = __lfpd(&a01[0]);
246         return *this;
247       }
248 
249       VectorFP2Double& operator+=(VectorFP2Double a) {
250         d = __fpadd(d, a.d);
251         return *this;
252       }
253 
254       VectorFP2Double& operator-=(VectorFP2Double a) {
255         d = __fpsub(d, a.d);
256         return *this;
257       }
258 
259       operator double() const {
260         double d0 = __creal(d);
261         return d0;
262       }
263 
264       void convert(double(&a)[2]) const {
265         __stfpd(&a[0], d);
266       }
267   };
268 
269   //@{ arithmetic operators
270   inline VectorFP2Double operator*(double a, VectorFP2Double b) {
271     VectorFP2Double c;
272     c.d = __fxpmul(b.d, a);
273     return c;
274   }
275 
276   inline VectorFP2Double operator*(VectorFP2Double a, double b) {
277     VectorFP2Double c;
278     c.d = __fxpmul(a.d, b);
279     return c;
280   }
281 
282   inline VectorFP2Double operator*(int a, VectorFP2Double b) {
283     if (a == 1)
284       return b;
285     else {
286       VectorFP2Double c;
287       c.d = __fxpmul(b.d, (double)a);
288       return c;
289     }
290   }
291 
292   inline VectorFP2Double operator*(VectorFP2Double a, int b) {
293     if (b == 1)
294       return a;
295     else {
296       VectorFP2Double c;
297       c.d = __fxpmul(a.d, (double)b);
298       return c;
299     }
300   }
301 
302   inline VectorFP2Double operator*(VectorFP2Double a, VectorFP2Double b) {
303     VectorFP2Double c;
304     c.d = __fpmul(a.d, b.d);
305     return c;
306   }
307 
308   inline VectorFP2Double operator+(VectorFP2Double a, VectorFP2Double b) {
309     VectorFP2Double c;
310     c.d = __fpadd(a.d, b.d);
311     return c;
312   }
313 
314   inline VectorFP2Double operator-(VectorFP2Double a, VectorFP2Double b) {
315     VectorFP2Double c;
316     c.d = __fpsub(a.d, b.d);
317     return c;
318   }
319 
320   /* there's no division DH instruction that I can see
321   inline VectorFP2Double operator/(VectorFP2Double a, VectorFP2Double b) {
322     VectorFP2Double c;
323   }
324   */
325 
326   inline VectorFP2Double fma_plus(VectorFP2Double a, VectorFP2Double b, VectorFP2Double c) {
327     VectorFP2Double d;
328     d.d = __fpmadd(a.d, b.d, c.d);
329     return d;
330   }
331   inline VectorFP2Double fma_minus(VectorFP2Double a, VectorFP2Double b, VectorFP2Double c) {
332     VectorFP2Double d;
333     d.d = __fpmsub(a.d, b.d, c.d);
334     return d;
335   }
336 
337   //@}
338 
339 };}; // namespace libint2::simd
340 
341 namespace libint2 {
342 
343   //@{ vector traits of VectorFP2Double
344 
345   template <>
346   struct is_vector<simd::VectorFP2Double> {
347       static const bool value = true;
348   };
349 
350   template <>
351   struct vector_traits<simd::VectorFP2Double> {
352       typedef double scalar_type;
353       static const size_t extent = 2;
354   };
355 
356   //@}
357 
358 } // namespace libint2
359 
360 #endif // FP2-only
361 
362 #endif // header guard
363 
364