1 /* 2 * Copyright (C) 2004-2021 Edward F. Valeev 3 * 4 * This file is part of Libint. 5 * 6 * Libint is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU Lesser General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * Libint is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public License 17 * along with Libint. If not, see <http://www.gnu.org/licenses/>. 18 * 19 */ 20 21 #ifndef _libint2_src_lib_libint_vectorppc_h_ 22 #define _libint2_src_lib_libint_vectorppc_h_ 23 24 #include <libint2/util/type_traits.h> 25 26 // clang on BG/Q defines __VECTOR4DOUBLE__ and intrinsics in this header file 27 #if defined(__clang__) && defined(__bgq__) 28 # include <qpxintrin.h> 29 #endif 30 31 #ifdef __VECTOR4DOUBLE__ 32 33 namespace libint2 { namespace simd { 34 35 /** 36 * SIMD vector of 4 double-precision floating-point real numbers, operations on which use QPX instructions 37 * available on some recent PowerPC hardware, e.g. Blue Gene/Q. 38 */ 39 struct VectorQPXDouble { 40 41 typedef double T; 42 vector4double d; 43 44 /** 45 * creates a vector of default-initialized values. 46 */ VectorQPXDoubleVectorQPXDouble47 VectorQPXDouble() {} 48 49 /** Initializes all elements to the same value 50 * @param a the value to which all elements will be set 51 */ VectorQPXDoubleVectorQPXDouble52 VectorQPXDouble(T a) { 53 d = vec_splats(a); 54 } 55 56 /** 57 * creates a vector of values initialized by an ordinary static-sized array 58 */ VectorQPXDoubleVectorQPXDouble59 VectorQPXDouble(T (&a)[4]) { 60 d = vec_ld(0, &a[0]); 61 } 62 63 /** 64 * creates a vector of values initialized by an ordinary static-sized array 65 */ VectorQPXDoubleVectorQPXDouble66 VectorQPXDouble(T a0, T a1, T a2, T a3) { 67 T a[4]; a[0] = a0; a[1] = a1; a[2] = a2; a[3] = a3; 68 d = vec_ld(0, &a[0]); 69 } 70 71 VectorQPXDouble& operator=(T a) { 72 d = vec_splats(a); 73 return *this; 74 } 75 76 VectorQPXDouble& operator+=(VectorQPXDouble a) { 77 d = vec_add(d, a.d); 78 return *this; 79 } 80 81 VectorQPXDouble& operator-=(VectorQPXDouble a) { 82 d = vec_sub(d, a.d); 83 return *this; 84 } 85 86 operator double() const { 87 double d0 = vec_extract(d, 0); 88 return d0; 89 } 90 convertVectorQPXDouble91 void convert(double(&a)[4]) const { 92 vec_st(d, 0, &a[0]); 93 } 94 95 }; 96 97 //@{ arithmetic operators 98 inline VectorQPXDouble operator*(double a, VectorQPXDouble b) { 99 VectorQPXDouble c; 100 VectorQPXDouble _a(a); 101 c.d = vec_mul(_a.d, b.d); 102 return c; 103 } 104 105 inline VectorQPXDouble operator*(VectorQPXDouble a, double b) { 106 VectorQPXDouble c; 107 VectorQPXDouble _b(b); 108 c.d = vec_mul(a.d, _b.d); 109 return c; 110 } 111 112 inline VectorQPXDouble operator*(int a, VectorQPXDouble b) { 113 if (a == 1) 114 return b; 115 else { 116 VectorQPXDouble c; 117 VectorQPXDouble _a((double)a); 118 c.d = vec_mul(_a.d, b.d); 119 return c; 120 } 121 } 122 123 inline VectorQPXDouble operator*(VectorQPXDouble a, int b) { 124 if (b == 1) 125 return a; 126 else { 127 VectorQPXDouble c; 128 VectorQPXDouble _b((double)b); 129 c.d = vec_mul(a.d, _b.d); 130 return c; 131 } 132 } 133 134 inline VectorQPXDouble operator*(VectorQPXDouble a, VectorQPXDouble b) { 135 VectorQPXDouble c; 136 c.d = vec_mul(a.d, b.d); 137 return c; 138 } 139 140 inline VectorQPXDouble operator+(VectorQPXDouble a, VectorQPXDouble b) { 141 VectorQPXDouble c; 142 c.d = vec_add(a.d, b.d); 143 return c; 144 } 145 146 inline VectorQPXDouble operator-(VectorQPXDouble a, VectorQPXDouble b) { 147 VectorQPXDouble c; 148 c.d = vec_sub(a.d, b.d); 149 return c; 150 } 151 152 inline VectorQPXDouble operator/(VectorQPXDouble a, VectorQPXDouble b) { 153 VectorQPXDouble c; 154 c.d = vec_swdiv(a.d, b.d); 155 return c; 156 } 157 fma_plus(VectorQPXDouble a,VectorQPXDouble b,VectorQPXDouble c)158 inline VectorQPXDouble fma_plus(VectorQPXDouble a, VectorQPXDouble b, VectorQPXDouble c) { 159 VectorQPXDouble d; 160 d.d = vec_madd(a.d, b.d, c.d); 161 return d; 162 } fma_minus(VectorQPXDouble a,VectorQPXDouble b,VectorQPXDouble c)163 inline VectorQPXDouble fma_minus(VectorQPXDouble a, VectorQPXDouble b, VectorQPXDouble c) { 164 VectorQPXDouble d; 165 d.d = vec_msub(a.d, b.d, c.d); 166 return d; 167 } 168 169 //@} 170 171 };}; // namespace libint2::simd 172 173 namespace libint2 { 174 175 //@{ vector traits of VectorQPXDouble 176 177 template <> 178 struct is_vector<simd::VectorQPXDouble> { 179 static const bool value = true; 180 }; 181 182 template <> 183 struct vector_traits<simd::VectorQPXDouble> { 184 typedef double scalar_type; 185 static const size_t extent = 4; 186 }; 187 188 //@} 189 190 } // namespace libint2 191 192 #endif // QPX-only 193 194 // only xlC on BG/L and BG/P supports FP2 (Double Hummer)instructions, not sure how to check if they are enabled 195 #if (defined(__xlC__) || defined(__clang__)) && (defined(__bgp__) || defined(__blrts__)) 196 197 #if defined(__xlC__) 198 # include <builtins.h> 199 #endif 200 #if defined(__clang__) 201 # include <fp2intrin.h> 202 #endif 203 204 namespace libint2 { namespace simd { 205 206 /** 207 * SIMD vector of 2 double-precision floating-point real numbers, operations on which use FP2 (Double Hummer) instructions 208 * available on some PowerPC hardware, e.g. Blue Gene/L and Blue Gene/P. 209 */ 210 struct VectorFP2Double { 211 212 typedef double T; 213 double _Complex d; //< represents 2 doubles 214 215 /** 216 * creates a vector of default-initialized values. 217 */ 218 VectorFP2Double() {} 219 220 /** Initializes all elements to the same value 221 * @param a the value to which all elements will be set 222 */ 223 VectorFP2Double(T a) { 224 T a01[2]; a01[0] = a; a01[1] = a; 225 d = __lfpd(&a01[0]); 226 } 227 228 /** 229 * creates a vector of values initialized by an ordinary static-sized array 230 */ 231 VectorFP2Double(T (&a)[2]) { 232 d = __lfpd(&a[0]); 233 } 234 235 /** 236 * creates a vector of values initialized by an ordinary static-sized array 237 */ 238 VectorFP2Double(T a0, T a1) { 239 T a[2]; a[0] = a0; a[1] = a1; 240 d = __lfpd(&a[0]); 241 } 242 243 VectorFP2Double& operator=(T a) { 244 T a01[2]; a01[0] = a; a01[1] = a; 245 d = __lfpd(&a01[0]); 246 return *this; 247 } 248 249 VectorFP2Double& operator+=(VectorFP2Double a) { 250 d = __fpadd(d, a.d); 251 return *this; 252 } 253 254 VectorFP2Double& operator-=(VectorFP2Double a) { 255 d = __fpsub(d, a.d); 256 return *this; 257 } 258 259 operator double() const { 260 double d0 = __creal(d); 261 return d0; 262 } 263 264 void convert(double(&a)[2]) const { 265 __stfpd(&a[0], d); 266 } 267 }; 268 269 //@{ arithmetic operators 270 inline VectorFP2Double operator*(double a, VectorFP2Double b) { 271 VectorFP2Double c; 272 c.d = __fxpmul(b.d, a); 273 return c; 274 } 275 276 inline VectorFP2Double operator*(VectorFP2Double a, double b) { 277 VectorFP2Double c; 278 c.d = __fxpmul(a.d, b); 279 return c; 280 } 281 282 inline VectorFP2Double operator*(int a, VectorFP2Double b) { 283 if (a == 1) 284 return b; 285 else { 286 VectorFP2Double c; 287 c.d = __fxpmul(b.d, (double)a); 288 return c; 289 } 290 } 291 292 inline VectorFP2Double operator*(VectorFP2Double a, int b) { 293 if (b == 1) 294 return a; 295 else { 296 VectorFP2Double c; 297 c.d = __fxpmul(a.d, (double)b); 298 return c; 299 } 300 } 301 302 inline VectorFP2Double operator*(VectorFP2Double a, VectorFP2Double b) { 303 VectorFP2Double c; 304 c.d = __fpmul(a.d, b.d); 305 return c; 306 } 307 308 inline VectorFP2Double operator+(VectorFP2Double a, VectorFP2Double b) { 309 VectorFP2Double c; 310 c.d = __fpadd(a.d, b.d); 311 return c; 312 } 313 314 inline VectorFP2Double operator-(VectorFP2Double a, VectorFP2Double b) { 315 VectorFP2Double c; 316 c.d = __fpsub(a.d, b.d); 317 return c; 318 } 319 320 /* there's no division DH instruction that I can see 321 inline VectorFP2Double operator/(VectorFP2Double a, VectorFP2Double b) { 322 VectorFP2Double c; 323 } 324 */ 325 326 inline VectorFP2Double fma_plus(VectorFP2Double a, VectorFP2Double b, VectorFP2Double c) { 327 VectorFP2Double d; 328 d.d = __fpmadd(a.d, b.d, c.d); 329 return d; 330 } 331 inline VectorFP2Double fma_minus(VectorFP2Double a, VectorFP2Double b, VectorFP2Double c) { 332 VectorFP2Double d; 333 d.d = __fpmsub(a.d, b.d, c.d); 334 return d; 335 } 336 337 //@} 338 339 };}; // namespace libint2::simd 340 341 namespace libint2 { 342 343 //@{ vector traits of VectorFP2Double 344 345 template <> 346 struct is_vector<simd::VectorFP2Double> { 347 static const bool value = true; 348 }; 349 350 template <> 351 struct vector_traits<simd::VectorFP2Double> { 352 typedef double scalar_type; 353 static const size_t extent = 2; 354 }; 355 356 //@} 357 358 } // namespace libint2 359 360 #endif // FP2-only 361 362 #endif // header guard 363 364