1 #ifndef __CS_BLAS_H__ 2 #define __CS_BLAS_H__ 3 4 /*============================================================================ 5 * BLAS (Basic Linear Algebra Subroutine) functions 6 *============================================================================*/ 7 8 /* 9 This file is part of Code_Saturne, a general-purpose CFD tool. 10 11 Copyright (C) 1998-2021 EDF S.A. 12 13 This program is free software; you can redistribute it and/or modify it under 14 the terms of the GNU General Public License as published by the Free Software 15 Foundation; either version 2 of the License, or (at your option) any later 16 version. 17 18 This program is distributed in the hope that it will be useful, but WITHOUT 19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 20 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 21 details. 22 23 You should have received a copy of the GNU General Public License along with 24 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin 25 Street, Fifth Floor, Boston, MA 02110-1301, USA. 26 */ 27 28 /*----------------------------------------------------------------------------*/ 29 30 #include "cs_defs.h" 31 32 /*---------------------------------------------------------------------------- 33 * External library headers 34 *----------------------------------------------------------------------------*/ 35 36 /*---------------------------------------------------------------------------- 37 * Local headers 38 *----------------------------------------------------------------------------*/ 39 40 #include "cs_base.h" 41 42 /*----------------------------------------------------------------------------*/ 43 44 BEGIN_C_DECLS 45 46 /*============================================================================ 47 * Macro definitions 48 *============================================================================*/ 49 50 /*============================================================================ 51 * Type definitions 52 *============================================================================*/ 53 54 /* BLAS reduction algorithm families */ 55 56 typedef enum { 57 58 CS_BLAS_REDUCE_SUPERBLOCK, 59 CS_BLAS_REDUCE_KAHAN 60 61 } cs_blas_reduce_t; 62 63 /*============================================================================ 64 * Public function prototypes 65 *============================================================================*/ 66 67 /*----------------------------------------------------------------------------*/ 68 /*! 69 * \brief Set the preferred BLAS reduction algorithm family. 70 * 71 * This may not be enforced for all algorithms, though it should at least 72 * be enforced for the most general functions such as \ref cs_dot. 73 * 74 * \param[in] mode BLAS mode to use 75 */ 76 /*----------------------------------------------------------------------------*/ 77 78 void 79 cs_blas_set_reduce_algorithm(cs_blas_reduce_t mode); 80 81 /*---------------------------------------------------------------------------- 82 * Constant times a vector plus a vector: y <-- ax + y 83 * 84 * parameters: 85 * n <-- size of arrays x and y 86 * a <-- multiplier for x 87 * x <-- array of floating-point values 88 * y <-- array of floating-point values 89 *----------------------------------------------------------------------------*/ 90 91 void 92 cs_axpy(cs_lnum_t n, 93 double a, 94 const cs_real_t *x, 95 cs_real_t *restrict y); 96 97 /*---------------------------------------------------------------------------- 98 * Return the sum of a vector. For better precision, a superblock algorithm 99 * is used. 100 * 101 * parameters: 102 * n <-- size of array x 103 * x <-- array of floating-point values 104 * 105 * returns: 106 * the resulting sum 107 *----------------------------------------------------------------------------*/ 108 109 double 110 cs_sum(cs_lnum_t n, 111 const cs_real_t *x); 112 113 /*---------------------------------------------------------------------------- 114 * Return the weighted sum of a vector. For better precision, a superblock 115 * algorithm is used. 116 * 117 * \param[in] n size of array x 118 * \param[in] w array of floating-point weights 119 * \param[in] x array of floating-point values 120 * 121 * \return the resulting weighted sum 122 *----------------------------------------------------------------------------*/ 123 124 double 125 cs_weighted_sum(cs_lnum_t n, 126 const cs_real_t *w, 127 const cs_real_t *x); 128 129 /*---------------------------------------------------------------------------- 130 * Return the dot product of 2 vectors: x.y 131 * 132 * parameters: 133 * n <-- size of arrays x and y 134 * x <-- array of floating-point values 135 * y <-- array of floating-point values 136 * 137 * returns: 138 * dot product 139 *----------------------------------------------------------------------------*/ 140 141 double 142 cs_dot(cs_lnum_t n, 143 const cs_real_t *x, 144 const cs_real_t *y); 145 146 /*---------------------------------------------------------------------------- 147 * Return dot products of a vector with itself: x.x 148 * 149 * parameters: 150 * n <-- size of arrays x 151 * x <-- array of floating-point values 152 * 153 * returns: 154 * dot product 155 *----------------------------------------------------------------------------*/ 156 157 double 158 cs_dot_xx(cs_lnum_t n, 159 const cs_real_t *x); 160 161 /*---------------------------------------------------------------------------- 162 * Return weighted dot products of a vector with itself: x.x 163 * 164 * For better precision, a superblock algorithm is used. 165 * 166 * parameters: 167 * n <-- size of arrays x 168 * w <-- array of weights 169 * x <-- array of floating-point values 170 * 171 * returns: 172 * dot product 173 *----------------------------------------------------------------------------*/ 174 175 double 176 cs_dot_wxx(cs_lnum_t n, 177 const cs_real_t *w, 178 const cs_real_t *x); 179 180 /*---------------------------------------------------------------------------- 181 * Return the double dot product of 2 vectors: x.x, and x.y 182 * 183 * The products could be computed separately, but computing them 184 * simultaneously adds more optimization opportunities and possibly better 185 * cache behavior. 186 * 187 * parameters: 188 * n <-- size of arrays x and y 189 * x <-- array of floating-point values 190 * y <-- array of floating-point values 191 * xx --> x.x dot product 192 * xy --> x.y dot product 193 *----------------------------------------------------------------------------*/ 194 195 void 196 cs_dot_xx_xy(cs_lnum_t n, 197 const cs_real_t *restrict x, 198 const cs_real_t *restrict y, 199 double *xx, 200 double *xy); 201 202 /*---------------------------------------------------------------------------- 203 * Return the double dot product of 3 vectors: x.y, and y.z 204 * 205 * The products could be computed separately, but computing them 206 * simultaneously adds more optimization opportunities and possibly better 207 * cache behavior. 208 * 209 * parameters: 210 * n <-- size of arrays x and y 211 * x <-- array of floating-point values 212 * y <-- array of floating-point values 213 * z <-- array of floating-point values 214 * xy --> x.y dot product 215 * yz --> x.z dot product 216 *----------------------------------------------------------------------------*/ 217 218 void 219 cs_dot_xy_yz(cs_lnum_t n, 220 const cs_real_t *restrict x, 221 const cs_real_t *restrict y, 222 const cs_real_t *restrict z, 223 double *xx, 224 double *xy); 225 226 /*---------------------------------------------------------------------------- 227 * Return 3 dot products of 3 vectors: x.x, x.y, and y.z 228 * 229 * The products could be computed separately, but computing them 230 * simultaneously adds more optimization opportunities and possibly better 231 * cache behavior. 232 * 233 * parameters: 234 * n <-- size of arrays x and y 235 * x <-- array of floating-point values 236 * y <-- array of floating-point values 237 * z <-- array of floating-point values 238 * xx --> x.y dot product 239 * xy --> x.y dot product 240 * yz --> y.z dot product 241 *----------------------------------------------------------------------------*/ 242 243 void 244 cs_dot_xx_xy_yz(cs_lnum_t n, 245 const cs_real_t *restrict x, 246 const cs_real_t *restrict y, 247 const cs_real_t *restrict z, 248 double *xx, 249 double *xy, 250 double *yz); 251 252 /*---------------------------------------------------------------------------- 253 * Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z 254 * 255 * The products could be computed separately, but computing them 256 * simultaneously adds more optimization opportunities and possibly better 257 * cache behavior. 258 * 259 * parameters: 260 * n <-- size of arrays x and y 261 * x <-- array of floating-point values 262 * y <-- array of floating-point values 263 * z <-- array of floating-point values 264 * xx --> x.y dot product 265 * yy --> y.y dot product 266 * xy --> x.y dot product 267 * xz --> x.z dot product 268 * yz --> y.z dot product 269 *----------------------------------------------------------------------------*/ 270 271 void 272 cs_dot_xx_yy_xy_xz_yz(cs_lnum_t n, 273 const cs_real_t *restrict x, 274 const cs_real_t *restrict y, 275 const cs_real_t *restrict z, 276 double *xx, 277 double *yy, 278 double *xy, 279 double *xz, 280 double *yz); 281 282 /*---------------------------------------------------------------------------- 283 * Return the global dot product of 2 vectors: x.y 284 * 285 * In parallel mode, the local results are summed on the default 286 * global communicator. 287 * 288 * parameters: 289 * n <-- size of arrays x and y 290 * x <-- array of floating-point values 291 * y <-- array of floating-point values 292 * 293 * returns: 294 * dot product 295 *----------------------------------------------------------------------------*/ 296 297 double 298 cs_gdot(cs_lnum_t n, 299 const cs_real_t *x, 300 const cs_real_t *y); 301 302 /*---------------------------------------------------------------------------- 303 * Return the global residual of 2 intensive vectors: 304 * 1/sum(vol) . sum(vol.x.y) 305 * 306 * parameters: 307 * n <-- size of arrays x and y 308 * vol <-- array of floating-point values 309 * x <-- array of floating-point values 310 * y <-- array of floating-point values 311 * 312 * returns: 313 * global residual 314 *----------------------------------------------------------------------------*/ 315 316 double 317 cs_gres(cs_lnum_t n, 318 const cs_real_t *vol, 319 const cs_real_t *x, 320 const cs_real_t *y); 321 322 /*---------------------------------------------------------------------------- 323 * Return the global volume weighted mean 324 * 1/sum(vol) . sum(vol.x) 325 * 326 * parameters: 327 * n <-- size of arrays x 328 * vol <-- array of floating-point values 329 * x <-- array of floating-point values 330 * 331 * returns: 332 * global residual 333 *----------------------------------------------------------------------------*/ 334 335 double 336 cs_gmean(cs_lnum_t n, 337 const cs_real_t *vol, 338 const cs_real_t *x); 339 340 /*----------------------------------------------------------------------------*/ 341 342 END_C_DECLS 343 344 #endif /* __CS_BLAS_H__ */ 345