1 #ifndef __CS_BLAS_H__
2 #define __CS_BLAS_H__
3 
4 /*============================================================================
5  * BLAS (Basic Linear Algebra Subroutine) functions
6  *============================================================================*/
7 
8 /*
9   This file is part of Code_Saturne, a general-purpose CFD tool.
10 
11   Copyright (C) 1998-2021 EDF S.A.
12 
13   This program is free software; you can redistribute it and/or modify it under
14   the terms of the GNU General Public License as published by the Free Software
15   Foundation; either version 2 of the License, or (at your option) any later
16   version.
17 
18   This program is distributed in the hope that it will be useful, but WITHOUT
19   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
21   details.
22 
23   You should have received a copy of the GNU General Public License along with
24   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25   Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 */
27 
28 /*----------------------------------------------------------------------------*/
29 
30 #include "cs_defs.h"
31 
32 /*----------------------------------------------------------------------------
33  * External library headers
34  *----------------------------------------------------------------------------*/
35 
36 /*----------------------------------------------------------------------------
37  *  Local headers
38  *----------------------------------------------------------------------------*/
39 
40 #include "cs_base.h"
41 
42 /*----------------------------------------------------------------------------*/
43 
44 BEGIN_C_DECLS
45 
46 /*============================================================================
47  * Macro definitions
48  *============================================================================*/
49 
50 /*============================================================================
51  * Type definitions
52  *============================================================================*/
53 
54 /* BLAS reduction algorithm families */
55 
56 typedef enum {
57 
58   CS_BLAS_REDUCE_SUPERBLOCK,
59   CS_BLAS_REDUCE_KAHAN
60 
61 } cs_blas_reduce_t;
62 
63 /*============================================================================
64  *  Public function prototypes
65  *============================================================================*/
66 
67 /*----------------------------------------------------------------------------*/
68 /*!
69  * \brief Set the preferred BLAS reduction algorithm family.
70  *
71  * This may not be enforced for all algorithms, though it should at least
72  * be enforced for the most general functions such as \ref cs_dot.
73  *
74  * \param[in]  mode   BLAS mode to use
75  */
76 /*----------------------------------------------------------------------------*/
77 
78 void
79 cs_blas_set_reduce_algorithm(cs_blas_reduce_t  mode);
80 
81 /*----------------------------------------------------------------------------
82  * Constant times a vector plus a vector: y <-- ax + y
83  *
84  * parameters:
85  *   n <-- size of arrays x and y
86  *   a <-- multiplier for x
87  *   x <-- array of floating-point values
88  *   y <-- array of floating-point values
89  *----------------------------------------------------------------------------*/
90 
91 void
92 cs_axpy(cs_lnum_t         n,
93         double            a,
94         const cs_real_t  *x,
95         cs_real_t        *restrict y);
96 
97 /*----------------------------------------------------------------------------
98  * Return the sum of a vector. For better precision, a superblock algorithm
99  * is used.
100  *
101  * parameters:
102  *   n <-- size of array x
103  *   x <-- array of floating-point values
104  *
105  * returns:
106  *   the resulting sum
107  *----------------------------------------------------------------------------*/
108 
109 double
110 cs_sum(cs_lnum_t         n,
111        const cs_real_t  *x);
112 
113 /*----------------------------------------------------------------------------
114  * Return the weighted sum of a vector. For better precision, a superblock
115  * algorithm is used.
116  *
117  * \param[in]  n  size of array x
118  * \param[in]  w  array of floating-point weights
119  * \param[in]  x  array of floating-point values
120  *
121  * \return the resulting weighted sum
122  *----------------------------------------------------------------------------*/
123 
124 double
125 cs_weighted_sum(cs_lnum_t         n,
126                 const cs_real_t  *w,
127                 const cs_real_t  *x);
128 
129 /*----------------------------------------------------------------------------
130  * Return the dot product of 2 vectors: x.y
131  *
132  * parameters:
133  *   n <-- size of arrays x and y
134  *   x <-- array of floating-point values
135  *   y <-- array of floating-point values
136  *
137  * returns:
138  *   dot product
139  *----------------------------------------------------------------------------*/
140 
141 double
142 cs_dot(cs_lnum_t         n,
143        const cs_real_t  *x,
144        const cs_real_t  *y);
145 
146 /*----------------------------------------------------------------------------
147  * Return dot products of a vector with itself: x.x
148  *
149  * parameters:
150  *   n  <-- size of arrays x
151  *   x  <-- array of floating-point values
152  *
153  * returns:
154  *   dot product
155  *----------------------------------------------------------------------------*/
156 
157 double
158 cs_dot_xx(cs_lnum_t         n,
159           const cs_real_t  *x);
160 
161 /*----------------------------------------------------------------------------
162  * Return weighted dot products of a vector with itself: x.x
163  *
164  * For better precision, a superblock algorithm is used.
165  *
166  * parameters:
167  *   n  <-- size of arrays x
168  *   w  <-- array of weights
169  *   x  <-- array of floating-point values
170  *
171  * returns:
172  *   dot product
173  *----------------------------------------------------------------------------*/
174 
175 double
176 cs_dot_wxx(cs_lnum_t         n,
177            const cs_real_t  *w,
178            const cs_real_t  *x);
179 
180 /*----------------------------------------------------------------------------
181  * Return the double dot product of 2 vectors: x.x, and x.y
182  *
183  * The products could be computed separately, but computing them
184  * simultaneously adds more optimization opportunities and possibly better
185  * cache behavior.
186  *
187  * parameters:
188  *   n  <-- size of arrays x and y
189  *   x  <-- array of floating-point values
190  *   y  <-- array of floating-point values
191  *   xx --> x.x dot product
192  *   xy --> x.y dot product
193  *----------------------------------------------------------------------------*/
194 
195 void
196 cs_dot_xx_xy(cs_lnum_t                    n,
197              const cs_real_t  *restrict   x,
198              const cs_real_t  *restrict   y,
199              double                      *xx,
200              double                      *xy);
201 
202 /*----------------------------------------------------------------------------
203  * Return the double dot product of 3 vectors: x.y, and y.z
204  *
205  * The products could be computed separately, but computing them
206  * simultaneously adds more optimization opportunities and possibly better
207  * cache behavior.
208  *
209  * parameters:
210  *   n  <-- size of arrays x and y
211  *   x  <-- array of floating-point values
212  *   y  <-- array of floating-point values
213  *   z  <-- array of floating-point values
214  *   xy --> x.y dot product
215  *   yz --> x.z dot product
216  *----------------------------------------------------------------------------*/
217 
218 void
219 cs_dot_xy_yz(cs_lnum_t                    n,
220              const cs_real_t  *restrict   x,
221              const cs_real_t  *restrict   y,
222              const cs_real_t  *restrict   z,
223              double                      *xx,
224              double                      *xy);
225 
226 /*----------------------------------------------------------------------------
227  * Return 3 dot products of 3 vectors: x.x, x.y, and y.z
228  *
229  * The products could be computed separately, but computing them
230  * simultaneously adds more optimization opportunities and possibly better
231  * cache behavior.
232  *
233  * parameters:
234  *   n  <-- size of arrays x and y
235  *   x  <-- array of floating-point values
236  *   y  <-- array of floating-point values
237  *   z  <-- array of floating-point values
238  *   xx --> x.y dot product
239  *   xy --> x.y dot product
240  *   yz --> y.z dot product
241  *----------------------------------------------------------------------------*/
242 
243 void
244 cs_dot_xx_xy_yz(cs_lnum_t                    n,
245                 const cs_real_t  *restrict   x,
246                 const cs_real_t  *restrict   y,
247                 const cs_real_t  *restrict   z,
248                 double                      *xx,
249                 double                      *xy,
250                 double                      *yz);
251 
252 /*----------------------------------------------------------------------------
253  * Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z
254  *
255  * The products could be computed separately, but computing them
256  * simultaneously adds more optimization opportunities and possibly better
257  * cache behavior.
258  *
259  * parameters:
260  *   n  <-- size of arrays x and y
261  *   x  <-- array of floating-point values
262  *   y  <-- array of floating-point values
263  *   z  <-- array of floating-point values
264  *   xx --> x.y dot product
265  *   yy --> y.y dot product
266  *   xy --> x.y dot product
267  *   xz --> x.z dot product
268  *   yz --> y.z dot product
269  *----------------------------------------------------------------------------*/
270 
271 void
272 cs_dot_xx_yy_xy_xz_yz(cs_lnum_t                    n,
273                       const cs_real_t  *restrict   x,
274                       const cs_real_t  *restrict   y,
275                       const cs_real_t  *restrict   z,
276                       double                      *xx,
277                       double                      *yy,
278                       double                      *xy,
279                       double                      *xz,
280                       double                      *yz);
281 
282 /*----------------------------------------------------------------------------
283  * Return the global dot product of 2 vectors: x.y
284  *
285  * In parallel mode, the local results are summed on the default
286  * global communicator.
287  *
288  * parameters:
289  *   n <-- size of arrays x and y
290  *   x <-- array of floating-point values
291  *   y <-- array of floating-point values
292  *
293  * returns:
294  *   dot product
295  *----------------------------------------------------------------------------*/
296 
297 double
298 cs_gdot(cs_lnum_t         n,
299         const cs_real_t  *x,
300         const cs_real_t  *y);
301 
302 /*----------------------------------------------------------------------------
303  * Return the global residual of 2 intensive vectors:
304  *  1/sum(vol) . sum(vol.x.y)
305  *
306  * parameters:
307  *   n   <-- size of arrays x and y
308  *   vol <-- array of floating-point values
309  *   x   <-- array of floating-point values
310  *   y   <-- array of floating-point values
311  *
312  * returns:
313  *   global residual
314  *----------------------------------------------------------------------------*/
315 
316 double
317 cs_gres(cs_lnum_t         n,
318         const cs_real_t  *vol,
319         const cs_real_t  *x,
320         const cs_real_t  *y);
321 
322 /*----------------------------------------------------------------------------
323  * Return the global volume weighted mean
324  *  1/sum(vol) . sum(vol.x)
325  *
326  * parameters:
327  *   n   <-- size of arrays x
328  *   vol <-- array of floating-point values
329  *   x   <-- array of floating-point values
330  *
331  * returns:
332  *   global residual
333  *----------------------------------------------------------------------------*/
334 
335 double
336 cs_gmean(cs_lnum_t         n,
337          const cs_real_t  *vol,
338          const cs_real_t  *x);
339 
340 /*----------------------------------------------------------------------------*/
341 
342 END_C_DECLS
343 
344 #endif /* __CS_BLAS_H__ */
345