1 /*============================================================================
2  * Sparse Matrix Representation and Operations.
3  *============================================================================*/
4 
5 /*
6   This file is part of Code_Saturne, a general-purpose CFD tool.
7 
8   Copyright (C) 1998-2021 EDF S.A.
9 
10   This program is free software; you can redistribute it and/or modify it under
11   the terms of the GNU General Public License as published by the Free Software
12   Foundation; either version 2 of the License, or (at your option) any later
13   version.
14 
15   This program is distributed in the hope that it will be useful, but WITHOUT
16   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
18   details.
19 
20   You should have received a copy of the GNU General Public License along with
21   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22   Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24 
25 /*----------------------------------------------------------------------------*/
26 
27 /*
28  * Notes:
29  *
30  * The aim of these structures and associated functions is multiple:
31  *
32  * - Provide an "opaque" matrix object for linear solvers, allowing possible
33  *   choice of the matrix type based on run-time tuning at code initialization
34  *   (depending on matrix size, architecture, and compiler, the most efficient
35  *   structure for matrix.vector products may vary).
36  *
37  * - Provide at least a CSR matrix structure in addition to the "native"
38  *   matrix structure, as this may allow us to leverage existing librairies.
39  *
40  * - Provide a C interface, also so as to be able to interface more easily
41  *   with external libraries.
42  *
43  * The structures used here could easily be extended to block matrixes,
44  * using for example the same structure information with 3x3 blocks which
45  * could arise from coupled velocity components. This would imply that the
46  * corresponding vectors be interlaced (or an interlaced copy be used
47  * for recurring operations such as sparse linear system resolution),
48  * for better memory locality, and possible loop unrolling.
49  */
50 
51 #include "cs_defs.h"
52 
53 /*----------------------------------------------------------------------------
54  * Standard C library headers
55  *----------------------------------------------------------------------------*/
56 
57 #include <stdarg.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <assert.h>
62 #include <math.h>
63 
64 #if defined(HAVE_MPI)
65 #include <mpi.h>
66 #endif
67 
68 #if defined (HAVE_MKL)
69 #include <mkl_spblas.h>
70 #endif
71 
72 /*----------------------------------------------------------------------------
73  * Local headers
74  *----------------------------------------------------------------------------*/
75 
76 #include "bft_mem.h"
77 #include "bft_error.h"
78 #include "bft_printf.h"
79 
80 #include "cs_base.h"
81 #include "cs_blas.h"
82 #include "cs_halo.h"
83 #include "cs_halo_perio.h"
84 #include "cs_log.h"
85 #include "cs_numbering.h"
86 #include "cs_prototypes.h"
87 #include "cs_sort.h"
88 #include "cs_timer.h"
89 
90 /*----------------------------------------------------------------------------
91  *  Header for the current file
92  *----------------------------------------------------------------------------*/
93 
94 #include "cs_matrix.h"
95 #include "cs_matrix_priv.h"
96 
97 /*----------------------------------------------------------------------------*/
98 
99 BEGIN_C_DECLS
100 
101 /*----------------------------------------------------------------------------*/
102 /*! \file cs_matrix.c
103  *
104  * \brief Sparse Matrix Representation and Operations.
105  *
106  * Please refer to the
107  * <a href="../../theory.pdf#matrix"><b>matrix</b></a> section of the
108  * theory guide for more informations.
109  */
110 /*----------------------------------------------------------------------------*/
111 
112 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
113 
114 /*=============================================================================
115  * Local Macro Definitions
116  *============================================================================*/
117 
118 /* Cache line multiple, in cs_real_t units */
119 
120 static const cs_lnum_t _cs_cl = (CS_CL_SIZE/8);
121 
122 /*=============================================================================
123  * Local Type Definitions
124  *============================================================================*/
125 
126 /* Note that most types are declared in cs_matrix_priv.h.
127    only those only handled here are declared here. */
128 
129 /*============================================================================
130  *  Global variables
131  *============================================================================*/
132 
133 /* Short names for matrix types */
134 
135 static const char  *_matrix_type_name[] = {N_("native"),
136                                            N_("CSR"),
137                                            N_("MSR"),
138                                            N_("external")};
139 
140 /* Full names for matrix types */
141 
142 static const char
143 *_matrix_type_fullname[] = {N_("diagonal + faces"),
144                             N_("Compressed Sparse Row"),
145                             N_("Modified Compressed Sparse Row"),
146                             N_("External")};
147 
148 /* Fill type names for matrices */
149 
150 const char  *cs_matrix_fill_type_name[] = {"CS_MATRIX_SCALAR",
151                                            "CS_MATRIX_SCALAR_SYM",
152                                            "CS_MATRIX_BLOCK_D",
153                                            "CS_MATRIX_BLOCK_D_66",
154                                            "CS_MATRIX_BLOCK_D_SYM",
155                                            "CS_MATRIX_BLOCK"};
156 
157 #if defined (HAVE_MKL)
158 
159 static char _no_exclude_diag_error_str[]
160   = N_("Matrix product variant using function %s\n"
161        "does not handle case with excluded diagonal.");
162 
163 #endif
164 
165 /*============================================================================
166  * Private function definitions
167 - *============================================================================*/
168 
169 /*----------------------------------------------------------------------------
170  * Set matrix fill metadata.
171  *
172  * Block sizes are defined by an optional array of 4 values:
173  *   0: useful block size, 1: vector block extents,
174  *   2: matrix line extents,  3: matrix line*column extents
175  *
176  * parameters:
177  *   matrix               <-> pointer to matrix structure
178  *   symmetric            <-- indicates if matrix coefficients are symmetric
179  *   diag_block_size       <-- block sizes for diagonal, or NULL
180  *   extra_diag_block_size <-- block sizes for extra diagonal, or NULL
181  *----------------------------------------------------------------------------*/
182 
183 static void
_set_fill_info(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t diag_block_size[4],const cs_lnum_t extra_diag_block_size[4])184 _set_fill_info(cs_matrix_t      *matrix,
185                bool              symmetric,
186                const cs_lnum_t   diag_block_size[4],
187                const cs_lnum_t   extra_diag_block_size[4])
188 {
189   matrix->symmetric = symmetric;
190 
191   if (diag_block_size == NULL) {
192     for (int i = 0; i < 4; i++)
193       matrix->db_size[i] = 1;
194   }
195   else {
196     for (int i = 0; i < 4; i++)
197       matrix->db_size[i] = diag_block_size[i];
198   }
199 
200   if (extra_diag_block_size == NULL) {
201     for (int i = 0; i < 4; i++)
202       matrix->eb_size[i] = 1;
203   }
204   else {
205     for (int i = 0; i < 4; i++)
206       matrix->eb_size[i] = extra_diag_block_size[i];
207   }
208 
209   /* Set fill type */
210 
211   matrix->fill_type = cs_matrix_get_fill_type(symmetric,
212                                               diag_block_size,
213                                               extra_diag_block_size);
214 }
215 
216 /*----------------------------------------------------------------------------
217  * Clear matrix fill metadata.
218  *
219  * parameters:
220  *   matrix <-> pointer to matrix structure
221  *----------------------------------------------------------------------------*/
222 
223 static void
_clear_fill_info(cs_matrix_t * matrix)224 _clear_fill_info(cs_matrix_t  *matrix)
225 {
226   matrix->symmetric = false;
227 
228   for (int i = 0; i < 4; i++) {
229     matrix->db_size[i] = 0;
230     matrix->eb_size[i] = 0;
231   }
232 
233   matrix->fill_type = CS_MATRIX_N_FILL_TYPES;
234 }
235 
236 /*----------------------------------------------------------------------------
237  * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
238  *
239  * Vectors and blocks may be larger than their useful size, to
240  * improve data alignment.
241  *
242  * parameters:
243  *   b_id   <-- block id
244  *   b_size <-- block size, including padding:
245  *              b_size[0]: useful block size
246  *              b_size[1]: vector block extents
247  *              b_size[2]: matrix line extents
248  *              b_size[3]: matrix line*column (block) extents
249  *   a      <-- pointer to block matrixes array (usually matrix diagonal)
250  *   x      <-- multipliying vector values
251  *   y      --> resulting vector
252  *----------------------------------------------------------------------------*/
253 
254 static inline void
_dense_b_ax(cs_lnum_t b_id,const cs_lnum_t b_size[4],const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])255 _dense_b_ax(cs_lnum_t         b_id,
256             const cs_lnum_t   b_size[4],
257             const cs_real_t   a[restrict],
258             const cs_real_t   x[restrict],
259             cs_real_t         y[restrict])
260 {
261   cs_lnum_t   ii, jj;
262 
263   for (ii = 0; ii < b_size[0]; ii++) {
264     y[b_id*b_size[1] + ii] = 0.;
265     for (jj = 0; jj < b_size[0]; jj++)
266       y[b_id*b_size[1] + ii]
267         +=   a[b_id*b_size[3] + ii*b_size[2] + jj]
268            * x[b_id*b_size[1] + jj];
269   }
270 }
271 
272 /*----------------------------------------------------------------------------
273  * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
274  *
275  * This variant uses a fixed 3x3 block, for better compiler optimization.
276  *
277  * parameters:
278  *   b_id   <-- block id
279  *   a      <-- pointer to block matrixes array (usually matrix diagonal)
280  *   x      <-- multipliying vector values
281  *   y      --> resulting vector
282  *----------------------------------------------------------------------------*/
283 
284 static inline void
_dense_3_3_ax(cs_lnum_t b_id,const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])285 _dense_3_3_ax(cs_lnum_t         b_id,
286               const cs_real_t   a[restrict],
287               const cs_real_t   x[restrict],
288               cs_real_t         y[restrict])
289 {
290   y[b_id*3]     =   a[b_id*9]         * x[b_id*3]
291                   + a[b_id*9 + 1]     * x[b_id*3 + 1]
292                   + a[b_id*9 + 2]     * x[b_id*3 + 2];
293 
294   y[b_id*3 + 1] =   a[b_id*9 + 3]     * x[b_id*3]
295                   + a[b_id*9 + 3 + 1] * x[b_id*3 + 1]
296                   + a[b_id*9 + 3 + 2] * x[b_id*3 + 2];
297 
298   y[b_id*3 + 2] =   a[b_id*9 + 6]     * x[b_id*3]
299                   + a[b_id*9 + 6 + 1] * x[b_id*3 + 1]
300                   + a[b_id*9 + 6 + 2] * x[b_id*3 + 2];
301 }
302 
303 /*----------------------------------------------------------------------------
304  * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
305  *
306  * This variant uses a fixed 6x6 block, for better compiler optimization.
307  *
308  * parameters:
309  *   b_id   <-- block id
310  *   a      <-- pointer to block matrixes array (usually matrix diagonal)
311  *   x      <-- multipliying vector values
312  *   y      --> resulting vector
313  *----------------------------------------------------------------------------*/
314 
315 static inline void
_dense_6_6_ax(cs_lnum_t b_id,const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])316 _dense_6_6_ax(cs_lnum_t        b_id,
317               const cs_real_t  a[restrict],
318               const cs_real_t  x[restrict],
319               cs_real_t        y[restrict])
320 {
321   const cs_lnum_t b_id_6 = b_id*6, b_id_36 = b_id*36;
322 
323   y[b_id_6]     =   a[b_id_36]         * x[b_id_6]
324                   + a[b_id_36 + 1]     * x[b_id_6 + 1]
325                   + a[b_id_36 + 2]     * x[b_id_6 + 2]
326                   + a[b_id_36 + 3]     * x[b_id_6 + 3]
327                   + a[b_id_36 + 4]     * x[b_id_6 + 4]
328                   + a[b_id_36 + 5]     * x[b_id_6 + 5];
329 
330   y[b_id_6 + 1] =   a[b_id_36 + 6]     * x[b_id_6]
331                   + a[b_id_36 + 6 + 1] * x[b_id_6 + 1]
332                   + a[b_id_36 + 6 + 2] * x[b_id_6 + 2]
333                   + a[b_id_36 + 6 + 3] * x[b_id_6 + 3]
334                   + a[b_id_36 + 6 + 4] * x[b_id_6 + 4]
335                   + a[b_id_36 + 6 + 5] * x[b_id_6 + 5];
336 
337   y[b_id_6 + 2] =   a[b_id_36 + 12]     * x[b_id_6]
338                   + a[b_id_36 + 12 + 1] * x[b_id_6 + 1]
339                   + a[b_id_36 + 12 + 2] * x[b_id_6 + 2]
340                   + a[b_id_36 + 12 + 3] * x[b_id_6 + 3]
341                   + a[b_id_36 + 12 + 4] * x[b_id_6 + 4]
342                   + a[b_id_36 + 12 + 5] * x[b_id_6 + 5];
343 
344   y[b_id_6 + 3] =   a[b_id_36 + 18]     * x[b_id_6]
345                   + a[b_id_36 + 18 + 1] * x[b_id_6 + 1]
346                   + a[b_id_36 + 18 + 2] * x[b_id_6 + 2]
347                   + a[b_id_36 + 18 + 3] * x[b_id_6 + 3]
348                   + a[b_id_36 + 18 + 4] * x[b_id_6 + 4]
349                   + a[b_id_36 + 18 + 5] * x[b_id_6 + 5];
350 
351   y[b_id_6 + 4] =   a[b_id_36 + 24]     * x[b_id_6]
352                   + a[b_id_36 + 24 + 1] * x[b_id_6 + 1]
353                   + a[b_id_36 + 24 + 2] * x[b_id_6 + 2]
354                   + a[b_id_36 + 24 + 3] * x[b_id_6 + 3]
355                   + a[b_id_36 + 24 + 4] * x[b_id_6 + 4]
356                   + a[b_id_36 + 24 + 5] * x[b_id_6 + 5];
357 
358   y[b_id_6 + 5] =   a[b_id_36 + 30]     * x[b_id_6]
359                   + a[b_id_36 + 30 + 1] * x[b_id_6 + 1]
360                   + a[b_id_36 + 30 + 2] * x[b_id_6 + 2]
361                   + a[b_id_36 + 30 + 3] * x[b_id_6 + 3]
362                   + a[b_id_36 + 30 + 4] * x[b_id_6 + 4]
363                   + a[b_id_36 + 30 + 5] * x[b_id_6 + 5];
364 
365 }
366 
367 /*----------------------------------------------------------------------------
368  * Compute matrix-vector product increment for one dense block:
369  * y[i] += a[ij].x[j]
370  *
371  * Vectors and blocks may be larger than their useful size, to
372  * improve data alignment.
373  *
374  * parameters:
375  *   b_i    <-- block id for i
376  *   b_j    <-- block id for j
377  *   b_ij   <-- block id for matrix ij position
378  *   b_size <-- block size, including padding:
379  *              b_size[0]: useful block size
380  *              b_size[1]: vector block extents
381  *              b_size[2]: matrix line extents
382  *              b_size[3]: matrix line*column (block) extents
383  *   a      <-- pointer to block matrixes array (usually matrix extra-diagonal)
384  *   x      <-- multipliying vector values
385  *   y      --> resulting vector
386  *----------------------------------------------------------------------------*/
387 
388 static inline void
_dense_eb_ax_add(cs_lnum_t b_i,cs_lnum_t b_j,cs_lnum_t b_ij,const cs_lnum_t b_size[4],const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])389 _dense_eb_ax_add(cs_lnum_t        b_i,
390                  cs_lnum_t        b_j,
391                  cs_lnum_t        b_ij,
392                  const cs_lnum_t  b_size[4],
393                  const cs_real_t  a[restrict],
394                  const cs_real_t  x[restrict],
395                  cs_real_t        y[restrict])
396 {
397   cs_lnum_t   ii, jj;
398 
399   for (ii = 0; ii < b_size[0]; ii++) {
400     for (jj = 0; jj < b_size[0]; jj++)
401       y[b_i*b_size[1] + ii]
402         +=   a[b_ij*b_size[3] + ii*b_size[2] + jj]
403            * x[b_j*b_size[1] + jj];
404   }
405 }
406 
407 /*----------------------------------------------------------------------------
408  * y[i] = da[i].x[i], with da possibly NULL
409  *
410  * parameters:
411  *   da     <-- pointer to coefficients array (usually matrix diagonal)
412  *   x      <-- multipliying vector values
413  *   y      --> resulting vector
414  *   n_elts <-- array size
415  *----------------------------------------------------------------------------*/
416 
417 static inline void
_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)418 _diag_vec_p_l(const cs_real_t  da[restrict],
419               const cs_real_t  x[restrict],
420               cs_real_t        y[restrict],
421               cs_lnum_t        n_elts)
422 {
423   cs_lnum_t  ii;
424 
425   if (da != NULL) {
426 #   pragma omp parallel for  if(n_elts > CS_THR_MIN)
427     for (ii = 0; ii < n_elts; ii++)
428       y[ii] = da[ii] * x[ii];
429   }
430   else {
431 #   pragma omp parallel for  if(n_elts > CS_THR_MIN)
432     for (ii = 0; ii < n_elts; ii++)
433       y[ii] = 0.0;
434   }
435 
436 }
437 
438 /*----------------------------------------------------------------------------
439  * Block version of y[i] = da[i].x[i], with da possibly NULL
440  *
441  * parameters:
442  *   da     <-- pointer to coefficients array (usually matrix diagonal)
443  *   x      <-- multipliying vector values
444  *   y      --> resulting vector
445  *   n_elts <-- array size
446  *   b_size <-- block size, including padding:
447  *              b_size[0]: useful block size
448  *              b_size[1]: vector block extents
449  *              b_size[2]: matrix line extents
450  *              b_size[3]: matrix line*column (block) extents
451  *----------------------------------------------------------------------------*/
452 
453 static inline void
_b_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts,const cs_lnum_t b_size[4])454 _b_diag_vec_p_l(const cs_real_t  da[restrict],
455                 const cs_real_t  x[restrict],
456                 cs_real_t        y[restrict],
457                 cs_lnum_t        n_elts,
458                 const cs_lnum_t  b_size[4])
459 {
460   cs_lnum_t   ii;
461 
462   if (da != NULL) {
463 #   pragma omp parallel for  if(n_elts > CS_THR_MIN)
464     for (ii = 0; ii < n_elts; ii++)
465       _dense_b_ax(ii, b_size, da, x, y);
466   }
467   else {
468 #   pragma omp parallel for  if(n_elts*b_size[1] > CS_THR_MIN)
469     for (ii = 0; ii < n_elts*b_size[1]; ii++)
470       y[ii] = 0.0;
471   }
472 }
473 
474 /*----------------------------------------------------------------------------
475  * Block version of y[i] = da[i].x[i], with da possibly NULL
476  *
477  * This variant uses a fixed 3x3 block, for better compiler optimization.
478  *
479  * parameters:
480  *   da     <-- pointer to coefficients array (usually matrix diagonal)
481  *   x      <-- multipliying vector values
482  *   y      --> resulting vector
483  *   n_elts <-- array size
484  *----------------------------------------------------------------------------*/
485 
486 static inline void
_3_3_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)487 _3_3_diag_vec_p_l(const cs_real_t  da[restrict],
488                   const cs_real_t  x[restrict],
489                   cs_real_t        y[restrict],
490                   cs_lnum_t        n_elts)
491 {
492   cs_lnum_t   ii;
493 
494   if (da != NULL) {
495 #   pragma omp parallel for  if(n_elts*3 > CS_THR_MIN)
496     for (ii = 0; ii < n_elts; ii++)
497       _dense_3_3_ax(ii, da, x, y);
498   }
499   else {
500 #   pragma omp parallel for  if(n_elts*3 > CS_THR_MIN)
501     for (ii = 0; ii < n_elts*3; ii++)
502       y[ii] = 0.0;
503   }
504 }
505 
506 /*----------------------------------------------------------------------------
507  * Block version of y[i] = da[i].x[i], with da possibly NULL
508  *
509  * This variant uses a fixed 6x6 block, for better compiler optimization.
510  *
511  * parameters:
512  *   da     <-- pointer to coefficients array (usually matrix diagonal)
513  *   x      <-- multipliying vector values
514  *   y      --> resulting vector
515  *   n_elts <-- array size
516  *----------------------------------------------------------------------------*/
517 
518 static inline void
_6_6_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)519 _6_6_diag_vec_p_l(const cs_real_t  da[restrict],
520                   const cs_real_t  x[restrict],
521                   cs_real_t        y[restrict],
522                   cs_lnum_t        n_elts)
523 {
524   cs_lnum_t   ii;
525 
526   if (da != NULL) {
527 #   pragma omp parallel for  if(n_elts*6 > CS_THR_MIN)
528     for (ii = 0; ii < n_elts; ii++)
529       _dense_6_6_ax(ii, da, x, y);
530   }
531   else {
532 #   pragma omp parallel for  if(n_elts*6 > CS_THR_MIN)
533     for (ii = 0; ii < n_elts*6; ii++)
534       y[ii] = 0.0;
535   }
536 }
537 
538 /*----------------------------------------------------------------------------
539  * Set values from y[start_id] to y[end_id] to 0.
540  *
541  * parameters:
542  *   y        --> resulting vector
543  *   start_id <-- start id in array
544  *   end_id   <-- end id in array
545  *----------------------------------------------------------------------------*/
546 
547 static inline void
_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)548 _zero_range(cs_real_t   y[restrict],
549             cs_lnum_t   start_id,
550             cs_lnum_t   end_id)
551 {
552   cs_lnum_t   ii;
553 
554 # pragma omp parallel for  if(end_id - start_id > CS_THR_MIN)
555   for (ii = start_id; ii < end_id; ii++)
556     y[ii] = 0.0;
557 }
558 
559 /*----------------------------------------------------------------------------
560  * Set values from y[start_id] to y[end_id] to 0, block version.
561  *
562  * parameters:
563  *   y        --> resulting vector
564  *   start_id <-- start id in array
565  *   end_id   <-- end id in array
566  *   b_size   <-- block size, including padding:
567  *                b_size[0]: useful block size
568  *                b_size[1]: vector block extents
569  *----------------------------------------------------------------------------*/
570 
571 static inline void
_b_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id,const cs_lnum_t b_size[2])572 _b_zero_range(cs_real_t        y[restrict],
573               cs_lnum_t        start_id,
574               cs_lnum_t        end_id,
575               const cs_lnum_t  b_size[2])
576 {
577   cs_lnum_t  ii;
578 
579 # pragma omp parallel for  if((end_id-start_id)*b_size[1] > CS_THR_MIN)
580   for (ii = start_id*b_size[1]; ii < end_id*b_size[1]; ii++)
581     y[ii] = 0.0;
582 }
583 
584 /*----------------------------------------------------------------------------
585  * Set values from y[start_id] to y[end_id] to 0, block version.
586  *
587  * parameters:
588  *   y        --> resulting vector
589  *   start_id <-- start id in array
590  *   end_id   <-- end id in array
591  *----------------------------------------------------------------------------*/
592 
593 static inline void
_3_3_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)594 _3_3_zero_range(cs_real_t  y[restrict],
595                 cs_lnum_t  start_id,
596                 cs_lnum_t  end_id)
597 {
598   cs_lnum_t  ii;
599 
600 # pragma omp parallel for  if((end_id-start_id)*3 > CS_THR_MIN)
601   for (ii = start_id*3; ii < end_id*3; ii++)
602     y[ii] = 0.0;
603 }
604 
605 /*----------------------------------------------------------------------------
606  * Set values from y[start_id] to y[end_id] to 0, block version.
607  *
608  * parameters:
609  *   y        --> resulting vector
610  *   start_id <-- start id in array
611  *   end_id   <-- end id in array
612  *----------------------------------------------------------------------------*/
613 
614 static inline void
_6_6_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)615 _6_6_zero_range(cs_real_t  y[restrict],
616                 cs_lnum_t  start_id,
617                 cs_lnum_t  end_id)
618 {
619   cs_lnum_t  ii;
620 
621 # pragma omp parallel for  if((end_id-start_id)*6 > CS_THR_MIN)
622   for (ii = start_id*6; ii < end_id*6; ii++)
623     y[ii] = 0.0;
624 }
625 
626 /*----------------------------------------------------------------------------
627  * Start synchronization of ghost values prior to matrix.vector product
628  *
629  * parameters:
630  *   matrix        <-- pointer to matrix structure
631  *   x             <-> multipliying vector values (ghost values updated)
632  *
633  * returns:
634  *   halo state to use for synchronisation finalisation.
635  *----------------------------------------------------------------------------*/
636 
637 static cs_halo_state_t *
_pre_vector_multiply_sync_x_start(const cs_matrix_t * matrix,cs_real_t x[restrict])638 _pre_vector_multiply_sync_x_start(const cs_matrix_t   *matrix,
639                                   cs_real_t            x[restrict])
640 {
641  cs_halo_state_t *hs = NULL;
642 
643   if (matrix->halo != NULL) {
644 
645     hs = cs_halo_state_get_default();
646 
647     /* Non-blocked version */
648 
649     cs_halo_sync_pack(matrix->halo,
650                       CS_HALO_STANDARD,
651                       CS_REAL_TYPE,
652                       matrix->db_size[1],
653                       x,
654                       NULL,
655                       hs);
656 
657     cs_halo_sync_start(matrix->halo, x, hs);
658 
659   }
660 
661   return hs;
662 }
663 
664 /*----------------------------------------------------------------------------
665  * Synchronize ghost values prior to matrix.vector product
666  *
667  * parameters:
668  *   matrix        <-- pointer to matrix structure
669  *   x             <-> multipliying vector values (ghost values updated)
670  *----------------------------------------------------------------------------*/
671 
672 static void
_pre_vector_multiply_sync_x_end(const cs_matrix_t * matrix,cs_halo_state_t * hs,cs_real_t x[restrict])673 _pre_vector_multiply_sync_x_end(const cs_matrix_t   *matrix,
674                                 cs_halo_state_t     *hs,
675                                 cs_real_t            x[restrict])
676 {
677   if (hs != NULL) {
678 
679     assert(matrix->halo != NULL);
680 
681     cs_halo_sync_wait(matrix->halo, x, hs);
682 
683     /* Synchronize periodic values */
684 
685 #if !defined(_CS_UNIT_MATRIX_TEST) /* unit tests do not link with full library */
686 
687     if (matrix->halo->n_transforms > 0) {
688       if (matrix->db_size[0] == 3)
689         cs_halo_perio_sync_var_vect(matrix->halo,
690                                     CS_HALO_STANDARD,
691                                     x,
692                                     matrix->db_size[1]);
693       else if (matrix->db_size[0] == 6)
694         cs_halo_perio_sync_var_sym_tens(matrix->halo,
695                                         CS_HALO_STANDARD,
696                                         x);
697     }
698 
699 #endif
700   }
701 }
702 
703 /*----------------------------------------------------------------------------
704  * Synchronize ghost values prior to matrix.vector product
705  *
706  * parameters:
707  *   matrix        <-- pointer to matrix structure
708  *   x             <-> multipliying vector values (ghost values updated)
709  *----------------------------------------------------------------------------*/
710 
711 void
cs_matrix_pre_vector_multiply_sync(const cs_matrix_t * matrix,cs_real_t * x)712 cs_matrix_pre_vector_multiply_sync(const cs_matrix_t   *matrix,
713                                    cs_real_t           *x)
714 {
715   if (matrix->halo != NULL) {
716     cs_halo_state_t *hs = _pre_vector_multiply_sync_x_start(matrix, x);
717     _pre_vector_multiply_sync_x_end(matrix, hs, x);
718   }
719 }
720 
721 /*----------------------------------------------------------------------------
722  * Create native matrix structure.
723  *
724  * Note that the structure created maps to the given existing
725  * face -> cell connectivity array, so it must be destroyed before this
726  * array (usually the code's main face -> cell structure) is freed.
727  *
728  * parameters:
729  *   n_rows      <-- number of local rows
730  *   n_cols_ext  <-- number of local + ghost columns
731  *   n_edges     <-- local number of graph edges
732  *   edges       <-- edges (symmetric row <-> column) connectivity
733  *
734  * returns:
735  *   pointer to allocated native matrix structure.
736  *----------------------------------------------------------------------------*/
737 
738 static cs_matrix_struct_native_t *
_create_struct_native(cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_t edges[][2])739 _create_struct_native(cs_lnum_t        n_rows,
740                       cs_lnum_t        n_cols_ext,
741                       cs_lnum_t        n_edges,
742                       const cs_lnum_t  edges[][2])
743 {
744   cs_matrix_struct_native_t  *ms;
745 
746   /* Allocate and map */
747 
748   BFT_MALLOC(ms, 1, cs_matrix_struct_native_t);
749 
750   /* Allocate and map */
751 
752   ms->n_rows = n_rows;
753   ms->n_cols_ext = n_cols_ext;
754   ms->n_edges = n_edges;
755 
756   ms->edges = edges;
757 
758   return ms;
759 }
760 
761 /*----------------------------------------------------------------------------
762  * Destroy native matrix structure.
763  *
764  * parameters:
765  *   ms  <->  pointer to native matrix structure pointer
766  *----------------------------------------------------------------------------*/
767 
768 static void
_destroy_struct_native(void ** ms)769 _destroy_struct_native(void  **ms)
770 {
771   if (ms != NULL && *ms !=NULL) {
772     cs_matrix_struct_native_t  *_ms = *ms;
773 
774     BFT_FREE(_ms);
775 
776     *ms= NULL;
777   }
778 }
779 
780 /*----------------------------------------------------------------------------
781  * Create native matrix coefficients.
782  *
783  * returns:
784  *   pointer to allocated native coefficients structure.
785  *----------------------------------------------------------------------------*/
786 
787 static cs_matrix_coeff_native_t *
_create_coeff_native(void)788 _create_coeff_native(void)
789 {
790   cs_matrix_coeff_native_t  *mc;
791 
792   /* Allocate */
793 
794   BFT_MALLOC(mc, 1, cs_matrix_coeff_native_t);
795 
796   /* Initialize */
797 
798   mc->symmetric = false;
799   mc->max_db_size = 0;
800   mc->max_eb_size = 0;
801 
802   mc->da = NULL;
803   mc->xa = NULL;
804 
805   mc->_da = NULL;
806   mc->_xa = NULL;
807 
808   return mc;
809 }
810 
811 /*----------------------------------------------------------------------------
812  * Destroy native matrix coefficients.
813  *
814  * parameters:
815  *   m  <->  pointer to matrix structure
816  *----------------------------------------------------------------------------*/
817 
818 static void
_destroy_coeff_native(cs_matrix_t * m)819 _destroy_coeff_native(cs_matrix_t  *m)
820 {
821   if (m->coeffs != NULL) {
822     cs_matrix_coeff_native_t  *mc = m->coeffs;
823 
824     BFT_FREE(mc->_xa);
825     BFT_FREE(mc->_da);
826 
827     BFT_FREE(m->coeffs);
828   }
829 }
830 
831 /*----------------------------------------------------------------------------
832  * Set Native matrix coefficients.
833  *
834  * Depending on current options and initialization, values will be copied
835  * or simply mapped.
836  *
837  * parameters:
838  *   matrix    <-- pointer to matrix structure
839  *   symmetric <-- indicates if extradiagonal values are symmetric
840  *   copy      <-- indicates if coefficients should be copied
841  *   n_edges   <-- local number of graph edges
842  *   edges     <-- edges (symmetric row <-> column) connectivity
843  *   da        <-- diagonal values
844  *   xa        <-- extradiagonal values
845  *----------------------------------------------------------------------------*/
846 
847 static void
_set_coeffs_native(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_t edges[restrict][2],const cs_real_t da[restrict],const cs_real_t xa[restrict])848 _set_coeffs_native(cs_matrix_t        *matrix,
849                    bool                symmetric,
850                    bool                copy,
851                    cs_lnum_t           n_edges,
852                    const cs_lnum_t     edges[restrict][2],
853                    const cs_real_t     da[restrict],
854                    const cs_real_t     xa[restrict])
855 {
856   CS_UNUSED(n_edges);
857   CS_UNUSED(edges);
858 
859   cs_matrix_coeff_native_t  *mc = matrix->coeffs;
860   const cs_matrix_struct_native_t  *ms = matrix->structure;
861   mc->symmetric = symmetric;
862 
863   /* Map or copy values */
864 
865   if (da != NULL) {
866 
867     if (copy) {
868       if (mc->_da == NULL || mc->max_db_size < matrix->db_size[3]) {
869         BFT_REALLOC(mc->_da, matrix->db_size[3]*ms->n_rows, cs_real_t);
870         mc->max_db_size = matrix->db_size[3];
871       }
872       memcpy(mc->_da, da, matrix->db_size[3]*sizeof(cs_real_t) * ms->n_rows);
873       mc->da = mc->_da;
874     }
875     else
876       mc->da = da;
877 
878   }
879   else {
880     mc->da = NULL;
881   }
882 
883   if (xa != NULL) {
884 
885     size_t xa_n_vals = ms->n_edges;
886     if (! symmetric)
887       xa_n_vals *= 2;
888 
889     if (copy) {
890       if (mc->_xa == NULL || mc->max_eb_size < matrix->eb_size[3]) {
891         BFT_MALLOC(mc->_xa, matrix->eb_size[3]*xa_n_vals, cs_real_t);
892         mc->max_eb_size = matrix->eb_size[3];
893       }
894       memcpy(mc->_xa, xa, matrix->eb_size[3]*xa_n_vals*sizeof(cs_real_t));
895       mc->xa = mc->_xa;
896     }
897     else
898       mc->xa = xa;
899 
900   }
901 }
902 
903 /*----------------------------------------------------------------------------
904  * Release shared native matrix coefficients.
905  *
906  * parameters:
907  *   matrix <-- pointer to matrix structure
908  *----------------------------------------------------------------------------*/
909 
910 static void
_release_coeffs_native(cs_matrix_t * matrix)911 _release_coeffs_native(cs_matrix_t  *matrix)
912 {
913   cs_matrix_coeff_native_t  *mc = matrix->coeffs;
914   if (mc != NULL) {
915     mc->da = NULL;
916     mc->xa = NULL;
917   }
918 }
919 
920 /*----------------------------------------------------------------------------
921  * Copy diagonal of native or MSR matrix.
922  *
923  * parameters:
924  *   matrix <-- pointer to matrix structure
925  *   da     --> diagonal (pre-allocated, size: n_rows)
926  *----------------------------------------------------------------------------*/
927 
928 static void
_copy_diagonal_separate(const cs_matrix_t * matrix,cs_real_t da[restrict])929 _copy_diagonal_separate(const cs_matrix_t  *matrix,
930                         cs_real_t           da[restrict])
931 {
932   const cs_real_t *_da = NULL;
933   if (matrix->type == CS_MATRIX_NATIVE) {
934     const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
935     _da = mc->da;
936   }
937   else if (matrix->type == CS_MATRIX_MSR) {
938     const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
939     _da = mc->d_val;
940   }
941   const cs_lnum_t  n_rows = matrix->n_rows;
942 
943   /* Unblocked version */
944 
945   if (matrix->db_size[3] == 1) {
946 
947     if (_da != NULL) {
948 #     pragma omp parallel for  if(n_rows > CS_THR_MIN)
949       for (cs_lnum_t ii = 0; ii < n_rows; ii++)
950         da[ii] = _da[ii];
951     }
952     else {
953 #     pragma omp parallel for  if(n_rows > CS_THR_MIN)
954       for (cs_lnum_t ii = 0; ii < n_rows; ii++)
955         da[ii] = 0.0;
956     }
957 
958   }
959 
960   /* Blocked version */
961 
962   else {
963 
964     const cs_lnum_t *db_size = matrix->db_size;
965 
966     if (_da != NULL) {
967 #     pragma omp parallel for  if(n_rows*db_size[0] > CS_THR_MIN)
968       for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
969         for (cs_lnum_t jj = 0; jj < db_size[0]; jj++)
970           da[ii*db_size[1] + jj] = _da[ii*db_size[3] + jj*db_size[2] + jj];
971       }
972     }
973     else {
974 #     pragma omp parallel for  if(n_rows*db_size[1] > CS_THR_MIN)
975       for (cs_lnum_t ii = 0; ii < n_rows*db_size[1]; ii++)
976         da[ii] = 0.0;
977     }
978   }
979 }
980 
981 /*----------------------------------------------------------------------------*/
982 /*!
983  * \brief Get matrix diagonal values for native matrix.
984  *
985  * In case of matrixes with block diagonal coefficients, a pointer to
986  * the complete block diagonal is returned.
987  *
988  * \param[in]  matrix  pointer to matrix structure
989  *
990  * \return  pointer to matrix diagonal array
991  */
992 /*----------------------------------------------------------------------------*/
993 
994 static const cs_real_t *
_get_diagonal_native(const cs_matrix_t * matrix)995 _get_diagonal_native(const cs_matrix_t  *matrix)
996 {
997   cs_lnum_t ii;
998 
999   const cs_real_t  *diag = NULL;
1000 
1001   cs_matrix_coeff_native_t *mc = matrix->coeffs;
1002 
1003   if (mc->da == NULL) {
1004     cs_lnum_t n_rows = matrix->n_rows * matrix->db_size[3];
1005     if (mc->_da == NULL || mc->max_db_size < matrix->db_size[3]) {
1006       BFT_REALLOC(mc->_da, matrix->db_size[3]*matrix->n_rows, cs_real_t);
1007       mc->max_db_size = matrix->db_size[3];
1008     }
1009 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
1010     for (ii = 0; ii < n_rows; ii++)
1011       mc->_da[ii] = 0.0;
1012     mc->da = mc->_da;
1013   }
1014 
1015   diag = mc->da;
1016 
1017   return diag;
1018 }
1019 
1020 /*----------------------------------------------------------------------------
1021  * Local matrix.vector product y = A.x with native matrix.
1022  *
1023  * parameters:
1024  *   matrix       <-- pointer to matrix structure
1025  *   exclude_diag <-- exclude diagonal if true,
1026  *   sync         <-- synchronize ghost cells if true
1027  *   x            <-> multipliying vector values
1028  *   y            --> resulting vector
1029  *----------------------------------------------------------------------------*/
1030 
1031 static void
_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1032 _mat_vec_p_l_native(const cs_matrix_t  *matrix,
1033                     bool                exclude_diag,
1034                     bool                sync,
1035                     cs_real_t           x[restrict],
1036                     cs_real_t           y[restrict])
1037 {
1038   cs_lnum_t  ii, jj, face_id;
1039 
1040   const cs_matrix_struct_native_t  *ms = matrix->structure;
1041   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1042 
1043   const cs_real_t  *restrict xa = mc->xa;
1044 
1045   /* Initialize ghost cell communication */
1046 
1047   cs_halo_state_t *hs
1048     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1049 
1050   /* Diagonal part of matrix.vector product */
1051 
1052   if (! exclude_diag) {
1053     _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1054     _zero_range(y, ms->n_rows, ms->n_cols_ext);
1055   }
1056   else
1057     _zero_range(y, 0, ms->n_cols_ext);
1058 
1059   /* Finalize ghost cell comunication if overlap used */
1060 
1061   if (hs != NULL)
1062     cs_halo_sync_wait(matrix->halo, x, hs);
1063 
1064   /* non-diagonal terms */
1065 
1066   if (mc->xa != NULL) {
1067 
1068     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1069 
1070     if (mc->symmetric) {
1071 
1072       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1073         ii = face_cel_p[face_id][0];
1074         jj = face_cel_p[face_id][1];
1075         y[ii] += xa[face_id] * x[jj];
1076         y[jj] += xa[face_id] * x[ii];
1077       }
1078 
1079     }
1080     else {
1081 
1082       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1083         ii = face_cel_p[face_id][0];
1084         jj = face_cel_p[face_id][1];
1085         y[ii] += xa[2*face_id] * x[jj];
1086         y[jj] += xa[2*face_id + 1] * x[ii];
1087       }
1088 
1089     }
1090 
1091   }
1092 }
1093 
1094 /*----------------------------------------------------------------------------
1095  * Local matrix.vector product y = A.x with native matrix.
1096  *
1097  * parameters:
1098  *   matrix       <-- pointer to matrix structure
1099  *   exclude_diag <-- exclude diagonal if true,
1100  *   sync         <-- synchronize ghost cells if true
1101  *   x            <-> multipliying vector values
1102  *   y            --> resulting vector
1103  *----------------------------------------------------------------------------*/
1104 
1105 static void
_b_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1106 _b_mat_vec_p_l_native(const cs_matrix_t  *matrix,
1107                       bool                exclude_diag,
1108                       bool                sync,
1109                       cs_real_t           x[restrict],
1110                       cs_real_t           y[restrict])
1111 {
1112   cs_lnum_t  ii, jj, kk, face_id;
1113 
1114   const cs_matrix_struct_native_t  *ms = matrix->structure;
1115   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1116 
1117   const cs_real_t  *restrict xa = mc->xa;
1118   const cs_lnum_t *db_size = matrix->db_size;
1119 
1120   /* Initialize ghost cell communication */
1121 
1122   cs_halo_state_t *hs
1123     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1124 
1125   /* Diagonal part of matrix.vector product */
1126 
1127   if (! exclude_diag) {
1128     _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1129     _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1130   }
1131   else
1132     _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1133 
1134   /* Finalize ghost cell comunication if overlap used */
1135 
1136   if (hs != NULL)
1137     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1138 
1139   /* non-diagonal terms */
1140 
1141   if (mc->xa != NULL) {
1142 
1143     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1144 
1145     if (mc->symmetric) {
1146 
1147       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1148         ii = face_cel_p[face_id][0];
1149         jj = face_cel_p[face_id][1];
1150         for (kk = 0; kk < db_size[0]; kk++) {
1151           y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1152           y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1153         }
1154       }
1155     }
1156     else {
1157 
1158       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1159         ii = face_cel_p[face_id][0];
1160         jj = face_cel_p[face_id][1];
1161         for (kk = 0; kk < db_size[0]; kk++) {
1162           y[ii*db_size[1] + kk] += xa[2*face_id]     * x[jj*db_size[1] + kk];
1163           y[jj*db_size[1] + kk] += xa[2*face_id + 1] * x[ii*db_size[1] + kk];
1164         }
1165       }
1166 
1167     }
1168 
1169   }
1170 
1171 }
1172 
1173 /*----------------------------------------------------------------------------
1174  * Local matrix.vector product y = A.x with native matrix.
1175  *
1176  * parameters:
1177  *   matrix       <-- pointer to matrix structure
1178  *   exclude_diag <-- exclude diagonal if true,
1179  *   sync         <-- synchronize ghost cells if true
1180  *   x            <-> multipliying vector values
1181  *   y            --> resulting vector
1182  *----------------------------------------------------------------------------*/
1183 
1184 static void
_bb_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1185 _bb_mat_vec_p_l_native(const cs_matrix_t  *matrix,
1186                        bool                exclude_diag,
1187                        bool                sync,
1188                        cs_real_t           x[restrict],
1189                        cs_real_t           y[restrict])
1190 {
1191   cs_lnum_t  ii, jj, face_id;
1192 
1193   const cs_matrix_struct_native_t  *ms = matrix->structure;
1194   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1195 
1196   const cs_real_t  *restrict xa = mc->xa;
1197   const cs_lnum_t *db_size = matrix->db_size;
1198   const cs_lnum_t *eb_size = matrix->eb_size;
1199 
1200   /* Initialize ghost cell communication */
1201 
1202   cs_halo_state_t *hs
1203     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1204 
1205   /* Diagonal part of matrix.vector product */
1206 
1207   if (! exclude_diag) {
1208     _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1209     _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1210   }
1211   else
1212     _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1213 
1214   /* Finalize ghost cell comunication if overlap used */
1215 
1216   if (hs != NULL)
1217     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1218 
1219   /* non-diagonal terms */
1220 
1221   if (mc->xa != NULL) {
1222 
1223     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1224 
1225     if (mc->symmetric) {
1226 
1227       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1228         ii = face_cel_p[face_id][0];
1229         jj = face_cel_p[face_id][1];
1230         _dense_eb_ax_add(ii, jj, face_id, eb_size, xa, x, y);
1231         _dense_eb_ax_add(jj, ii, face_id, eb_size, xa, x, y);
1232       }
1233     }
1234     else {
1235 
1236       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1237         ii = face_cel_p[face_id][0];
1238         jj = face_cel_p[face_id][1];
1239         _dense_eb_ax_add(ii, jj, 2*face_id, eb_size, xa, x, y);
1240         _dense_eb_ax_add(jj, ii, 2*face_id + 1, eb_size, xa, x, y);
1241       }
1242 
1243     }
1244 
1245   }
1246 
1247 }
1248 
1249 /*----------------------------------------------------------------------------
1250  * Local matrix.vector product y = A.x with native matrix.
1251  *
1252  * This variant uses a fixed 3x3 block, for better compiler optimization.
1253  *
1254  * parameters:
1255  *   matrix       <-- pointer to matrix structure
1256  *   exclude_diag <-- exclude diagonal if true,
1257  *   sync         <-- synchronize ghost cells if true
1258  *   x            <-> multipliying vector values
1259  *   y            --> resulting vector
1260  *----------------------------------------------------------------------------*/
1261 
1262 static void
_3_3_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1263 _3_3_mat_vec_p_l_native(const cs_matrix_t  *matrix,
1264                         bool                exclude_diag,
1265                         bool                sync,
1266                         cs_real_t           x[restrict],
1267                         cs_real_t           y[restrict])
1268 {
1269   cs_lnum_t  ii, jj, kk, face_id;
1270 
1271   const cs_matrix_struct_native_t  *ms = matrix->structure;
1272   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1273 
1274   const cs_real_t  *restrict xa = mc->xa;
1275 
1276   assert(matrix->db_size[0] == 3 && matrix->db_size[3] == 9);
1277 
1278   /* Initialize ghost cell communication */
1279 
1280   cs_halo_state_t *hs
1281     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1282 
1283   /* Diagonal part of matrix.vector product */
1284 
1285   if (! exclude_diag) {
1286     _3_3_diag_vec_p_l(mc->da, x, y, ms->n_rows);
1287     _3_3_zero_range(y, ms->n_rows, ms->n_cols_ext);
1288   }
1289   else
1290     _3_3_zero_range(y, 0, ms->n_cols_ext);
1291 
1292   /* Finalize ghost cell comunication */
1293 
1294   if (hs != NULL)
1295     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1296 
1297   /* non-diagonal terms */
1298 
1299   if (mc->xa != NULL) {
1300 
1301     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1302 
1303     if (mc->symmetric) {
1304 
1305       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1306         ii = face_cel_p[face_id][0];
1307         jj = face_cel_p[face_id][1];
1308         for (kk = 0; kk < 3; kk++) {
1309           y[ii*3 + kk] += xa[face_id] * x[jj*3 + kk];
1310           y[jj*3 + kk] += xa[face_id] * x[ii*3 + kk];
1311         }
1312       }
1313     }
1314     else {
1315 
1316       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1317         ii = face_cel_p[face_id][0];
1318         jj = face_cel_p[face_id][1];
1319         for (kk = 0; kk < 3; kk++) {
1320           y[ii*3 + kk] += xa[2*face_id]     * x[jj*3 + kk];
1321           y[jj*3 + kk] += xa[2*face_id + 1] * x[ii*3 + kk];
1322         }
1323       }
1324 
1325     }
1326 
1327   }
1328 
1329 }
1330 
1331 /*----------------------------------------------------------------------------
1332  * Local matrix.vector product y = A.x with native matrix.
1333  *
1334  * This variant uses a fixed 6x6 block, for better compiler optimization.
1335  *
1336  * parameters:
1337  *   matrix       <-- pointer to matrix structure
1338  *   exclude_diag <-- exclude diagonal if true,
1339  *   sync         <-- synchronize ghost cells if true
1340  *   x            <-> multipliying vector values
1341  *   y            --> resulting vector
1342  *----------------------------------------------------------------------------*/
1343 
1344 static void
_6_6_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1345 _6_6_mat_vec_p_l_native(const cs_matrix_t  *matrix,
1346                         bool                exclude_diag,
1347                         bool                sync,
1348                         cs_real_t           x[restrict],
1349                         cs_real_t           y[restrict])
1350 {
1351   cs_lnum_t  ii, jj, kk, face_id;
1352 
1353   const cs_matrix_struct_native_t  *ms = matrix->structure;
1354   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1355 
1356   const cs_real_t  *restrict xa = mc->xa;
1357 
1358   assert(matrix->db_size[0] == 6 && matrix->db_size[3] == 36);
1359 
1360   /* Initialize ghost cell communication */
1361 
1362   cs_halo_state_t *hs
1363     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1364 
1365   /* Diagonal part of matrix.vector product */
1366 
1367   if (! exclude_diag) {
1368     _6_6_diag_vec_p_l(mc->da, x, y, ms->n_rows);
1369     _6_6_zero_range(y, ms->n_rows, ms->n_cols_ext);
1370   }
1371   else
1372     _6_6_zero_range(y, 0, ms->n_cols_ext);
1373 
1374   /* Finalize ghost cell comunication if overlap used */
1375 
1376   if (hs != NULL)
1377     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1378 
1379   /* non-diagonal terms */
1380 
1381   if (mc->xa != NULL) {
1382 
1383     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1384 
1385     if (mc->symmetric) {
1386 
1387       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1388         ii = face_cel_p[face_id][0];
1389         jj = face_cel_p[face_id][1];
1390         for (kk = 0; kk < 6; kk++) {
1391           y[ii*6 + kk] += xa[face_id] * x[jj*6 + kk];
1392           y[jj*6 + kk] += xa[face_id] * x[ii*6 + kk];
1393         }
1394       }
1395     }
1396     else {
1397 
1398       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1399         ii = face_cel_p[face_id][0];
1400         jj = face_cel_p[face_id][1];
1401         for (kk = 0; kk < 6; kk++) {
1402           y[ii*6 + kk] += xa[2*face_id]     * x[jj*6 + kk];
1403           y[jj*6 + kk] += xa[2*face_id + 1] * x[ii*6 + kk];
1404         }
1405       }
1406 
1407     }
1408 
1409   }
1410 
1411 }
1412 
1413 /*----------------------------------------------------------------------------
1414  * Local matrix.vector product y = A.x with native matrix, blocked version.
1415  *
1416  * This variant uses fixed block size variants for common cases.
1417  *
1418  * parameters:
1419  *   matrix       <-- pointer to matrix structure
1420  *   exclude_diag <-- exclude diagonal if true,
1421  *   sync         <-- synchronize ghost cells if true
1422  *   x            <-> multipliying vector values
1423  *   y            --> resulting vector
1424  *----------------------------------------------------------------------------*/
1425 
1426 static void
_b_mat_vec_p_l_native_fixed(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1427 _b_mat_vec_p_l_native_fixed(const cs_matrix_t  *matrix,
1428                             bool                exclude_diag,
1429                             bool                sync,
1430                             cs_real_t           x[restrict],
1431                             cs_real_t           y[restrict])
1432 {
1433   if (matrix->db_size[0] == 3 && matrix->db_size[3] == 9)
1434     _3_3_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1435 
1436   else if (matrix->db_size[0] == 6 && matrix->db_size[3] == 36)
1437     _6_6_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1438 
1439   else
1440     _b_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1441 }
1442 
1443 #if defined(HAVE_OPENMP) /* OpenMP variants */
1444 
1445 /*----------------------------------------------------------------------------
1446  * Local matrix.vector product y = A.x with native matrix.
1447  *
1448  * parameters:
1449  *   matrix       <-- Pointer to matrix structure
1450  *   exclude_diag <-- exclude diagonal if true
1451  *   sync         <-- synchronize ghost cells if true
1452  *   x            <-> Multipliying vector values
1453  *   y            --> Resulting vector
1454  *----------------------------------------------------------------------------*/
1455 
1456 static void
_mat_vec_p_l_native_omp(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1457 _mat_vec_p_l_native_omp(const cs_matrix_t  *matrix,
1458                         bool                exclude_diag,
1459                         bool                sync,
1460                         cs_real_t           x[restrict],
1461                         cs_real_t           y[restrict])
1462 {
1463   const int n_threads = matrix->numbering->n_threads;
1464   const int n_groups = matrix->numbering->n_groups;
1465   const cs_lnum_t *group_index = matrix->numbering->group_index;
1466 
1467   const cs_matrix_struct_native_t  *ms = matrix->structure;
1468   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1469   const cs_real_t  *restrict xa = mc->xa;
1470 
1471   assert(matrix->numbering->type == CS_NUMBERING_THREADS);
1472 
1473   /* Initialize ghost cell communication */
1474 
1475   cs_halo_state_t *hs
1476     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1477 
1478   /* Diagonal part of matrix.vector product */
1479 
1480   if (! exclude_diag) {
1481     _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1482     _zero_range(y, ms->n_rows, ms->n_cols_ext);
1483   }
1484   else
1485     _zero_range(y, 0, ms->n_cols_ext);
1486 
1487   /* Finalize ghost cell comunication if overlap used */
1488 
1489   if (hs != NULL)
1490     cs_halo_sync_wait(matrix->halo, x, hs);
1491 
1492   /* non-diagonal terms */
1493 
1494   if (mc->xa != NULL) {
1495 
1496     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1497 
1498     if (mc->symmetric) {
1499 
1500       for (int g_id = 0; g_id < n_groups; g_id++) {
1501 
1502 #       pragma omp parallel for
1503         for (int t_id = 0; t_id < n_threads; t_id++) {
1504 
1505           for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1506                face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1507                face_id++) {
1508             cs_lnum_t ii = face_cel_p[face_id][0];
1509             cs_lnum_t jj = face_cel_p[face_id][1];
1510             y[ii] += xa[face_id] * x[jj];
1511             y[jj] += xa[face_id] * x[ii];
1512           }
1513         }
1514       }
1515     }
1516     else {
1517 
1518       for (int g_id = 0; g_id < n_groups; g_id++) {
1519 
1520 #       pragma omp parallel for
1521         for (int t_id = 0; t_id < n_threads; t_id++) {
1522 
1523           for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1524                face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1525                face_id++) {
1526             cs_lnum_t ii = face_cel_p[face_id][0];
1527             cs_lnum_t jj = face_cel_p[face_id][1];
1528             y[ii] += xa[2*face_id] * x[jj];
1529             y[jj] += xa[2*face_id + 1] * x[ii];
1530           }
1531         }
1532       }
1533     }
1534 
1535   }
1536 }
1537 
1538 /*----------------------------------------------------------------------------
1539  * Local matrix.vector product y = A.x with native matrix, blocked version
1540  *
1541  * parameters:
1542  *   matrix       <-- pointer to matrix structure
1543  *   exclude_diag <-- exclude diagonal if true,
1544  *   sync         <-- synchronize ghost cells if true
1545  *   x            <-> multipliying vector values
1546  *   y            --> resulting vector
1547  *----------------------------------------------------------------------------*/
1548 
1549 static void
_b_mat_vec_p_l_native_omp(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1550 _b_mat_vec_p_l_native_omp(const cs_matrix_t  *matrix,
1551                           bool                exclude_diag,
1552                           bool                sync,
1553                           cs_real_t           x[restrict],
1554                           cs_real_t           y[restrict])
1555 {
1556   const cs_lnum_t *db_size = matrix->db_size;
1557 
1558   const int n_threads = matrix->numbering->n_threads;
1559   const int n_groups = matrix->numbering->n_groups;
1560   const cs_lnum_t *group_index = matrix->numbering->group_index;
1561 
1562   const cs_matrix_struct_native_t  *ms = matrix->structure;
1563   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1564   const cs_real_t  *restrict xa = mc->xa;
1565 
1566   assert(matrix->numbering->type == CS_NUMBERING_THREADS);
1567 
1568   /* Initialize ghost cell communication */
1569 
1570   cs_halo_state_t *hs
1571     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1572 
1573   /* Diagonal part of matrix.vector product */
1574 
1575   if (! exclude_diag) {
1576     _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1577     _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1578   }
1579   else
1580     _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1581 
1582   /* Finalize ghost cell comunication if overlap used */
1583 
1584   if (hs != NULL)
1585     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1586 
1587   /* non-diagonal terms */
1588 
1589   if (mc->xa != NULL) {
1590 
1591     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1592 
1593     if (mc->symmetric) {
1594 
1595       for (int g_id = 0; g_id < n_groups; g_id++) {
1596 
1597 #       pragma omp parallel for
1598         for (int t_id = 0; t_id < n_threads; t_id++) {
1599 
1600           for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1601                face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1602                face_id++) {
1603             cs_lnum_t ii = face_cel_p[face_id][0];
1604             cs_lnum_t jj = face_cel_p[face_id][1];
1605             for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1606               y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1607               y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1608             }
1609           }
1610         }
1611       }
1612 
1613     }
1614     else {
1615 
1616       for (int g_id = 0; g_id < n_groups; g_id++) {
1617 
1618 #       pragma omp parallel for
1619         for (int t_id = 0; t_id < n_threads; t_id++) {
1620 
1621           for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1622                face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1623                face_id++) {
1624             cs_lnum_t ii = face_cel_p[face_id][0];
1625             cs_lnum_t jj = face_cel_p[face_id][1];
1626             for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1627               y[ii*db_size[1] + kk] += xa[2*face_id]     * x[jj*db_size[1] + kk];
1628               y[jj*db_size[1] + kk] += xa[2*face_id + 1] * x[ii*db_size[1] + kk];
1629             }
1630           }
1631         }
1632       }
1633 
1634     }
1635 
1636   }
1637 }
1638 
1639 /*----------------------------------------------------------------------------
1640  * Local matrix.vector product y = A.x with native matrix.
1641  *
1642  * parameters:
1643  *   matrix       <-- pointer to matrix structure
1644  *   exclude_diag <-- exclude diagonal if true,
1645  *   sync         <-- synchronize ghost cells if true
1646  *   x            <-> multipliying vector values
1647  *   y            --> resulting vector
1648  *----------------------------------------------------------------------------*/
1649 
1650 static void
_mat_vec_p_l_native_omp_atomic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1651 _mat_vec_p_l_native_omp_atomic(const cs_matrix_t  *matrix,
1652                                bool                exclude_diag,
1653                                bool                sync,
1654                                cs_real_t           x[restrict],
1655                                cs_real_t           y[restrict])
1656 {
1657   const cs_matrix_struct_native_t  *ms = matrix->structure;
1658   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1659   const cs_real_t  *restrict xa = mc->xa;
1660 
1661   /* Initialize ghost cell communication */
1662 
1663   cs_halo_state_t *hs
1664     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1665 
1666   /* Diagonal part of matrix.vector product */
1667 
1668   if (! exclude_diag) {
1669     _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1670     _zero_range(y, ms->n_rows, ms->n_cols_ext);
1671   }
1672   else
1673     _zero_range(y, 0, ms->n_cols_ext);
1674 
1675   /* Finalize ghost cell comunication if overlap used */
1676 
1677   if (hs != NULL)
1678     cs_halo_sync_wait(matrix->halo, x, hs);
1679 
1680   /* non-diagonal terms */
1681 
1682   if (mc->xa != NULL) {
1683 
1684     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1685 
1686     if (mc->symmetric) {
1687 
1688 #     pragma omp parallel for
1689       for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1690         cs_lnum_t ii = face_cel_p[face_id][0];
1691         cs_lnum_t jj = face_cel_p[face_id][1];
1692 #       pragma omp atomic
1693         y[ii] += xa[face_id] * x[jj];
1694 #       pragma omp atomic
1695         y[jj] += xa[face_id] * x[ii];
1696       }
1697     }
1698     else {
1699 
1700 #     pragma omp parallel for
1701       for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1702         cs_lnum_t ii = face_cel_p[face_id][0];
1703         cs_lnum_t jj = face_cel_p[face_id][1];
1704 #       pragma omp atomic
1705         y[ii] += xa[2*face_id] * x[jj];
1706 #       pragma omp atomic
1707         y[jj] += xa[2*face_id + 1] * x[ii];
1708       }
1709     }
1710 
1711   }
1712 }
1713 
1714 /*----------------------------------------------------------------------------
1715  * Local matrix.vector product y = A.x with native matrix, blocked version
1716  *
1717  * parameters:
1718  *   matrix       <-- pointer to matrix structure
1719  *   exclude_diag <-- exclude diagonal if true,
1720  *   sync         <-- synchronize ghost cells if true
1721  *   x            <-> multipliying vector values
1722  *   y            --> resulting vector
1723  *----------------------------------------------------------------------------*/
1724 
1725 static void
_b_mat_vec_p_l_native_omp_atomic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1726 _b_mat_vec_p_l_native_omp_atomic(const cs_matrix_t  *matrix,
1727                                  bool                exclude_diag,
1728                                  bool                sync,
1729                                  cs_real_t           x[restrict],
1730                                  cs_real_t           y[restrict])
1731 {
1732   const cs_lnum_t *db_size = matrix->db_size;
1733 
1734   const cs_matrix_struct_native_t  *ms = matrix->structure;
1735   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1736   const cs_real_t  *restrict xa = mc->xa;
1737 
1738   /* Initialize ghost cell communication */
1739 
1740   cs_halo_state_t *hs
1741     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1742 
1743   /* Diagonal part of matrix.vector product */
1744 
1745   if (! exclude_diag) {
1746     _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1747     _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1748   }
1749   else
1750     _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1751 
1752   /* Finalize ghost cell comunication if overlap used */
1753 
1754   if (hs != NULL)
1755     _pre_vector_multiply_sync_x_end(matrix, hs, x);
1756 
1757   /* non-diagonal terms */
1758 
1759   if (mc->xa != NULL) {
1760 
1761     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1762 
1763     if (mc->symmetric) {
1764 
1765 #     pragma omp parallel for
1766       for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1767         cs_lnum_t ii = face_cel_p[face_id][0];
1768         cs_lnum_t jj = face_cel_p[face_id][1];
1769         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1770 #         pragma omp atomic
1771           y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1772 #         pragma omp atomic
1773           y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1774         }
1775       }
1776 
1777     }
1778     else {
1779 
1780 #     pragma omp parallel for
1781       for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1782         cs_lnum_t ii = face_cel_p[face_id][0];
1783         cs_lnum_t jj = face_cel_p[face_id][1];
1784         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1785 #         pragma omp atomic
1786           y[ii*db_size[1] + kk] += xa[2*face_id]   * x[jj*db_size[1] + kk];
1787 #         pragma omp atomic
1788           y[jj*db_size[1] + kk] += xa[2*face_id+1] * x[ii*db_size[1] + kk];
1789         }
1790       }
1791 
1792     }
1793 
1794   }
1795 }
1796 
1797 #endif /* defined(HAVE_OPENMP) */
1798 
1799 /*----------------------------------------------------------------------------
1800  * Local matrix.vector product y = A.x with native matrix.
1801  *
1802  * parameters:
1803  *   matrix       <-- pointer to matrix structure
1804  *   exclude_diag <-- exclude diagonal if true,
1805  *   sync         <-- synchronize ghost cells if true
1806  *   x            <-> multipliying vector values
1807  *   y            --> resulting vector
1808  *----------------------------------------------------------------------------*/
1809 
1810 static void
_mat_vec_p_l_native_vector(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1811 _mat_vec_p_l_native_vector(const cs_matrix_t  *matrix,
1812                            bool                exclude_diag,
1813                            bool                sync,
1814                            cs_real_t           x[restrict],
1815                            cs_real_t           y[restrict])
1816 {
1817   cs_lnum_t  ii, jj, face_id;
1818   const cs_matrix_struct_native_t  *ms = matrix->structure;
1819   const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
1820   const cs_real_t  *restrict xa = mc->xa;
1821 
1822   assert(matrix->numbering->type == CS_NUMBERING_VECTORIZE);
1823 
1824   /* Initialize ghost cell communication */
1825 
1826   cs_halo_state_t *hs
1827     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1828 
1829   /* Diagonal part of matrix.vector product */
1830 
1831   if (! exclude_diag) {
1832     _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1833     _zero_range(y, ms->n_rows, ms->n_cols_ext);
1834   }
1835   else
1836     _zero_range(y, 0, ms->n_cols_ext);
1837 
1838   /* Finalize ghost cell comunication if overlap used */
1839 
1840   if (hs != NULL)
1841     cs_halo_sync_wait(matrix->halo, x, hs);
1842 
1843   /* non-diagonal terms */
1844 
1845   if (mc->xa != NULL) {
1846 
1847     const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1848 
1849     if (mc->symmetric) {
1850 
1851 #     if defined(HAVE_OPENMP_SIMD)
1852 #       pragma omp simd safelen(CS_NUMBERING_SIMD_SIZE)
1853 #     else
1854 #       pragma dir nodep
1855 #       pragma GCC ivdep
1856 #       pragma _NEC ivdep
1857 #     endif
1858       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1859         ii = face_cel_p[face_id][0];
1860         jj = face_cel_p[face_id][1];
1861         y[ii] += xa[face_id] * x[jj];
1862         y[jj] += xa[face_id] * x[ii];
1863       }
1864 
1865     }
1866     else {
1867 
1868 #     if defined(HAVE_OPENMP_SIMD)
1869 #       pragma omp simd safelen(CS_NUMBERING_SIMD_SIZE)
1870 #     else
1871 #       pragma dir nodep
1872 #       pragma GCC ivdep
1873 #       pragma _NEC ivdep
1874 #     endif
1875       for (face_id = 0; face_id < ms->n_edges; face_id++) {
1876         ii = face_cel_p[face_id][0];
1877         jj = face_cel_p[face_id][1];
1878         y[ii] += xa[2*face_id] * x[jj];
1879         y[jj] += xa[2*face_id + 1] * x[ii];
1880       }
1881 
1882     }
1883 
1884   }
1885 }
1886 
1887 /*----------------------------------------------------------------------------
1888  * Destroy a CSR matrix structure.
1889  *
1890  * parameters:
1891  *   ms  <->  pointer to CSR matrix structure pointer
1892  *----------------------------------------------------------------------------*/
1893 
1894 static void
_destroy_struct_csr(void ** ms)1895 _destroy_struct_csr(void  **ms)
1896 {
1897   if (ms != NULL && *ms !=NULL) {
1898     cs_matrix_struct_csr_t  *_ms = *ms;
1899 
1900     BFT_FREE(_ms->_row_index);
1901     BFT_FREE(_ms->_col_id);
1902     BFT_FREE(_ms);
1903 
1904     *ms= NULL;
1905   }
1906 }
1907 
1908 /*----------------------------------------------------------------------------
1909  * Create a CSR matrix structure from a native matrix stucture.
1910  *
1911  * Note that the structure created maps global cell numbers to the given
1912  * existing face -> cell connectivity array, so it must be destroyed before
1913  * this array (usually the code's global cell numbering) is freed.
1914  *
1915  * parameters:
1916  *   have_diag   <-- indicates if the diagonal is nonzero
1917  *   n_rows      <-- number of local rows
1918  *   n_cols_ext  <-- number of local + ghost columns
1919  *   n_edges     <-- local number of graph edges
1920  *   edges       <-- edges (symmetric row <-> column) connectivity
1921  *
1922  * returns:
1923  *   pointer to allocated CSR matrix structure.
1924  *----------------------------------------------------------------------------*/
1925 
1926 static cs_matrix_struct_csr_t *
_create_struct_csr(bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges)1927 _create_struct_csr(bool                have_diag,
1928                    cs_lnum_t           n_rows,
1929                    cs_lnum_t           n_cols_ext,
1930                    cs_lnum_t           n_edges,
1931                    const cs_lnum_2_t  *edges)
1932 {
1933   cs_lnum_t ii, jj, face_id;
1934   const cs_lnum_t *restrict face_cel_p;
1935 
1936   cs_lnum_t  diag_elts = 1;
1937   cs_lnum_t  *ccount = NULL;
1938 
1939   cs_matrix_struct_csr_t  *ms;
1940 
1941   /* Allocate and map */
1942 
1943   BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
1944 
1945   ms->n_rows = n_rows;
1946   ms->n_cols_ext = n_cols_ext;
1947 
1948   ms->direct_assembly = true;
1949   ms->have_diag = have_diag;
1950 
1951   BFT_MALLOC(ms->_row_index, ms->n_rows + 1, cs_lnum_t);
1952   ms->row_index = NULL;
1953 
1954   /* Count number of nonzero elements per row */
1955 
1956   BFT_MALLOC(ccount, ms->n_rows, cs_lnum_t);
1957 
1958   if (have_diag == false)
1959     diag_elts = 0;
1960 
1961   for (ii = 0; ii < ms->n_rows; ii++)  /* count starting with diagonal terms */
1962     ccount[ii] = diag_elts;
1963 
1964   if (edges != NULL) {
1965 
1966     face_cel_p = (const cs_lnum_t *restrict)edges;
1967 
1968     for (face_id = 0; face_id < n_edges; face_id++) {
1969       ii = *face_cel_p++;
1970       jj = *face_cel_p++;
1971       if (ii < ms->n_rows)
1972         ccount[ii] += 1;
1973       if (jj < ms->n_rows)
1974         ccount[jj] += 1;
1975     }
1976 
1977   } /* if (edges != NULL) */
1978 
1979   ms->_row_index[0] = 0;
1980   for (ii = 0; ii < ms->n_rows; ii++) {
1981     ms->_row_index[ii+1] = ms->_row_index[ii] + ccount[ii];
1982     ccount[ii] = diag_elts; /* pre-count for diagonal terms */
1983   }
1984 
1985   /* Build structure */
1986 
1987   BFT_MALLOC(ms->_col_id, (ms->_row_index[ms->n_rows]), cs_lnum_t);
1988   ms->col_id = NULL;
1989 
1990   if (have_diag == true) {
1991     for (ii = 0; ii < ms->n_rows; ii++) {    /* diagonal terms */
1992       ms->_col_id[ms->_row_index[ii]] = ii;
1993     }
1994   }
1995 
1996   if (edges != NULL) {                   /* non-diagonal terms */
1997 
1998     face_cel_p = (const cs_lnum_t *restrict)edges;
1999 
2000     for (face_id = 0; face_id < n_edges; face_id++) {
2001       ii = *face_cel_p++;
2002       jj = *face_cel_p++;
2003       if (ii < ms->n_rows) {
2004         ms->_col_id[ms->_row_index[ii] + ccount[ii]] = jj;
2005         ccount[ii] += 1;
2006       }
2007       if (jj < ms->n_rows) {
2008         ms->_col_id[ms->_row_index[jj] + ccount[jj]] = ii;
2009         ccount[jj] += 1;
2010       }
2011     }
2012 
2013   } /* if (edges != NULL) */
2014 
2015   BFT_FREE(ccount);
2016 
2017   /* Sort line elements by column id (for better access patterns) */
2018 
2019   ms->direct_assembly = cs_sort_indexed(ms->n_rows,
2020                                         ms->_row_index,
2021                                         ms->_col_id);
2022 
2023   /* Compact elements if necessary */
2024 
2025   if (ms->direct_assembly == false) {
2026 
2027     cs_lnum_t *tmp_row_index = NULL;
2028     cs_lnum_t  kk = 0;
2029 
2030     BFT_MALLOC(tmp_row_index, ms->n_rows+1, cs_lnum_t);
2031     memcpy(tmp_row_index, ms->_row_index, (ms->n_rows+1)*sizeof(cs_lnum_t));
2032 
2033     kk = 0;
2034 
2035     for (ii = 0; ii < ms->n_rows; ii++) {
2036       cs_lnum_t *col_id = ms->_col_id + ms->_row_index[ii];
2037       cs_lnum_t n_cols = ms->_row_index[ii+1] - ms->_row_index[ii];
2038       cs_lnum_t col_id_prev = -1;
2039       ms->_row_index[ii] = kk;
2040       for (jj = 0; jj < n_cols; jj++) {
2041         if (col_id_prev != col_id[jj]) {
2042           ms->_col_id[kk++] = col_id[jj];
2043           col_id_prev = col_id[jj];
2044         }
2045       }
2046     }
2047     ms->_row_index[ms->n_rows] = kk;
2048 
2049     assert(ms->_row_index[ms->n_rows] < tmp_row_index[ms->n_rows]);
2050 
2051     BFT_FREE(tmp_row_index);
2052     BFT_REALLOC(ms->_col_id, (ms->_row_index[ms->n_rows]), cs_lnum_t);
2053 
2054   }
2055 
2056   ms->row_index = ms->_row_index;
2057   ms->col_id = ms->_col_id;
2058 
2059   return ms;
2060 }
2061 
2062 /*----------------------------------------------------------------------------
2063  * Create a CSR matrix structure from an index and an array related
2064  * to column id
2065  *
2066  * parameters:
2067  *   have_diag  <-- indicates if the diagonal structure contains nonzeroes
2068  *   transfer   <-- transfer property of row_index and col_id
2069  *                  if true, map them otherwise
2070  *   ordered    <-- indicates if row entries are already ordered
2071  *   n_rows     <-- local number of rows
2072  *   n_cols_ext <-- local number of columns + ghosts
2073  *   row_index  <-- pointer to index on rows
2074  *   col_id     <-> pointer to array of colum ids related to the row index
2075  *
2076  * returns:
2077  *    a pointer to a created CSR matrix structure
2078  *----------------------------------------------------------------------------*/
2079 
2080 static cs_matrix_struct_csr_t *
_create_struct_csr_from_csr(bool have_diag,bool transfer,bool ordered,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t ** row_index,cs_lnum_t ** col_id)2081 _create_struct_csr_from_csr(bool         have_diag,
2082                             bool         transfer,
2083                             bool         ordered,
2084                             cs_lnum_t    n_rows,
2085                             cs_lnum_t    n_cols_ext,
2086                             cs_lnum_t  **row_index,
2087                             cs_lnum_t  **col_id)
2088 {
2089   cs_matrix_struct_csr_t  *ms = NULL;
2090 
2091   cs_lnum_t  *_row_index = *row_index;
2092   cs_lnum_t  *_col_id = *col_id;
2093 
2094   /* Allocate and map */
2095 
2096   BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2097 
2098   ms->n_rows = n_rows;
2099   ms->n_cols_ext = n_cols_ext;
2100 
2101   ms->direct_assembly = false; /* not relevant here */
2102   ms->have_diag = have_diag;
2103 
2104   ms->row_index = _row_index;
2105   ms->col_id = _col_id;
2106 
2107   ms->_row_index = NULL;
2108   ms->_col_id = NULL;
2109 
2110   if (transfer == true) {
2111 
2112     ms->_row_index = _row_index;
2113     ms->_col_id = _col_id;
2114 
2115     *row_index = NULL;
2116     *col_id = NULL;
2117 
2118     /* Sort line elements by column id (for better access patterns) */
2119 
2120     if (! ordered)
2121       cs_sort_indexed(ms->n_rows,
2122                       ms->_row_index,
2123                       ms->_col_id);
2124 
2125   }
2126 
2127   return ms;
2128 }
2129 
2130 /*----------------------------------------------------------------------------
2131  * Create a CSR matrix structure from an index and an array related
2132  * to column id
2133  *
2134  * parameters:
2135  *   have_diag       <-- indicates if diagonal structure contains nonzeroes
2136  *   direct_assembly <-- true if each value corresponds to a unique face
2137  *   n_rows          <- local number of rows
2138  *   n_cols_ext      <-- local number of columns + ghosts
2139  *   row_index       <-- index on rows
2140  *   col_id          <-> array of colum ids related to the row index
2141  *
2142  * returns:
2143  *    a pointer to a created CSR matrix structure
2144  *----------------------------------------------------------------------------*/
2145 
2146 static cs_matrix_struct_csr_t *
_create_struct_csr_from_shared(bool have_diag,bool direct_assembly,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t * row_index,const cs_lnum_t * col_id)2147 _create_struct_csr_from_shared(bool              have_diag,
2148                                bool              direct_assembly,
2149                                cs_lnum_t         n_rows,
2150                                cs_lnum_t         n_cols_ext,
2151                                const cs_lnum_t  *row_index,
2152                                const cs_lnum_t  *col_id)
2153 {
2154   cs_matrix_struct_csr_t  *ms = NULL;
2155 
2156   /* Allocate and map */
2157 
2158   BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2159 
2160   ms->n_rows = n_rows;
2161   ms->n_cols_ext = n_cols_ext;
2162 
2163   ms->direct_assembly = direct_assembly;
2164   ms->have_diag = have_diag;
2165 
2166   ms->row_index = row_index;
2167   ms->col_id = col_id;
2168 
2169   ms->_row_index = NULL;
2170   ms->_col_id = NULL;
2171 
2172   return ms;
2173 }
2174 
2175 /*----------------------------------------------------------------------------
2176  * Create a CSR matrix structure from the restriction to local rank of
2177  * another CSR matrix structure.
2178  *
2179  * parameters:
2180  *   src <-- base matrix structure
2181  *
2182  * returns:
2183  *    a pointer to a created CSR matrix structure
2184  *----------------------------------------------------------------------------*/
2185 
2186 static cs_matrix_struct_csr_t *
_create_struct_csr_from_restrict_local(const cs_matrix_struct_csr_t * src)2187 _create_struct_csr_from_restrict_local(const cs_matrix_struct_csr_t  *src)
2188 {
2189   cs_matrix_struct_csr_t  *ms = NULL;
2190 
2191   /* Allocate and map */
2192 
2193   BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2194 
2195   const cs_lnum_t n_rows = src->n_rows;
2196 
2197   ms->n_rows = n_rows;
2198   ms->n_cols_ext = n_rows;
2199 
2200   ms->direct_assembly = src->direct_assembly;
2201   ms->have_diag = src->have_diag;
2202 
2203   BFT_MALLOC(ms->_row_index, ms->n_rows+1, cs_lnum_t);
2204   BFT_MALLOC(ms->_col_id, src->row_index[ms->n_rows], cs_lnum_t);
2205 
2206   ms->_row_index[0] = 0;
2207 
2208   cs_lnum_t k = 0;
2209 
2210   const cs_lnum_t *col_id_s = src->col_id;
2211   cs_lnum_t *col_id_d = ms->_col_id;
2212 
2213   for (cs_lnum_t i = 0; i < n_rows; i++) {
2214     const cs_lnum_t s_id = src->row_index[i];
2215     const cs_lnum_t e_id = src->row_index[i+1];
2216     for (cs_lnum_t j = s_id; j < e_id; j++) {
2217       cs_lnum_t c_id = col_id_s[j];
2218       if (c_id < n_rows) {
2219         col_id_d[k] = c_id;
2220         k += 1;
2221       }
2222     }
2223     ms->_row_index[i+1] = k;
2224   }
2225 
2226   BFT_REALLOC(ms->_col_id, ms->_row_index[n_rows], cs_lnum_t);
2227 
2228   ms->row_index = ms->_row_index;
2229   ms->col_id = ms->_col_id;
2230 
2231   return ms;
2232 }
2233 
2234 /*----------------------------------------------------------------------------
2235  * Destroy CSR matrix coefficients.
2236  *
2237  * parameters:
2238  *   m  <->  pointer to matrix structure
2239  *----------------------------------------------------------------------------*/
2240 
2241 static void
_destroy_coeff_csr(cs_matrix_t * m)2242 _destroy_coeff_csr(cs_matrix_t  *m)
2243 {
2244   if (m->coeffs != NULL) {
2245     cs_matrix_coeff_csr_t  *mc = m->coeffs;
2246 
2247     BFT_FREE(mc->_val);
2248     BFT_FREE(mc->_d_val);
2249 
2250     BFT_FREE(m->coeffs);
2251   }
2252 }
2253 
2254 /*----------------------------------------------------------------------------
2255  * Create CSR matrix coefficients.
2256  *
2257  * returns:
2258  *   pointer to allocated CSR coefficients structure.
2259  *----------------------------------------------------------------------------*/
2260 
2261 static cs_matrix_coeff_csr_t *
_create_coeff_csr(void)2262 _create_coeff_csr(void)
2263 {
2264   cs_matrix_coeff_csr_t  *mc = NULL;
2265 
2266   /* Allocate */
2267 
2268   BFT_MALLOC(mc, 1, cs_matrix_coeff_csr_t);
2269 
2270   /* Initialize */
2271 
2272   mc->val = NULL;
2273   mc->_val = NULL;
2274 
2275   mc->d_val = NULL;
2276   mc->_d_val = NULL;
2277 
2278   return mc;
2279 }
2280 
2281 /*----------------------------------------------------------------------------
2282  * Set CSR matrix coefficients to zero.
2283  *
2284  * The coefficients should already be allocated.
2285  *
2286  * Use of this function is preferrable to a simple loop, as its
2287  * threading behavior should be consistent with SpMW in NUMA cases.
2288  *
2289  * parameters:
2290  *   matrix           <-> pointer to matrix structure
2291  *----------------------------------------------------------------------------*/
2292 
2293 static void
_zero_coeffs_csr(cs_matrix_t * matrix)2294 _zero_coeffs_csr(cs_matrix_t  *matrix)
2295 {
2296   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2297 
2298   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2299 
2300   const cs_lnum_t  n_rows = ms->n_rows;
2301   const cs_lnum_t *eb_size = matrix->eb_size;
2302 
2303   if (eb_size[0] == 1) {
2304 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
2305     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2306       const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2307       cs_real_t  *m_row = mc->_val + ms->row_index[ii];
2308       for (cs_lnum_t jj = 0; jj < n_cols; jj++)
2309         m_row[jj] = 0.0;
2310     }
2311   }
2312   else {
2313 #   pragma omp parallel for  if(n_rows*eb_size[0] > CS_THR_MIN)
2314     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2315       const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2316       cs_real_t  *m_row = mc->_val + ms->row_index[ii]*eb_size[3];
2317       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2318         for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
2319           m_row[jj*eb_size[3] + kk] = 0.0;
2320       }
2321     }
2322   }
2323 }
2324 
2325 /*----------------------------------------------------------------------------
2326  * Set CSR extradiagonal matrix coefficients for the case where direct
2327  * assignment is possible (i.e. when there are no multiple contributions
2328  * to a given coefficient).
2329  *
2330  * parameters:
2331  *   matrix      <-- pointer to matrix structure
2332  *   symmetric   <-- indicates if extradiagonal values are symmetric
2333  *   n_edges     <-- local number of graph edges
2334  *   edges       <-- edges (symmetric row <-> column) connectivity
2335  *   xa          <-- extradiagonal values
2336  *----------------------------------------------------------------------------*/
2337 
2338 static void
_set_xa_coeffs_csr_direct(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t xa[restrict])2339 _set_xa_coeffs_csr_direct(cs_matrix_t        *matrix,
2340                           bool                symmetric,
2341                           cs_lnum_t           n_edges,
2342                           const cs_lnum_2_t  *edges,
2343                           const cs_real_t     xa[restrict])
2344 {
2345   cs_lnum_t  ii, jj, face_id;
2346   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2347 
2348   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2349 
2350   /* Copy extra-diagonal values */
2351 
2352   assert(edges != NULL);
2353 
2354   const cs_lnum_t *restrict edges_p
2355     = (const cs_lnum_t *restrict)(edges);
2356 
2357   if (symmetric == false) {
2358 
2359     for (face_id = 0; face_id < n_edges; face_id++) {
2360       cs_lnum_t kk, ll;
2361       ii = *edges_p++;
2362       jj = *edges_p++;
2363       if (ii < ms->n_rows) {
2364         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2365         mc->_val[kk] = xa[2*face_id];
2366       }
2367       if (jj < ms->n_rows) {
2368         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2369         mc->_val[ll] = xa[2*face_id + 1];
2370       }
2371     }
2372 
2373   }
2374   else { /* if symmetric == true */
2375 
2376     for (face_id = 0; face_id < n_edges; face_id++) {
2377       cs_lnum_t kk, ll;
2378       ii = *edges_p++;
2379       jj = *edges_p++;
2380       if (ii < ms->n_rows) {
2381         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2382         mc->_val[kk] = xa[face_id];
2383       }
2384       if (jj < ms->n_rows) {
2385         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2386         mc->_val[ll] = xa[face_id];
2387       }
2388 
2389     }
2390 
2391   } /* end of condition on coefficients symmetry */
2392 
2393 }
2394 
2395 /*----------------------------------------------------------------------------
2396  * Set CSR extradiagonal matrix coefficients for the case where there are
2397  * multiple contributions to a given coefficient).
2398  *
2399  * The matrix coefficients should have been initialized (i.e. set to 0)
2400  * some before using this function.
2401  *
2402  * parameters:
2403  *   matrix      <-- pointer to matrix structure
2404  *   symmetric   <-- indicates if extradiagonal values are symmetric
2405  *   n_edges     <-- local number of graph edges
2406  *   edges       <-- edges (symmetric row <-> column) connectivity
2407  *   xa          <-- extradiagonal values
2408  *----------------------------------------------------------------------------*/
2409 
2410 static void
_set_xa_coeffs_csr_increment(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t edges[restrict],const cs_real_t xa[restrict])2411 _set_xa_coeffs_csr_increment(cs_matrix_t        *matrix,
2412                              bool                symmetric,
2413                              cs_lnum_t           n_edges,
2414                              const cs_lnum_2_t   edges[restrict],
2415                              const cs_real_t     xa[restrict])
2416 {
2417   cs_lnum_t  ii, jj, face_id;
2418   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2419 
2420   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2421 
2422   /* Copy extra-diagonal values */
2423 
2424   assert(edges != NULL);
2425 
2426   const cs_lnum_t *restrict edges_p
2427     = (const cs_lnum_t *restrict)(edges);
2428 
2429   if (symmetric == false) {
2430 
2431     for (face_id = 0; face_id < n_edges; face_id++) {
2432       cs_lnum_t kk, ll;
2433       ii = *edges_p++;
2434       jj = *edges_p++;
2435       if (ii < ms->n_rows) {
2436         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2437         mc->_val[kk] += xa[2*face_id];
2438       }
2439       if (jj < ms->n_rows) {
2440         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2441         mc->_val[ll] += xa[2*face_id + 1];
2442       }
2443     }
2444 
2445   }
2446   else { /* if symmetric == true */
2447 
2448     for (face_id = 0; face_id < n_edges; face_id++) {
2449       cs_lnum_t kk, ll;
2450       ii = *edges_p++;
2451       jj = *edges_p++;
2452       if (ii < ms->n_rows) {
2453         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2454         mc->_val[kk] += xa[face_id];
2455       }
2456       if (jj < ms->n_rows) {
2457         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2458         mc->_val[ll] += xa[face_id];
2459       }
2460 
2461     }
2462 
2463   } /* end of condition on coefficients symmetry */
2464 
2465 }
2466 
2467 /*----------------------------------------------------------------------------
2468  * Set CSR matrix coefficients.
2469  *
2470  * parameters:
2471  *   matrix      <-> pointer to matrix structure
2472  *   symmetric   <-- indicates if extradiagonal values are symmetric
2473  *   copy        <-- indicates if coefficients should be copied
2474  *   n_edges     <-- local number of graph edges
2475  *   edges       <-- edges (symmetric row <-> column) connectivity
2476  *   da          <-- diagonal values (NULL if all zero)
2477  *   xa          <-- extradiagonal values (NULL if all zero)
2478  *----------------------------------------------------------------------------*/
2479 
2480 static void
_set_coeffs_csr(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_t edges[restrict][2],const cs_real_t da[restrict],const cs_real_t xa[restrict])2481 _set_coeffs_csr(cs_matrix_t      *matrix,
2482                 bool              symmetric,
2483                 bool              copy,
2484                 cs_lnum_t         n_edges,
2485                 const cs_lnum_t   edges[restrict][2],
2486                 const cs_real_t   da[restrict],
2487                 const cs_real_t   xa[restrict])
2488 {
2489   CS_UNUSED(copy);
2490 
2491   cs_lnum_t  ii, jj;
2492   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2493 
2494   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2495 
2496   if (mc->_val == NULL)
2497     BFT_MALLOC(mc->_val, ms->row_index[ms->n_rows], cs_real_t);
2498   mc->val = mc->_val;
2499 
2500   /* Initialize coefficients to zero if assembly is incremental */
2501 
2502   if (ms->direct_assembly == false)
2503     _zero_coeffs_csr(matrix);
2504 
2505   /* Copy diagonal values */
2506 
2507   if (ms->have_diag == true) {
2508 
2509     if (da != NULL) {
2510       for (ii = 0; ii < ms->n_rows; ii++) {
2511         cs_lnum_t kk;
2512         for (kk = ms->row_index[ii]; ms->col_id[kk] != ii; kk++);
2513         mc->_val[kk] = da[ii];
2514       }
2515     }
2516     else {
2517       for (ii = 0; ii < ms->n_rows; ii++) {
2518         cs_lnum_t kk;
2519         for (kk = ms->row_index[ii]; ms->col_id[kk] != ii; kk++);
2520         mc->_val[kk] = 0.0;
2521       }
2522     }
2523 
2524   }
2525 
2526   /* Mark diagonal values as not queried (mc->_d_val not changed) */
2527 
2528   mc->d_val = NULL;
2529 
2530   /* Copy extra-diagonal values */
2531 
2532   if (edges != NULL) {
2533 
2534     if (xa != NULL) {
2535 
2536       if (ms->direct_assembly == true)
2537         _set_xa_coeffs_csr_direct(matrix, symmetric, n_edges, edges, xa);
2538       else
2539         _set_xa_coeffs_csr_increment(matrix, symmetric, n_edges, edges, xa);
2540 
2541     }
2542     else { /* if (xa == NULL) */
2543 
2544       for (ii = 0; ii < ms->n_rows; ii++) {
2545         const cs_lnum_t  *restrict col_id = ms->col_id + ms->row_index[ii];
2546         cs_real_t  *m_row = mc->_val + ms->row_index[ii];
2547         cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2548 
2549         for (jj = 0; jj < n_cols; jj++) {
2550           if (col_id[jj] != ii)
2551             m_row[jj] = 0.0;
2552         }
2553 
2554       }
2555 
2556     }
2557 
2558   } /* (matrix->edges != NULL) */
2559 
2560 }
2561 
2562 /*----------------------------------------------------------------------------
2563  * Set CSR matrix coefficients provided in MSR form.
2564  *
2565  * If da and xa are equal to NULL, then initialize val with zeros.
2566  *
2567  * parameters:
2568  *   matrix           <-> pointer to matrix structure
2569  *   row_index        <-- MSR row index (0 to n-1)
2570  *   col_id           <-- MSR column id (0 to n-1)
2571  *   d_vals           <-- diagonal values (NULL if all zero)
2572  *   d_vals_transfer  <-- diagonal values whose ownership is trasferred
2573  *                        (NULL or d_vals in, NULL out)
2574  *   x_vals           <-- extradiagonal values (NULL if all zero)
2575  *   x_vals_transfer  <-- extradiagonal values whose ownership is transferred
2576  *                        (NULL or x_vals in, NULL out)
2577  *----------------------------------------------------------------------------*/
2578 
2579 static void
_set_coeffs_csr_from_msr(cs_matrix_t * matrix,const cs_lnum_t row_index[],const cs_lnum_t col_id[],const cs_real_t d_vals[restrict],cs_real_t ** d_vals_transfer,const cs_real_t x_vals[restrict],cs_real_t ** x_vals_transfer)2580 _set_coeffs_csr_from_msr(cs_matrix_t       *matrix,
2581                          const cs_lnum_t    row_index[],
2582                          const cs_lnum_t    col_id[],
2583                          const cs_real_t    d_vals[restrict],
2584                          cs_real_t        **d_vals_transfer,
2585                          const cs_real_t    x_vals[restrict],
2586                          cs_real_t        **x_vals_transfer)
2587 {
2588   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2589 
2590   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2591 
2592   const cs_lnum_t  n_rows = ms->n_rows;
2593 
2594   /* Sanity check */
2595 
2596   if (matrix->db_size[0] > 1 || matrix->eb_size[0] > 1)
2597     bft_error
2598       (__FILE__, __LINE__, 0,
2599        "%s:\n"
2600        "  case with diagonal block size %ld en extradiagonal block size %ld\n"
2601        "  not implemented.\n",
2602        __func__, (long)matrix->db_size[0], (long)matrix->eb_size[0]);
2603 
2604   /* Special configuration where ownership is transferred directly */
2605 
2606   /* TODO: we should use metadata or check that the row_index and
2607      column id values are consistent, which should be true as long
2608      as columns are ordered in an identical manner */
2609 
2610   if (x_vals_transfer != NULL) {
2611     if (d_vals == NULL && *x_vals_transfer != NULL) {
2612       mc->_val = *x_vals_transfer;
2613       mc->val = mc->_val;
2614       *x_vals_transfer = NULL;
2615       return;
2616     }
2617   }
2618 
2619   /* Allocate local array */
2620 
2621   if (mc->_val == NULL)
2622     BFT_MALLOC(mc->_val, ms->row_index[ms->n_rows], cs_real_t);
2623 
2624   mc->val = mc->_val;
2625 
2626   /* Mark diagonal values as not queried (mc->_d_val not changed) */
2627 
2628   mc->d_val = NULL;
2629 
2630   /* Case with diagonal and extradiagonal values */
2631 
2632   if (d_vals != NULL && x_vals != NULL) {
2633 
2634 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
2635     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2636 
2637       const cs_lnum_t  *restrict m_col_id = ms->col_id + ms->row_index[ii];
2638       cs_real_t  *restrict m_row = mc->_val + ms->row_index[ii];
2639       cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2640 
2641       const cs_lnum_t  *restrict s_col_id = col_id + row_index[ii];
2642       const cs_real_t  *restrict s_row = x_vals + ms->row_index[ii];
2643       cs_lnum_t  n_s_cols = row_index[ii+1] - row_index[ii];
2644 
2645       cs_lnum_t c_id_0 = 0;
2646 
2647       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2648         if (m_col_id[jj] == ii)
2649           m_row[jj] = d_vals[ii];
2650         else {
2651           /* Optimize for ordered case */
2652           if (m_col_id[jj] == s_col_id[c_id_0]) {
2653             m_row[jj] = s_row[c_id_0];
2654             c_id_0++;
2655           }
2656           else {
2657             for (cs_lnum_t kk = c_id_0; kk < n_s_cols; kk++) {
2658               if (m_col_id[jj] == s_col_id[kk]) {
2659                 m_row[jj] = s_row[kk];
2660                 break;
2661               }
2662             }
2663           }
2664         }
2665       }
2666 
2667     }
2668   }
2669 
2670   /* Case with diagonal values only */
2671 
2672   else if (d_vals != NULL) {
2673 
2674 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
2675     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2676 
2677       const cs_lnum_t  *restrict m_col_id = ms->col_id + ms->row_index[ii];
2678       cs_real_t  *restrict m_row = mc->_val + ms->row_index[ii];
2679       cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2680 
2681       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2682         if (m_col_id[jj] == ii)
2683           m_row[jj] = d_vals[ii];
2684         else
2685           m_row[jj] = 0.;
2686       }
2687 
2688     }
2689   }
2690 
2691   /* Case with null-diagonal */
2692 
2693   else if (x_vals != NULL) {
2694 
2695 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
2696     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2697 
2698       const cs_lnum_t  *restrict m_col_id = ms->col_id + ms->row_index[ii];
2699       cs_real_t  *restrict m_row = mc->_val + ms->row_index[ii];
2700       cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2701 
2702       const cs_lnum_t  *restrict s_col_id = col_id + row_index[ii];
2703       const cs_real_t  *restrict s_row = x_vals + ms->row_index[ii];
2704       cs_lnum_t  n_s_cols = row_index[ii+1] - row_index[ii];
2705 
2706       cs_lnum_t c_id_0 = 0;
2707 
2708       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2709         if (m_col_id[jj] == ii)
2710           m_row[jj] = 0.;
2711         else {
2712           /* Optimize for ordered case */
2713           if (m_col_id[jj] == s_col_id[c_id_0]) {
2714             m_row[jj] = s_row[c_id_0];
2715             c_id_0++;
2716           }
2717           else {
2718             for (cs_lnum_t kk = c_id_0; kk < n_s_cols; kk++) {
2719               if (m_col_id[jj] == s_col_id[kk]) {
2720                 m_row[jj] = s_row[kk];
2721                 break;
2722               }
2723             }
2724           }
2725         }
2726       }
2727 
2728     }
2729 
2730   }
2731 
2732   else
2733     _zero_coeffs_csr(matrix);
2734 
2735   /* Now free transferred arrays */
2736 
2737   if (d_vals_transfer != NULL)
2738     BFT_FREE(*d_vals_transfer);
2739   if (x_vals_transfer != NULL)
2740     BFT_FREE(*x_vals_transfer);
2741 }
2742 
2743 /*----------------------------------------------------------------------------
2744  * Release shared CSR matrix coefficients.
2745  *
2746  * parameters:
2747  *   matrix <-- pointer to matrix structure
2748  *----------------------------------------------------------------------------*/
2749 
2750 static void
_release_coeffs_csr(cs_matrix_t * matrix)2751 _release_coeffs_csr(cs_matrix_t  *matrix)
2752 {
2753   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2754   if (mc != NULL)
2755     mc->d_val = NULL;
2756   return;
2757 }
2758 
2759 /*----------------------------------------------------------------------------
2760  * Copy diagonal of CSR matrix.
2761  *
2762  * parameters:
2763  *   matrix <-- pointer to matrix structure
2764  *   da     --> diagonal (pre-allocated, size: n_rows)
2765  *----------------------------------------------------------------------------*/
2766 
2767 static void
_copy_diagonal_csr(const cs_matrix_t * matrix,cs_real_t * restrict da)2768 _copy_diagonal_csr(const cs_matrix_t  *matrix,
2769                    cs_real_t          *restrict da)
2770 {
2771   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2772   const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2773   cs_lnum_t  n_rows = ms->n_rows;
2774 
2775 # pragma omp parallel for  if(n_rows > CS_THR_MIN)
2776   for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2777 
2778     const cs_lnum_t  *restrict col_id = ms->col_id + ms->row_index[ii];
2779     const cs_real_t  *restrict m_row = mc->val + ms->row_index[ii];
2780     cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2781 
2782     da[ii] = 0.0;
2783     for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2784       if (col_id[jj] == ii) {
2785         da[ii] = m_row[jj];
2786         break;
2787       }
2788     }
2789 
2790   }
2791 }
2792 
2793 /*----------------------------------------------------------------------------*/
2794 /*!
2795  * \brief Get matrix diagonal values for CSR matrix.
2796  *
2797  * In case of matrixes with block diagonal coefficients, a pointer to
2798  * the complete block diagonal is returned.
2799  *
2800  * \param[in]  matrix  pointer to matrix structure
2801  *
2802  * \return  pointer to matrix diagonal array
2803  */
2804 /*----------------------------------------------------------------------------*/
2805 
2806 static const cs_real_t *
_get_diagonal_csr(const cs_matrix_t * matrix)2807 _get_diagonal_csr(const cs_matrix_t  *matrix)
2808 {
2809   const cs_real_t  *diag = NULL;
2810 
2811   cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2812   assert(matrix->db_size[3] == 1);
2813   if (mc->_d_val == NULL)
2814     BFT_MALLOC(mc->_d_val, matrix->n_rows, cs_real_t);
2815   if (mc->d_val == NULL) {
2816     cs_matrix_copy_diagonal(matrix, mc->_d_val);
2817     mc->d_val = mc->_d_val;
2818   }
2819   diag = mc->d_val;
2820 
2821   return diag;
2822 }
2823 
2824 /*----------------------------------------------------------------------------*/
2825 /*!
2826  * \brief Function for initialization of CSR matrix coefficients using
2827  *        local row ids and column indexes.
2828  *
2829  * \warning  The matrix pointer must point to valid data when the selection
2830  *           function is called, so the life cycle of the data pointed to
2831  *           should be at least as long as that of the assembler values
2832  *           structure.
2833  *
2834  * \param[in, out]  matrix_p  untyped pointer to matrix description structure
2835  * \param[in]       db_size   optional diagonal block sizes
2836  * \param[in]       eb_size   optional extra-diagonal block sizes
2837  */
2838 /*----------------------------------------------------------------------------*/
2839 
2840 static void
_csr_assembler_values_init(void * matrix_p,const cs_lnum_t db_size[4],const cs_lnum_t eb_size[4])2841 _csr_assembler_values_init(void              *matrix_p,
2842                            const cs_lnum_t    db_size[4],
2843                            const cs_lnum_t    eb_size[4])
2844 {
2845   CS_UNUSED(db_size);
2846 
2847   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
2848 
2849   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2850 
2851   const cs_lnum_t n_rows = matrix->n_rows;
2852   cs_lnum_t e_stride = 1;
2853   if (eb_size != NULL)
2854     e_stride = eb_size[3];
2855 
2856   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2857 
2858   /* Initialize diagonal values */
2859 
2860   BFT_REALLOC(mc->_val, e_stride*ms->row_index[ms->n_rows], cs_real_t);
2861   mc->val = mc->_val;
2862 
2863 # pragma omp parallel for  if(n_rows*db_size[0] > CS_THR_MIN)
2864   for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2865     cs_lnum_t n_s_cols = (ms->row_index[ii+1] - ms->row_index[ii])*e_stride;
2866     cs_lnum_t displ = ms->row_index[ii]*e_stride;
2867     for (cs_lnum_t jj = 0; jj < n_s_cols; jj++)
2868       mc->_val[displ + jj] = 0;
2869   }
2870 }
2871 
2872 /*----------------------------------------------------------------------------*/
2873 /*!
2874  * \brief Function for addition to CSR matrix coefficients using
2875  *        local row ids and column indexes.
2876  *
2877  * Values whose associated row index is negative should be ignored;
2878  * Values whose column index is -1 are assumed to be assigned to a
2879  * separately stored diagonal. Other indexes shoudl be valid.
2880  *
2881  * \warning  The matrix pointer must point to valid data when the selection
2882  *           function is called, so the life cycle of the data pointed to
2883  *           should be at least as long as that of the assembler values
2884  *           structure.
2885  *
2886  * \remark  Note that we pass column indexes (not ids) here; as the
2887  *          caller is already assumed to have identified the index
2888  *          matching a given column id.
2889  *
2890  * \param[in, out]  matrix_p  untyped pointer to matrix description structure
2891  * \param[in]       n         number of values to add
2892  * \param[in]       stride    associated data block size
2893  * \param[in]       row_id    associated local row ids
2894  * \param[in]       col_idx   associated local column indexes
2895  * \param[in]       vals      pointer to values (size: n*stride)
2896  */
2897 /*----------------------------------------------------------------------------*/
2898 
2899 static void
_csr_assembler_values_add(void * matrix_p,cs_lnum_t n,cs_lnum_t stride,const cs_lnum_t row_id[],const cs_lnum_t col_idx[],const cs_real_t vals[])2900 _csr_assembler_values_add(void             *matrix_p,
2901                           cs_lnum_t         n,
2902                           cs_lnum_t         stride,
2903                           const cs_lnum_t   row_id[],
2904                           const cs_lnum_t   col_idx[],
2905                           const cs_real_t   vals[])
2906 {
2907   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
2908 
2909   cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
2910 
2911   const cs_matrix_struct_csr_t  *ms = matrix->structure;
2912 
2913   if (stride == 1) {
2914 
2915     /* Copy instead of test for OpenMP to avoid outlining for small sets */
2916 
2917     if (n*stride <= CS_THR_MIN) {
2918       for (cs_lnum_t ii = 0; ii < n; ii++) {
2919         if (row_id[ii] < 0)
2920           continue;
2921         else {
2922           cs_lnum_t r_id = row_id[ii];
2923           mc->_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
2924         }
2925       }
2926     }
2927 
2928     else {
2929 #     pragma omp parallel for  if(n*stride > CS_THR_MIN)
2930       for (cs_lnum_t ii = 0; ii < n; ii++) {
2931         if (row_id[ii] < 0)
2932           continue;
2933         else {
2934           cs_lnum_t r_id = row_id[ii];
2935           mc->_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
2936         }
2937       }
2938     }
2939   }
2940 
2941   else { /* if (stride > 1) */
2942 
2943     /* Copy instead of test for OpenMP to avoid outlining for small sets */
2944 
2945     if (n*stride <= CS_THR_MIN) {
2946       for (cs_lnum_t ii = 0; ii < n; ii++) {
2947         if (row_id[ii] < 0)
2948           continue;
2949         else {
2950           cs_lnum_t r_id = row_id[ii];
2951           cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
2952           for (cs_lnum_t jj = 0; jj < stride; jj++)
2953             mc->_val[displ + jj] += vals[ii*stride + jj];
2954         }
2955       }
2956     }
2957 
2958     else {
2959 #     pragma omp parallel for  if(n*stride > CS_THR_MIN)
2960       for (cs_lnum_t ii = 0; ii < n; ii++) {
2961         if (row_id[ii] < 0)
2962           continue;
2963         else {
2964           cs_lnum_t r_id = row_id[ii];
2965           cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
2966           for (cs_lnum_t jj = 0; jj < stride; jj++)
2967             mc->_val[displ + jj] += vals[ii*stride + jj];
2968         }
2969       }
2970     }
2971   }
2972 
2973 }
2974 
2975 /*----------------------------------------------------------------------------*/
2976 /*!
2977  * \brief Create and initialize a CSR matrix assembler values structure.
2978  *
2979  * The associated matrix's structure must have been created using
2980  * \ref cs_matrix_structure_create_from_assembler.
2981  *
2982  * Block sizes are defined by an optional array of 4 values:
2983  *   0: useful block size, 1: vector block extents,
2984  *   2: matrix line extents,  3: matrix line*column extents
2985  *
2986  * \param[in, out]  matrix                 pointer to matrix structure
2987  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
2988  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
2989  *                                         or NULL
2990  *
2991  * \return  pointer to initialized matrix assembler values structure;
2992  */
2993 /*----------------------------------------------------------------------------*/
2994 
2995 static cs_matrix_assembler_values_t *
_assembler_values_create_csr(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)2996 _assembler_values_create_csr(cs_matrix_t      *matrix,
2997                              const cs_lnum_t  *diag_block_size,
2998                              const cs_lnum_t  *extra_diag_block_size)
2999 {
3000   cs_matrix_assembler_values_t *mav
3001     = cs_matrix_assembler_values_create(matrix->assembler,
3002                                         false,
3003                                         diag_block_size,
3004                                         extra_diag_block_size,
3005                                         (void *)matrix,
3006                                         _csr_assembler_values_init,
3007                                         _csr_assembler_values_add,
3008                                         NULL,
3009                                         NULL,
3010                                         NULL);
3011 
3012   return mav;
3013 }
3014 
3015 /*----------------------------------------------------------------------------
3016  * Local matrix.vector product y = A.x with CSR matrix.
3017  *
3018  * parameters:
3019  *   matrix       <-- pointer to matrix structure
3020  *   exclude_diag <-- exclude diagonal if true,
3021  *   sync         <-- synchronize ghost cells if true
3022  *   x            <-> multipliying vector values
3023  *   y            --> resulting vector
3024  *----------------------------------------------------------------------------*/
3025 
3026 static void
_mat_vec_p_l_csr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)3027 _mat_vec_p_l_csr(const cs_matrix_t  *matrix,
3028                  bool                exclude_diag,
3029                  bool                sync,
3030                  cs_real_t          *restrict x,
3031                  cs_real_t          *restrict y)
3032 {
3033   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3034   const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
3035   cs_lnum_t  n_rows = ms->n_rows;
3036 
3037   /* Ghost cell communication */
3038 
3039   cs_halo_state_t *hs
3040     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
3041   if (hs != NULL)
3042     cs_halo_sync_wait(matrix->halo, x, hs);
3043 
3044   /* Standard case */
3045 
3046   if (!exclude_diag) {
3047 
3048 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
3049     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3050 
3051       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
3052       const cs_real_t *restrict m_row = mc->val + ms->row_index[ii];
3053       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3054       cs_real_t sii = 0.0;
3055 
3056       for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3057         sii += (m_row[jj]*x[col_id[jj]]);
3058 
3059       y[ii] = sii;
3060 
3061     }
3062 
3063   }
3064 
3065   /* Exclude diagonal */
3066 
3067   else {
3068 
3069 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
3070     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3071 
3072       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
3073       const cs_real_t *restrict m_row = mc->val + ms->row_index[ii];
3074       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3075       cs_real_t sii = 0.0;
3076 
3077       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3078         if (col_id[jj] != ii)
3079           sii += (m_row[jj]*x[col_id[jj]]);
3080       }
3081 
3082       y[ii] = sii;
3083 
3084     }
3085   }
3086 }
3087 
3088 #if defined (HAVE_MKL)
3089 
3090 static void
_mat_vec_p_l_csr_mkl(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)3091 _mat_vec_p_l_csr_mkl(const cs_matrix_t  *matrix,
3092                      bool                exclude_diag,
3093                      bool                sync,
3094                      cs_real_t          *restrict x,
3095                      cs_real_t          *restrict y)
3096 {
3097   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3098   const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
3099 
3100   /* Ghost cell communication */
3101 
3102   cs_halo_state_t *hs
3103     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
3104   if (hs != NULL)
3105     cs_halo_sync_wait(matrix->halo, x, hs);
3106 
3107   /* MKL call */
3108 
3109   int n_rows = ms->n_rows;
3110   char transa[] = "n";
3111 
3112   if (exclude_diag)
3113     bft_error(__FILE__, __LINE__, 0,
3114               _(_no_exclude_diag_error_str), __func__);
3115 
3116   mkl_cspblas_dcsrgemv(transa,
3117                        &n_rows,
3118                        mc->val,
3119                        ms->row_index,
3120                        ms->col_id,
3121                        (double *)x,
3122                        y);
3123 }
3124 
3125 #endif /* defined (HAVE_MKL) */
3126 
3127 /*----------------------------------------------------------------------------
3128  * Create MSR matrix coefficients.
3129  *
3130  * returns:
3131  *   pointer to allocated MSR coefficients structure.
3132  *----------------------------------------------------------------------------*/
3133 
3134 static cs_matrix_coeff_msr_t *
_create_coeff_msr(void)3135 _create_coeff_msr(void)
3136 {
3137   cs_matrix_coeff_msr_t  *mc;
3138 
3139   /* Allocate */
3140 
3141   BFT_MALLOC(mc, 1, cs_matrix_coeff_msr_t);
3142 
3143   /* Initialize */
3144 
3145   mc->max_db_size = 0;
3146   mc->max_eb_size = 0;
3147 
3148   mc->d_val = NULL;
3149   mc->x_val = NULL;
3150 
3151   mc->_d_val = NULL;
3152   mc->_x_val = NULL;
3153 
3154   return mc;
3155 }
3156 
3157 /*----------------------------------------------------------------------------
3158  * Destroy MSR matrix coefficients.
3159  *
3160  * parameters:
3161  *   m  <->  pointer to matrix structure
3162  *----------------------------------------------------------------------------*/
3163 
3164 static void
_destroy_coeff_msr(cs_matrix_t * m)3165 _destroy_coeff_msr(cs_matrix_t  *m)
3166 {
3167   if (m->coeffs != NULL) {
3168     cs_matrix_coeff_msr_t  *mc = m->coeffs;
3169 
3170     BFT_FREE(mc->_x_val);
3171     BFT_FREE(mc->_d_val);
3172 
3173     BFT_FREE(m->coeffs);
3174   }
3175 }
3176 
3177 /*----------------------------------------------------------------------------
3178  * Set MSR matrix extradiagonal coefficients to zero.
3179  *
3180  * The coefficients should already be allocated.
3181  *
3182  * Use of this function is preferrable to a simple loop, as its
3183  * threading behavior should be consistent with SpMW in NUMA cases.
3184  *
3185  * parameters:
3186  *   matrix           <-> pointer to matrix structure
3187  *----------------------------------------------------------------------------*/
3188 
3189 static void
_zero_x_coeffs_msr(cs_matrix_t * matrix)3190 _zero_x_coeffs_msr(cs_matrix_t  *matrix)
3191 {
3192   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3193 
3194   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3195 
3196   const cs_lnum_t  n_rows = ms->n_rows;
3197   const cs_lnum_t *eb_size = matrix->eb_size;
3198 
3199   if (eb_size[0] == 1) {
3200 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
3201     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3202       const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3203       cs_real_t  *m_row = mc->_x_val + ms->row_index[ii];
3204       for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3205         m_row[jj] = 0.0;
3206     }
3207   }
3208   else {
3209 #   pragma omp parallel for  if(n_rows*eb_size[0] > CS_THR_MIN)
3210     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3211       const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3212       cs_real_t  *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
3213       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3214         for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
3215           m_row[jj*eb_size[3] + kk] = 0.0;
3216       }
3217     }
3218   }
3219 }
3220 
3221 /*----------------------------------------------------------------------------
3222  * Set MSR extradiagonal matrix coefficients for the case where direct
3223  * assignment is possible (i.e. when there are no multiple contributions
3224  * to a given coefficient).
3225  *
3226  * parameters:
3227  *   matrix      <-- pointer to matrix structure
3228  *   symmetric   <-- indicates if extradiagonal values are symmetric
3229  *   n_edges     <-- local number of graph edges
3230  *   edges       <-- edges (symmetric row <-> column) connectivity
3231  *   xa          <-- extradiagonal values
3232  *----------------------------------------------------------------------------*/
3233 
3234 static void
_set_xa_coeffs_msr_direct(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3235 _set_xa_coeffs_msr_direct(cs_matrix_t        *matrix,
3236                           bool                symmetric,
3237                           cs_lnum_t           n_edges,
3238                           const cs_lnum_2_t  *edges,
3239                           const cs_real_t    *restrict xa)
3240 {
3241   cs_lnum_t  ii, jj, face_id;
3242   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3243 
3244   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3245 
3246   /* Copy extra-diagonal values */
3247 
3248   assert(edges != NULL || n_edges == 0);
3249 
3250   if (symmetric == false) {
3251 
3252     const cs_lnum_t *restrict edges_p
3253       = (const cs_lnum_t *restrict)(edges);
3254 
3255     for (face_id = 0; face_id < n_edges; face_id++) {
3256       cs_lnum_t kk, ll;
3257       ii = *edges_p++;
3258       jj = *edges_p++;
3259       if (ii < ms->n_rows) {
3260         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3261         mc->_x_val[kk] = xa[2*face_id];
3262       }
3263       if (jj < ms->n_rows) {
3264         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3265         mc->_x_val[ll] = xa[2*face_id + 1];
3266       }
3267     }
3268 
3269   }
3270   else { /* if symmetric == true */
3271 
3272     const cs_lnum_t *restrict edges_p
3273       = (const cs_lnum_t *restrict)(edges);
3274 
3275     for (face_id = 0; face_id < n_edges; face_id++) {
3276       cs_lnum_t kk, ll;
3277       ii = *edges_p++;
3278       jj = *edges_p++;
3279       if (ii < ms->n_rows) {
3280         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3281         mc->_x_val[kk] = xa[face_id];
3282       }
3283       if (jj < ms->n_rows) {
3284         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3285         mc->_x_val[ll] = xa[face_id];
3286       }
3287 
3288     }
3289 
3290   } /* end of condition on coefficients symmetry */
3291 
3292 }
3293 
3294 /*----------------------------------------------------------------------------
3295  * Set MSR extradiagonal block matrix coefficients for the case where direct
3296  * assignment is possible (i.e. when there are no multiple contributions
3297  * to a given coefficient).
3298  *
3299  * parameters:
3300  *   matrix      <-- pointer to matrix structure
3301  *   symmetric   <-- indicates if extradiagonal values are symmetric
3302  *   n_edges     <-- local number of graph edges
3303  *   edges       <-- edges (symmetric row <-> column) connectivity
3304  *   xa          <-- extradiagonal values
3305  *----------------------------------------------------------------------------*/
3306 
3307 static void
_set_xa_coeffs_msr_direct_block(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3308 _set_xa_coeffs_msr_direct_block(cs_matrix_t        *matrix,
3309                                 bool                symmetric,
3310                                 cs_lnum_t           n_edges,
3311                                 const cs_lnum_2_t  *edges,
3312                                 const cs_real_t    *restrict xa)
3313 {
3314   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3315 
3316   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3317 
3318   const cs_lnum_t  b_size = matrix->eb_size[3];
3319 
3320   /* Copy extra-diagonal values */
3321 
3322   assert(edges != NULL || n_edges == 0);
3323 
3324   if (symmetric == false) {
3325 
3326     const cs_lnum_t *restrict edges_p
3327       = (const cs_lnum_t *restrict)(edges);
3328 
3329     for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3330       cs_lnum_t ii = *edges_p++;
3331       cs_lnum_t jj = *edges_p++;
3332       cs_lnum_t kk, ll;
3333       if (ii < ms->n_rows) {
3334         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3335         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3336           mc->_x_val[kk*b_size + pp] = xa[2*face_id*b_size + pp];
3337       }
3338       if (jj < ms->n_rows) {
3339         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3340         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3341           mc->_x_val[ll*b_size + pp] = xa[(2*face_id+1)*b_size + pp];
3342       }
3343     }
3344 
3345   }
3346   else { /* if symmetric == true */
3347 
3348     const cs_lnum_t *restrict edges_p
3349       = (const cs_lnum_t *restrict)(edges);
3350 
3351     for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3352       cs_lnum_t ii = *edges_p++;
3353       cs_lnum_t jj = *edges_p++;
3354       cs_lnum_t kk, ll;
3355       if (ii < ms->n_rows) {
3356         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3357         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3358           mc->_x_val[kk*b_size + pp] = xa[face_id*b_size + pp];
3359       }
3360       if (jj < ms->n_rows) {
3361         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3362         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3363           mc->_x_val[ll*b_size + pp] = xa[face_id*b_size + pp];
3364       }
3365 
3366     }
3367 
3368   } /* end of condition on coefficients symmetry */
3369 
3370 }
3371 
3372 /*----------------------------------------------------------------------------
3373  * Set MSR extradiagonal matrix coefficients for the case where there are
3374  * multiple contributions to a given coefficient.
3375  *
3376  * The matrix coefficients should have been initialized (i.e. set to 0)
3377  * some before using this function.
3378  *
3379  * parameters:
3380  *   matrix      <-- pointer to matrix structure
3381  *   symmetric   <-- indicates if extradiagonal values are symmetric
3382  *   n_edges     <-- local number of graph edges
3383  *   edges       <-- edges (symmetric row <-> column) connectivity
3384  *   xa          <-- extradiagonal values
3385  *----------------------------------------------------------------------------*/
3386 
3387 static void
_set_xa_coeffs_msr_increment(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3388 _set_xa_coeffs_msr_increment(cs_matrix_t        *matrix,
3389                              bool                symmetric,
3390                              cs_lnum_t           n_edges,
3391                              const cs_lnum_2_t  *edges,
3392                              const cs_real_t    *restrict xa)
3393 {
3394   cs_lnum_t  ii, jj, face_id;
3395   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3396 
3397   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3398 
3399   /* Copy extra-diagonal values */
3400 
3401   assert(edges != NULL);
3402 
3403   if (symmetric == false) {
3404 
3405     const cs_lnum_t *restrict edges_p
3406       = (const cs_lnum_t *restrict)(edges);
3407 
3408     for (face_id = 0; face_id < n_edges; face_id++) {
3409       cs_lnum_t kk, ll;
3410       ii = *edges_p++;
3411       jj = *edges_p++;
3412       if (ii < ms->n_rows) {
3413         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3414         mc->_x_val[kk] += xa[2*face_id];
3415       }
3416       if (jj < ms->n_rows) {
3417         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3418         mc->_x_val[ll] += xa[2*face_id + 1];
3419       }
3420     }
3421 
3422   }
3423   else { /* if symmetric == true */
3424 
3425     const cs_lnum_t *restrict edges_p
3426       = (const cs_lnum_t *restrict)(edges);
3427 
3428     for (face_id = 0; face_id < n_edges; face_id++) {
3429       cs_lnum_t kk, ll;
3430       ii = *edges_p++;
3431       jj = *edges_p++;
3432       if (ii < ms->n_rows) {
3433         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3434         mc->_x_val[kk] += xa[face_id];
3435       }
3436       if (jj < ms->n_rows) {
3437         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3438         mc->_x_val[ll] += xa[face_id];
3439       }
3440 
3441     }
3442 
3443   } /* end of condition on coefficients symmetry */
3444 
3445 }
3446 
3447 /*----------------------------------------------------------------------------
3448  * Set MSR extradiagonal matrix coefficients for the case where there are
3449  * multiple contributions to a given coefficient.
3450  *
3451  * The matrix coefficients should have been initialized (i.e. set to 0)
3452  * some before using this function.
3453  *
3454  * parameters:
3455  *   matrix      <-- pointer to matrix structure
3456  *   symmetric   <-- indicates if extradiagonal values are symmetric
3457  *   n_edges     <-- local number of graph edges
3458  *   edges       <-- edges (symmetric row <-> column) connectivity
3459  *   xa          <-- extradiagonal values
3460  *----------------------------------------------------------------------------*/
3461 
3462 static void
_set_xa_coeffs_msr_increment_block(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3463 _set_xa_coeffs_msr_increment_block(cs_matrix_t        *matrix,
3464                                    bool                symmetric,
3465                                    cs_lnum_t           n_edges,
3466                                    const cs_lnum_2_t  *edges,
3467                                    const cs_real_t    *restrict xa)
3468 {
3469   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3470 
3471   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3472 
3473   const cs_lnum_t  b_size = matrix->eb_size[3];
3474 
3475   /* Copy extra-diagonal values */
3476 
3477   assert(edges != NULL);
3478 
3479   if (symmetric == false) {
3480 
3481     const cs_lnum_t *restrict edges_p
3482       = (const cs_lnum_t *restrict)(edges);
3483 
3484     for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3485       cs_lnum_t ii = *edges_p++;
3486       cs_lnum_t jj = *edges_p++;
3487       cs_lnum_t kk, ll;
3488       if (ii < ms->n_rows) {
3489         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3490         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3491           mc->_x_val[kk*b_size + pp] += xa[2*face_id*b_size + pp];
3492       }
3493       if (jj < ms->n_rows) {
3494         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3495         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3496           mc->_x_val[ll*b_size + pp] += xa[(2*face_id+1)*b_size + pp];
3497       }
3498     }
3499 
3500   }
3501   else { /* if symmetric == true */
3502 
3503     const cs_lnum_t *restrict edges_p
3504       = (const cs_lnum_t *restrict)(edges);
3505 
3506     for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3507       cs_lnum_t ii = *edges_p++;
3508       cs_lnum_t jj = *edges_p++;
3509       cs_lnum_t kk, ll;
3510       if (ii < ms->n_rows) {
3511         for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3512         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3513           mc->_x_val[kk*b_size + pp] += xa[face_id*b_size + pp];
3514       }
3515       if (jj < ms->n_rows) {
3516         for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3517         for (cs_lnum_t pp = 0; pp < b_size; pp++)
3518           mc->_x_val[kk*b_size + pp] += xa[face_id*b_size + pp];
3519       }
3520 
3521     }
3522 
3523   } /* end of condition on coefficients symmetry */
3524 
3525 }
3526 
3527 /*----------------------------------------------------------------------------
3528  * Map or copy MSR matrix diagonal coefficients.
3529  *
3530  * parameters:
3531  *   matrix           <-> pointer to matrix structure
3532  *   copy             <-- indicates if coefficients should be copied
3533  *   da               <-- diagonal values (NULL if all zero)
3534  *----------------------------------------------------------------------------*/
3535 
3536 static void
_map_or_copy_da_coeffs_msr(cs_matrix_t * matrix,bool copy,const cs_real_t * restrict da)3537 _map_or_copy_da_coeffs_msr(cs_matrix_t      *matrix,
3538                            bool              copy,
3539                            const cs_real_t  *restrict da)
3540 {
3541   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3542 
3543   const cs_lnum_t n_rows = matrix->n_rows;
3544   const cs_lnum_t *db_size = matrix->db_size;
3545 
3546   /* Map or copy diagonal values */
3547 
3548   if (da != NULL) {
3549 
3550     if (copy) {
3551       if (mc->_d_val == NULL || mc->max_db_size < db_size[3]) {
3552         BFT_REALLOC(mc->_d_val, db_size[3]*n_rows, cs_real_t);
3553         mc->max_db_size = db_size[3];
3554       }
3555 #     pragma omp parallel for  if(n_rows*db_size[0] > CS_THR_MIN)
3556       for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3557         for (cs_lnum_t jj = 0; jj < db_size[3]; jj++)
3558           mc->_d_val[ii*db_size[3] + jj] = da[ii*db_size[3] + jj];
3559       }
3560       mc->d_val = mc->_d_val;
3561     }
3562     else
3563       mc->d_val = da;
3564 
3565   }
3566   else
3567     mc->d_val = NULL;
3568 }
3569 
3570 /*----------------------------------------------------------------------------
3571  * Map or copy MSR matrix extra diagonal coefficients.
3572  *
3573  * This assumes the xa values are already provided in MSR form.
3574  *
3575  * Setting xa = NULL and copy = true, this function also ensures allocation
3576  * of and zeroes extradiagonal coefficients.
3577  *
3578  * parameters:
3579  *   matrix           <-> pointer to matrix structure
3580  *   copy             <-- indicates if coefficients should be copied
3581  *   xa               <-- extradiagonal values (NULL if all zero)
3582  *----------------------------------------------------------------------------*/
3583 
3584 static void
_map_or_copy_xa_coeffs_msr(cs_matrix_t * matrix,bool copy,const cs_real_t * restrict xa)3585 _map_or_copy_xa_coeffs_msr(cs_matrix_t      *matrix,
3586                            bool              copy,
3587                            const cs_real_t  *restrict xa)
3588 {
3589   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3590 
3591   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3592   const cs_lnum_t n_rows = matrix->n_rows;
3593   const cs_lnum_t *eb_size = matrix->eb_size;
3594 
3595   if (xa == NULL || copy) {
3596 
3597     /* Ensure allocation */
3598     if (mc->_x_val == NULL || mc->max_eb_size < eb_size[3]) {
3599       BFT_REALLOC(mc->_x_val,
3600                   eb_size[3]*ms->row_index[ms->n_rows],
3601                   cs_real_t);
3602       mc->max_eb_size = eb_size[3];
3603     }
3604     mc->x_val = mc->_x_val;
3605 
3606     /* zero if required */
3607     if (xa == NULL)
3608       _zero_x_coeffs_msr(matrix);
3609 
3610   }
3611 
3612   /* Map or copy extradiagonal values (we could use memcpy, but prefer
3613      to have a similar threading behavior to SpMV for NUMA performance) */
3614 
3615   if (xa != NULL) {
3616 
3617     if (copy) {
3618       if (eb_size[0] == 1) {
3619 #       pragma omp parallel for  if(n_rows > CS_THR_MIN)
3620         for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3621           const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3622           const cs_real_t  *s_row = xa + ms->row_index[ii];
3623           cs_real_t  *m_row = mc->_x_val + ms->row_index[ii];
3624           for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3625             m_row[jj] = s_row[jj];
3626         }
3627       }
3628       else {
3629 #       pragma omp parallel for  if(n_rows*eb_size[0] > CS_THR_MIN)
3630         for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3631           const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3632           const cs_real_t  *s_row = xa + ms->row_index[ii]*eb_size[3];
3633           cs_real_t  *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
3634           for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3635             for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
3636               m_row[jj*eb_size[3] + kk] = s_row[jj*eb_size[3] + kk];
3637           }
3638         }
3639       }
3640     }
3641 
3642     else
3643       mc->x_val = xa;
3644 
3645   }
3646 }
3647 
3648 /*----------------------------------------------------------------------------
3649  * Set MSR matrix coefficients.
3650  *
3651  * parameters:
3652  *   matrix      <-> pointer to matrix structure
3653  *   symmetric   <-- indicates if extradiagonal values are symmetric
3654  *   copy        <-- indicates if coefficients should be copied
3655  *   n_edges     <-- local number of graph edges
3656  *   edges       <-- edges (symmetric row <-> column) connectivity
3657  *   da          <-- diagonal values (NULL if all zero)
3658  *   xa          <-- extradiagonal values (NULL if all zero)
3659  *----------------------------------------------------------------------------*/
3660 
3661 static void
_set_coeffs_msr(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_2_t * restrict edges,const cs_real_t * restrict da,const cs_real_t * restrict xa)3662 _set_coeffs_msr(cs_matrix_t         *matrix,
3663                 bool                 symmetric,
3664                 bool                 copy,
3665                 cs_lnum_t            n_edges,
3666                 const cs_lnum_2_t  *restrict edges,
3667                 const cs_real_t    *restrict da,
3668                 const cs_real_t    *restrict xa)
3669 {
3670   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3671 
3672   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3673   const cs_lnum_t *eb_size = matrix->eb_size;
3674 
3675   /* Map or copy diagonal values */
3676 
3677   _map_or_copy_da_coeffs_msr(matrix, copy, da);
3678 
3679   /* Extradiagonal values */
3680 
3681   if (mc->_x_val == NULL || mc->max_eb_size < eb_size[3]) {
3682     BFT_REALLOC(mc->_x_val,
3683                 eb_size[3]*ms->row_index[ms->n_rows],
3684                 cs_real_t);
3685     mc->max_eb_size = eb_size[3];
3686   }
3687   mc->x_val = mc->_x_val;
3688 
3689   /* Copy extra-diagonal values if assembly is direct */
3690 
3691   if (ms->direct_assembly) {
3692     if (xa == NULL)
3693       _zero_x_coeffs_msr(matrix);
3694     if (eb_size[0] == 1)
3695       _set_xa_coeffs_msr_direct(matrix, symmetric, n_edges, edges, xa);
3696     else
3697       _set_xa_coeffs_msr_direct_block(matrix, symmetric, n_edges, edges, xa);
3698   }
3699 
3700   /* Initialize coefficients to zero if assembly is incremental */
3701 
3702   else {
3703     _zero_x_coeffs_msr(matrix);
3704     if (eb_size[0] == 1)
3705       _set_xa_coeffs_msr_increment(matrix, symmetric, n_edges, edges, xa);
3706     else
3707       _set_xa_coeffs_msr_increment_block(matrix, symmetric, n_edges, edges, xa);
3708   }
3709 }
3710 
3711 /*----------------------------------------------------------------------------
3712  * Set MSR matrix coefficients provided in the same form.
3713  *
3714  * If da and xa are equal to NULL, then initialize val with zeros.
3715  *
3716  * parameters:
3717  *   matrix           <-> pointer to matrix structure
3718  *   copy             <-- indicates if coefficients should be copied
3719  *                        when not transferred
3720  *   row_index        <-- MSR row index (0 to n-1)
3721  *   col_id           <-- MSR column id (0 to n-1)
3722  *   d_vals           <-- diagonal values (NULL if all zero)
3723  *   d_vals_transfer  <-- diagonal values whose ownership is transferred
3724  *                        (NULL or d_vals in, NULL out)
3725  *   x_vals           <-- extradiagonal values (NULL if all zero)
3726  *   x_vals_transfer  <-- extradiagonal values whose ownership is transferred
3727  *                        (NULL or x_vals in, NULL out)
3728  *----------------------------------------------------------------------------*/
3729 
3730 static void
_set_coeffs_msr_from_msr(cs_matrix_t * matrix,bool copy,const cs_lnum_t row_index[],const cs_lnum_t col_id[],const cs_real_t * d_vals,cs_real_t ** d_vals_transfer,const cs_real_t * x_vals,cs_real_t ** x_vals_transfer)3731 _set_coeffs_msr_from_msr(cs_matrix_t       *matrix,
3732                          bool               copy,
3733                          const cs_lnum_t    row_index[],
3734                          const cs_lnum_t    col_id[],
3735                          const cs_real_t   *d_vals,
3736                          cs_real_t        **d_vals_transfer,
3737                          const cs_real_t   *x_vals,
3738                          cs_real_t        **x_vals_transfer)
3739 {
3740   CS_UNUSED(row_index);
3741   CS_UNUSED(col_id);
3742 
3743   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3744 
3745   bool d_transferred = false, x_transferred = false;
3746 
3747   /* TODO: we should use metadata or check that the row_index and
3748      column id values are consistent, which should be true as long
3749      as columns are ordered in an identical manner */
3750 
3751   if (d_vals_transfer != NULL) {
3752     if (*d_vals_transfer != NULL) {
3753       mc->max_db_size = matrix->db_size[0];
3754       if (mc->_d_val != *d_vals_transfer) {
3755         BFT_FREE(mc->_d_val);
3756         mc->_d_val = *d_vals_transfer;
3757       }
3758       mc->d_val = mc->_d_val;
3759       *d_vals_transfer = NULL;
3760       d_transferred = true;
3761     }
3762   }
3763 
3764   if (x_vals_transfer != NULL) {
3765     if (*x_vals_transfer != NULL) {
3766       mc->max_db_size = matrix->db_size[0];
3767       BFT_FREE(mc->_x_val);
3768       mc->_x_val = *x_vals_transfer;
3769       mc->x_val = mc->_x_val;
3770       *x_vals_transfer = NULL;
3771       x_transferred = true;
3772     }
3773   }
3774 
3775   if (d_transferred == false)
3776     _map_or_copy_da_coeffs_msr(matrix, copy, d_vals);
3777 
3778   if (x_transferred == false)
3779     _map_or_copy_xa_coeffs_msr(matrix, copy, x_vals);
3780 
3781   /* Now free transferred arrays */
3782 
3783   if (d_vals_transfer != NULL)
3784     BFT_FREE(*d_vals_transfer);
3785   if (x_vals_transfer != NULL)
3786     BFT_FREE(*x_vals_transfer);
3787 }
3788 
3789 /*----------------------------------------------------------------------------
3790  * Release shared MSR matrix coefficients.
3791  *
3792  * parameters:
3793  *   matrix <-- pointer to matrix structure
3794  *----------------------------------------------------------------------------*/
3795 
3796 static void
_release_coeffs_msr(cs_matrix_t * matrix)3797 _release_coeffs_msr(cs_matrix_t  *matrix)
3798 {
3799   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3800   if (mc !=NULL) {
3801     /* Unmap shared values */
3802     mc->d_val = NULL;
3803     mc->x_val = NULL;
3804   }
3805 }
3806 
3807 /*----------------------------------------------------------------------------*/
3808 /*!
3809  * \brief Get matrix diagonal values for MSR matrix.
3810  *
3811  * In case of matrixes with block diagonal coefficients, a pointer to
3812  * the complete block diagonal is returned.
3813  *
3814  * \param[in]  matrix  pointer to matrix structure
3815  *
3816  * \return  pointer to matrix diagonal array
3817  */
3818 /*----------------------------------------------------------------------------*/
3819 
3820 static const cs_real_t *
_get_diagonal_msr(const cs_matrix_t * matrix)3821 _get_diagonal_msr(const cs_matrix_t  *matrix)
3822 {
3823   const cs_real_t  *diag = NULL;
3824 
3825   cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3826   if (mc->d_val == NULL) {
3827     cs_lnum_t n_rows = matrix->n_rows * matrix->db_size[3];
3828     if (mc->_d_val == NULL || mc->max_db_size < matrix->db_size[3]) {
3829       BFT_REALLOC(mc->_d_val, matrix->db_size[3]*matrix->n_rows, cs_real_t);
3830       mc->max_db_size = matrix->db_size[3];
3831     }
3832 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
3833     for (cs_lnum_t ii = 0; ii < n_rows; ii++)
3834       mc->_d_val[ii] = 0.0;
3835     mc->d_val = mc->_d_val;
3836   }
3837   diag = mc->d_val;
3838 
3839   return diag;
3840 }
3841 
3842 /*----------------------------------------------------------------------------*/
3843 /*!
3844  * \brief Function for initialization of MSR matrix coefficients using
3845  *        local row ids and column indexes.
3846  *
3847  * \warning  The matrix pointer must point to valid data when the selection
3848  *           function is called, so the life cycle of the data pointed to
3849  *           should be at least as long as that of the assembler values
3850  *           structure.
3851  *
3852  * \param[in, out]  matrix_p  untyped pointer to matrix description structure
3853  * \param[in]       db_size   optional diagonal block sizes
3854  * \param[in]       eb_size   optional extra-diagonal block sizes
3855  */
3856 /*----------------------------------------------------------------------------*/
3857 
3858 static void
_msr_assembler_values_init(void * matrix_p,const cs_lnum_t db_size[4],const cs_lnum_t eb_size[4])3859 _msr_assembler_values_init(void              *matrix_p,
3860                            const cs_lnum_t    db_size[4],
3861                            const cs_lnum_t    eb_size[4])
3862 {
3863   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
3864 
3865   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3866 
3867   const cs_lnum_t n_rows = matrix->n_rows;
3868 
3869   cs_lnum_t d_stride = 1;
3870   if (db_size != NULL)
3871     d_stride = db_size[3];
3872   cs_lnum_t e_stride = 1;
3873   if (eb_size != NULL)
3874     e_stride = eb_size[3];
3875 
3876   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3877 
3878   /* Initialize diagonal values */
3879 
3880   BFT_REALLOC(mc->_d_val, d_stride*n_rows, cs_real_t);
3881   mc->d_val = mc->_d_val;
3882   mc->max_db_size = d_stride;
3883 
3884   BFT_REALLOC(mc->_x_val, e_stride*ms->row_index[ms->n_rows], cs_real_t);
3885   mc->x_val = mc->_x_val;
3886   mc->max_eb_size = e_stride;
3887 
3888 # pragma omp parallel for  if(n_rows*db_size[0] > CS_THR_MIN)
3889   for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3890     for (cs_lnum_t jj = 0; jj < d_stride; jj++)
3891       mc->_d_val[ii*d_stride + jj] = 0;
3892     cs_lnum_t n_s_cols = (ms->row_index[ii+1] - ms->row_index[ii])*e_stride;
3893     cs_lnum_t displ = ms->row_index[ii]*e_stride;
3894     for (cs_lnum_t jj = 0; jj < n_s_cols; jj++)
3895       mc->_x_val[displ + jj] = 0;
3896   }
3897 }
3898 
3899 /*----------------------------------------------------------------------------*/
3900 /*!
3901  * \brief Function for addition to MSR matrix coefficients using
3902  *        local row ids and column indexes.
3903  *
3904  * Values whose associated row index is negative should be ignored;
3905  * Values whose column index is -1 are assumed to be assigned to a
3906  * separately stored diagonal. Other indexes shoudl be valid.
3907  *
3908  * \warning  The matrix pointer must point to valid data when the selection
3909  *           function is called, so the life cycle of the data pointed to
3910  *           should be at least as long as that of the assembler values
3911  *           structure.
3912  *
3913  * \remark  Note that we pass column indexes (not ids) here; as the
3914  *          caller is already assumed to have identified the index
3915  *          matching a given column id.
3916  *
3917  * \param[in, out]  matrix_p  untyped pointer to matrix description structure
3918  * \param[in]       n         number of values to add
3919  * \param[in]       stride    associated data block size
3920  * \param[in]       row_id    associated local row ids
3921  * \param[in]       col_idx   associated local column indexes
3922  * \param[in]       vals      pointer to values (size: n*stride)
3923  */
3924 /*----------------------------------------------------------------------------*/
3925 
3926 static void
_msr_assembler_values_add(void * matrix_p,cs_lnum_t n,cs_lnum_t stride,const cs_lnum_t row_id[],const cs_lnum_t col_idx[],const cs_real_t vals[])3927 _msr_assembler_values_add(void             *matrix_p,
3928                           cs_lnum_t         n,
3929                           cs_lnum_t         stride,
3930                           const cs_lnum_t   row_id[],
3931                           const cs_lnum_t   col_idx[],
3932                           const cs_real_t   vals[])
3933 {
3934   cs_matrix_t  *matrix = (cs_matrix_t *)matrix_p;
3935 
3936   cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
3937 
3938   const cs_matrix_struct_csr_t  *ms = matrix->structure;
3939 
3940   if (stride == 1) {
3941 
3942     /* Copy instead of test for OpenMP to avoid outlining for small sets */
3943 
3944     if (n*stride <= CS_THR_MIN) {
3945       for (cs_lnum_t ii = 0; ii < n; ii++) {
3946         cs_lnum_t r_id = row_id[ii];
3947         if (r_id < 0)
3948           continue;
3949         if (col_idx[ii] < 0) {
3950 #         pragma omp atomic
3951           mc->_d_val[r_id] += vals[ii];
3952         }
3953         else {
3954 #         pragma omp atomic
3955           mc->_x_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
3956         }
3957       }
3958     }
3959 
3960     else {
3961 #     pragma omp parallel for  if(n*stride > CS_THR_MIN)
3962       for (cs_lnum_t ii = 0; ii < n; ii++) {
3963         cs_lnum_t r_id = row_id[ii];
3964         if (r_id < 0)
3965           continue;
3966         if (col_idx[ii] < 0) {
3967 #         pragma omp atomic
3968           mc->_d_val[r_id] += vals[ii];
3969         }
3970         else {
3971 #         pragma omp atomic
3972           mc->_x_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
3973         }
3974       }
3975     }
3976   }
3977 
3978   else { /* if (stride > 1) */
3979 
3980     /* Copy instead of test for OpenMP to avoid outlining for small sets */
3981 
3982     if (n*stride <= CS_THR_MIN) {
3983       for (cs_lnum_t ii = 0; ii < n; ii++) {
3984         cs_lnum_t r_id = row_id[ii];
3985         if (r_id < 0)
3986           continue;
3987         if (col_idx[ii] < 0) {
3988           for (cs_lnum_t jj = 0; jj < stride; jj++)
3989             mc->_d_val[r_id*stride + jj] += vals[ii*stride + jj];
3990         }
3991         else {
3992           cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
3993           for (cs_lnum_t jj = 0; jj < stride; jj++)
3994             mc->_x_val[displ + jj] += vals[ii*stride + jj];
3995         }
3996       }
3997     }
3998 
3999     else {
4000 #     pragma omp parallel for  if(n*stride > CS_THR_MIN)
4001       for (cs_lnum_t ii = 0; ii < n; ii++) {
4002         cs_lnum_t r_id = row_id[ii];
4003         if (r_id < 0)
4004           continue;
4005         if (col_idx[ii] < 0) {
4006           for (cs_lnum_t jj = 0; jj < stride; jj++)
4007             mc->_d_val[r_id*stride + jj] += vals[ii*stride + jj];
4008         }
4009         else {
4010           cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
4011           for (cs_lnum_t jj = 0; jj < stride; jj++)
4012             mc->_x_val[displ + jj] += vals[ii*stride + jj];
4013         }
4014       }
4015     }
4016   }
4017 
4018 }
4019 
4020 /*----------------------------------------------------------------------------*/
4021 /*!
4022  * \brief Create and initialize an MSR matrix assembler values structure.
4023  *
4024  * The associated matrix's structure must have been created using
4025  * \ref cs_matrix_structure_create_from_assembler.
4026  *
4027  * Block sizes are defined by an optional array of 4 values:
4028  *   0: useful block size, 1: vector block extents,
4029  *   2: matrix line extents,  3: matrix line*column extents
4030  *
4031  * \param[in, out]  matrix                 pointer to matrix structure
4032  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
4033  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
4034  *                                         or NULL
4035  *
4036  * \return  pointer to initialized matrix assembler values structure;
4037  */
4038 /*----------------------------------------------------------------------------*/
4039 
4040 static cs_matrix_assembler_values_t *
_assembler_values_create_msr(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)4041 _assembler_values_create_msr(cs_matrix_t      *matrix,
4042                              const cs_lnum_t  *diag_block_size,
4043                              const cs_lnum_t  *extra_diag_block_size)
4044 {
4045   cs_matrix_assembler_values_t *mav
4046     = cs_matrix_assembler_values_create(matrix->assembler,
4047                                         true,
4048                                         diag_block_size,
4049                                         extra_diag_block_size,
4050                                         (void *)matrix,
4051                                         _msr_assembler_values_init,
4052                                         _msr_assembler_values_add,
4053                                         NULL,
4054                                         NULL,
4055                                         NULL);
4056 
4057   return mav;
4058 }
4059 
4060 /*----------------------------------------------------------------------------
4061  * Local matrix.vector product y = A.x with MSR matrix.
4062  *
4063  * parameters:
4064  *   matrix       <-- pointer to matrix structure
4065  *   exclude_diag <-- exclude diagonal if true,
4066  *   sync         <-- synchronize ghost cells if true
4067  *   x            <-> multipliying vector values
4068  *   y            --> resulting vector
4069  *----------------------------------------------------------------------------*/
4070 
4071 static void
_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4072 _mat_vec_p_l_msr(const cs_matrix_t  *matrix,
4073                  bool                exclude_diag,
4074                  bool                sync,
4075                  cs_real_t          *restrict x,
4076                  cs_real_t          *restrict y)
4077 {
4078   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4079   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4080   cs_lnum_t  n_rows = ms->n_rows;
4081 
4082   /* Ghost cell communication */
4083 
4084   cs_halo_state_t *hs
4085     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4086   if (hs != NULL)
4087     cs_halo_sync_wait(matrix->halo, x, hs);
4088 
4089   /* Standard case */
4090 
4091   if (!exclude_diag && mc->d_val != NULL) {
4092 
4093 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4094     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4095 
4096       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4097       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4098       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4099       cs_real_t sii = 0.0;
4100 
4101       for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4102         sii += (m_row[jj]*x[col_id[jj]]);
4103 
4104       y[ii] = sii + mc->d_val[ii]*x[ii];
4105 
4106     }
4107 
4108   }
4109 
4110   /* Exclude diagonal */
4111 
4112   else {
4113 
4114 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4115     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4116 
4117       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4118       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4119       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4120       cs_real_t sii = 0.0;
4121 
4122       for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4123         sii += (m_row[jj]*x[col_id[jj]]);
4124 
4125       y[ii] = sii;
4126 
4127     }
4128   }
4129 
4130 }
4131 
4132 /*----------------------------------------------------------------------------
4133  * Local matrix.vector product y = A.x with MSR matrix.
4134  *
4135  * parameters:
4136  *   matrix       <-- pointer to matrix structure
4137  *   exclude_diag <-- exclude diagonal if true,
4138  *   sync         <-- synchronize ghost cells if true
4139  *   x            <-> multipliying vector values
4140  *   y            --> resulting vector
4141  *----------------------------------------------------------------------------*/
4142 
4143 static void
_mat_vec_p_l_msr_omp_sched(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4144 _mat_vec_p_l_msr_omp_sched(const cs_matrix_t  *matrix,
4145                            bool                exclude_diag,
4146                            bool                sync,
4147                            cs_real_t          *restrict x,
4148                            cs_real_t          *restrict y)
4149 {
4150   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4151   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4152   cs_lnum_t  n_rows = ms->n_rows;
4153 
4154   /* Ghost cell communication */
4155 
4156   cs_halo_state_t *hs
4157     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4158   if (hs != NULL)
4159     cs_halo_sync_wait(matrix->halo, x, hs);
4160 
4161   /* Standard case */
4162 
4163   if (!exclude_diag && mc->d_val != NULL) {
4164 
4165 #   pragma omp parallel if(n_rows > CS_THR_MIN)
4166     {
4167 
4168       cs_lnum_t n_s_rows = cs_align(n_rows * 0.9, _cs_cl);
4169       if (n_s_rows > n_rows)
4170         n_s_rows = n_rows;
4171 
4172 #     pragma omp for nowait
4173       for (cs_lnum_t ii = 0; ii < n_s_rows; ii++) {
4174 
4175         const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4176         const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4177         cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4178         cs_real_t sii = 0.0;
4179 
4180         for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4181           sii += (m_row[jj]*x[col_id[jj]]);
4182 
4183         y[ii] = sii + mc->d_val[ii]*x[ii];
4184 
4185       }
4186 
4187 #     pragma omp for schedule(dynamic, _cs_cl)
4188       for (cs_lnum_t ii = n_s_rows; ii < n_rows; ii++) {
4189 
4190         const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4191         const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4192         cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4193         cs_real_t sii = 0.0;
4194 
4195         for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4196           sii += (m_row[jj]*x[col_id[jj]]);
4197 
4198         y[ii] = sii + mc->d_val[ii]*x[ii];
4199 
4200       }
4201 
4202     }
4203 
4204   }
4205 
4206   /* Exclude diagonal */
4207 
4208   else {
4209 
4210 #   pragma omp parallel if(n_rows > CS_THR_MIN)
4211     {
4212 
4213       cs_lnum_t n_s_rows = cs_align(n_rows * 0.9, _cs_cl);
4214       if (n_s_rows > n_rows)
4215         n_s_rows = n_rows;
4216 
4217 #     pragma omp for
4218       for (cs_lnum_t ii = 0; ii < n_s_rows; ii++) {
4219 
4220         const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4221         const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4222         cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4223         cs_real_t sii = 0.0;
4224 
4225         for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4226           sii += (m_row[jj]*x[col_id[jj]]);
4227 
4228         y[ii] = sii;
4229 
4230       }
4231 
4232 #     pragma omp for schedule(dynamic, _cs_cl)
4233       for (cs_lnum_t ii = n_s_rows; ii < n_rows; ii++) {
4234 
4235         const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4236         const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4237         cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4238         cs_real_t sii = 0.0;
4239 
4240         for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4241           sii += (m_row[jj]*x[col_id[jj]]);
4242 
4243         y[ii] = sii;
4244 
4245       }
4246 
4247     }
4248 
4249   }
4250 
4251 }
4252 
4253 /*----------------------------------------------------------------------------
4254  * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4255  *
4256  * parameters:
4257  *   matrix       <-- pointer to matrix structure
4258  *   exclude_diag <-- exclude diagonal if true,
4259  *   sync         <-- synchronize ghost cells if true
4260  *   x            <-> multipliying vector values
4261  *   y            --> resulting vector
4262  *----------------------------------------------------------------------------*/
4263 
4264 static void
_b_mat_vec_p_l_msr_generic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4265 _b_mat_vec_p_l_msr_generic(const cs_matrix_t  *matrix,
4266                            bool                exclude_diag,
4267                            bool                sync,
4268                            cs_real_t           x[restrict],
4269                            cs_real_t           y[restrict])
4270 {
4271   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4272   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4273   const cs_lnum_t  n_rows = ms->n_rows;
4274   const cs_lnum_t *db_size = matrix->db_size;
4275 
4276   /* Ghost cell communication */
4277 
4278   cs_halo_state_t *hs
4279     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4280   if (hs != NULL)
4281     _pre_vector_multiply_sync_x_end(matrix, hs, x);
4282 
4283   /* Standard case */
4284 
4285   if (!exclude_diag && mc->d_val != NULL) {
4286 
4287 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4288     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4289 
4290       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4291       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4292       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4293 
4294       _dense_b_ax(ii, db_size, mc->d_val, x, y);
4295 
4296       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4297         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4298           y[ii*db_size[1] + kk]
4299             += (m_row[jj]*x[col_id[jj]*db_size[1] + kk]);
4300         }
4301       }
4302 
4303     }
4304 
4305   }
4306 
4307   /* Exclude diagonal */
4308 
4309   else {
4310 
4311 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4312     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4313 
4314       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4315       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4316       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4317 
4318       for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4319         y[ii*db_size[1] + kk] = 0.;
4320 
4321       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4322         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4323           y[ii*db_size[1] + kk]
4324             += (m_row[jj]*x[col_id[jj]*db_size[1] + kk]);
4325         }
4326       }
4327 
4328     }
4329   }
4330 
4331 }
4332 
4333 /*----------------------------------------------------------------------------
4334  * Local matrix.vector product y = A.x with MSR matrix, 3x3 blocked version.
4335  *
4336  * parameters:
4337  *   matrix       <-- pointer to matrix structure
4338  *   exclude_diag <-- exclude diagonal if true,
4339  *   sync         <-- synchronize ghost cells if true
4340  *   x            <-> multipliying vector values
4341  *   y            --> resulting vector
4342  *----------------------------------------------------------------------------*/
4343 
4344 static void
_3_3_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4345 _3_3_mat_vec_p_l_msr(const cs_matrix_t  *matrix,
4346                      bool                exclude_diag,
4347                      bool                sync,
4348                      cs_real_t          *restrict x,
4349                      cs_real_t          *restrict y)
4350 {
4351   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4352   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4353   const cs_lnum_t  n_rows = ms->n_rows;
4354 
4355   assert(matrix->db_size[0] == 3 && matrix->db_size[3] == 9);
4356 
4357   /* Ghost cell communication */
4358 
4359   cs_halo_state_t *hs
4360     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4361   if (hs != NULL)
4362     _pre_vector_multiply_sync_x_end(matrix, hs, x);
4363 
4364   /* Standard case */
4365 
4366   if (!exclude_diag && mc->d_val != NULL) {
4367 
4368 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4369     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4370 
4371       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4372       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4373       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4374 
4375       _dense_3_3_ax(ii, mc->d_val, x, y);
4376 
4377       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4378         for (cs_lnum_t kk = 0; kk < 3; kk++)
4379           y[ii*3 + kk] += (m_row[jj]*x[col_id[jj]*3 + kk]);
4380       }
4381 
4382     }
4383 
4384   }
4385 
4386   /* Exclude diagonal */
4387 
4388   else {
4389 
4390 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4391     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4392 
4393       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4394       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4395       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4396 
4397       for (cs_lnum_t kk = 0; kk < 3; kk++)
4398         y[ii*3 + kk] = 0.;
4399 
4400       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4401         for (cs_lnum_t kk = 0; kk < 3; kk++)
4402           y[ii*3 + kk] += (m_row[jj]*x[col_id[jj]*3 + kk]);
4403       }
4404 
4405     }
4406   }
4407 
4408 }
4409 
4410 /*----------------------------------------------------------------------------
4411  * Local matrix.vector product y = A.x with MSR matrix, 6x6 blocked version.
4412  *
4413  * parameters:
4414  *   matrix       <-- pointer to matrix structure
4415  *   exclude_diag <-- exclude diagonal if true,
4416  *   sync         <-- synchronize ghost cells if true
4417  *   x            <-> multipliying vector values
4418  *   y            --> resulting vector
4419  *----------------------------------------------------------------------------*/
4420 
4421 static void
_6_6_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4422 _6_6_mat_vec_p_l_msr(const cs_matrix_t  *matrix,
4423                      bool                exclude_diag,
4424                      bool                sync,
4425                      cs_real_t           x[restrict],
4426                      cs_real_t           y[restrict])
4427 {
4428   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4429   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4430   const cs_lnum_t  n_rows = ms->n_rows;
4431 
4432   assert(matrix->db_size[0] == 6 && matrix->db_size[3] == 36);
4433 
4434   /* Ghost cell communication */
4435 
4436   cs_halo_state_t *hs
4437     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4438   if (hs != NULL)
4439     _pre_vector_multiply_sync_x_end(matrix, hs, x);
4440 
4441   /* Standard case */
4442 
4443   if (!exclude_diag && mc->d_val != NULL) {
4444 
4445 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4446     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4447 
4448       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4449       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4450       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4451 
4452       _dense_6_6_ax(ii, mc->d_val, x, y);
4453 
4454       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4455         for (cs_lnum_t kk = 0; kk < 6; kk++)
4456           y[ii*6 + kk] += (m_row[jj]*x[col_id[jj]*6 + kk]);
4457       }
4458 
4459     }
4460 
4461   }
4462 
4463   /* Exclude diagonal */
4464 
4465   else {
4466 
4467 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4468     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4469 
4470       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4471       const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4472       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4473 
4474       for (cs_lnum_t kk = 0; kk < 6; kk++)
4475         y[ii*6 + kk] = 0.;
4476 
4477       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4478         for (cs_lnum_t kk = 0; kk < 6; kk++)
4479           y[ii*6 + kk] += (m_row[jj]*x[col_id[jj]*6 + kk]);
4480       }
4481 
4482     }
4483   }
4484 
4485 }
4486 
4487 /*----------------------------------------------------------------------------
4488  * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4489  *
4490  * This variant uses fixed block size variants for common cases.
4491  *
4492  * parameters:
4493  *   matrix       <-- pointer to matrix structure
4494  *   exclude_diag <-- exclude diagonal if true,
4495  *   sync         <-- synchronize ghost cells if true
4496  *   x            <-> multipliying vector values
4497  *   y            --> resulting vector
4498  *----------------------------------------------------------------------------*/
4499 
4500 static void
_b_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4501 _b_mat_vec_p_l_msr(const cs_matrix_t  *matrix,
4502                    bool                exclude_diag,
4503                    bool                sync,
4504                    cs_real_t           x[restrict],
4505                    cs_real_t           y[restrict])
4506 {
4507   if (matrix->db_size[0] == 3 && matrix->db_size[3] == 9)
4508     _3_3_mat_vec_p_l_msr(matrix, exclude_diag, sync, x, y);
4509 
4510   else if (matrix->db_size[0] == 6 && matrix->db_size[3] == 36)
4511     _6_6_mat_vec_p_l_msr(matrix, exclude_diag, sync, x, y);
4512 
4513   else
4514     _b_mat_vec_p_l_msr_generic(matrix, exclude_diag, sync, x, y);
4515 }
4516 
4517 /*----------------------------------------------------------------------------
4518  * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4519  *
4520  * parameters:
4521  *   matrix       <-- pointer to matrix structure
4522  *   exclude_diag <-- exclude diagonal if true,
4523  *   sync         <-- synchronize ghost cells if true
4524  *   x            <-> multipliying vector values
4525  *   y            --> resulting vector
4526  *----------------------------------------------------------------------------*/
4527 
4528 static void
_bb_mat_vec_p_l_msr_3(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4529 _bb_mat_vec_p_l_msr_3(const cs_matrix_t  *matrix,
4530                       bool                exclude_diag,
4531                       bool                sync,
4532                       cs_real_t           x[restrict],
4533                       cs_real_t           y[restrict])
4534 {
4535   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4536   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4537   const cs_lnum_t  n_rows = ms->n_rows;
4538   const cs_lnum_t *db_size = matrix->db_size;
4539 
4540   /* Ghost cell communication */
4541 
4542   cs_halo_state_t *hs
4543     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4544   if (hs != NULL)
4545     _pre_vector_multiply_sync_x_end(matrix, hs, x);
4546 
4547   /* Standard case */
4548 
4549   if (!exclude_diag && mc->d_val != NULL) {
4550 
4551 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4552     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4553 
4554       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4555       const cs_real_t *restrict m_row =  mc->x_val + (ms->row_index[ii]*9);
4556       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4557 
4558       _dense_b_ax(ii, db_size, mc->d_val, x, y);
4559 
4560       cs_real_t * _y = y + ii*3;
4561 
4562       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4563         _y[0] += (  m_row[jj*9]         * x[col_id[jj]*3]
4564                   + m_row[jj*9 + 1]     * x[col_id[jj]*3 + 1]
4565                   + m_row[jj*9 + 2]     * x[col_id[jj]*3 + 2]);
4566         _y[1] += (  m_row[jj*9 + 3]     * x[col_id[jj]*3]
4567                   + m_row[jj*9 + 3 + 1] * x[col_id[jj]*3 + 1]
4568                   + m_row[jj*9 + 3 + 2] * x[col_id[jj]*3 + 2]);
4569         _y[2] += (  m_row[jj*9 + 6]     * x[col_id[jj]*3]
4570                   + m_row[jj*9 + 6 + 1] * x[col_id[jj]*3 + 1]
4571                   + m_row[jj*9 + 6 + 2] * x[col_id[jj]*3 + 2]);
4572       }
4573 
4574     }
4575 
4576   }
4577 
4578   /* Exclude diagonal */
4579 
4580   else {
4581 
4582 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4583     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4584 
4585       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4586       const cs_real_t *restrict m_row =  mc->x_val + (ms->row_index[ii]*9);
4587       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4588 
4589       cs_real_t * _y = y + (ii*db_size[1]);
4590 
4591       for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4592         _y[kk] = 0.;
4593 
4594       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4595         _y[0] += (  m_row[jj*9]         * x[col_id[jj]*3]
4596                   + m_row[jj*9 + 1]     * x[col_id[jj]*3 + 1]
4597                   + m_row[jj*9 + 2]     * x[col_id[jj]*3 + 2]);
4598         _y[1] += (  m_row[jj*9 + 3]     * x[col_id[jj]*3]
4599                   + m_row[jj*9 + 3 + 1] * x[col_id[jj]*3 + 1]
4600                   + m_row[jj*9 + 3 + 2] * x[col_id[jj]*3 + 2]);
4601         _y[2] += (  m_row[jj*9 + 6]     * x[col_id[jj]*3]
4602                   + m_row[jj*9 + 6 + 1] * x[col_id[jj]*3 + 1]
4603                   + m_row[jj*9 + 6 + 2] * x[col_id[jj]*3 + 2]);
4604       }
4605 
4606     }
4607   }
4608 
4609 }
4610 
4611 /*----------------------------------------------------------------------------
4612  * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4613  *
4614  * parameters:
4615  *   matrix       <-- pointer to matrix structure
4616  *   exclude_diag <-- exclude diagonal if true,
4617  *   sync         <-- synchronize ghost cells if true
4618  *   x            <-> multipliying vector values
4619  *   y            --> resulting vector
4620  *----------------------------------------------------------------------------*/
4621 
4622 static void
_bb_mat_vec_p_l_msr_generic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4623 _bb_mat_vec_p_l_msr_generic(const cs_matrix_t  *matrix,
4624                             bool                exclude_diag,
4625                             bool                sync,
4626                             cs_real_t           x[restrict],
4627                             cs_real_t           y[restrict])
4628 {
4629   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4630   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4631   const cs_lnum_t  n_rows = ms->n_rows;
4632   const cs_lnum_t *db_size = matrix->db_size;
4633   const cs_lnum_t *eb_size = matrix->eb_size;
4634 
4635   /* Ghost cell communication */
4636 
4637   cs_halo_state_t *hs
4638     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4639   if (hs != NULL)
4640     _pre_vector_multiply_sync_x_end(matrix, hs, x);
4641 
4642   /* Standard case */
4643 
4644   if (!exclude_diag && mc->d_val != NULL) {
4645 
4646 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4647     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4648 
4649       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4650       const cs_real_t *restrict m_row =   mc->x_val
4651                                         + (ms->row_index[ii]*eb_size[3]);
4652       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4653 
4654       _dense_b_ax(ii, db_size, mc->d_val, x, y);
4655 
4656       cs_real_t * _y = y + (ii*db_size[1]);
4657 
4658       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4659         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4660           for (cs_lnum_t ll = 0; ll < db_size[0]; ll++) {
4661             _y[kk] += (  m_row[jj*eb_size[3] + kk*eb_size[2] + ll]
4662                        * x[col_id[jj]*db_size[1] + ll]);
4663           }
4664         }
4665       }
4666 
4667     }
4668 
4669   }
4670 
4671   /* Exclude diagonal */
4672 
4673   else {
4674 
4675 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4676     for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4677 
4678       const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4679       const cs_real_t *restrict m_row =   mc->x_val
4680                                         + (ms->row_index[ii]*eb_size[3]);
4681       cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4682 
4683       cs_real_t * _y = y + (ii*db_size[1]);
4684 
4685       for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4686         _y[kk] = 0.;
4687 
4688       for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4689         for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4690           for (cs_lnum_t ll = 0; ll < db_size[0]; ll++) {
4691             _y[kk] += (  m_row[jj*eb_size[3] + kk*eb_size[2] + ll]
4692                        * x[col_id[jj]*db_size[1] + ll]);
4693           }
4694         }
4695       }
4696 
4697     }
4698   }
4699 
4700 }
4701 
4702 /*----------------------------------------------------------------------------
4703  * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4704  *
4705  * parameters:
4706  *   matrix       <-- pointer to matrix structure
4707  *   exclude_diag <-- exclude diagonal if true,
4708  *   sync         <-- synchronize ghost cells if true
4709  *   x            <-> multipliying vector values
4710  *   y            --> resulting vector
4711  *----------------------------------------------------------------------------*/
4712 
4713 static void
_bb_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4714 _bb_mat_vec_p_l_msr(const cs_matrix_t  *matrix,
4715                     bool                exclude_diag,
4716                     bool                sync,
4717                     cs_real_t           x[restrict],
4718                     cs_real_t           y[restrict])
4719 {
4720   if (matrix->eb_size[0] == 3 && matrix->eb_size[3] == 9)
4721     _bb_mat_vec_p_l_msr_3(matrix, exclude_diag, sync, x, y);
4722 
4723   else
4724     _bb_mat_vec_p_l_msr_generic(matrix, exclude_diag, sync, x, y);
4725 }
4726 
4727 /*----------------------------------------------------------------------------
4728  * Local matrix.vector product y = A.x with MSR matrix, using MKL
4729  *
4730  * parameters:
4731  *   exclude_diag <-- exclude diagonal if true
4732  *   matrix       <-- pointer to matrix structure
4733  *   hs           <-- halo state: if non-NULL, call cs_halo_sync_wait
4734  *                    locally (possibly allowing computation/communication
4735  *                    overlap)
4736  *   x            <-> multipliying vector values
4737  *   y            --> resulting vector
4738  *----------------------------------------------------------------------------*/
4739 
4740 #if defined (HAVE_MKL)
4741 
4742 static void
_mat_vec_p_l_msr_mkl(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4743 _mat_vec_p_l_msr_mkl(const cs_matrix_t  *matrix,
4744                      bool                exclude_diag,
4745                      bool                sync,
4746                      cs_real_t           x[restrict],
4747                      cs_real_t           y[restrict])
4748 {
4749   const cs_matrix_struct_csr_t  *ms = matrix->structure;
4750   const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
4751 
4752   /* Ghost cell communication */
4753 
4754   cs_halo_state_t *hs
4755     = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4756   if (hs != NULL)
4757     cs_halo_sync_wait(matrix->halo, x, hs);
4758 
4759   /* Call MKL function */
4760 
4761   int n_rows = ms->n_rows;
4762   char transa[] = "n";
4763 
4764   mkl_cspblas_dcsrgemv(transa,
4765                        &n_rows,
4766                        mc->x_val,
4767                        ms->row_index,
4768                        ms->col_id,
4769                        (double *)x,
4770                        y);
4771 
4772   /* Add diagonal contribution */
4773 
4774   if (!exclude_diag && mc->d_val != NULL) {
4775     cs_lnum_t ii;
4776     const double *restrict da = mc->d_val;
4777 #   pragma omp parallel for  if(n_rows > CS_THR_MIN)
4778     for (ii = 0; ii < n_rows; ii++)
4779       y[ii] += da[ii] * x[ii];
4780   }
4781 }
4782 
4783 #endif /* defined (HAVE_MKL) */
4784 
4785 /*----------------------------------------------------------------------------
4786  * Add variant
4787  *
4788  * parameters:
4789  *   type                 <-- matrix type
4790  *   mft                  <-- fill type tuned for
4791  *   ed_flag              <-- 0: with diagonal only, 1 exclude only; 2; both
4792  *   vector_multiply      <-- function pointer for A.x
4793  *   n_variants           <-> number of variants
4794  *   n_variants_max       <-> current maximum number of variants
4795  *   m_variant            <-> array of matrix variants
4796  *----------------------------------------------------------------------------*/
4797 
4798 static void
_variant_add(const char * name,cs_matrix_type_t type,cs_matrix_fill_type_t mft,int ed_flag,cs_matrix_vector_product_t * vector_multiply,int * n_variants,int * n_variants_max,cs_matrix_variant_t ** m_variant)4799 _variant_add(const char                        *name,
4800              cs_matrix_type_t                   type,
4801              cs_matrix_fill_type_t              mft,
4802              int                                ed_flag,
4803              cs_matrix_vector_product_t        *vector_multiply,
4804              int                               *n_variants,
4805              int                               *n_variants_max,
4806              cs_matrix_variant_t              **m_variant)
4807 {
4808   cs_matrix_variant_t  *v;
4809   int i = *n_variants;
4810 
4811   if (vector_multiply == NULL)
4812     return;
4813 
4814   if (*n_variants_max == *n_variants) {
4815     if (*n_variants_max == 0)
4816       *n_variants_max = 8;
4817     else
4818       *n_variants_max *= 2;
4819     BFT_REALLOC(*m_variant, *n_variants_max, cs_matrix_variant_t);
4820   }
4821 
4822   v = (*m_variant) + i;
4823 
4824   for (int j = 0; j < 2; j++) {
4825     v->vector_multiply[j] = NULL;
4826     strncpy(v->name[j], name, 31);
4827     v->name[j][31] = '\0';
4828   }
4829 
4830   v->type = type;
4831   v->fill_type = mft;
4832 
4833   if (ed_flag != 1)
4834     v->vector_multiply[0] = vector_multiply;
4835   if (ed_flag != 0)
4836     v->vector_multiply[1] = vector_multiply;
4837 
4838   *n_variants += 1;
4839 }
4840 
4841 /*----------------------------------------------------------------------------
4842  * Select the sparse matrix-vector product function to be used by a
4843  * matrix or variant for a given fill type.
4844  *
4845  * Currently, possible variant functions are:
4846  *
4847  *   CS_MATRIX_NATIVE  (all fill types)
4848  *     default
4849  *     standard
4850  *     fixed           (for CS_MATRIX_33_BLOCK_D or CS_MATRIX_33_BLOCK_D_SYM)
4851  *     omp             (for OpenMP with compatible numbering)
4852  *     vector          (For vector machine with compatible numbering)
4853  *
4854  *   CS_MATRIX_CSR     (for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
4855  *     default
4856  *     standard
4857  *     mkl             (with MKL)
4858  *
4859  *   CS_MATRIX_MSR
4860  *     default
4861  *     standard
4862  *     omp_sched       (Improved OpenMP scheduling, , for CS_MATRIX_SCALAR*)
4863  *     mkl             (with MKL, for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
4864  *
4865  * parameters:
4866  *   m_type          <-- Matrix type
4867  *   numbering       <-- mesh numbering type, or NULL
4868  *   fill type       <-- matrix fill type to merge from
4869  *   ed_flag         <-- 0: with diagonal only, 1 exclude only; 2; both
4870  *   func_name       <-- function type name, or NULL for default
4871  *   vector_multiply <-> multiplication function array
4872  *
4873  * returns:
4874  *   0 for success, 1 for incompatible function, 2 for compatible
4875  *   function not available in current build
4876  *----------------------------------------------------------------------------*/
4877 
4878 static int
_set_spmv_func(cs_matrix_type_t m_type,const cs_numbering_t * numbering,cs_matrix_fill_type_t fill_type,int ed_flag,const char * func_name,cs_matrix_vector_product_t * vector_multiply[2])4879 _set_spmv_func(cs_matrix_type_t             m_type,
4880                const cs_numbering_t        *numbering,
4881                cs_matrix_fill_type_t        fill_type,
4882                int                          ed_flag,
4883                const char                  *func_name,
4884                cs_matrix_vector_product_t  *vector_multiply[2])
4885 {
4886   int retcode = 1;
4887   int standard = 0;
4888 
4889   cs_matrix_vector_product_t *spmv[2] = {NULL, NULL};
4890 
4891   if (func_name == NULL)
4892     standard = 2;
4893   else if (!strcmp(func_name, "default"))
4894     standard = 2;
4895   else if (!strcmp(func_name, "standard"))
4896     standard = 1;
4897 
4898   switch(m_type) {
4899 
4900   case CS_MATRIX_NATIVE:
4901 
4902     if (standard > 0) { /* standard or default */
4903 
4904       switch(fill_type) {
4905       case CS_MATRIX_SCALAR:
4906       case CS_MATRIX_SCALAR_SYM:
4907         spmv[0] = _mat_vec_p_l_native;
4908         spmv[1] = _mat_vec_p_l_native;
4909         break;
4910       case CS_MATRIX_BLOCK_D:
4911       case CS_MATRIX_BLOCK_D_66:
4912       case CS_MATRIX_BLOCK_D_SYM:
4913         spmv[0] = _b_mat_vec_p_l_native_fixed;
4914         spmv[1] = _b_mat_vec_p_l_native_fixed;
4915         break;
4916       case CS_MATRIX_BLOCK:
4917         spmv[0] = _bb_mat_vec_p_l_native;
4918         spmv[1] = _bb_mat_vec_p_l_native;
4919         break;
4920       default:
4921         break;
4922       }
4923 
4924       if (standard > 1) { /* default optimized variants */
4925         switch(fill_type) {
4926         case CS_MATRIX_SCALAR:
4927         case CS_MATRIX_SCALAR_SYM:
4928           if (numbering != NULL) {
4929 #if defined(HAVE_OPENMP)
4930             if (numbering->type == CS_NUMBERING_THREADS) {
4931               spmv[0] = _mat_vec_p_l_native_omp;
4932               spmv[1] = _mat_vec_p_l_native_omp;
4933             }
4934 #endif
4935             if (numbering->type == CS_NUMBERING_VECTORIZE) {
4936               spmv[0] = _mat_vec_p_l_native_vector;
4937               spmv[1] = _mat_vec_p_l_native_vector;
4938             }
4939           }
4940           break;
4941         case CS_MATRIX_BLOCK_D:
4942         case CS_MATRIX_BLOCK_D_66:
4943         case CS_MATRIX_BLOCK_D_SYM:
4944           if (numbering != NULL) {
4945 #if defined(HAVE_OPENMP)
4946             if (numbering->type == CS_NUMBERING_THREADS) {
4947               spmv[0] = _b_mat_vec_p_l_native_omp;
4948               spmv[1] = _b_mat_vec_p_l_native_omp;
4949             }
4950 #endif
4951           }
4952           break;
4953         default:
4954           break;
4955         }
4956       }
4957 
4958     }
4959 
4960     else if (!strcmp(func_name, "omp")) {
4961 #if defined(HAVE_OPENMP)
4962       if (numbering != NULL) {
4963         if (numbering->type == CS_NUMBERING_THREADS) {
4964           switch(fill_type) {
4965           case CS_MATRIX_SCALAR:
4966           case CS_MATRIX_SCALAR_SYM:
4967             spmv[0] = _mat_vec_p_l_native_omp;
4968             spmv[1] = _mat_vec_p_l_native_omp;
4969             break;
4970           case CS_MATRIX_BLOCK_D:
4971           case CS_MATRIX_BLOCK_D_66:
4972           case CS_MATRIX_BLOCK_D_SYM:
4973             spmv[0] = _b_mat_vec_p_l_native_omp;
4974             spmv[1] = _b_mat_vec_p_l_native_omp;
4975             break;
4976           default:
4977             break;
4978           }
4979         }
4980       }
4981 #else
4982       retcode = 2;
4983 #endif
4984     }
4985 
4986     else if (!strcmp(func_name, "omp_atomic")) {
4987 #if defined(HAVE_OPENMP)
4988       switch(fill_type) {
4989       case CS_MATRIX_SCALAR:
4990       case CS_MATRIX_SCALAR_SYM:
4991         spmv[0] = _mat_vec_p_l_native_omp_atomic;
4992         spmv[1] = _mat_vec_p_l_native_omp_atomic;
4993         break;
4994       case CS_MATRIX_BLOCK_D:
4995       case CS_MATRIX_BLOCK_D_66:
4996       case CS_MATRIX_BLOCK_D_SYM:
4997         spmv[0] = _b_mat_vec_p_l_native_omp_atomic;
4998         spmv[1] = _b_mat_vec_p_l_native_omp_atomic;
4999         break;
5000       default:
5001         break;
5002       }
5003 #else
5004       retcode = 2;
5005 #endif
5006     }
5007 
5008     else if (!strcmp(func_name, "vector")) {
5009       switch(fill_type) {
5010       case CS_MATRIX_SCALAR:
5011       case CS_MATRIX_SCALAR_SYM:
5012         spmv[0] = _mat_vec_p_l_native_vector;
5013         spmv[1] = _mat_vec_p_l_native_vector;
5014         break;
5015       default:
5016         break;
5017       }
5018     }
5019 
5020     break;
5021 
5022   case CS_MATRIX_CSR:
5023 
5024     switch(fill_type) {
5025     case CS_MATRIX_SCALAR:
5026     case CS_MATRIX_SCALAR_SYM:
5027       if (standard > 0) {
5028         spmv[0] = _mat_vec_p_l_csr;
5029         spmv[1] = _mat_vec_p_l_csr;
5030       }
5031       else if (!strcmp(func_name, "mkl")) {
5032 #if defined(HAVE_MKL)
5033         spmv[0] = _mat_vec_p_l_csr_mkl;
5034         spmv[1] = _mat_vec_p_l_csr_mkl;
5035 #else
5036         retcode = 2;
5037 #endif
5038       }
5039       break;
5040     default:
5041       break;
5042     }
5043 
5044     break;
5045 
5046   case CS_MATRIX_MSR:
5047 
5048     if (standard > 0) {
5049       switch(fill_type) {
5050       case CS_MATRIX_SCALAR:
5051       case CS_MATRIX_SCALAR_SYM:
5052         spmv[0] = _mat_vec_p_l_msr;
5053         spmv[1] = _mat_vec_p_l_msr;
5054         break;
5055       case CS_MATRIX_BLOCK_D:
5056       case CS_MATRIX_BLOCK_D_66:
5057       case CS_MATRIX_BLOCK_D_SYM:
5058         spmv[0] = _b_mat_vec_p_l_msr;
5059         spmv[1] = _b_mat_vec_p_l_msr;
5060         break;
5061       case CS_MATRIX_BLOCK:
5062         spmv[0] = _bb_mat_vec_p_l_msr;
5063         spmv[1] = _bb_mat_vec_p_l_msr;
5064         break;
5065       default:
5066         break;
5067       }
5068     }
5069 
5070     else if (!strcmp(func_name, "mkl")) {
5071 #if defined(HAVE_MKL)
5072       switch(fill_type) {
5073       case CS_MATRIX_SCALAR:
5074       case CS_MATRIX_SCALAR_SYM:
5075         spmv[0] = _mat_vec_p_l_msr_mkl;
5076         spmv[1] = _mat_vec_p_l_msr_mkl;
5077         break;
5078       default:
5079         break;
5080       }
5081 #else
5082       retcode = 2;
5083 #endif
5084     }
5085 
5086     else if (!strcmp(func_name, "omp_sched")) {
5087       switch(fill_type) {
5088       case CS_MATRIX_SCALAR:
5089       case CS_MATRIX_SCALAR_SYM:
5090         spmv[0] = _mat_vec_p_l_msr_omp_sched;
5091         spmv[1] = _mat_vec_p_l_msr_omp_sched;
5092         break;
5093       default:
5094         break;
5095       }
5096     }
5097 
5098     break;
5099 
5100   default:
5101     break;
5102   }
5103 
5104   if (ed_flag != 1 && spmv[0] != NULL) {
5105     vector_multiply[0] = spmv[0];
5106     retcode = 0;
5107   }
5108   if (ed_flag != 0 && spmv[0] != NULL) {
5109     vector_multiply[1] = spmv[1];
5110     retcode = 0;
5111   }
5112 
5113   return retcode;
5114 }
5115 
5116 /*----------------------------------------------------------------------------*/
5117 /*!
5118  * \brief Create matrix structure internals using a matrix assembler.
5119  *
5120  * Only CSR and MSR formats are handled.
5121  *
5122  * \param[in]  type  type of matrix considered
5123  * \param[in]  ma    pointer to matrix assembler structure
5124  *
5125  * \return  a pointer to created matrix structure internals
5126  */
5127 /*----------------------------------------------------------------------------*/
5128 
5129 static void *
_structure_from_assembler(cs_matrix_type_t type,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_matrix_assembler_t * ma)5130 _structure_from_assembler(cs_matrix_type_t        type,
5131                           cs_lnum_t               n_rows,
5132                           cs_lnum_t               n_cols_ext,
5133                           cs_matrix_assembler_t  *ma)
5134 {
5135   void *structure = NULL;
5136 
5137   /* Get info on assembler structure */
5138 
5139   bool             ma_sep_diag = cs_matrix_assembler_get_separate_diag(ma);
5140   const cs_lnum_t *row_index = cs_matrix_assembler_get_row_index(ma);
5141   const cs_lnum_t *col_id = cs_matrix_assembler_get_col_ids(ma);
5142 
5143   /* Define structure */
5144 
5145   switch(type) {
5146 
5147   case CS_MATRIX_CSR:
5148     /* Assume diagonal is present (should not be important
5149        for assembly using matrix assembler) */
5150     if (ma_sep_diag == false)
5151       structure = _create_struct_csr_from_shared(true, /* have_diag */
5152                                                  false, /* for safety */
5153                                                  n_rows,
5154                                                  n_cols_ext,
5155                                                  row_index,
5156                                                  col_id);
5157     else {
5158       cs_lnum_t *_row_index, *_col_id;
5159       BFT_MALLOC(_row_index, n_rows + 1, cs_lnum_t);
5160       BFT_MALLOC(_col_id, row_index[n_rows] + n_rows, cs_lnum_t);
5161       _row_index[0] = 0;
5162       for (cs_lnum_t i = 0; i < n_rows; i++) {
5163         cs_lnum_t n_cols = row_index[i+1] - row_index[i];
5164         cs_lnum_t j = 0, k = 0;
5165         const cs_lnum_t *s_c_id = col_id + row_index[i];
5166         cs_lnum_t *d_c_id = _col_id + row_index[i] + i;
5167         while (j < n_cols && s_c_id[j] < i)
5168           d_c_id[k++] = s_c_id[j++];
5169         d_c_id[k++] = i;
5170         while (j < n_cols)
5171           d_c_id[k++] = s_c_id[j++];
5172         _row_index[i+1] = row_index[i+1] + i + 1;
5173       }
5174       structure = _create_struct_csr_from_csr(true, /* have_idag */
5175                                               true,
5176                                               true,
5177                                               n_rows,
5178                                               n_cols_ext,
5179                                               &_row_index,
5180                                               &_col_id);
5181     }
5182     break;
5183 
5184   case CS_MATRIX_MSR:
5185     if (ma_sep_diag == true)
5186       structure = _create_struct_csr_from_shared(false,
5187                                                  false, /* for safety */
5188                                                  n_rows,
5189                                                  n_cols_ext,
5190                                                  row_index,
5191                                                  col_id);
5192     else {
5193       cs_lnum_t *_row_index, *_col_id;
5194       BFT_MALLOC(_row_index, n_rows + 1, cs_lnum_t);
5195       BFT_MALLOC(_col_id, row_index[n_rows], cs_lnum_t);
5196       _row_index[0] = 0;
5197       cs_lnum_t k = 0;
5198       for (cs_lnum_t i = 0; i < n_rows; i++) {
5199         cs_lnum_t n_cols = row_index[i+1] - row_index[i];
5200         const cs_lnum_t *s_c_id = col_id + row_index[i];
5201         for (cs_lnum_t j = 0; j < n_cols; j++) {
5202           if (s_c_id[j] != i)
5203             _col_id[k++] = s_c_id[j];
5204         }
5205         _row_index[i+1] = k;
5206       }
5207       BFT_REALLOC(_col_id, _row_index[n_rows], cs_lnum_t);
5208       structure = _create_struct_csr_from_csr(false,
5209                                               true,
5210                                               true,
5211                                               n_rows,
5212                                               n_cols_ext,
5213                                               &_row_index,
5214                                               &_col_id);
5215     }
5216     break;
5217   default:
5218     if (type >= 0 && type < CS_MATRIX_N_BUILTIN_TYPES)
5219       bft_error(__FILE__, __LINE__, 0,
5220                 _("%s: handling of matrices in %s format\n"
5221                   "is not operational yet."),
5222                 __func__,
5223                 _(_matrix_type_name[type]));
5224     else
5225       bft_error(__FILE__, __LINE__, 0,
5226                 _("%s: handling of matrices in external format type %d\n"
5227                   "is not handled by this function."),
5228                 __func__, (int)type);
5229     break;
5230   }
5231 
5232   return structure;
5233 }
5234 
5235 /*----------------------------------------------------------------------------*/
5236 /*!
5237  * \brief Destroy matrix structure internals.
5238  *
5239  * \param[in]       type       matrix structure type
5240  * \param[in, out]  structure  pointer to matrix structure pointer
5241  */
5242 /*----------------------------------------------------------------------------*/
5243 
5244 static void
_structure_destroy(cs_matrix_type_t type,void ** structure)5245 _structure_destroy(cs_matrix_type_t   type,
5246                    void             **structure)
5247 {
5248   switch(type) {
5249   case CS_MATRIX_NATIVE:
5250     _destroy_struct_native(structure);
5251     break;
5252   case CS_MATRIX_CSR:
5253     _destroy_struct_csr(structure);
5254     break;
5255   case CS_MATRIX_MSR:
5256     _destroy_struct_csr(structure);
5257     break;
5258   default:
5259     assert(0);
5260     break;
5261   }
5262 }
5263 
5264 /*----------------------------------------------------------------------------*/
5265 /*!
5266  * \brief Create a matrix container using a given type.
5267  *
5268  * \param[in]  type  chosen matrix type
5269  *
5270  * \return  pointer to created matrix structure;
5271  */
5272 /*----------------------------------------------------------------------------*/
5273 
5274 static cs_matrix_t *
_matrix_create(cs_matrix_type_t type)5275 _matrix_create(cs_matrix_type_t  type)
5276 {
5277   int i;
5278   cs_matrix_fill_type_t mft;
5279   cs_matrix_t *m;
5280 
5281   BFT_MALLOC(m, 1, cs_matrix_t);
5282 
5283   m->type = type;
5284 
5285   if (m->type >= 0 && m->type < CS_MATRIX_N_BUILTIN_TYPES) {
5286     m->type_name = _matrix_type_name[m->type];
5287     m->type_fname = _matrix_type_fullname[m->type];
5288   }
5289   else {
5290     m->type_name = _matrix_type_name[CS_MATRIX_N_BUILTIN_TYPES];
5291     m->type_fname = _matrix_type_fullname[CS_MATRIX_N_BUILTIN_TYPES];
5292   }
5293 
5294   /* Map shared structure */
5295 
5296   m->n_rows = 0;
5297   m->n_cols_ext = 0;
5298 
5299   m->symmetric = false;
5300 
5301   for (i = 0; i < 4; i++) {
5302     m->db_size[i] = 0;
5303     m->eb_size[i] = 0;
5304   }
5305   m->fill_type = CS_MATRIX_N_FILL_TYPES;
5306 
5307   m->structure = NULL;
5308   m->_structure = NULL;
5309 
5310   m->halo = NULL;
5311   m->numbering = NULL;
5312   m->assembler = NULL;
5313 
5314   for (mft = 0; mft < CS_MATRIX_N_FILL_TYPES; mft++) {
5315     for (i = 0; i < 2; i++)
5316       m->vector_multiply[mft][i] = NULL;
5317   }
5318 
5319   /* Define coefficients */
5320 
5321   switch(m->type) {
5322   case CS_MATRIX_NATIVE:
5323     m->coeffs = _create_coeff_native();
5324     break;
5325   case CS_MATRIX_CSR:
5326     m->coeffs = _create_coeff_csr();
5327     break;
5328   case CS_MATRIX_MSR:
5329     m->coeffs = _create_coeff_msr();
5330     break;
5331   default:
5332     bft_error(__FILE__, __LINE__, 0,
5333               _("Handling of matrixes in format type %d\n"
5334                 "is not operational yet."),
5335               m->type);
5336     break;
5337   }
5338 
5339   m->xa = NULL;
5340 
5341   /* Set function pointers here */
5342 
5343   m->set_coefficients = NULL;
5344 
5345   for (mft = 0; mft < CS_MATRIX_N_FILL_TYPES; mft++)
5346     _set_spmv_func(m->type,
5347                    m->numbering,
5348                    mft,
5349                    2,    /* ed_flag */
5350                    NULL, /* func_name */
5351                    m->vector_multiply[mft]);
5352 
5353   switch(m->type) {
5354 
5355   case CS_MATRIX_NATIVE:
5356 
5357     m->set_coefficients = _set_coeffs_native;
5358     m->release_coefficients = _release_coeffs_native;
5359     m->copy_diagonal = _copy_diagonal_separate;
5360     m->get_diagonal = _get_diagonal_native;
5361     m->destroy_structure = _destroy_struct_native;
5362     m->destroy_coefficients = _destroy_coeff_native;
5363     m->assembler_values_create = NULL;
5364  break;
5365 
5366   case CS_MATRIX_CSR:
5367     m->set_coefficients = _set_coeffs_csr;
5368     m->release_coefficients = _release_coeffs_csr;
5369     m->copy_diagonal = _copy_diagonal_csr;
5370     m->get_diagonal = _get_diagonal_csr;
5371     m->destroy_structure = _destroy_struct_csr;
5372     m->destroy_coefficients = _destroy_coeff_csr;
5373     m->assembler_values_create = _assembler_values_create_csr;
5374     break;
5375 
5376   case CS_MATRIX_MSR:
5377     m->set_coefficients = _set_coeffs_msr;
5378     m->release_coefficients = _release_coeffs_msr;
5379     m->copy_diagonal = _copy_diagonal_separate;
5380     m->get_diagonal = _get_diagonal_msr;
5381     m->destroy_structure = _destroy_struct_csr;
5382     m->destroy_coefficients = _destroy_coeff_msr;
5383     m->assembler_values_create = _assembler_values_create_msr;
5384     break;
5385 
5386   default:
5387     assert(0);
5388     break;
5389 
5390   }
5391 
5392   for (i = 0; i < CS_MATRIX_N_FILL_TYPES; i++) {
5393     if (m->vector_multiply[i][1] == NULL)
5394       m->vector_multiply[i][1] = m->vector_multiply[i][0];
5395   }
5396 
5397   return m;
5398 }
5399 
5400 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
5401 
5402 /*============================================================================
5403  * Public function definitions
5404  *============================================================================*/
5405 
5406 /*----------------------------------------------------------------------------*/
5407 /*!
5408  * \brief Create a matrix structure.
5409  *
5410  * Note that the structure created usually maps to the given existing
5411  * cell global number, face -> cell connectivity arrays, and cell halo
5412  * structure, so it must be destroyed before they are freed
5413  * (usually along with the code's main face -> cell structure).
5414  *
5415  * Note that the resulting matrix structure will contain either a full or
5416  * an empty main diagonal, and that the extra-diagonal structure is always
5417  * symmetric (though the coefficients my not be, and we may choose a
5418  * matrix format that does not exploit this symmetry). If the edges
5419  * connectivity argument is NULL, the matrix will be purely diagonal.
5420  *
5421  * \param[in]  type        type of matrix considered
5422  * \param[in]  have_diag   indicates if the diagonal structure
5423  *                         contains nonzeroes
5424  * \param[in]  n_rows      local number of rows
5425  * \param[in]  n_cols_ext  number of local + ghost columns
5426  * \param[in]  n_edges     local number of (undirected) graph edges
5427  * \param[in]  edges       edges (symmetric row <-> column) connectivity
5428  * \param[in]  halo        halo structure associated with cells, or NULL
5429  * \param[in]  numbering   vectorization or thread-related numbering info,
5430  *                         or NULL
5431  *
5432  * \return  pointer to created matrix structure;
5433  */
5434 /*----------------------------------------------------------------------------*/
5435 
5436 cs_matrix_structure_t *
cs_matrix_structure_create(cs_matrix_type_t type,bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_halo_t * halo,const cs_numbering_t * numbering)5437 cs_matrix_structure_create(cs_matrix_type_t       type,
5438                            bool                   have_diag,
5439                            cs_lnum_t              n_rows,
5440                            cs_lnum_t              n_cols_ext,
5441                            cs_lnum_t              n_edges,
5442                            const cs_lnum_2_t     *edges,
5443                            const cs_halo_t       *halo,
5444                            const cs_numbering_t  *numbering)
5445 {
5446   cs_matrix_structure_t *ms;
5447 
5448   BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5449 
5450   ms->type = type;
5451 
5452   ms->n_rows = n_rows;
5453   ms->n_cols_ext = n_cols_ext;
5454 
5455   /* Define Structure */
5456 
5457   switch(ms->type) {
5458   case CS_MATRIX_NATIVE:
5459     ms->structure = _create_struct_native(n_rows,
5460                                           n_cols_ext,
5461                                           n_edges,
5462                                           edges);
5463     break;
5464   case CS_MATRIX_CSR:
5465     ms->structure = _create_struct_csr(have_diag,
5466                                        n_rows,
5467                                        n_cols_ext,
5468                                        n_edges,
5469                                        edges);
5470     break;
5471   case CS_MATRIX_MSR:
5472     ms->structure = _create_struct_csr(false,
5473                                        n_rows,
5474                                        n_cols_ext,
5475                                        n_edges,
5476                                        edges);
5477     break;
5478   default:
5479     bft_error(__FILE__, __LINE__, 0,
5480               _("Handling of matrixes in format type %d\n"
5481                 "is not operational yet."),
5482               type);
5483     break;
5484   }
5485 
5486   /* Set pointers to structures shared from mesh here */
5487 
5488   ms->halo = halo;
5489   ms->numbering = numbering;
5490   ms->assembler = NULL;
5491 
5492   return ms;
5493 }
5494 
5495 /*----------------------------------------------------------------------------*/
5496 /*!
5497  * \brief Create a matrix structure based on a MSR connectivity definition.
5498  *
5499  * Only CSR and MSR formats are handled.
5500  *
5501  * col_id is sorted row by row during the creation of this structure.
5502  *
5503  * In case the property of the row index and col_id arrays are transferred
5504  * to the structure, the arrays pointers passed as arguments are set to NULL,
5505  * to help ensure the caller does not use the original arrays directly after
5506  * this call.
5507  *
5508  * \param[in]       type        type of matrix considered
5509  * \param[in]       transfer    transfer property of row_index and col_id
5510  *                              if true, map them otherwise
5511  * \param[in]       have_diag   indicates if the structure includes the
5512  *                              diagonal (should be the same for all rows)
5513  * \param[in]       n_rows      local number of rows
5514  * \param[in]       n_cols_ext  local number of columns + ghosts
5515  * \param[in]       row_index   pointer to index on rows
5516  * \param[in, out]  col_id      pointer to array of colum ids related to
5517  *                              the row index
5518  * \param[in]       halo        halo structure for synchronization, or NULL
5519  * \param[in]       numbering   vectorization or thread-related numbering info,
5520  *                              or NULL
5521  *
5522  * \return  a pointer to a created matrix structure
5523  */
5524 /*----------------------------------------------------------------------------*/
5525 
5526 cs_matrix_structure_t *
cs_matrix_structure_create_msr(cs_matrix_type_t type,bool transfer,bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t ** row_index,cs_lnum_t ** col_id,const cs_halo_t * halo,const cs_numbering_t * numbering)5527 cs_matrix_structure_create_msr(cs_matrix_type_t        type,
5528                                bool                    transfer,
5529                                bool                    have_diag,
5530                                cs_lnum_t               n_rows,
5531                                cs_lnum_t               n_cols_ext,
5532                                cs_lnum_t             **row_index,
5533                                cs_lnum_t             **col_id,
5534                                const cs_halo_t        *halo,
5535                                const cs_numbering_t   *numbering)
5536 {
5537   cs_matrix_structure_t *ms = NULL;
5538 
5539   BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5540 
5541   ms->type = type;
5542 
5543   ms->n_rows = n_rows;
5544   ms->n_cols_ext = n_cols_ext;
5545 
5546   /* Define Structure */
5547 
5548   switch(ms->type) {
5549   case CS_MATRIX_CSR:
5550     ms->structure = _create_struct_csr_from_csr(have_diag,
5551                                                 transfer,
5552                                                 false,
5553                                                 n_rows,
5554                                                 n_cols_ext,
5555                                                 row_index,
5556                                                 col_id);
5557     break;
5558   case CS_MATRIX_MSR:
5559     ms->structure = _create_struct_csr_from_csr(false,
5560                                                 transfer,
5561                                                 false,
5562                                                 n_rows,
5563                                                 n_cols_ext,
5564                                                 row_index,
5565                                                 col_id);
5566     break;
5567   default:
5568     if (type >= 0 && type < CS_MATRIX_N_BUILTIN_TYPES)
5569       bft_error(__FILE__, __LINE__, 0,
5570                 _("%s: handling of matrices in %s format\n"
5571                   "is not operational yet."),
5572                 __func__,
5573                 _(_matrix_type_name[type]));
5574     else
5575       bft_error(__FILE__, __LINE__, 0,
5576                 _("%s: handling of matrices in external format type %d\n"
5577                   "is not handled by this function."),
5578                 __func__, (int)type);
5579     break;
5580   }
5581 
5582   /* Set pointers to structures shared from mesh here */
5583 
5584   ms->halo = halo;
5585   ms->numbering = numbering;
5586   ms->assembler = NULL;
5587 
5588   return ms;
5589 }
5590 
5591 /*----------------------------------------------------------------------------*/
5592 /*!
5593  * \brief Create an MSR matrix structure sharing an existing connectivity
5594  * definition.
5595  *
5596  * Note that as the structure created maps to the given existing
5597  * cell global number, face -> cell connectivity arrays, and cell halo
5598  * structure, it must be destroyed before they are freed
5599  * (usually along with the code's main face -> cell structure).
5600  *
5601  * \param[in]  have_diag        indicates if the structure includes the
5602  *                              diagonal (should be the same for all rows)
5603  * \param[in]  direct_assembly  true if each value corresponds to
5604                                 a unique face
5605  * \param[in]  n_rows           local number of rows
5606  * \param[in]  n_cols_ext       local number of columns + ghosts
5607  * \param[in]  row_index        index on rows
5608  * \param[in]  col_id           array of colum ids related to the row index
5609  * \param[in]  halo             halo structure for synchronization, or NULL
5610  * \param[in]  numbering        vectorization or thread-related numbering
5611  *                              info, or NULL
5612  *
5613  * \returns  a pointer to a created matrix structure
5614  */
5615 /*----------------------------------------------------------------------------*/
5616 
5617 cs_matrix_structure_t *
cs_matrix_structure_create_msr_shared(bool have_diag,bool direct_assembly,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t * row_index,const cs_lnum_t * col_id,const cs_halo_t * halo,const cs_numbering_t * numbering)5618 cs_matrix_structure_create_msr_shared(bool                    have_diag,
5619                                       bool                    direct_assembly,
5620                                       cs_lnum_t               n_rows,
5621                                       cs_lnum_t               n_cols_ext,
5622                                       const cs_lnum_t        *row_index,
5623                                       const cs_lnum_t        *col_id,
5624                                       const cs_halo_t        *halo,
5625                                       const cs_numbering_t   *numbering)
5626 {
5627   cs_matrix_structure_t *ms = NULL;
5628 
5629   BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5630 
5631   ms->type = CS_MATRIX_MSR;
5632 
5633   ms->n_rows = n_rows;
5634   ms->n_cols_ext = n_cols_ext;
5635 
5636   /* Define Structure */
5637 
5638   ms->structure = _create_struct_csr_from_shared(have_diag,
5639                                                  direct_assembly,
5640                                                  n_rows,
5641                                                  n_cols_ext,
5642                                                  row_index,
5643                                                  col_id);
5644 
5645   /* Set pointers to structures shared from mesh here */
5646 
5647   ms->halo = halo;
5648   ms->numbering = numbering;
5649   ms->assembler = NULL;
5650 
5651   return ms;
5652 }
5653 
5654 /*----------------------------------------------------------------------------*/
5655 /*!
5656  * \brief Create a matrix structure using a matrix assembler.
5657  *
5658  * Only CSR and MSR formats are handled.
5659  *
5660  * \param[in]  type  type of matrix considered
5661  * \param[in]  ma    pointer to matrix assembler structure
5662  *
5663  * \return  a pointer to a created matrix structure
5664  */
5665 /*----------------------------------------------------------------------------*/
5666 
5667 cs_matrix_structure_t *
cs_matrix_structure_create_from_assembler(cs_matrix_type_t type,cs_matrix_assembler_t * ma)5668 cs_matrix_structure_create_from_assembler(cs_matrix_type_t        type,
5669                                           cs_matrix_assembler_t  *ma)
5670 {
5671   cs_matrix_structure_t *ms = NULL;
5672 
5673   BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5674 
5675   ms->type = type;
5676 
5677   ms->n_rows = cs_matrix_assembler_get_n_rows(ma);
5678   ms->n_cols_ext = cs_matrix_assembler_get_n_columns(ma);;
5679 
5680   /* Define internal structure */
5681 
5682   ms->structure = _structure_from_assembler(ms->type,
5683                                             ms->n_rows,
5684                                             ms->n_cols_ext,
5685                                             ma);
5686 
5687   /* Set pointers to structures shared from mesh here */
5688 
5689   ms->halo = cs_matrix_assembler_get_halo(ma);
5690 
5691   ms->numbering = NULL;
5692 
5693   ms->assembler = ma;
5694 
5695   return ms;
5696 }
5697 
5698 /*----------------------------------------------------------------------------*/
5699 /*!
5700  * \brief Destroy a matrix structure.
5701  *
5702  * \param[in, out]  ms  pointer to matrix structure pointer
5703  */
5704 /*----------------------------------------------------------------------------*/
5705 
5706 void
cs_matrix_structure_destroy(cs_matrix_structure_t ** ms)5707 cs_matrix_structure_destroy(cs_matrix_structure_t  **ms)
5708 {
5709   if (ms != NULL && *ms != NULL) {
5710 
5711     cs_matrix_structure_t *_ms = *ms;
5712 
5713     _structure_destroy(_ms->type, &(_ms->structure));
5714 
5715     /* Now free main structure */
5716 
5717     BFT_FREE(*ms);
5718   }
5719 }
5720 
5721 /*----------------------------------------------------------------------------*/
5722 /*!
5723  * \brief Create a matrix container using a given structure.
5724  *
5725  * Note that the matrix container maps to the assigned structure,
5726  * so it must be destroyed before that structure.
5727  *
5728  * \param[in]  ms  associated matrix structure
5729  *
5730  * \return  pointer to created matrix structure;
5731  */
5732 /*----------------------------------------------------------------------------*/
5733 
5734 cs_matrix_t *
cs_matrix_create(const cs_matrix_structure_t * ms)5735 cs_matrix_create(const cs_matrix_structure_t  *ms)
5736 {
5737   assert(ms != NULL); /* Sanity check */
5738 
5739   cs_matrix_t *m = _matrix_create(ms->type);
5740 
5741   /* Map shared structure */
5742 
5743   m->n_rows = ms->n_rows;
5744   m->n_cols_ext = ms->n_cols_ext;
5745 
5746   m->structure = ms->structure;
5747 
5748   m->halo = ms->halo;
5749   m->numbering = ms->numbering;
5750   m->assembler = ms->assembler;
5751 
5752   return m;
5753 }
5754 
5755 /*----------------------------------------------------------------------------*/
5756 /*!
5757  * \brief Create a matrix directly from assembler.
5758  *
5759  * Only CSR and MSR formats are handled.
5760  *
5761  * \param[in]  type  type of matrix considered
5762  * \param[in]  ma    pointer to matrix assembler structure
5763  *
5764  * \return  a pointer to a created matrix structure
5765  */
5766 /*----------------------------------------------------------------------------*/
5767 
5768 cs_matrix_t *
cs_matrix_create_from_assembler(cs_matrix_type_t type,cs_matrix_assembler_t * ma)5769 cs_matrix_create_from_assembler(cs_matrix_type_t        type,
5770                                 cs_matrix_assembler_t  *ma)
5771 {
5772   cs_matrix_t *m = _matrix_create(type);
5773 
5774   m->assembler = ma;
5775 
5776   m->type = type;
5777 
5778   m->n_rows = cs_matrix_assembler_get_n_rows(ma);
5779   m->n_cols_ext = cs_matrix_assembler_get_n_columns(ma);;
5780 
5781   /* Define internal structure */
5782 
5783   m->_structure = _structure_from_assembler(m->type,
5784                                             m->n_rows,
5785                                             m->n_cols_ext,
5786                                             ma);
5787   m->structure = m->_structure;
5788 
5789   /* Set pointers to structures shared from mesh here */
5790 
5791   m->halo = cs_matrix_assembler_get_halo(ma);
5792 
5793   m->numbering = NULL;
5794 
5795   m->assembler = ma;
5796 
5797   return m;
5798 }
5799 
5800 /*----------------------------------------------------------------------------*/
5801 /*!
5802  * \brief Create a matrix container by copying another
5803  *
5804  * Note that the matrix containers share the same assigned structure,
5805  * so they must be both destroyed before that structure.
5806  *
5807  * If assigned, coefficients are not copied.
5808  *
5809  * \param[in]  src  reference matrix structure
5810  *
5811  * \return  pointer to created matrix structure;
5812  */
5813 /*----------------------------------------------------------------------------*/
5814 
5815 cs_matrix_t *
cs_matrix_create_by_copy(cs_matrix_t * src)5816 cs_matrix_create_by_copy(cs_matrix_t   *src)
5817 {
5818   cs_matrix_t *m;
5819 
5820   BFT_MALLOC(m, 1, cs_matrix_t);
5821 
5822   memcpy(m, src, sizeof(cs_matrix_t));
5823 
5824   /* Define coefficients */
5825 
5826   switch(m->type) {
5827   case CS_MATRIX_NATIVE:
5828     m->coeffs = _create_coeff_native();
5829     break;
5830   case CS_MATRIX_CSR:
5831     m->coeffs = _create_coeff_csr();
5832     break;
5833   case CS_MATRIX_MSR:
5834     m->coeffs = _create_coeff_msr();
5835     break;
5836   default:
5837     bft_error(__FILE__, __LINE__, 0,
5838               _("Handling of matrixes in format type %d\n"
5839                 "is not operational yet."),
5840               m->type);
5841     break;
5842   }
5843 
5844   cs_matrix_release_coefficients(m);
5845 
5846   return m;
5847 }
5848 
5849 /*----------------------------------------------------------------------------*/
5850 /*!
5851  * \brief Create a matrix based on the local restriction of a base matrix.
5852  *
5853  * Coefficients are copied. Some coefficients may be shared with the
5854  * parent matrix, so the base matrix must not be destroyed before the
5855  * restriction matrix.
5856  *
5857  * \param[in]  src  reference matrix structure
5858  *
5859  * \return  pointer to created matrix structure;
5860  */
5861 /*----------------------------------------------------------------------------*/
5862 
5863 cs_matrix_t *
cs_matrix_create_by_local_restrict(const cs_matrix_t * src)5864 cs_matrix_create_by_local_restrict(const cs_matrix_t  *src)
5865 {
5866   cs_matrix_t *m = NULL;
5867 
5868   const cs_lnum_t n_rows = src->n_rows;
5869   const cs_lnum_t *eb_size = src->eb_size;
5870 
5871   BFT_MALLOC(m, 1, cs_matrix_t);
5872   memcpy(m, src, sizeof(cs_matrix_t));
5873   m->n_cols_ext = m->n_rows;
5874 
5875   m->structure = NULL;
5876   m->_structure = NULL;
5877 
5878   m->halo = NULL;
5879   m->numbering = NULL;
5880   m->assembler = NULL;
5881   m->xa = NULL;
5882   m->coeffs = NULL;
5883 
5884   /* Define coefficients */
5885 
5886   switch(m->type) {
5887   case CS_MATRIX_MSR:
5888     {
5889       m->_structure = _create_struct_csr_from_restrict_local(src->structure);
5890       m->structure = m->_structure;
5891       m->coeffs = _create_coeff_msr();
5892       cs_matrix_coeff_msr_t  *mc = m->coeffs;
5893       cs_matrix_coeff_msr_t  *mc_src = src->coeffs;
5894       const cs_matrix_struct_csr_t *ms = m->structure;
5895       const cs_matrix_struct_csr_t *ms_src = src->structure;
5896       mc->d_val = mc_src->d_val;
5897       BFT_MALLOC(mc->_x_val, src->eb_size[3]*ms->row_index[n_rows], cs_real_t);
5898       mc->x_val = mc->_x_val;
5899       for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
5900         const cs_lnum_t  n_cols = ms->row_index[ii+1] - ms->row_index[ii];
5901         const cs_real_t  *s_row =   mc_src->x_val
5902                                   + ms_src->row_index[ii]*eb_size[3];
5903         cs_real_t  *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
5904         memcpy(m_row, s_row, sizeof(cs_real_t)*eb_size[3]*n_cols);
5905       }
5906       mc->max_db_size = m->db_size[3];
5907       mc->max_eb_size = m->eb_size[3];
5908     }
5909     break;
5910   case CS_MATRIX_NATIVE:
5911   case CS_MATRIX_CSR:
5912   default:
5913     bft_error(__FILE__, __LINE__, 0,
5914               _("Handling of matrixes in %s format\n"
5915                 "is not operational yet."),
5916               _(m->type_name));
5917     break;
5918   }
5919 
5920   return m;
5921 }
5922 
5923 /*----------------------------------------------------------------------------
5924  * Destroy a matrix structure.
5925  *
5926  * In the case of a compound matrix, sub-matrices are not destroyed.
5927  *
5928  * parameters:
5929  *   matrix <-> pointer to matrix structure pointer
5930  *----------------------------------------------------------------------------*/
5931 
5932 void
cs_matrix_destroy(cs_matrix_t ** matrix)5933 cs_matrix_destroy(cs_matrix_t **matrix)
5934 {
5935   if (matrix != NULL && *matrix != NULL) {
5936 
5937     cs_matrix_t *m = *matrix;
5938 
5939     m->destroy_coefficients(m);
5940 
5941     if (m->_structure != NULL) {
5942       m->destroy_structure(&(m->_structure));
5943       m->structure = NULL;
5944     }
5945 
5946     /* Now free main structure */
5947 
5948     BFT_FREE(*matrix);
5949   }
5950 }
5951 
5952 /*----------------------------------------------------------------------------*/
5953 /*!
5954  * \brief Return matrix type.
5955  *
5956  * \param[in]  matrix  pointer to matrix structure
5957  */
5958 /*----------------------------------------------------------------------------*/
5959 
5960 cs_matrix_type_t
cs_matrix_get_type(const cs_matrix_t * matrix)5961 cs_matrix_get_type(const cs_matrix_t  *matrix)
5962 {
5963   if (matrix == NULL)
5964     bft_error(__FILE__, __LINE__, 0,
5965               _("The matrix is not defined."));
5966   return matrix->type;
5967 }
5968 
5969 /*----------------------------------------------------------------------------
5970  * Return matrix type name.
5971  *
5972  * parameters:
5973  *   matrix --> pointer to matrix structure
5974  *----------------------------------------------------------------------------*/
5975 
5976 const char *
cs_matrix_get_type_name(const cs_matrix_t * matrix)5977 cs_matrix_get_type_name(const cs_matrix_t  *matrix)
5978 {
5979   if (matrix == NULL)
5980     bft_error(__FILE__, __LINE__, 0,
5981               _("%s: matrix not defined."), __func__);
5982 
5983   return matrix->type_name;
5984 }
5985 
5986 /*----------------------------------------------------------------------------
5987  * Return matrix type full name.
5988  *
5989  * parameters:
5990  *   matrix --> pointer to matrix structure
5991  *----------------------------------------------------------------------------*/
5992 
5993 const char *
cs_matrix_get_type_fullname(const cs_matrix_t * matrix)5994 cs_matrix_get_type_fullname(const cs_matrix_t  *matrix)
5995 {
5996   if (matrix == NULL)
5997     bft_error(__FILE__, __LINE__, 0,
5998               _("%s: matrix not defined."), __func__);
5999 
6000   return matrix->type_fname;
6001 }
6002 
6003 /*----------------------------------------------------------------------------*/
6004 /*!
6005  * \brief Return number of columns in a matrix.
6006  *
6007  * \param[in]  matrix  pointer to matrix structure
6008  */
6009 /*----------------------------------------------------------------------------*/
6010 
6011 cs_lnum_t
cs_matrix_get_n_columns(const cs_matrix_t * matrix)6012 cs_matrix_get_n_columns(const cs_matrix_t  *matrix)
6013 {
6014   if (matrix == NULL)
6015     bft_error(__FILE__, __LINE__, 0,
6016               _("The matrix is not defined."));
6017   return matrix->n_cols_ext;
6018 }
6019 
6020 /*----------------------------------------------------------------------------*/
6021 /*!
6022  * \brief Return number of rows in matrix.
6023  *
6024  * \param[in]  matrix  pointer to matrix structure
6025  */
6026 /*----------------------------------------------------------------------------*/
6027 
6028 cs_lnum_t
cs_matrix_get_n_rows(const cs_matrix_t * matrix)6029 cs_matrix_get_n_rows(const cs_matrix_t  *matrix)
6030 {
6031   if (matrix == NULL)
6032     bft_error(__FILE__, __LINE__, 0,
6033               _("The matrix is not defined."));
6034   return matrix->n_rows;
6035 }
6036 
6037 /*----------------------------------------------------------------------------*/
6038 /*!
6039  * \brief Return number of entries in matrix.
6040  *
6041  * When the block size is > 1, the number reported is the number of
6042  * entry blocks, not individual entries.
6043  *
6044  * \param[in]  matrix  pointer to matrix structure
6045  */
6046 /*----------------------------------------------------------------------------*/
6047 
6048 cs_lnum_t
cs_matrix_get_n_entries(const cs_matrix_t * matrix)6049 cs_matrix_get_n_entries(const cs_matrix_t  *matrix)
6050 {
6051   cs_lnum_t retval = 0;
6052 
6053   if (matrix == NULL)
6054     bft_error(__FILE__, __LINE__, 0,
6055               _("The matrix is not defined."));
6056 
6057   switch(matrix->type) {
6058   case CS_MATRIX_NATIVE:
6059     {
6060       const cs_matrix_struct_native_t  *ms = matrix->structure;
6061       retval = ms->n_edges*2 + ms->n_rows;
6062     }
6063     break;
6064   case CS_MATRIX_CSR:
6065     {
6066       const cs_matrix_struct_csr_t  *ms = matrix->structure;
6067       retval = ms->row_index[ms->n_rows];
6068     }
6069     break;
6070   case CS_MATRIX_MSR:
6071     {
6072       const cs_matrix_struct_csr_t  *ms = matrix->structure;
6073       retval = ms->row_index[ms->n_rows] + ms->n_rows;
6074     }
6075     break;
6076   default:
6077     break;
6078   }
6079 
6080   return retval;
6081 }
6082 
6083 /*----------------------------------------------------------------------------*/
6084 /*!
6085  * \brief Return matrix diagonal block sizes.
6086  *
6087  * Block sizes are defined by a array of 4 values:
6088  *   0: useful block size, 1: vector block extents,
6089  *   2: matrix line extents,  3: matrix line*column extents
6090  *
6091  * \param[in]  matrix  pointer to matrix structure
6092  *
6093  * \return  pointer to block sizes
6094  */
6095 /*----------------------------------------------------------------------------*/
6096 
6097 const cs_lnum_t *
cs_matrix_get_diag_block_size(const cs_matrix_t * matrix)6098 cs_matrix_get_diag_block_size(const cs_matrix_t  *matrix)
6099 {
6100   if (matrix == NULL)
6101     bft_error(__FILE__, __LINE__, 0,
6102               _("The matrix is not defined."));
6103 
6104   return matrix->db_size;
6105 }
6106 
6107 /*----------------------------------------------------------------------------*/
6108 /*!
6109  * \brief Return matrix extra-diagonal block sizes.
6110  *
6111  * Block sizes are defined by a array of 4 values:
6112  *   0: useful block size, 1: vector block extents,
6113  *   2: matrix line extents,  3: matrix line*column extents
6114  *
6115  * \param[in]  matrix  pointer to matrix structure
6116  *
6117  * \return  pointer to block sizes
6118  */
6119 /*----------------------------------------------------------------------------*/
6120 
6121 const cs_lnum_t *
cs_matrix_get_extra_diag_block_size(const cs_matrix_t * matrix)6122 cs_matrix_get_extra_diag_block_size(const cs_matrix_t  *matrix)
6123 {
6124   if (matrix == NULL)
6125     bft_error(__FILE__, __LINE__, 0,
6126               _("The matrix is not defined."));
6127 
6128   return matrix->eb_size;
6129 }
6130 
6131 /*----------------------------------------------------------------------------*/
6132 /*!
6133  * \brief Return pointer to matrix halo structure.
6134  *
6135  * \param[in]  matrix  pointer to matrix structure
6136  *
6137  * \return  pointer to halo strucuture
6138  */
6139 /*----------------------------------------------------------------------------*/
6140 
6141 const cs_halo_t *
cs_matrix_get_halo(const cs_matrix_t * matrix)6142 cs_matrix_get_halo(const cs_matrix_t  *matrix)
6143 {
6144   if (matrix == NULL)
6145     bft_error(__FILE__, __LINE__, 0,
6146               _("The matrix is not defined."));
6147 
6148   return matrix->halo;
6149 }
6150 
6151 /*----------------------------------------------------------------------------*/
6152 /*!
6153  * \brief Get matrix fill type, depending on block sizes.
6154  *
6155  * Block sizes are defined by an optional array of 4 values:
6156  *   0: useful block size, 1: vector block extents,
6157  *   2: matrix line extents,  3: matrix line*column extents
6158  *
6159  * \param[in]  symmetric              indicates if matrix coefficients
6160  *                                    are symmetric
6161  * \param[in]  diag_block_size        block sizes for diagonal, or NULL
6162  * \param[in]  extra_diag_block_size  block sizes for extra diagonal, or NULL
6163  *
6164  * \return  matrix fill type
6165  */
6166 /*----------------------------------------------------------------------------*/
6167 
6168 cs_matrix_fill_type_t
cs_matrix_get_fill_type(bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)6169 cs_matrix_get_fill_type(bool              symmetric,
6170                         const cs_lnum_t  *diag_block_size,
6171                         const cs_lnum_t  *extra_diag_block_size)
6172 {
6173   cs_matrix_fill_type_t fill_type = CS_MATRIX_N_FILL_TYPES;
6174 
6175   cs_lnum_t _db_size = 1, _eb_size = 1;
6176   if (diag_block_size != NULL)
6177     _db_size = diag_block_size[0];
6178 
6179   if (extra_diag_block_size != NULL)
6180     _eb_size = extra_diag_block_size[0];
6181 
6182   /* Set fill type */
6183 
6184   cs_base_check_bool(&symmetric);
6185 
6186   if (_db_size == 1) {
6187     if (symmetric)
6188       fill_type = CS_MATRIX_SCALAR_SYM;
6189     else
6190       fill_type = CS_MATRIX_SCALAR;
6191   }
6192   else if (_eb_size == 1) {
6193     if (symmetric)
6194       fill_type = CS_MATRIX_BLOCK_D_SYM;
6195     else if (_db_size == 6)
6196       fill_type = CS_MATRIX_BLOCK_D_66;
6197     else
6198       fill_type = CS_MATRIX_BLOCK_D;
6199   }
6200   else
6201     fill_type = CS_MATRIX_BLOCK;
6202 
6203   return fill_type;
6204 }
6205 
6206 /*----------------------------------------------------------------------------*/
6207 /*!
6208  * \brief Set matrix coefficients defined relative to a "native" edge graph,
6209  * sharing arrays with the caller when possible.
6210  *
6211  * With shared arrays, the matrix becomes unusable if the arrays passed as
6212  * arguments are not be modified (its coefficients should be unset first
6213  * to mark this).
6214  *
6215  * Depending on current options and initialization, values will be copied
6216  * or simply mapped.
6217  *
6218  * Block sizes are defined by an optional array of 4 values:
6219  *   0: useful block size, 1: vector block extents,
6220  *   2: matrix line extents,  3: matrix line*column extents
6221  *
6222  * \param[in, out]  matrix                 pointer to matrix structure
6223  * \param[in]       symmetric              indicates if matrix coefficients
6224  *                                         are symmetric
6225  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
6226  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
6227  *                                         or NULL
6228  * \param[in]       n_edges                local number of graph edges
6229  * \param[in]       edges                  edges (row <-> column) connectivity
6230  * \param[in]       da                     diagonal values (NULL if zero)
6231  * \param[in]       xa                     extradiagonal values (NULL if zero)
6232  *                                         casts as:
6233  *                                           xa[n_edges]    if symmetric,
6234  *                                           xa[n_edges][2] if non symmetric
6235  */
6236 /*----------------------------------------------------------------------------*/
6237 
6238 void
cs_matrix_set_coefficients(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t n_edges,const cs_lnum_2_t edges[],const cs_real_t * da,const cs_real_t * xa)6239 cs_matrix_set_coefficients(cs_matrix_t        *matrix,
6240                            bool                symmetric,
6241                            const cs_lnum_t    *diag_block_size,
6242                            const cs_lnum_t    *extra_diag_block_size,
6243                            const cs_lnum_t     n_edges,
6244                            const cs_lnum_2_t   edges[],
6245                            const cs_real_t    *da,
6246                            const cs_real_t    *xa)
6247 {
6248   if (matrix == NULL)
6249     bft_error(__FILE__, __LINE__, 0,
6250               _("The matrix is not defined."));
6251 
6252   cs_base_check_bool(&symmetric);
6253 
6254   /* Set fill type */
6255   _set_fill_info(matrix,
6256                  symmetric,
6257                  diag_block_size,
6258                  extra_diag_block_size);
6259 
6260   /* Set coefficients */
6261 
6262   if (matrix->set_coefficients != NULL) {
6263     matrix->xa = xa;
6264     matrix->set_coefficients(matrix, symmetric, false, n_edges, edges, da, xa);
6265   }
6266   else
6267     bft_error
6268       (__FILE__, __LINE__, 0,
6269        "Matrix format %s with fill type %s does not handle\n"
6270        "coefficient assignment from native (graph-edge) coefficients.",
6271        matrix->type_name,
6272        cs_matrix_fill_type_name[matrix->fill_type]);
6273 }
6274 
6275 /*----------------------------------------------------------------------------*/
6276 /*!
6277  * \brief Set matrix coefficients, copying values to private arrays.
6278  *
6279  * With private arrays, the matrix becomes independant from the
6280  * arrays passed as arguments.
6281  *
6282  * Block sizes are defined by an optional array of 4 values:
6283  *   0: useful block size, 1: vector block extents,
6284  *   2: matrix line extents,  3: matrix line*column extents
6285  *
6286  * \param[in, out]  matrix                 pointer to matrix structure
6287  * \param[in]       symmetric              indicates if matrix coefficients
6288  *                                         are symmetric
6289  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
6290  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
6291  *                                         or NULL
6292  * \param[in]       n_edges                local number of graph edges
6293  * \param[in]       edges                  edges (row <-> column) connectivity
6294  * \param[in]       da                     diagonal values (NULL if zero)
6295  * \param[in]       xa                     extradiagonal values (NULL if zero)
6296  *                                         casts as:
6297  *                                           xa[n_edges]    if symmetric,
6298  *                                           xa[n_edges][2] if non symmetric
6299  */
6300 /*----------------------------------------------------------------------------*/
6301 
6302 void
cs_matrix_copy_coefficients(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t n_edges,const cs_lnum_2_t edges[],const cs_real_t * da,const cs_real_t * xa)6303 cs_matrix_copy_coefficients(cs_matrix_t        *matrix,
6304                             bool                symmetric,
6305                             const cs_lnum_t    *diag_block_size,
6306                             const cs_lnum_t    *extra_diag_block_size,
6307                             const cs_lnum_t     n_edges,
6308                             const cs_lnum_2_t   edges[],
6309                             const cs_real_t    *da,
6310                             const cs_real_t    *xa)
6311 {
6312   if (matrix == NULL)
6313     bft_error(__FILE__, __LINE__, 0,
6314               _("The matrix is not defined."));
6315 
6316   cs_base_check_bool(&symmetric);
6317 
6318   _set_fill_info(matrix,
6319                  symmetric,
6320                  diag_block_size,
6321                  extra_diag_block_size);
6322 
6323   if (matrix->set_coefficients != NULL)
6324     matrix->set_coefficients(matrix, symmetric, true, n_edges, edges, da, xa);
6325   else
6326     bft_error
6327       (__FILE__, __LINE__, 0,
6328        "Matrix format %s with fill type %s does not handle\n"
6329        "coefficient assignment from native (graph-edge) coefficients.",
6330        matrix->type_name,
6331        cs_matrix_fill_type_name[matrix->fill_type]);
6332 }
6333 
6334 /*----------------------------------------------------------------------------*/
6335 /*!
6336  * \brief Set matrix coefficients in an MSR format, transfering the
6337  * property of those arrays to the matrix.
6338  *
6339  * If the matrix is also in MSR format, this avoids an extra copy.
6340  * If it is in a different format, values are copied to the structure,
6341  * and the original arrays freed. In any case, the arrays pointers passed as
6342  * arguments are set to NULL, to help ensure the caller does not use the
6343  * original arrays directly after this call.
6344  *
6345  * Block sizes are defined by an optional array of 4 values:
6346  *   0: useful block size, 1: vector block extents,
6347  *   2: matrix line extents,  3: matrix line*column extents
6348  *
6349  * \param[in, out]  matrix                 pointer to matrix structure
6350  * \param[in]       symmetric              indicates if matrix coefficients
6351  *                                         are symmetric
6352  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
6353  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
6354  *                                         or NULL
6355  * \param[in]       row_index              MSR row index (0 to n-1)
6356  * \param[in]       col_id                 MSR column id (0 to n-1)
6357  * \param[in, out]  d_val                  diagonal values (NULL if zero)
6358  * \param[in, out]  x_val                  extradiagonal values (NULL if zero)
6359  */
6360 /*----------------------------------------------------------------------------*/
6361 
6362 void
cs_matrix_transfer_coefficients_msr(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t row_index[],const cs_lnum_t col_id[],cs_real_t ** d_val,cs_real_t ** x_val)6363 cs_matrix_transfer_coefficients_msr(cs_matrix_t         *matrix,
6364                                     bool                 symmetric,
6365                                     const cs_lnum_t     *diag_block_size,
6366                                     const cs_lnum_t     *extra_diag_block_size,
6367                                     const cs_lnum_t      row_index[],
6368                                     const cs_lnum_t      col_id[],
6369                                     cs_real_t          **d_val,
6370                                     cs_real_t          **x_val)
6371 {
6372   const cs_real_t  *d_val_p = (d_val != NULL) ? *d_val : NULL;
6373   const cs_real_t  *x_val_p = (x_val != NULL) ? *x_val : NULL;
6374 
6375   if (matrix == NULL)
6376     bft_error(__FILE__, __LINE__, 0,
6377               _("The matrix is not defined."));
6378 
6379   cs_base_check_bool(&symmetric);
6380 
6381   _set_fill_info(matrix,
6382                  symmetric,
6383                  diag_block_size,
6384                  extra_diag_block_size);
6385 
6386   switch(matrix->type) {
6387 
6388   case CS_MATRIX_CSR:
6389     _set_coeffs_csr_from_msr(matrix,
6390                              row_index,
6391                              col_id,
6392                              d_val_p,
6393                              d_val,
6394                              x_val_p,
6395                              x_val);
6396     break;
6397 
6398   case CS_MATRIX_MSR:
6399     _set_coeffs_msr_from_msr(matrix,
6400                              false, /* ignored in case of transfer */
6401                              row_index,
6402                              col_id,
6403                              d_val_p,
6404                              d_val,
6405                              x_val_p,
6406                              x_val);
6407     break;
6408 
6409   default:
6410     bft_error
6411       (__FILE__, __LINE__, 0,
6412        "Matrix format %s with fill type %s does not handle\n"
6413        "coefficient assignment from native (graph-edge) coefficients.",
6414        matrix->type_name,
6415        cs_matrix_fill_type_name[matrix->fill_type]);
6416   }
6417 }
6418 
6419 /*----------------------------------------------------------------------------*/
6420 /*!
6421  * \brief Release shared matrix coefficients.
6422  *
6423  * Pointers to mapped coefficients are set to NULL, while
6424  * coefficient copies owned by the matrix are not modified.
6425  *
6426  * This simply ensures the matrix does not maintain pointers
6427  * to nonexistant data.
6428  *
6429  * \param[in, out]  matrix  pointer to matrix structure
6430  */
6431 /*----------------------------------------------------------------------------*/
6432 
6433 void
cs_matrix_release_coefficients(cs_matrix_t * matrix)6434 cs_matrix_release_coefficients(cs_matrix_t  *matrix)
6435 {
6436   /* Check API state */
6437 
6438   if (matrix == NULL)
6439     bft_error(__FILE__, __LINE__, 0,
6440               _("The matrix is not defined."));
6441 
6442   if (matrix->release_coefficients != NULL) {
6443     matrix->xa = NULL;
6444     matrix->release_coefficients(matrix);
6445   }
6446   else {
6447     bft_error
6448       (__FILE__, __LINE__, 0,
6449        "Matrix format %s is missing a release_coefficients function.",
6450        matrix->type_name);
6451   }
6452 
6453   /* Set fill type to impossible value */
6454 
6455   _clear_fill_info(matrix);
6456 }
6457 
6458 /*----------------------------------------------------------------------------*/
6459 /*!
6460  * \brief Create and initialize a CSR matrix assembler values structure.
6461  *
6462  * The associated matrix's structure must have been created using
6463  * \ref cs_matrix_structure_create_from_assembler.
6464  *
6465  * Block sizes are defined by an optional array of 4 values:
6466  *   0: useful block size, 1: vector block extents,
6467  *   2: matrix line extents,  3: matrix line*column extents
6468  *
6469  * \param[in, out]  matrix                 pointer to matrix structure
6470  * \param[in]       diag_block_size        block sizes for diagonal, or NULL
6471  * \param[in]       extra_diag_block_size  block sizes for extra diagonal,
6472  *                                         or NULL
6473  *
6474  * \return  pointer to initialized matrix assembler values structure;
6475  */
6476 /*----------------------------------------------------------------------------*/
6477 
6478 cs_matrix_assembler_values_t *
cs_matrix_assembler_values_init(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)6479 cs_matrix_assembler_values_init(cs_matrix_t      *matrix,
6480                                 const cs_lnum_t  *diag_block_size,
6481                                 const cs_lnum_t  *extra_diag_block_size)
6482 {
6483   cs_matrix_assembler_values_t *mav = NULL;
6484 
6485   /* Set fill type */
6486 
6487   _set_fill_info(matrix,
6488                  false, /* symmetric */
6489                  diag_block_size,
6490                  extra_diag_block_size);
6491 
6492   /* Create values assembler */
6493 
6494   if (matrix->assembler_values_create != NULL)
6495     mav = matrix->assembler_values_create(matrix,
6496                                         diag_block_size,
6497                                         extra_diag_block_size);
6498 
6499   else
6500     bft_error(__FILE__, __LINE__, 0,
6501               _("%s: direct assembly handling of matrices of type %s\n"
6502                 "is not available."),
6503               __func__, _(matrix->type_name));
6504 
6505   return mav;
6506 }
6507 
6508 /*----------------------------------------------------------------------------*/
6509 /*!
6510  * \brief Copy matrix diagonal values.
6511  *
6512  * In case of matrixes with block diagonal coefficients, only the true
6513  * diagonal values are copied.
6514  *
6515  * \param[in]   matrix  pointer to matrix structure
6516  * \param[out]  da      diagonal (pre-allocated, size: n_rows*block_size)
6517  */
6518 /*----------------------------------------------------------------------------*/
6519 
6520 void
cs_matrix_copy_diagonal(const cs_matrix_t * matrix,cs_real_t * restrict da)6521 cs_matrix_copy_diagonal(const cs_matrix_t  *matrix,
6522                         cs_real_t          *restrict da)
6523 {
6524   /* Check API state */
6525 
6526   if (matrix == NULL)
6527     bft_error(__FILE__, __LINE__, 0,
6528               _("The matrix is not defined."));
6529 
6530   if (matrix->copy_diagonal != NULL)
6531     matrix->copy_diagonal(matrix, da);
6532 }
6533 
6534 /*----------------------------------------------------------------------------*/
6535 /*!
6536  * \brief Query matrix coefficients symmetry
6537  *
6538  * \param[in]  matrix  pointer to matrix structure
6539  *
6540  * \return  true if coefficients are symmetric, false otherwise
6541  */
6542 /*----------------------------------------------------------------------------*/
6543 
6544 bool
cs_matrix_is_symmetric(const cs_matrix_t * matrix)6545 cs_matrix_is_symmetric(const cs_matrix_t  *matrix)
6546 {
6547   return matrix->symmetric;
6548 }
6549 
6550 /*----------------------------------------------------------------------------*/
6551 /*!
6552  * \brief Indicate whether coefficients were mapped from native face-based
6553  *        arrays.
6554  *
6555  * It is used in the current multgrid code, but should be removed as soon
6556  * as the dependency to the native format is removed.
6557  *
6558  * \param[in]  matrix  pointer to matrix structure
6559  *
6560  * \return  true if coefficients were mapped from native face-based arrays,
6561  *          false otherwise
6562  */
6563 /*----------------------------------------------------------------------------*/
6564 
6565 bool
cs_matrix_is_mapped_from_native(const cs_matrix_t * matrix)6566 cs_matrix_is_mapped_from_native(const cs_matrix_t  *matrix)
6567 {
6568   bool retval = false;
6569 
6570   if (matrix->xa != NULL)
6571     retval = true;
6572 
6573   return retval;
6574 }
6575 
6576 /*----------------------------------------------------------------------------*/
6577 /*!
6578  * \brief Get matrix diagonal values.
6579  *
6580  * In case of matrixes with block diagonal coefficients, a pointer to
6581  * the complete block diagonal is returned.
6582  *
6583  * \param[in]  matrix  pointer to matrix structure
6584  *
6585  * \return  pointer to matrix diagonal array
6586  */
6587 /*----------------------------------------------------------------------------*/
6588 
6589 const cs_real_t *
cs_matrix_get_diagonal(const cs_matrix_t * matrix)6590 cs_matrix_get_diagonal(const cs_matrix_t  *matrix)
6591 {
6592   const cs_real_t  *diag = NULL;
6593 
6594   if (matrix->get_diagonal != NULL)
6595     diag = matrix->get_diagonal(matrix);
6596 
6597   else
6598     bft_error(__FILE__, __LINE__, 0,
6599               _("%s: not available for matrix type: %s."),
6600               __func__, cs_matrix_get_type_name(matrix));
6601 
6602   return diag;
6603 }
6604 
6605 /*----------------------------------------------------------------------------*/
6606 /*!
6607  * \brief Get pointer to matrix extra-diagonal values in "native" format
6608  *
6609  * \deprecated
6610  *
6611  * This function only functions if the coefficients were mapped from native
6612  * coefficients using cs_matrix_set_coefficients(), in which case the pointer
6613  * returned is the same as the one passed to that function.
6614  *
6615  * It is used in the current multgrid code, but should be removed as soon
6616  * as the dependency to the native format is removed.
6617  *
6618  * \param[in]  matrix  pointer to matrix structure
6619  *
6620  * \return  pointer to matrix diagonal array
6621  */
6622 /*----------------------------------------------------------------------------*/
6623 
6624 const cs_real_t *
cs_matrix_get_extra_diagonal(const cs_matrix_t * matrix)6625 cs_matrix_get_extra_diagonal(const cs_matrix_t  *matrix)
6626 {
6627   const cs_real_t  *exdiag = NULL;
6628 
6629   if (matrix->xa == NULL)
6630     bft_error
6631       (__FILE__, __LINE__, 0,
6632        _("Matrix coefficients were not mapped from native face-based arrays,\n"
6633          "so the extra-diagonal coefficients are not available in that form."));
6634   else
6635     exdiag = matrix->xa;
6636 
6637   return exdiag;
6638 }
6639 
6640 /*----------------------------------------------------------------------------*/
6641 /*!
6642  * \brief Initialize row info for a given matrix.
6643  *
6644  * \param[out]  r   row info structure
6645  */
6646 /*----------------------------------------------------------------------------*/
6647 
6648 void
cs_matrix_row_init(cs_matrix_row_info_t * r)6649 cs_matrix_row_init(cs_matrix_row_info_t  *r)
6650 {
6651   r->row_size = 0;
6652   r->buffer_size = 0;
6653   r->col_id = NULL;
6654   r->_col_id = NULL;
6655   r->vals = NULL;
6656   r->_vals = NULL;
6657 }
6658 
6659 /*----------------------------------------------------------------------------*/
6660 /*!
6661  * \brief Finalize row info for a given matrix.
6662  *
6663  * \param[in, out]  r   row info structure
6664  */
6665 /*----------------------------------------------------------------------------*/
6666 
6667 void
cs_matrix_row_finalize(cs_matrix_row_info_t * r)6668 cs_matrix_row_finalize(cs_matrix_row_info_t  *r)
6669 {
6670   r->row_size = 0;
6671   r->buffer_size = 0;
6672   r->col_id = NULL;
6673   BFT_FREE(r->_col_id);
6674   r->vals = NULL;
6675   BFT_FREE(r->_vals);
6676 }
6677 
6678 /*----------------------------------------------------------------------------*/
6679 /*!
6680  * \brief Get row values for a given matrix.
6681  *
6682  * This function may not work for all matrix types.
6683  *
6684  * In the case of blocked matrixes, the true (non-blocked)
6685  * values are returned.
6686  *
6687  * The row information structure must have been previously initialized
6688  * using \ref cs_matrix_row_init, and should be finalized using
6689  * using \ref cs_matrix_row_finalize, so as to free buffers it may have
6690  * built for certain matrix formats.
6691  *
6692  * \param[in]       matrix     pointer to matrix structure
6693  * \param[in]       row_id     id of row to query
6694  * \param[in, out]  r          row info structure
6695  */
6696 /*----------------------------------------------------------------------------*/
6697 
6698 void
cs_matrix_get_row(const cs_matrix_t * matrix,const cs_lnum_t row_id,cs_matrix_row_info_t * r)6699 cs_matrix_get_row(const cs_matrix_t     *matrix,
6700                   const cs_lnum_t        row_id,
6701                   cs_matrix_row_info_t  *r)
6702 {
6703   cs_lnum_t b_size = matrix->db_size[0];
6704 
6705   switch (matrix->type) {
6706 
6707   case CS_MATRIX_CSR:
6708     {
6709       const cs_matrix_struct_csr_t  *ms = matrix->structure;
6710       const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
6711       r->row_size = (ms->row_index[row_id+1] - ms->row_index[row_id])*b_size;
6712       r->col_id = ms->col_id + ms->row_index[row_id]*b_size;
6713       if (mc->val != NULL)
6714         r->vals = mc->val + ms->row_index[row_id]*b_size;
6715       else
6716         r->vals = NULL;
6717     }
6718     break;
6719 
6720   case CS_MATRIX_MSR:
6721     {
6722       const cs_lnum_t _row_id = row_id / b_size;
6723       const cs_matrix_struct_csr_t  *ms = matrix->structure;
6724       const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
6725       const cs_lnum_t n_ed_cols =   ms->row_index[_row_id+1]
6726                                   - ms->row_index[_row_id];
6727       if (b_size == 1)
6728         r->row_size = n_ed_cols + 1;
6729       else if (matrix->eb_size[0] == 1)
6730         r->row_size = n_ed_cols*b_size;
6731       else
6732         r->row_size = (n_ed_cols+1)*b_size;
6733       if (r->buffer_size < r->row_size) {
6734         r->buffer_size = r->row_size*2;
6735         BFT_REALLOC(r->_col_id, r->buffer_size, cs_lnum_t);
6736         r->col_id = r->_col_id;
6737         BFT_REALLOC(r->_vals, r->buffer_size, cs_real_t);
6738         r->vals = r->_vals;
6739       }
6740       cs_lnum_t ii = 0, jj = 0;
6741       const cs_lnum_t *restrict c_id = ms->col_id + ms->row_index[_row_id];
6742       if (b_size == 1) {
6743         const cs_real_t *m_row = mc->val + ms->row_index[_row_id];
6744         for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6745           r->_col_id[ii] = c_id[jj];
6746           r->_vals[ii++] = m_row[jj];
6747         }
6748         r->_col_id[ii] = _row_id;
6749         r->_vals[ii++] = mc->d_val[_row_id];
6750         for (; jj < n_ed_cols; jj++) {
6751           r->_col_id[ii] = c_id[jj];
6752           r->_vals[ii++] = m_row[jj];
6753         }
6754       }
6755       else if (matrix->eb_size[0] == 1) {
6756         const cs_lnum_t _sub_id = row_id % b_size;
6757         const cs_lnum_t *db_size = matrix->db_size;
6758         const cs_real_t *m_row = mc->val + ms->row_index[_row_id];
6759         for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6760           r->_col_id[ii] = c_id[jj]*b_size + _sub_id;
6761           r->_vals[ii++] = m_row[jj];
6762         }
6763         for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6764           r->_col_id[ii] = _row_id*b_size + kk;
6765           r->_vals[ii++] = mc->d_val[  _row_id*db_size[3]
6766                                      + _sub_id*db_size[2] + kk];
6767         }
6768         for (; jj < n_ed_cols; jj++) {
6769           r->_col_id[ii] = c_id[jj]*b_size + _sub_id;
6770           r->_vals[ii++] = m_row[jj];
6771         }
6772       }
6773       else {
6774         const cs_lnum_t _sub_id = row_id % b_size;
6775         const cs_lnum_t *db_size = matrix->db_size;
6776         const cs_lnum_t *eb_size = matrix->db_size;
6777         const cs_real_t *m_row = mc->val + ms->row_index[_row_id]*eb_size[3];
6778         for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6779           for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6780             r->_col_id[ii] = c_id[jj]*b_size + kk;
6781             r->_vals[ii++] = m_row[_sub_id*eb_size[2] + kk];
6782           }
6783         }
6784         for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6785           r->_col_id[ii] = _row_id*b_size + kk;
6786           r->_vals[ii++] = mc->d_val[  _row_id*db_size[3]
6787                                      + _sub_id*db_size[2] + kk];
6788         }
6789         for (; jj < n_ed_cols; jj++) {
6790           for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6791             r->_col_id[ii] = c_id[jj]*b_size + kk;
6792             r->_vals[ii++] = m_row[_sub_id*eb_size[2] + kk];
6793           }
6794         }
6795       }
6796     }
6797     break;
6798 
6799   default:
6800     bft_error
6801       (__FILE__, __LINE__, 0,
6802        _("Matrix format %s with fill type %s does not handle %s operation."),
6803        matrix->type_name,
6804        cs_matrix_fill_type_name[matrix->fill_type],
6805        __func__);
6806   }
6807 }
6808 
6809 /*----------------------------------------------------------------------------*/
6810 /*!
6811  * \brief Get arrays describing a matrix in native format.
6812  *
6813  * This function works for matrix in native format.
6814  *
6815  * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6816  * and cs_matrix_get_extra_diag_block_size().
6817  *
6818  * \param[in]   matrix     pointer to matrix structure
6819  * \param[out]  symmetric  true if symmetric
6820  * \param[out]  n_edges    number of associated faces
6821  * \param[out]  edges      edges (symmetric row <-> column) connectivity
6822  * \param[out]  d_val      diagonal values
6823  * \param[out]  x_val      extra-diagonal values
6824  */
6825 /*----------------------------------------------------------------------------*/
6826 
6827 void
cs_matrix_get_native_arrays(const cs_matrix_t * matrix,bool * symmetric,cs_lnum_t * n_edges,const cs_lnum_2_t ** edges,const cs_real_t ** d_val,const cs_real_t ** x_val)6828 cs_matrix_get_native_arrays(const cs_matrix_t   *matrix,
6829                             bool                *symmetric,
6830                             cs_lnum_t           *n_edges,
6831                             const cs_lnum_2_t  **edges,
6832                             const cs_real_t    **d_val,
6833                             const cs_real_t    **x_val)
6834 {
6835   if (symmetric != NULL)
6836     *symmetric = false;
6837   if (n_edges != NULL)
6838     *n_edges = 0;
6839   if (edges != NULL)
6840     *edges = NULL;
6841   if (d_val != NULL)
6842     *d_val = NULL;
6843   if (x_val != NULL)
6844     *x_val = NULL;
6845 
6846   if (matrix->type == CS_MATRIX_NATIVE) {
6847     const cs_matrix_struct_native_t  *ms = matrix->structure;
6848     const cs_matrix_coeff_native_t  *mc = matrix->coeffs;
6849     if (n_edges != NULL)
6850       *n_edges = ms->n_edges;
6851     if (edges != NULL)
6852       *edges = ms->edges;
6853     if (mc != NULL) {
6854       if (symmetric != NULL)
6855         *symmetric = mc->symmetric;
6856       if (d_val != NULL)
6857         *d_val = mc->da;
6858       if (x_val != NULL)
6859         *x_val = mc->xa;
6860     }
6861   }
6862 }
6863 
6864 /*----------------------------------------------------------------------------*/
6865 /*!
6866  * \brief Get arrays describing a matrix in CSR format.
6867  *
6868  * This function only works for an CSR matrix (i.e. there is
6869  * no automatic conversion from another matrix type).
6870  *
6871  * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6872  * and cs_matrix_get_extra_diag_block_size().
6873  *
6874  * \param[in]   matrix     pointer to matrix structure
6875  * \param[out]  row_index  CSR row index
6876  * \param[out]  col_id     CSR column id
6877  * \param[out]  val        values
6878  */
6879 /*----------------------------------------------------------------------------*/
6880 
6881 void
cs_matrix_get_csr_arrays(const cs_matrix_t * matrix,const cs_lnum_t ** row_index,const cs_lnum_t ** col_id,const cs_real_t ** val)6882 cs_matrix_get_csr_arrays(const cs_matrix_t   *matrix,
6883                          const cs_lnum_t    **row_index,
6884                          const cs_lnum_t    **col_id,
6885                          const cs_real_t    **val)
6886 {
6887   if (row_index != NULL)
6888     *row_index = NULL;
6889   if (col_id != NULL)
6890     *col_id = NULL;
6891   if (val != NULL)
6892     *val = NULL;
6893 
6894   if (matrix->type == CS_MATRIX_CSR) {
6895     const cs_matrix_struct_csr_t  *ms = matrix->structure;
6896     const cs_matrix_coeff_csr_t  *mc = matrix->coeffs;
6897     if (row_index != NULL)
6898       *row_index = ms->row_index;
6899     if (col_id != NULL)
6900       *col_id = ms->col_id;
6901     if (val != NULL && mc != NULL) {
6902       *val = mc->val;
6903     }
6904   }
6905 }
6906 
6907 /*----------------------------------------------------------------------------*/
6908 /*!
6909  * \brief Get arrays describing a matrix in MSR format.
6910  *
6911  * This function only works for an MSR matrix (i.e. there is
6912  * no automatic conversion from another matrix type).
6913  *
6914  * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6915  * and cs_matrix_get_extra_diag_block_size().
6916  *
6917  * \param[in]   matrix     pointer to matrix structure
6918  * \param[out]  row_index  MSR row index
6919  * \param[out]  col_id     MSR column id
6920  * \param[out]  d_val      diagonal values
6921  * \param[out]  x_val      extra-diagonal values
6922  */
6923 /*----------------------------------------------------------------------------*/
6924 
6925 void
cs_matrix_get_msr_arrays(const cs_matrix_t * matrix,const cs_lnum_t ** row_index,const cs_lnum_t ** col_id,const cs_real_t ** d_val,const cs_real_t ** x_val)6926 cs_matrix_get_msr_arrays(const cs_matrix_t   *matrix,
6927                          const cs_lnum_t    **row_index,
6928                          const cs_lnum_t    **col_id,
6929                          const cs_real_t    **d_val,
6930                          const cs_real_t    **x_val)
6931 {
6932   if (row_index != NULL)
6933     *row_index = NULL;
6934   if (col_id != NULL)
6935     *col_id = NULL;
6936   if (d_val != NULL)
6937     *d_val = NULL;
6938   if (x_val != NULL)
6939     *x_val = NULL;
6940 
6941   if (matrix->type == CS_MATRIX_MSR) {
6942     const cs_matrix_struct_csr_t  *ms = matrix->structure;
6943     const cs_matrix_coeff_msr_t  *mc = matrix->coeffs;
6944     if (row_index != NULL)
6945       *row_index = ms->row_index;
6946     if (col_id != NULL)
6947       *col_id = ms->col_id;
6948     if (mc != NULL) {
6949       if (d_val != NULL)
6950         *d_val = mc->d_val;
6951       if (x_val != NULL)
6952         *x_val = mc->x_val;
6953     }
6954   }
6955   else
6956     bft_error
6957       (__FILE__, __LINE__, 0,
6958        _("%s is not available for matrix using %s storage."),
6959        __func__,
6960        matrix->type_name);
6961 }
6962 
6963 /*----------------------------------------------------------------------------*/
6964 /*!
6965  * \brief Matrix.vector product y = A.x
6966  *
6967  * This function includes a halo update of x prior to multiplication by A.
6968  *
6969  * \param[in]       matrix         pointer to matrix structure
6970  * \param[in, out]  x              multipliying vector values
6971  *                                 (ghost values updated)
6972  * \param[out]      y              resulting vector
6973  */
6974 /*----------------------------------------------------------------------------*/
6975 
6976 void
cs_matrix_vector_multiply(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)6977 cs_matrix_vector_multiply(const cs_matrix_t   *matrix,
6978                           cs_real_t           *restrict x,
6979                           cs_real_t           *restrict y)
6980 {
6981   assert(matrix != NULL);
6982 
6983   if (matrix->vector_multiply[matrix->fill_type][0] != NULL)
6984     matrix->vector_multiply[matrix->fill_type][0](matrix, false, true, x, y);
6985 
6986   else
6987     bft_error(__FILE__, __LINE__, 0,
6988               _("%s: Matrix of type: %s is missing a vector multiply\n"
6989                 "function for fill type %s."),
6990               __func__, cs_matrix_get_type_name(matrix),
6991               cs_matrix_fill_type_name[matrix->fill_type]);
6992 }
6993 
6994 /*----------------------------------------------------------------------------*/
6995 /*!
6996  * \brief Matrix.vector product y = A.x with no prior halo update of x.
6997  *
6998  * This function does not include a halo update of x prior to multiplication
6999  * by A, so it should be called only when the halo of x is known to already
7000  * be up to date (in which case we avoid the performance penalty of a
7001  * redundant update by using this variant of the matrix.vector product).
7002  *
7003  * \param[in]   matrix         pointer to matrix structure
7004  * \param[in]   x              multipliying vector values
7005  * \param[out]  y              resulting vector
7006  */
7007 /*----------------------------------------------------------------------------*/
7008 
7009 void
cs_matrix_vector_multiply_nosync(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)7010 cs_matrix_vector_multiply_nosync(const cs_matrix_t  *matrix,
7011                                  cs_real_t          *restrict x,
7012                                  cs_real_t          *restrict y)
7013 {
7014   assert(matrix != NULL);
7015 
7016   if (matrix->vector_multiply[matrix->fill_type][0] != NULL)
7017     matrix->vector_multiply[matrix->fill_type][0](matrix, false, false, x, y);
7018 
7019   else
7020     bft_error(__FILE__, __LINE__, 0,
7021               _("%s: Matrix of type: %s is missing a vector multiply\n"
7022                 "function for fill type %s."),
7023               __func__, cs_matrix_get_type_name(matrix),
7024               cs_matrix_fill_type_name[matrix->fill_type]);
7025 }
7026 
7027 /*----------------------------------------------------------------------------*/
7028 /*!
7029  * \brief Matrix.vector product y = (A-D).x
7030  *
7031  * This function includes a halo update of x prior to multiplication by A.
7032  *
7033  * \param[in]       matrix         pointer to matrix structure
7034  * \param[in, out]  x              multipliying vector values
7035  *                                 (ghost values updated)
7036  * \param[out]      y              resulting vector
7037  */
7038 /*----------------------------------------------------------------------------*/
7039 
7040 void
cs_matrix_exdiag_vector_multiply(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)7041 cs_matrix_exdiag_vector_multiply(const cs_matrix_t   *matrix,
7042                                  cs_real_t           *restrict x,
7043                                  cs_real_t           *restrict y)
7044 {
7045   assert(matrix != NULL);
7046 
7047   if (matrix->vector_multiply[matrix->fill_type][1] != NULL)
7048     matrix->vector_multiply[matrix->fill_type][1](matrix, true, true, x, y);
7049 
7050   else
7051     bft_error(__FILE__, __LINE__, 0,
7052               _("%s: Matrix of type: %s is missing a vector multiply\n"
7053                 "vy extra-diagonal function for fill type %s."),
7054               __func__, cs_matrix_get_type_name(matrix),
7055               cs_matrix_fill_type_name[matrix->fill_type]);
7056 }
7057 
7058 /*----------------------------------------------------------------------------*/
7059 /*!
7060  * \brief Build matrix variant
7061  *
7062  * The variant will initially use default matrix-vector functions,
7063  * which can be later modified using cs_matrix_variant_set_func().
7064  *
7065  * \param[in]  m   pointer to matrix
7066  */
7067 /*----------------------------------------------------------------------------*/
7068 
7069 cs_matrix_variant_t *
cs_matrix_variant_create(cs_matrix_t * m)7070 cs_matrix_variant_create(cs_matrix_t  *m)
7071 {
7072   cs_matrix_variant_t  *mv;
7073 
7074   BFT_MALLOC(mv, 1, cs_matrix_variant_t);
7075 
7076   mv->type = m->type;
7077   mv->fill_type = m->fill_type;
7078 
7079   for (int j = 0; j < 2; j++) {
7080     mv->vector_multiply[j] = NULL;
7081     strncpy(mv->name[j], "default", 31);
7082     mv->name[j][31] = '\0';
7083   }
7084 
7085   (void) _set_spmv_func(m->type,
7086                         m->numbering,
7087                         m->fill_type,
7088                         2,
7089                         NULL, /* func_name */
7090                         mv->vector_multiply);
7091 
7092   return mv;
7093 }
7094 
7095 /*----------------------------------------------------------------------------*/
7096 /*!
7097  * \brief Build list of variants for tuning or testing.
7098  *
7099  * The matrix coefficients should be assigned, so the fill type can
7100  * be determined.
7101  *
7102  * \param[in]   m             associated matrix
7103  * \param[out]  n_variants    number of variants
7104  * \param[out]  m_variant     array of matrix variants
7105  */
7106 /*----------------------------------------------------------------------------*/
7107 
7108 void
cs_matrix_variant_build_list(const cs_matrix_t * m,int * n_variants,cs_matrix_variant_t ** m_variant)7109 cs_matrix_variant_build_list(const cs_matrix_t       *m,
7110                              int                     *n_variants,
7111                              cs_matrix_variant_t    **m_variant)
7112 {
7113   int  n_variants_max = 0;
7114 
7115   *n_variants = 0;
7116   *m_variant = NULL;
7117 
7118   cs_matrix_vector_product_t  *vector_multiply = NULL;
7119 
7120   if (m->type == CS_MATRIX_NATIVE) {
7121 
7122     switch(m->fill_type) {
7123     case CS_MATRIX_SCALAR:
7124     case CS_MATRIX_SCALAR_SYM:
7125       vector_multiply = _mat_vec_p_l_native;
7126       break;
7127     case CS_MATRIX_BLOCK_D:
7128     case CS_MATRIX_BLOCK_D_66:
7129     case CS_MATRIX_BLOCK_D_SYM:
7130       vector_multiply = _b_mat_vec_p_l_native_fixed;
7131       break;
7132     case CS_MATRIX_BLOCK:
7133       vector_multiply = _bb_mat_vec_p_l_native;
7134       break;
7135     default:
7136       vector_multiply = NULL;
7137     }
7138 
7139     _variant_add(_("native, baseline"),
7140                  m->type,
7141                  m->fill_type,
7142                  2, /* ed_flag */
7143                  vector_multiply,
7144                  n_variants,
7145                  &n_variants_max,
7146                  m_variant);
7147 
7148     if (m->numbering != NULL) {
7149 
7150 #if defined(HAVE_OPENMP)
7151 
7152       if (m->numbering->type == CS_NUMBERING_THREADS) {
7153 
7154         switch(m->fill_type) {
7155         case CS_MATRIX_SCALAR:
7156         case CS_MATRIX_SCALAR_SYM:
7157           vector_multiply = _mat_vec_p_l_native_omp;
7158           break;
7159         case CS_MATRIX_BLOCK_D:
7160         case CS_MATRIX_BLOCK_D_66:
7161         case CS_MATRIX_BLOCK_D_SYM:
7162           vector_multiply = _b_mat_vec_p_l_native_omp;
7163           break;
7164         default:
7165           vector_multiply = NULL;
7166         }
7167 
7168         _variant_add(_("native, OpenMP"),
7169                      m->type,
7170                      m->fill_type,
7171                      2, /* ed_flag */
7172                      vector_multiply,
7173                      n_variants,
7174                      &n_variants_max,
7175                      m_variant);
7176       }
7177 
7178       switch(m->fill_type) {
7179       case CS_MATRIX_SCALAR:
7180       case CS_MATRIX_SCALAR_SYM:
7181         vector_multiply = _mat_vec_p_l_native_omp_atomic;
7182         break;
7183       case CS_MATRIX_BLOCK_D:
7184       case CS_MATRIX_BLOCK_D_66:
7185       case CS_MATRIX_BLOCK_D_SYM:
7186         vector_multiply = _b_mat_vec_p_l_native_omp_atomic;
7187         break;
7188       default:
7189         vector_multiply = NULL;
7190       }
7191 
7192       _variant_add(_("native, OpenMP atomic"),
7193                    m->type,
7194                    m->fill_type,
7195                    2, /* ed_flag */
7196                    vector_multiply,
7197                    n_variants,
7198                    &n_variants_max,
7199                    m_variant);
7200 
7201 #endif
7202 
7203       if (m->numbering->type == CS_NUMBERING_VECTORIZE) {
7204 
7205         switch(m->fill_type) {
7206         case CS_MATRIX_SCALAR:
7207         case CS_MATRIX_SCALAR_SYM:
7208           vector_multiply = _mat_vec_p_l_native_vector;
7209           break;
7210         default:
7211           vector_multiply = NULL;
7212         }
7213 
7214         _variant_add(_("native, vectorized"),
7215                      m->type,
7216                      m->fill_type,
7217                      2, /* ed_flag */
7218                      vector_multiply,
7219                      n_variants,
7220                      &n_variants_max,
7221                      m_variant);
7222 
7223       }
7224 
7225     }
7226 
7227   }
7228 
7229   if (m->type == CS_MATRIX_CSR) {
7230 
7231     switch(m->fill_type) {
7232     case CS_MATRIX_SCALAR:
7233     case CS_MATRIX_SCALAR_SYM:
7234       vector_multiply = _mat_vec_p_l_csr;
7235       break;
7236     default:
7237       vector_multiply = NULL;
7238     }
7239 
7240     _variant_add(_("CSR"),
7241                  m->type,
7242                  m->fill_type,
7243                  2, /* ed_flag */
7244                  vector_multiply,
7245                  n_variants,
7246                  &n_variants_max,
7247                  m_variant);
7248 
7249 #if defined(HAVE_MKL)
7250 
7251     switch(m->fill_type) {
7252     case CS_MATRIX_SCALAR:
7253     case CS_MATRIX_SCALAR_SYM:
7254       vector_multiply = _mat_vec_p_l_csr_mkl;
7255       break;
7256     default:
7257       vector_multiply = NULL;
7258     }
7259 
7260     _variant_add(_("CSR, with MKL"),
7261                  m->type,
7262                  m->fill_type,
7263                  0, /* ed_flag */
7264                  vector_multiply,
7265                  n_variants,
7266                  &n_variants_max,
7267                  m_variant);
7268 
7269 #endif /* defined(HAVE_MKL) */
7270 
7271   }
7272 
7273   if (m->type == CS_MATRIX_MSR) {
7274 
7275       switch(m->fill_type) {
7276       case CS_MATRIX_SCALAR:
7277       case CS_MATRIX_SCALAR_SYM:
7278         vector_multiply = _mat_vec_p_l_msr;
7279         break;
7280       case CS_MATRIX_BLOCK_D:
7281       case CS_MATRIX_BLOCK_D_66:
7282       case CS_MATRIX_BLOCK_D_SYM:
7283         vector_multiply = _b_mat_vec_p_l_msr;
7284         break;
7285       case CS_MATRIX_BLOCK:
7286         vector_multiply = _bb_mat_vec_p_l_msr;
7287         break;
7288       default:
7289         vector_multiply = NULL;
7290       }
7291 
7292     _variant_add(_("MSR"),
7293                  m->type,
7294                  m->fill_type,
7295                  2, /* ed_flag */
7296                  vector_multiply,
7297                  n_variants,
7298                  &n_variants_max,
7299                  m_variant);
7300 
7301 #if defined(HAVE_MKL)
7302 
7303     switch(m->fill_type) {
7304     case CS_MATRIX_SCALAR:
7305     case CS_MATRIX_SCALAR_SYM:
7306       vector_multiply = _mat_vec_p_l_msr_mkl;
7307       break;
7308     default:
7309       vector_multiply = NULL;
7310     }
7311 
7312     _variant_add(_("MSR, with MKL"),
7313                  m->type,
7314                  m->fill_type,
7315                  2, /* ed_flag */
7316                  vector_multiply,
7317                  n_variants,
7318                  &n_variants_max,
7319                  m_variant);
7320 
7321 #endif /* defined(HAVE_MKL) */
7322 
7323 #if defined(HAVE_OPENMP)
7324 
7325     if (omp_get_num_threads() > 1) {
7326 
7327       switch(m->fill_type) {
7328       case CS_MATRIX_SCALAR:
7329       case CS_MATRIX_SCALAR_SYM:
7330         vector_multiply = _mat_vec_p_l_msr_omp_sched;
7331         break;
7332       default:
7333         vector_multiply = NULL;
7334       }
7335 
7336       _variant_add(_("MSR, OpenMP scheduling"),
7337                    m->type,
7338                    m->fill_type,
7339                    2, /* ed_flag */
7340                    vector_multiply,
7341                    n_variants,
7342                    &n_variants_max,
7343                    m_variant);
7344 
7345     }
7346 
7347 #endif /* defined(HAVE_OPENMP) */
7348 
7349   }
7350 
7351   n_variants_max = *n_variants;
7352   BFT_REALLOC(*m_variant, *n_variants, cs_matrix_variant_t);
7353 }
7354 
7355 /*----------------------------------------------------------------------------*/
7356 /*!
7357  * \brief Destroy a matrix variant structure.
7358  *
7359  * \param[in, out]  mv  pointer to matrix variant pointer
7360  */
7361 /*----------------------------------------------------------------------------*/
7362 
7363 void
cs_matrix_variant_destroy(cs_matrix_variant_t ** mv)7364 cs_matrix_variant_destroy(cs_matrix_variant_t  **mv)
7365 {
7366   if (mv != NULL)
7367     BFT_FREE(*mv);
7368 }
7369 
7370 /*----------------------------------------------------------------------------*/
7371 /*!
7372  * \brief Apply a variant to a given matrix
7373  *
7374  * \param[in, out]  m   pointer to matrix
7375  * \param[in]       mv  pointer to matrix variant pointer
7376  */
7377 /*----------------------------------------------------------------------------*/
7378 
7379 void
cs_matrix_variant_apply(cs_matrix_t * m,cs_matrix_variant_t * mv)7380 cs_matrix_variant_apply(cs_matrix_t          *m,
7381                         cs_matrix_variant_t  *mv)
7382 {
7383   if (m == NULL || mv == NULL)
7384     return;
7385 
7386   if (   m->type < 0 || m->type > CS_MATRIX_N_BUILTIN_TYPES
7387       || m->fill_type < 0 || m->fill_type > CS_MATRIX_N_FILL_TYPES)
7388     return;
7389 
7390   for (int i = 0; i < 2; i++)
7391     m->vector_multiply[m->fill_type][i] = mv->vector_multiply[i];
7392 }
7393 
7394 /*----------------------------------------------------------------------------*/
7395 /*!
7396  * \brief Select the sparse matrix-vector product function to be used by a
7397  * matrix variant for a given fill type.
7398  *
7399  * Currently, possible variant functions are:
7400  *
7401  *   CS_MATRIX_NATIVE  (all fill types)
7402  *     default
7403  *     standard
7404  *     omp             (for OpenMP with compatible numbering)
7405  *     omp_atomic      (for OpenMP with atomics)
7406  *     vector          (For vector machine with compatible numbering)
7407  *
7408  *   CS_MATRIX_CSR     (for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
7409  *     default
7410  *     standard
7411  *     mkl             (with MKL)
7412  *
7413  *   CS_MATRIX_MSR     (all fill types except CS_MATRIX_33_BLOCK)
7414  *     default
7415  *     standard
7416  *     mkl             (with MKL, for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
7417  *     omp_sched       (For OpenMP with scheduling)
7418  *
7419  * parameters:
7420  *   mv        <-> Pointer to matrix variant
7421  *   numbering <-- mesh numbering info, or NULL
7422  *   fill type <-- matrix fill type to merge from
7423  *   ed_flag   <-- 0: with diagonal only, 1 exclude only; 2; both
7424  *   func_name <-- function type name
7425  */
7426 /*----------------------------------------------------------------------------*/
7427 
7428 void
cs_matrix_variant_set_func(cs_matrix_variant_t * mv,const cs_numbering_t * numbering,cs_matrix_fill_type_t fill_type,int ed_flag,const char * func_name)7429 cs_matrix_variant_set_func(cs_matrix_variant_t     *mv,
7430                            const cs_numbering_t    *numbering,
7431                            cs_matrix_fill_type_t    fill_type,
7432                            int                      ed_flag,
7433                            const char              *func_name)
7434 {
7435   int s_id = (ed_flag != 1) ? 0 : 1;
7436   int e_id = (ed_flag != 0) ? 2 : 1;
7437 
7438   for (int j = s_id; j < e_id; j++) {
7439 
7440     int retcode = _set_spmv_func(mv->type,
7441                                  numbering,
7442                                  fill_type,
7443                                  j,
7444                                  func_name,
7445                                  mv->vector_multiply);
7446 
7447   if (retcode == 1)
7448     bft_error
7449       (__FILE__, __LINE__, 0,
7450        _("Assignment of matrix.vector product \"%s\" to matrix variant \"%s\"\n"
7451          "of type \"%s\" for fill \"%s\" not allowed."),
7452        func_name, mv->name[j], _matrix_type_name[mv->type],
7453        cs_matrix_fill_type_name[fill_type]);
7454   else if (retcode == 2)
7455     bft_error
7456       (__FILE__, __LINE__, 0,
7457        _("Matrix.vector product function type \"%s\"\n"
7458          "is not available in this build."),
7459        func_name);
7460   }
7461 }
7462 
7463 /*----------------------------------------------------------------------------*/
7464 /*!
7465  * \brief Get the type associated with a matrix variant.
7466  *
7467  * \param[in]  mv  pointer to matrix variant structure
7468  */
7469 /*----------------------------------------------------------------------------*/
7470 
7471 cs_matrix_type_t
cs_matrix_variant_type(const cs_matrix_variant_t * mv)7472 cs_matrix_variant_type(const cs_matrix_variant_t  *mv)
7473 {
7474   return mv->type;
7475 }
7476 
7477 /*----------------------------------------------------------------------------*/
7478 
7479 END_C_DECLS
7480