1 /*============================================================================
2 * Sparse Matrix Representation and Operations.
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 /*
28 * Notes:
29 *
30 * The aim of these structures and associated functions is multiple:
31 *
32 * - Provide an "opaque" matrix object for linear solvers, allowing possible
33 * choice of the matrix type based on run-time tuning at code initialization
34 * (depending on matrix size, architecture, and compiler, the most efficient
35 * structure for matrix.vector products may vary).
36 *
37 * - Provide at least a CSR matrix structure in addition to the "native"
38 * matrix structure, as this may allow us to leverage existing librairies.
39 *
40 * - Provide a C interface, also so as to be able to interface more easily
41 * with external libraries.
42 *
43 * The structures used here could easily be extended to block matrixes,
44 * using for example the same structure information with 3x3 blocks which
45 * could arise from coupled velocity components. This would imply that the
46 * corresponding vectors be interlaced (or an interlaced copy be used
47 * for recurring operations such as sparse linear system resolution),
48 * for better memory locality, and possible loop unrolling.
49 */
50
51 #include "cs_defs.h"
52
53 /*----------------------------------------------------------------------------
54 * Standard C library headers
55 *----------------------------------------------------------------------------*/
56
57 #include <stdarg.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <assert.h>
62 #include <math.h>
63
64 #if defined(HAVE_MPI)
65 #include <mpi.h>
66 #endif
67
68 #if defined (HAVE_MKL)
69 #include <mkl_spblas.h>
70 #endif
71
72 /*----------------------------------------------------------------------------
73 * Local headers
74 *----------------------------------------------------------------------------*/
75
76 #include "bft_mem.h"
77 #include "bft_error.h"
78 #include "bft_printf.h"
79
80 #include "cs_base.h"
81 #include "cs_blas.h"
82 #include "cs_halo.h"
83 #include "cs_halo_perio.h"
84 #include "cs_log.h"
85 #include "cs_numbering.h"
86 #include "cs_prototypes.h"
87 #include "cs_sort.h"
88 #include "cs_timer.h"
89
90 /*----------------------------------------------------------------------------
91 * Header for the current file
92 *----------------------------------------------------------------------------*/
93
94 #include "cs_matrix.h"
95 #include "cs_matrix_priv.h"
96
97 /*----------------------------------------------------------------------------*/
98
99 BEGIN_C_DECLS
100
101 /*----------------------------------------------------------------------------*/
102 /*! \file cs_matrix.c
103 *
104 * \brief Sparse Matrix Representation and Operations.
105 *
106 * Please refer to the
107 * <a href="../../theory.pdf#matrix"><b>matrix</b></a> section of the
108 * theory guide for more informations.
109 */
110 /*----------------------------------------------------------------------------*/
111
112 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
113
114 /*=============================================================================
115 * Local Macro Definitions
116 *============================================================================*/
117
118 /* Cache line multiple, in cs_real_t units */
119
120 static const cs_lnum_t _cs_cl = (CS_CL_SIZE/8);
121
122 /*=============================================================================
123 * Local Type Definitions
124 *============================================================================*/
125
126 /* Note that most types are declared in cs_matrix_priv.h.
127 only those only handled here are declared here. */
128
129 /*============================================================================
130 * Global variables
131 *============================================================================*/
132
133 /* Short names for matrix types */
134
135 static const char *_matrix_type_name[] = {N_("native"),
136 N_("CSR"),
137 N_("MSR"),
138 N_("external")};
139
140 /* Full names for matrix types */
141
142 static const char
143 *_matrix_type_fullname[] = {N_("diagonal + faces"),
144 N_("Compressed Sparse Row"),
145 N_("Modified Compressed Sparse Row"),
146 N_("External")};
147
148 /* Fill type names for matrices */
149
150 const char *cs_matrix_fill_type_name[] = {"CS_MATRIX_SCALAR",
151 "CS_MATRIX_SCALAR_SYM",
152 "CS_MATRIX_BLOCK_D",
153 "CS_MATRIX_BLOCK_D_66",
154 "CS_MATRIX_BLOCK_D_SYM",
155 "CS_MATRIX_BLOCK"};
156
157 #if defined (HAVE_MKL)
158
159 static char _no_exclude_diag_error_str[]
160 = N_("Matrix product variant using function %s\n"
161 "does not handle case with excluded diagonal.");
162
163 #endif
164
165 /*============================================================================
166 * Private function definitions
167 - *============================================================================*/
168
169 /*----------------------------------------------------------------------------
170 * Set matrix fill metadata.
171 *
172 * Block sizes are defined by an optional array of 4 values:
173 * 0: useful block size, 1: vector block extents,
174 * 2: matrix line extents, 3: matrix line*column extents
175 *
176 * parameters:
177 * matrix <-> pointer to matrix structure
178 * symmetric <-- indicates if matrix coefficients are symmetric
179 * diag_block_size <-- block sizes for diagonal, or NULL
180 * extra_diag_block_size <-- block sizes for extra diagonal, or NULL
181 *----------------------------------------------------------------------------*/
182
183 static void
_set_fill_info(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t diag_block_size[4],const cs_lnum_t extra_diag_block_size[4])184 _set_fill_info(cs_matrix_t *matrix,
185 bool symmetric,
186 const cs_lnum_t diag_block_size[4],
187 const cs_lnum_t extra_diag_block_size[4])
188 {
189 matrix->symmetric = symmetric;
190
191 if (diag_block_size == NULL) {
192 for (int i = 0; i < 4; i++)
193 matrix->db_size[i] = 1;
194 }
195 else {
196 for (int i = 0; i < 4; i++)
197 matrix->db_size[i] = diag_block_size[i];
198 }
199
200 if (extra_diag_block_size == NULL) {
201 for (int i = 0; i < 4; i++)
202 matrix->eb_size[i] = 1;
203 }
204 else {
205 for (int i = 0; i < 4; i++)
206 matrix->eb_size[i] = extra_diag_block_size[i];
207 }
208
209 /* Set fill type */
210
211 matrix->fill_type = cs_matrix_get_fill_type(symmetric,
212 diag_block_size,
213 extra_diag_block_size);
214 }
215
216 /*----------------------------------------------------------------------------
217 * Clear matrix fill metadata.
218 *
219 * parameters:
220 * matrix <-> pointer to matrix structure
221 *----------------------------------------------------------------------------*/
222
223 static void
_clear_fill_info(cs_matrix_t * matrix)224 _clear_fill_info(cs_matrix_t *matrix)
225 {
226 matrix->symmetric = false;
227
228 for (int i = 0; i < 4; i++) {
229 matrix->db_size[i] = 0;
230 matrix->eb_size[i] = 0;
231 }
232
233 matrix->fill_type = CS_MATRIX_N_FILL_TYPES;
234 }
235
236 /*----------------------------------------------------------------------------
237 * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
238 *
239 * Vectors and blocks may be larger than their useful size, to
240 * improve data alignment.
241 *
242 * parameters:
243 * b_id <-- block id
244 * b_size <-- block size, including padding:
245 * b_size[0]: useful block size
246 * b_size[1]: vector block extents
247 * b_size[2]: matrix line extents
248 * b_size[3]: matrix line*column (block) extents
249 * a <-- pointer to block matrixes array (usually matrix diagonal)
250 * x <-- multipliying vector values
251 * y --> resulting vector
252 *----------------------------------------------------------------------------*/
253
254 static inline void
_dense_b_ax(cs_lnum_t b_id,const cs_lnum_t b_size[4],const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])255 _dense_b_ax(cs_lnum_t b_id,
256 const cs_lnum_t b_size[4],
257 const cs_real_t a[restrict],
258 const cs_real_t x[restrict],
259 cs_real_t y[restrict])
260 {
261 cs_lnum_t ii, jj;
262
263 for (ii = 0; ii < b_size[0]; ii++) {
264 y[b_id*b_size[1] + ii] = 0.;
265 for (jj = 0; jj < b_size[0]; jj++)
266 y[b_id*b_size[1] + ii]
267 += a[b_id*b_size[3] + ii*b_size[2] + jj]
268 * x[b_id*b_size[1] + jj];
269 }
270 }
271
272 /*----------------------------------------------------------------------------
273 * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
274 *
275 * This variant uses a fixed 3x3 block, for better compiler optimization.
276 *
277 * parameters:
278 * b_id <-- block id
279 * a <-- pointer to block matrixes array (usually matrix diagonal)
280 * x <-- multipliying vector values
281 * y --> resulting vector
282 *----------------------------------------------------------------------------*/
283
284 static inline void
_dense_3_3_ax(cs_lnum_t b_id,const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])285 _dense_3_3_ax(cs_lnum_t b_id,
286 const cs_real_t a[restrict],
287 const cs_real_t x[restrict],
288 cs_real_t y[restrict])
289 {
290 y[b_id*3] = a[b_id*9] * x[b_id*3]
291 + a[b_id*9 + 1] * x[b_id*3 + 1]
292 + a[b_id*9 + 2] * x[b_id*3 + 2];
293
294 y[b_id*3 + 1] = a[b_id*9 + 3] * x[b_id*3]
295 + a[b_id*9 + 3 + 1] * x[b_id*3 + 1]
296 + a[b_id*9 + 3 + 2] * x[b_id*3 + 2];
297
298 y[b_id*3 + 2] = a[b_id*9 + 6] * x[b_id*3]
299 + a[b_id*9 + 6 + 1] * x[b_id*3 + 1]
300 + a[b_id*9 + 6 + 2] * x[b_id*3 + 2];
301 }
302
303 /*----------------------------------------------------------------------------
304 * Compute matrix-vector product for one dense block: y[i] = a[i].x[i]
305 *
306 * This variant uses a fixed 6x6 block, for better compiler optimization.
307 *
308 * parameters:
309 * b_id <-- block id
310 * a <-- pointer to block matrixes array (usually matrix diagonal)
311 * x <-- multipliying vector values
312 * y --> resulting vector
313 *----------------------------------------------------------------------------*/
314
315 static inline void
_dense_6_6_ax(cs_lnum_t b_id,const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])316 _dense_6_6_ax(cs_lnum_t b_id,
317 const cs_real_t a[restrict],
318 const cs_real_t x[restrict],
319 cs_real_t y[restrict])
320 {
321 const cs_lnum_t b_id_6 = b_id*6, b_id_36 = b_id*36;
322
323 y[b_id_6] = a[b_id_36] * x[b_id_6]
324 + a[b_id_36 + 1] * x[b_id_6 + 1]
325 + a[b_id_36 + 2] * x[b_id_6 + 2]
326 + a[b_id_36 + 3] * x[b_id_6 + 3]
327 + a[b_id_36 + 4] * x[b_id_6 + 4]
328 + a[b_id_36 + 5] * x[b_id_6 + 5];
329
330 y[b_id_6 + 1] = a[b_id_36 + 6] * x[b_id_6]
331 + a[b_id_36 + 6 + 1] * x[b_id_6 + 1]
332 + a[b_id_36 + 6 + 2] * x[b_id_6 + 2]
333 + a[b_id_36 + 6 + 3] * x[b_id_6 + 3]
334 + a[b_id_36 + 6 + 4] * x[b_id_6 + 4]
335 + a[b_id_36 + 6 + 5] * x[b_id_6 + 5];
336
337 y[b_id_6 + 2] = a[b_id_36 + 12] * x[b_id_6]
338 + a[b_id_36 + 12 + 1] * x[b_id_6 + 1]
339 + a[b_id_36 + 12 + 2] * x[b_id_6 + 2]
340 + a[b_id_36 + 12 + 3] * x[b_id_6 + 3]
341 + a[b_id_36 + 12 + 4] * x[b_id_6 + 4]
342 + a[b_id_36 + 12 + 5] * x[b_id_6 + 5];
343
344 y[b_id_6 + 3] = a[b_id_36 + 18] * x[b_id_6]
345 + a[b_id_36 + 18 + 1] * x[b_id_6 + 1]
346 + a[b_id_36 + 18 + 2] * x[b_id_6 + 2]
347 + a[b_id_36 + 18 + 3] * x[b_id_6 + 3]
348 + a[b_id_36 + 18 + 4] * x[b_id_6 + 4]
349 + a[b_id_36 + 18 + 5] * x[b_id_6 + 5];
350
351 y[b_id_6 + 4] = a[b_id_36 + 24] * x[b_id_6]
352 + a[b_id_36 + 24 + 1] * x[b_id_6 + 1]
353 + a[b_id_36 + 24 + 2] * x[b_id_6 + 2]
354 + a[b_id_36 + 24 + 3] * x[b_id_6 + 3]
355 + a[b_id_36 + 24 + 4] * x[b_id_6 + 4]
356 + a[b_id_36 + 24 + 5] * x[b_id_6 + 5];
357
358 y[b_id_6 + 5] = a[b_id_36 + 30] * x[b_id_6]
359 + a[b_id_36 + 30 + 1] * x[b_id_6 + 1]
360 + a[b_id_36 + 30 + 2] * x[b_id_6 + 2]
361 + a[b_id_36 + 30 + 3] * x[b_id_6 + 3]
362 + a[b_id_36 + 30 + 4] * x[b_id_6 + 4]
363 + a[b_id_36 + 30 + 5] * x[b_id_6 + 5];
364
365 }
366
367 /*----------------------------------------------------------------------------
368 * Compute matrix-vector product increment for one dense block:
369 * y[i] += a[ij].x[j]
370 *
371 * Vectors and blocks may be larger than their useful size, to
372 * improve data alignment.
373 *
374 * parameters:
375 * b_i <-- block id for i
376 * b_j <-- block id for j
377 * b_ij <-- block id for matrix ij position
378 * b_size <-- block size, including padding:
379 * b_size[0]: useful block size
380 * b_size[1]: vector block extents
381 * b_size[2]: matrix line extents
382 * b_size[3]: matrix line*column (block) extents
383 * a <-- pointer to block matrixes array (usually matrix extra-diagonal)
384 * x <-- multipliying vector values
385 * y --> resulting vector
386 *----------------------------------------------------------------------------*/
387
388 static inline void
_dense_eb_ax_add(cs_lnum_t b_i,cs_lnum_t b_j,cs_lnum_t b_ij,const cs_lnum_t b_size[4],const cs_real_t a[restrict],const cs_real_t x[restrict],cs_real_t y[restrict])389 _dense_eb_ax_add(cs_lnum_t b_i,
390 cs_lnum_t b_j,
391 cs_lnum_t b_ij,
392 const cs_lnum_t b_size[4],
393 const cs_real_t a[restrict],
394 const cs_real_t x[restrict],
395 cs_real_t y[restrict])
396 {
397 cs_lnum_t ii, jj;
398
399 for (ii = 0; ii < b_size[0]; ii++) {
400 for (jj = 0; jj < b_size[0]; jj++)
401 y[b_i*b_size[1] + ii]
402 += a[b_ij*b_size[3] + ii*b_size[2] + jj]
403 * x[b_j*b_size[1] + jj];
404 }
405 }
406
407 /*----------------------------------------------------------------------------
408 * y[i] = da[i].x[i], with da possibly NULL
409 *
410 * parameters:
411 * da <-- pointer to coefficients array (usually matrix diagonal)
412 * x <-- multipliying vector values
413 * y --> resulting vector
414 * n_elts <-- array size
415 *----------------------------------------------------------------------------*/
416
417 static inline void
_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)418 _diag_vec_p_l(const cs_real_t da[restrict],
419 const cs_real_t x[restrict],
420 cs_real_t y[restrict],
421 cs_lnum_t n_elts)
422 {
423 cs_lnum_t ii;
424
425 if (da != NULL) {
426 # pragma omp parallel for if(n_elts > CS_THR_MIN)
427 for (ii = 0; ii < n_elts; ii++)
428 y[ii] = da[ii] * x[ii];
429 }
430 else {
431 # pragma omp parallel for if(n_elts > CS_THR_MIN)
432 for (ii = 0; ii < n_elts; ii++)
433 y[ii] = 0.0;
434 }
435
436 }
437
438 /*----------------------------------------------------------------------------
439 * Block version of y[i] = da[i].x[i], with da possibly NULL
440 *
441 * parameters:
442 * da <-- pointer to coefficients array (usually matrix diagonal)
443 * x <-- multipliying vector values
444 * y --> resulting vector
445 * n_elts <-- array size
446 * b_size <-- block size, including padding:
447 * b_size[0]: useful block size
448 * b_size[1]: vector block extents
449 * b_size[2]: matrix line extents
450 * b_size[3]: matrix line*column (block) extents
451 *----------------------------------------------------------------------------*/
452
453 static inline void
_b_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts,const cs_lnum_t b_size[4])454 _b_diag_vec_p_l(const cs_real_t da[restrict],
455 const cs_real_t x[restrict],
456 cs_real_t y[restrict],
457 cs_lnum_t n_elts,
458 const cs_lnum_t b_size[4])
459 {
460 cs_lnum_t ii;
461
462 if (da != NULL) {
463 # pragma omp parallel for if(n_elts > CS_THR_MIN)
464 for (ii = 0; ii < n_elts; ii++)
465 _dense_b_ax(ii, b_size, da, x, y);
466 }
467 else {
468 # pragma omp parallel for if(n_elts*b_size[1] > CS_THR_MIN)
469 for (ii = 0; ii < n_elts*b_size[1]; ii++)
470 y[ii] = 0.0;
471 }
472 }
473
474 /*----------------------------------------------------------------------------
475 * Block version of y[i] = da[i].x[i], with da possibly NULL
476 *
477 * This variant uses a fixed 3x3 block, for better compiler optimization.
478 *
479 * parameters:
480 * da <-- pointer to coefficients array (usually matrix diagonal)
481 * x <-- multipliying vector values
482 * y --> resulting vector
483 * n_elts <-- array size
484 *----------------------------------------------------------------------------*/
485
486 static inline void
_3_3_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)487 _3_3_diag_vec_p_l(const cs_real_t da[restrict],
488 const cs_real_t x[restrict],
489 cs_real_t y[restrict],
490 cs_lnum_t n_elts)
491 {
492 cs_lnum_t ii;
493
494 if (da != NULL) {
495 # pragma omp parallel for if(n_elts*3 > CS_THR_MIN)
496 for (ii = 0; ii < n_elts; ii++)
497 _dense_3_3_ax(ii, da, x, y);
498 }
499 else {
500 # pragma omp parallel for if(n_elts*3 > CS_THR_MIN)
501 for (ii = 0; ii < n_elts*3; ii++)
502 y[ii] = 0.0;
503 }
504 }
505
506 /*----------------------------------------------------------------------------
507 * Block version of y[i] = da[i].x[i], with da possibly NULL
508 *
509 * This variant uses a fixed 6x6 block, for better compiler optimization.
510 *
511 * parameters:
512 * da <-- pointer to coefficients array (usually matrix diagonal)
513 * x <-- multipliying vector values
514 * y --> resulting vector
515 * n_elts <-- array size
516 *----------------------------------------------------------------------------*/
517
518 static inline void
_6_6_diag_vec_p_l(const cs_real_t da[restrict],const cs_real_t x[restrict],cs_real_t y[restrict],cs_lnum_t n_elts)519 _6_6_diag_vec_p_l(const cs_real_t da[restrict],
520 const cs_real_t x[restrict],
521 cs_real_t y[restrict],
522 cs_lnum_t n_elts)
523 {
524 cs_lnum_t ii;
525
526 if (da != NULL) {
527 # pragma omp parallel for if(n_elts*6 > CS_THR_MIN)
528 for (ii = 0; ii < n_elts; ii++)
529 _dense_6_6_ax(ii, da, x, y);
530 }
531 else {
532 # pragma omp parallel for if(n_elts*6 > CS_THR_MIN)
533 for (ii = 0; ii < n_elts*6; ii++)
534 y[ii] = 0.0;
535 }
536 }
537
538 /*----------------------------------------------------------------------------
539 * Set values from y[start_id] to y[end_id] to 0.
540 *
541 * parameters:
542 * y --> resulting vector
543 * start_id <-- start id in array
544 * end_id <-- end id in array
545 *----------------------------------------------------------------------------*/
546
547 static inline void
_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)548 _zero_range(cs_real_t y[restrict],
549 cs_lnum_t start_id,
550 cs_lnum_t end_id)
551 {
552 cs_lnum_t ii;
553
554 # pragma omp parallel for if(end_id - start_id > CS_THR_MIN)
555 for (ii = start_id; ii < end_id; ii++)
556 y[ii] = 0.0;
557 }
558
559 /*----------------------------------------------------------------------------
560 * Set values from y[start_id] to y[end_id] to 0, block version.
561 *
562 * parameters:
563 * y --> resulting vector
564 * start_id <-- start id in array
565 * end_id <-- end id in array
566 * b_size <-- block size, including padding:
567 * b_size[0]: useful block size
568 * b_size[1]: vector block extents
569 *----------------------------------------------------------------------------*/
570
571 static inline void
_b_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id,const cs_lnum_t b_size[2])572 _b_zero_range(cs_real_t y[restrict],
573 cs_lnum_t start_id,
574 cs_lnum_t end_id,
575 const cs_lnum_t b_size[2])
576 {
577 cs_lnum_t ii;
578
579 # pragma omp parallel for if((end_id-start_id)*b_size[1] > CS_THR_MIN)
580 for (ii = start_id*b_size[1]; ii < end_id*b_size[1]; ii++)
581 y[ii] = 0.0;
582 }
583
584 /*----------------------------------------------------------------------------
585 * Set values from y[start_id] to y[end_id] to 0, block version.
586 *
587 * parameters:
588 * y --> resulting vector
589 * start_id <-- start id in array
590 * end_id <-- end id in array
591 *----------------------------------------------------------------------------*/
592
593 static inline void
_3_3_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)594 _3_3_zero_range(cs_real_t y[restrict],
595 cs_lnum_t start_id,
596 cs_lnum_t end_id)
597 {
598 cs_lnum_t ii;
599
600 # pragma omp parallel for if((end_id-start_id)*3 > CS_THR_MIN)
601 for (ii = start_id*3; ii < end_id*3; ii++)
602 y[ii] = 0.0;
603 }
604
605 /*----------------------------------------------------------------------------
606 * Set values from y[start_id] to y[end_id] to 0, block version.
607 *
608 * parameters:
609 * y --> resulting vector
610 * start_id <-- start id in array
611 * end_id <-- end id in array
612 *----------------------------------------------------------------------------*/
613
614 static inline void
_6_6_zero_range(cs_real_t y[restrict],cs_lnum_t start_id,cs_lnum_t end_id)615 _6_6_zero_range(cs_real_t y[restrict],
616 cs_lnum_t start_id,
617 cs_lnum_t end_id)
618 {
619 cs_lnum_t ii;
620
621 # pragma omp parallel for if((end_id-start_id)*6 > CS_THR_MIN)
622 for (ii = start_id*6; ii < end_id*6; ii++)
623 y[ii] = 0.0;
624 }
625
626 /*----------------------------------------------------------------------------
627 * Start synchronization of ghost values prior to matrix.vector product
628 *
629 * parameters:
630 * matrix <-- pointer to matrix structure
631 * x <-> multipliying vector values (ghost values updated)
632 *
633 * returns:
634 * halo state to use for synchronisation finalisation.
635 *----------------------------------------------------------------------------*/
636
637 static cs_halo_state_t *
_pre_vector_multiply_sync_x_start(const cs_matrix_t * matrix,cs_real_t x[restrict])638 _pre_vector_multiply_sync_x_start(const cs_matrix_t *matrix,
639 cs_real_t x[restrict])
640 {
641 cs_halo_state_t *hs = NULL;
642
643 if (matrix->halo != NULL) {
644
645 hs = cs_halo_state_get_default();
646
647 /* Non-blocked version */
648
649 cs_halo_sync_pack(matrix->halo,
650 CS_HALO_STANDARD,
651 CS_REAL_TYPE,
652 matrix->db_size[1],
653 x,
654 NULL,
655 hs);
656
657 cs_halo_sync_start(matrix->halo, x, hs);
658
659 }
660
661 return hs;
662 }
663
664 /*----------------------------------------------------------------------------
665 * Synchronize ghost values prior to matrix.vector product
666 *
667 * parameters:
668 * matrix <-- pointer to matrix structure
669 * x <-> multipliying vector values (ghost values updated)
670 *----------------------------------------------------------------------------*/
671
672 static void
_pre_vector_multiply_sync_x_end(const cs_matrix_t * matrix,cs_halo_state_t * hs,cs_real_t x[restrict])673 _pre_vector_multiply_sync_x_end(const cs_matrix_t *matrix,
674 cs_halo_state_t *hs,
675 cs_real_t x[restrict])
676 {
677 if (hs != NULL) {
678
679 assert(matrix->halo != NULL);
680
681 cs_halo_sync_wait(matrix->halo, x, hs);
682
683 /* Synchronize periodic values */
684
685 #if !defined(_CS_UNIT_MATRIX_TEST) /* unit tests do not link with full library */
686
687 if (matrix->halo->n_transforms > 0) {
688 if (matrix->db_size[0] == 3)
689 cs_halo_perio_sync_var_vect(matrix->halo,
690 CS_HALO_STANDARD,
691 x,
692 matrix->db_size[1]);
693 else if (matrix->db_size[0] == 6)
694 cs_halo_perio_sync_var_sym_tens(matrix->halo,
695 CS_HALO_STANDARD,
696 x);
697 }
698
699 #endif
700 }
701 }
702
703 /*----------------------------------------------------------------------------
704 * Synchronize ghost values prior to matrix.vector product
705 *
706 * parameters:
707 * matrix <-- pointer to matrix structure
708 * x <-> multipliying vector values (ghost values updated)
709 *----------------------------------------------------------------------------*/
710
711 void
cs_matrix_pre_vector_multiply_sync(const cs_matrix_t * matrix,cs_real_t * x)712 cs_matrix_pre_vector_multiply_sync(const cs_matrix_t *matrix,
713 cs_real_t *x)
714 {
715 if (matrix->halo != NULL) {
716 cs_halo_state_t *hs = _pre_vector_multiply_sync_x_start(matrix, x);
717 _pre_vector_multiply_sync_x_end(matrix, hs, x);
718 }
719 }
720
721 /*----------------------------------------------------------------------------
722 * Create native matrix structure.
723 *
724 * Note that the structure created maps to the given existing
725 * face -> cell connectivity array, so it must be destroyed before this
726 * array (usually the code's main face -> cell structure) is freed.
727 *
728 * parameters:
729 * n_rows <-- number of local rows
730 * n_cols_ext <-- number of local + ghost columns
731 * n_edges <-- local number of graph edges
732 * edges <-- edges (symmetric row <-> column) connectivity
733 *
734 * returns:
735 * pointer to allocated native matrix structure.
736 *----------------------------------------------------------------------------*/
737
738 static cs_matrix_struct_native_t *
_create_struct_native(cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_t edges[][2])739 _create_struct_native(cs_lnum_t n_rows,
740 cs_lnum_t n_cols_ext,
741 cs_lnum_t n_edges,
742 const cs_lnum_t edges[][2])
743 {
744 cs_matrix_struct_native_t *ms;
745
746 /* Allocate and map */
747
748 BFT_MALLOC(ms, 1, cs_matrix_struct_native_t);
749
750 /* Allocate and map */
751
752 ms->n_rows = n_rows;
753 ms->n_cols_ext = n_cols_ext;
754 ms->n_edges = n_edges;
755
756 ms->edges = edges;
757
758 return ms;
759 }
760
761 /*----------------------------------------------------------------------------
762 * Destroy native matrix structure.
763 *
764 * parameters:
765 * ms <-> pointer to native matrix structure pointer
766 *----------------------------------------------------------------------------*/
767
768 static void
_destroy_struct_native(void ** ms)769 _destroy_struct_native(void **ms)
770 {
771 if (ms != NULL && *ms !=NULL) {
772 cs_matrix_struct_native_t *_ms = *ms;
773
774 BFT_FREE(_ms);
775
776 *ms= NULL;
777 }
778 }
779
780 /*----------------------------------------------------------------------------
781 * Create native matrix coefficients.
782 *
783 * returns:
784 * pointer to allocated native coefficients structure.
785 *----------------------------------------------------------------------------*/
786
787 static cs_matrix_coeff_native_t *
_create_coeff_native(void)788 _create_coeff_native(void)
789 {
790 cs_matrix_coeff_native_t *mc;
791
792 /* Allocate */
793
794 BFT_MALLOC(mc, 1, cs_matrix_coeff_native_t);
795
796 /* Initialize */
797
798 mc->symmetric = false;
799 mc->max_db_size = 0;
800 mc->max_eb_size = 0;
801
802 mc->da = NULL;
803 mc->xa = NULL;
804
805 mc->_da = NULL;
806 mc->_xa = NULL;
807
808 return mc;
809 }
810
811 /*----------------------------------------------------------------------------
812 * Destroy native matrix coefficients.
813 *
814 * parameters:
815 * m <-> pointer to matrix structure
816 *----------------------------------------------------------------------------*/
817
818 static void
_destroy_coeff_native(cs_matrix_t * m)819 _destroy_coeff_native(cs_matrix_t *m)
820 {
821 if (m->coeffs != NULL) {
822 cs_matrix_coeff_native_t *mc = m->coeffs;
823
824 BFT_FREE(mc->_xa);
825 BFT_FREE(mc->_da);
826
827 BFT_FREE(m->coeffs);
828 }
829 }
830
831 /*----------------------------------------------------------------------------
832 * Set Native matrix coefficients.
833 *
834 * Depending on current options and initialization, values will be copied
835 * or simply mapped.
836 *
837 * parameters:
838 * matrix <-- pointer to matrix structure
839 * symmetric <-- indicates if extradiagonal values are symmetric
840 * copy <-- indicates if coefficients should be copied
841 * n_edges <-- local number of graph edges
842 * edges <-- edges (symmetric row <-> column) connectivity
843 * da <-- diagonal values
844 * xa <-- extradiagonal values
845 *----------------------------------------------------------------------------*/
846
847 static void
_set_coeffs_native(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_t edges[restrict][2],const cs_real_t da[restrict],const cs_real_t xa[restrict])848 _set_coeffs_native(cs_matrix_t *matrix,
849 bool symmetric,
850 bool copy,
851 cs_lnum_t n_edges,
852 const cs_lnum_t edges[restrict][2],
853 const cs_real_t da[restrict],
854 const cs_real_t xa[restrict])
855 {
856 CS_UNUSED(n_edges);
857 CS_UNUSED(edges);
858
859 cs_matrix_coeff_native_t *mc = matrix->coeffs;
860 const cs_matrix_struct_native_t *ms = matrix->structure;
861 mc->symmetric = symmetric;
862
863 /* Map or copy values */
864
865 if (da != NULL) {
866
867 if (copy) {
868 if (mc->_da == NULL || mc->max_db_size < matrix->db_size[3]) {
869 BFT_REALLOC(mc->_da, matrix->db_size[3]*ms->n_rows, cs_real_t);
870 mc->max_db_size = matrix->db_size[3];
871 }
872 memcpy(mc->_da, da, matrix->db_size[3]*sizeof(cs_real_t) * ms->n_rows);
873 mc->da = mc->_da;
874 }
875 else
876 mc->da = da;
877
878 }
879 else {
880 mc->da = NULL;
881 }
882
883 if (xa != NULL) {
884
885 size_t xa_n_vals = ms->n_edges;
886 if (! symmetric)
887 xa_n_vals *= 2;
888
889 if (copy) {
890 if (mc->_xa == NULL || mc->max_eb_size < matrix->eb_size[3]) {
891 BFT_MALLOC(mc->_xa, matrix->eb_size[3]*xa_n_vals, cs_real_t);
892 mc->max_eb_size = matrix->eb_size[3];
893 }
894 memcpy(mc->_xa, xa, matrix->eb_size[3]*xa_n_vals*sizeof(cs_real_t));
895 mc->xa = mc->_xa;
896 }
897 else
898 mc->xa = xa;
899
900 }
901 }
902
903 /*----------------------------------------------------------------------------
904 * Release shared native matrix coefficients.
905 *
906 * parameters:
907 * matrix <-- pointer to matrix structure
908 *----------------------------------------------------------------------------*/
909
910 static void
_release_coeffs_native(cs_matrix_t * matrix)911 _release_coeffs_native(cs_matrix_t *matrix)
912 {
913 cs_matrix_coeff_native_t *mc = matrix->coeffs;
914 if (mc != NULL) {
915 mc->da = NULL;
916 mc->xa = NULL;
917 }
918 }
919
920 /*----------------------------------------------------------------------------
921 * Copy diagonal of native or MSR matrix.
922 *
923 * parameters:
924 * matrix <-- pointer to matrix structure
925 * da --> diagonal (pre-allocated, size: n_rows)
926 *----------------------------------------------------------------------------*/
927
928 static void
_copy_diagonal_separate(const cs_matrix_t * matrix,cs_real_t da[restrict])929 _copy_diagonal_separate(const cs_matrix_t *matrix,
930 cs_real_t da[restrict])
931 {
932 const cs_real_t *_da = NULL;
933 if (matrix->type == CS_MATRIX_NATIVE) {
934 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
935 _da = mc->da;
936 }
937 else if (matrix->type == CS_MATRIX_MSR) {
938 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
939 _da = mc->d_val;
940 }
941 const cs_lnum_t n_rows = matrix->n_rows;
942
943 /* Unblocked version */
944
945 if (matrix->db_size[3] == 1) {
946
947 if (_da != NULL) {
948 # pragma omp parallel for if(n_rows > CS_THR_MIN)
949 for (cs_lnum_t ii = 0; ii < n_rows; ii++)
950 da[ii] = _da[ii];
951 }
952 else {
953 # pragma omp parallel for if(n_rows > CS_THR_MIN)
954 for (cs_lnum_t ii = 0; ii < n_rows; ii++)
955 da[ii] = 0.0;
956 }
957
958 }
959
960 /* Blocked version */
961
962 else {
963
964 const cs_lnum_t *db_size = matrix->db_size;
965
966 if (_da != NULL) {
967 # pragma omp parallel for if(n_rows*db_size[0] > CS_THR_MIN)
968 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
969 for (cs_lnum_t jj = 0; jj < db_size[0]; jj++)
970 da[ii*db_size[1] + jj] = _da[ii*db_size[3] + jj*db_size[2] + jj];
971 }
972 }
973 else {
974 # pragma omp parallel for if(n_rows*db_size[1] > CS_THR_MIN)
975 for (cs_lnum_t ii = 0; ii < n_rows*db_size[1]; ii++)
976 da[ii] = 0.0;
977 }
978 }
979 }
980
981 /*----------------------------------------------------------------------------*/
982 /*!
983 * \brief Get matrix diagonal values for native matrix.
984 *
985 * In case of matrixes with block diagonal coefficients, a pointer to
986 * the complete block diagonal is returned.
987 *
988 * \param[in] matrix pointer to matrix structure
989 *
990 * \return pointer to matrix diagonal array
991 */
992 /*----------------------------------------------------------------------------*/
993
994 static const cs_real_t *
_get_diagonal_native(const cs_matrix_t * matrix)995 _get_diagonal_native(const cs_matrix_t *matrix)
996 {
997 cs_lnum_t ii;
998
999 const cs_real_t *diag = NULL;
1000
1001 cs_matrix_coeff_native_t *mc = matrix->coeffs;
1002
1003 if (mc->da == NULL) {
1004 cs_lnum_t n_rows = matrix->n_rows * matrix->db_size[3];
1005 if (mc->_da == NULL || mc->max_db_size < matrix->db_size[3]) {
1006 BFT_REALLOC(mc->_da, matrix->db_size[3]*matrix->n_rows, cs_real_t);
1007 mc->max_db_size = matrix->db_size[3];
1008 }
1009 # pragma omp parallel for if(n_rows > CS_THR_MIN)
1010 for (ii = 0; ii < n_rows; ii++)
1011 mc->_da[ii] = 0.0;
1012 mc->da = mc->_da;
1013 }
1014
1015 diag = mc->da;
1016
1017 return diag;
1018 }
1019
1020 /*----------------------------------------------------------------------------
1021 * Local matrix.vector product y = A.x with native matrix.
1022 *
1023 * parameters:
1024 * matrix <-- pointer to matrix structure
1025 * exclude_diag <-- exclude diagonal if true,
1026 * sync <-- synchronize ghost cells if true
1027 * x <-> multipliying vector values
1028 * y --> resulting vector
1029 *----------------------------------------------------------------------------*/
1030
1031 static void
_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1032 _mat_vec_p_l_native(const cs_matrix_t *matrix,
1033 bool exclude_diag,
1034 bool sync,
1035 cs_real_t x[restrict],
1036 cs_real_t y[restrict])
1037 {
1038 cs_lnum_t ii, jj, face_id;
1039
1040 const cs_matrix_struct_native_t *ms = matrix->structure;
1041 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1042
1043 const cs_real_t *restrict xa = mc->xa;
1044
1045 /* Initialize ghost cell communication */
1046
1047 cs_halo_state_t *hs
1048 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1049
1050 /* Diagonal part of matrix.vector product */
1051
1052 if (! exclude_diag) {
1053 _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1054 _zero_range(y, ms->n_rows, ms->n_cols_ext);
1055 }
1056 else
1057 _zero_range(y, 0, ms->n_cols_ext);
1058
1059 /* Finalize ghost cell comunication if overlap used */
1060
1061 if (hs != NULL)
1062 cs_halo_sync_wait(matrix->halo, x, hs);
1063
1064 /* non-diagonal terms */
1065
1066 if (mc->xa != NULL) {
1067
1068 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1069
1070 if (mc->symmetric) {
1071
1072 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1073 ii = face_cel_p[face_id][0];
1074 jj = face_cel_p[face_id][1];
1075 y[ii] += xa[face_id] * x[jj];
1076 y[jj] += xa[face_id] * x[ii];
1077 }
1078
1079 }
1080 else {
1081
1082 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1083 ii = face_cel_p[face_id][0];
1084 jj = face_cel_p[face_id][1];
1085 y[ii] += xa[2*face_id] * x[jj];
1086 y[jj] += xa[2*face_id + 1] * x[ii];
1087 }
1088
1089 }
1090
1091 }
1092 }
1093
1094 /*----------------------------------------------------------------------------
1095 * Local matrix.vector product y = A.x with native matrix.
1096 *
1097 * parameters:
1098 * matrix <-- pointer to matrix structure
1099 * exclude_diag <-- exclude diagonal if true,
1100 * sync <-- synchronize ghost cells if true
1101 * x <-> multipliying vector values
1102 * y --> resulting vector
1103 *----------------------------------------------------------------------------*/
1104
1105 static void
_b_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1106 _b_mat_vec_p_l_native(const cs_matrix_t *matrix,
1107 bool exclude_diag,
1108 bool sync,
1109 cs_real_t x[restrict],
1110 cs_real_t y[restrict])
1111 {
1112 cs_lnum_t ii, jj, kk, face_id;
1113
1114 const cs_matrix_struct_native_t *ms = matrix->structure;
1115 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1116
1117 const cs_real_t *restrict xa = mc->xa;
1118 const cs_lnum_t *db_size = matrix->db_size;
1119
1120 /* Initialize ghost cell communication */
1121
1122 cs_halo_state_t *hs
1123 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1124
1125 /* Diagonal part of matrix.vector product */
1126
1127 if (! exclude_diag) {
1128 _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1129 _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1130 }
1131 else
1132 _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1133
1134 /* Finalize ghost cell comunication if overlap used */
1135
1136 if (hs != NULL)
1137 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1138
1139 /* non-diagonal terms */
1140
1141 if (mc->xa != NULL) {
1142
1143 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1144
1145 if (mc->symmetric) {
1146
1147 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1148 ii = face_cel_p[face_id][0];
1149 jj = face_cel_p[face_id][1];
1150 for (kk = 0; kk < db_size[0]; kk++) {
1151 y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1152 y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1153 }
1154 }
1155 }
1156 else {
1157
1158 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1159 ii = face_cel_p[face_id][0];
1160 jj = face_cel_p[face_id][1];
1161 for (kk = 0; kk < db_size[0]; kk++) {
1162 y[ii*db_size[1] + kk] += xa[2*face_id] * x[jj*db_size[1] + kk];
1163 y[jj*db_size[1] + kk] += xa[2*face_id + 1] * x[ii*db_size[1] + kk];
1164 }
1165 }
1166
1167 }
1168
1169 }
1170
1171 }
1172
1173 /*----------------------------------------------------------------------------
1174 * Local matrix.vector product y = A.x with native matrix.
1175 *
1176 * parameters:
1177 * matrix <-- pointer to matrix structure
1178 * exclude_diag <-- exclude diagonal if true,
1179 * sync <-- synchronize ghost cells if true
1180 * x <-> multipliying vector values
1181 * y --> resulting vector
1182 *----------------------------------------------------------------------------*/
1183
1184 static void
_bb_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1185 _bb_mat_vec_p_l_native(const cs_matrix_t *matrix,
1186 bool exclude_diag,
1187 bool sync,
1188 cs_real_t x[restrict],
1189 cs_real_t y[restrict])
1190 {
1191 cs_lnum_t ii, jj, face_id;
1192
1193 const cs_matrix_struct_native_t *ms = matrix->structure;
1194 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1195
1196 const cs_real_t *restrict xa = mc->xa;
1197 const cs_lnum_t *db_size = matrix->db_size;
1198 const cs_lnum_t *eb_size = matrix->eb_size;
1199
1200 /* Initialize ghost cell communication */
1201
1202 cs_halo_state_t *hs
1203 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1204
1205 /* Diagonal part of matrix.vector product */
1206
1207 if (! exclude_diag) {
1208 _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1209 _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1210 }
1211 else
1212 _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1213
1214 /* Finalize ghost cell comunication if overlap used */
1215
1216 if (hs != NULL)
1217 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1218
1219 /* non-diagonal terms */
1220
1221 if (mc->xa != NULL) {
1222
1223 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1224
1225 if (mc->symmetric) {
1226
1227 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1228 ii = face_cel_p[face_id][0];
1229 jj = face_cel_p[face_id][1];
1230 _dense_eb_ax_add(ii, jj, face_id, eb_size, xa, x, y);
1231 _dense_eb_ax_add(jj, ii, face_id, eb_size, xa, x, y);
1232 }
1233 }
1234 else {
1235
1236 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1237 ii = face_cel_p[face_id][0];
1238 jj = face_cel_p[face_id][1];
1239 _dense_eb_ax_add(ii, jj, 2*face_id, eb_size, xa, x, y);
1240 _dense_eb_ax_add(jj, ii, 2*face_id + 1, eb_size, xa, x, y);
1241 }
1242
1243 }
1244
1245 }
1246
1247 }
1248
1249 /*----------------------------------------------------------------------------
1250 * Local matrix.vector product y = A.x with native matrix.
1251 *
1252 * This variant uses a fixed 3x3 block, for better compiler optimization.
1253 *
1254 * parameters:
1255 * matrix <-- pointer to matrix structure
1256 * exclude_diag <-- exclude diagonal if true,
1257 * sync <-- synchronize ghost cells if true
1258 * x <-> multipliying vector values
1259 * y --> resulting vector
1260 *----------------------------------------------------------------------------*/
1261
1262 static void
_3_3_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1263 _3_3_mat_vec_p_l_native(const cs_matrix_t *matrix,
1264 bool exclude_diag,
1265 bool sync,
1266 cs_real_t x[restrict],
1267 cs_real_t y[restrict])
1268 {
1269 cs_lnum_t ii, jj, kk, face_id;
1270
1271 const cs_matrix_struct_native_t *ms = matrix->structure;
1272 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1273
1274 const cs_real_t *restrict xa = mc->xa;
1275
1276 assert(matrix->db_size[0] == 3 && matrix->db_size[3] == 9);
1277
1278 /* Initialize ghost cell communication */
1279
1280 cs_halo_state_t *hs
1281 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1282
1283 /* Diagonal part of matrix.vector product */
1284
1285 if (! exclude_diag) {
1286 _3_3_diag_vec_p_l(mc->da, x, y, ms->n_rows);
1287 _3_3_zero_range(y, ms->n_rows, ms->n_cols_ext);
1288 }
1289 else
1290 _3_3_zero_range(y, 0, ms->n_cols_ext);
1291
1292 /* Finalize ghost cell comunication */
1293
1294 if (hs != NULL)
1295 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1296
1297 /* non-diagonal terms */
1298
1299 if (mc->xa != NULL) {
1300
1301 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1302
1303 if (mc->symmetric) {
1304
1305 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1306 ii = face_cel_p[face_id][0];
1307 jj = face_cel_p[face_id][1];
1308 for (kk = 0; kk < 3; kk++) {
1309 y[ii*3 + kk] += xa[face_id] * x[jj*3 + kk];
1310 y[jj*3 + kk] += xa[face_id] * x[ii*3 + kk];
1311 }
1312 }
1313 }
1314 else {
1315
1316 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1317 ii = face_cel_p[face_id][0];
1318 jj = face_cel_p[face_id][1];
1319 for (kk = 0; kk < 3; kk++) {
1320 y[ii*3 + kk] += xa[2*face_id] * x[jj*3 + kk];
1321 y[jj*3 + kk] += xa[2*face_id + 1] * x[ii*3 + kk];
1322 }
1323 }
1324
1325 }
1326
1327 }
1328
1329 }
1330
1331 /*----------------------------------------------------------------------------
1332 * Local matrix.vector product y = A.x with native matrix.
1333 *
1334 * This variant uses a fixed 6x6 block, for better compiler optimization.
1335 *
1336 * parameters:
1337 * matrix <-- pointer to matrix structure
1338 * exclude_diag <-- exclude diagonal if true,
1339 * sync <-- synchronize ghost cells if true
1340 * x <-> multipliying vector values
1341 * y --> resulting vector
1342 *----------------------------------------------------------------------------*/
1343
1344 static void
_6_6_mat_vec_p_l_native(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1345 _6_6_mat_vec_p_l_native(const cs_matrix_t *matrix,
1346 bool exclude_diag,
1347 bool sync,
1348 cs_real_t x[restrict],
1349 cs_real_t y[restrict])
1350 {
1351 cs_lnum_t ii, jj, kk, face_id;
1352
1353 const cs_matrix_struct_native_t *ms = matrix->structure;
1354 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1355
1356 const cs_real_t *restrict xa = mc->xa;
1357
1358 assert(matrix->db_size[0] == 6 && matrix->db_size[3] == 36);
1359
1360 /* Initialize ghost cell communication */
1361
1362 cs_halo_state_t *hs
1363 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1364
1365 /* Diagonal part of matrix.vector product */
1366
1367 if (! exclude_diag) {
1368 _6_6_diag_vec_p_l(mc->da, x, y, ms->n_rows);
1369 _6_6_zero_range(y, ms->n_rows, ms->n_cols_ext);
1370 }
1371 else
1372 _6_6_zero_range(y, 0, ms->n_cols_ext);
1373
1374 /* Finalize ghost cell comunication if overlap used */
1375
1376 if (hs != NULL)
1377 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1378
1379 /* non-diagonal terms */
1380
1381 if (mc->xa != NULL) {
1382
1383 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1384
1385 if (mc->symmetric) {
1386
1387 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1388 ii = face_cel_p[face_id][0];
1389 jj = face_cel_p[face_id][1];
1390 for (kk = 0; kk < 6; kk++) {
1391 y[ii*6 + kk] += xa[face_id] * x[jj*6 + kk];
1392 y[jj*6 + kk] += xa[face_id] * x[ii*6 + kk];
1393 }
1394 }
1395 }
1396 else {
1397
1398 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1399 ii = face_cel_p[face_id][0];
1400 jj = face_cel_p[face_id][1];
1401 for (kk = 0; kk < 6; kk++) {
1402 y[ii*6 + kk] += xa[2*face_id] * x[jj*6 + kk];
1403 y[jj*6 + kk] += xa[2*face_id + 1] * x[ii*6 + kk];
1404 }
1405 }
1406
1407 }
1408
1409 }
1410
1411 }
1412
1413 /*----------------------------------------------------------------------------
1414 * Local matrix.vector product y = A.x with native matrix, blocked version.
1415 *
1416 * This variant uses fixed block size variants for common cases.
1417 *
1418 * parameters:
1419 * matrix <-- pointer to matrix structure
1420 * exclude_diag <-- exclude diagonal if true,
1421 * sync <-- synchronize ghost cells if true
1422 * x <-> multipliying vector values
1423 * y --> resulting vector
1424 *----------------------------------------------------------------------------*/
1425
1426 static void
_b_mat_vec_p_l_native_fixed(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1427 _b_mat_vec_p_l_native_fixed(const cs_matrix_t *matrix,
1428 bool exclude_diag,
1429 bool sync,
1430 cs_real_t x[restrict],
1431 cs_real_t y[restrict])
1432 {
1433 if (matrix->db_size[0] == 3 && matrix->db_size[3] == 9)
1434 _3_3_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1435
1436 else if (matrix->db_size[0] == 6 && matrix->db_size[3] == 36)
1437 _6_6_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1438
1439 else
1440 _b_mat_vec_p_l_native(matrix, exclude_diag, sync, x, y);
1441 }
1442
1443 #if defined(HAVE_OPENMP) /* OpenMP variants */
1444
1445 /*----------------------------------------------------------------------------
1446 * Local matrix.vector product y = A.x with native matrix.
1447 *
1448 * parameters:
1449 * matrix <-- Pointer to matrix structure
1450 * exclude_diag <-- exclude diagonal if true
1451 * sync <-- synchronize ghost cells if true
1452 * x <-> Multipliying vector values
1453 * y --> Resulting vector
1454 *----------------------------------------------------------------------------*/
1455
1456 static void
_mat_vec_p_l_native_omp(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1457 _mat_vec_p_l_native_omp(const cs_matrix_t *matrix,
1458 bool exclude_diag,
1459 bool sync,
1460 cs_real_t x[restrict],
1461 cs_real_t y[restrict])
1462 {
1463 const int n_threads = matrix->numbering->n_threads;
1464 const int n_groups = matrix->numbering->n_groups;
1465 const cs_lnum_t *group_index = matrix->numbering->group_index;
1466
1467 const cs_matrix_struct_native_t *ms = matrix->structure;
1468 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1469 const cs_real_t *restrict xa = mc->xa;
1470
1471 assert(matrix->numbering->type == CS_NUMBERING_THREADS);
1472
1473 /* Initialize ghost cell communication */
1474
1475 cs_halo_state_t *hs
1476 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1477
1478 /* Diagonal part of matrix.vector product */
1479
1480 if (! exclude_diag) {
1481 _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1482 _zero_range(y, ms->n_rows, ms->n_cols_ext);
1483 }
1484 else
1485 _zero_range(y, 0, ms->n_cols_ext);
1486
1487 /* Finalize ghost cell comunication if overlap used */
1488
1489 if (hs != NULL)
1490 cs_halo_sync_wait(matrix->halo, x, hs);
1491
1492 /* non-diagonal terms */
1493
1494 if (mc->xa != NULL) {
1495
1496 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1497
1498 if (mc->symmetric) {
1499
1500 for (int g_id = 0; g_id < n_groups; g_id++) {
1501
1502 # pragma omp parallel for
1503 for (int t_id = 0; t_id < n_threads; t_id++) {
1504
1505 for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1506 face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1507 face_id++) {
1508 cs_lnum_t ii = face_cel_p[face_id][0];
1509 cs_lnum_t jj = face_cel_p[face_id][1];
1510 y[ii] += xa[face_id] * x[jj];
1511 y[jj] += xa[face_id] * x[ii];
1512 }
1513 }
1514 }
1515 }
1516 else {
1517
1518 for (int g_id = 0; g_id < n_groups; g_id++) {
1519
1520 # pragma omp parallel for
1521 for (int t_id = 0; t_id < n_threads; t_id++) {
1522
1523 for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1524 face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1525 face_id++) {
1526 cs_lnum_t ii = face_cel_p[face_id][0];
1527 cs_lnum_t jj = face_cel_p[face_id][1];
1528 y[ii] += xa[2*face_id] * x[jj];
1529 y[jj] += xa[2*face_id + 1] * x[ii];
1530 }
1531 }
1532 }
1533 }
1534
1535 }
1536 }
1537
1538 /*----------------------------------------------------------------------------
1539 * Local matrix.vector product y = A.x with native matrix, blocked version
1540 *
1541 * parameters:
1542 * matrix <-- pointer to matrix structure
1543 * exclude_diag <-- exclude diagonal if true,
1544 * sync <-- synchronize ghost cells if true
1545 * x <-> multipliying vector values
1546 * y --> resulting vector
1547 *----------------------------------------------------------------------------*/
1548
1549 static void
_b_mat_vec_p_l_native_omp(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1550 _b_mat_vec_p_l_native_omp(const cs_matrix_t *matrix,
1551 bool exclude_diag,
1552 bool sync,
1553 cs_real_t x[restrict],
1554 cs_real_t y[restrict])
1555 {
1556 const cs_lnum_t *db_size = matrix->db_size;
1557
1558 const int n_threads = matrix->numbering->n_threads;
1559 const int n_groups = matrix->numbering->n_groups;
1560 const cs_lnum_t *group_index = matrix->numbering->group_index;
1561
1562 const cs_matrix_struct_native_t *ms = matrix->structure;
1563 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1564 const cs_real_t *restrict xa = mc->xa;
1565
1566 assert(matrix->numbering->type == CS_NUMBERING_THREADS);
1567
1568 /* Initialize ghost cell communication */
1569
1570 cs_halo_state_t *hs
1571 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1572
1573 /* Diagonal part of matrix.vector product */
1574
1575 if (! exclude_diag) {
1576 _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1577 _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1578 }
1579 else
1580 _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1581
1582 /* Finalize ghost cell comunication if overlap used */
1583
1584 if (hs != NULL)
1585 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1586
1587 /* non-diagonal terms */
1588
1589 if (mc->xa != NULL) {
1590
1591 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1592
1593 if (mc->symmetric) {
1594
1595 for (int g_id = 0; g_id < n_groups; g_id++) {
1596
1597 # pragma omp parallel for
1598 for (int t_id = 0; t_id < n_threads; t_id++) {
1599
1600 for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1601 face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1602 face_id++) {
1603 cs_lnum_t ii = face_cel_p[face_id][0];
1604 cs_lnum_t jj = face_cel_p[face_id][1];
1605 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1606 y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1607 y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1608 }
1609 }
1610 }
1611 }
1612
1613 }
1614 else {
1615
1616 for (int g_id = 0; g_id < n_groups; g_id++) {
1617
1618 # pragma omp parallel for
1619 for (int t_id = 0; t_id < n_threads; t_id++) {
1620
1621 for (cs_lnum_t face_id = group_index[(t_id*n_groups + g_id)*2];
1622 face_id < group_index[(t_id*n_groups + g_id)*2 + 1];
1623 face_id++) {
1624 cs_lnum_t ii = face_cel_p[face_id][0];
1625 cs_lnum_t jj = face_cel_p[face_id][1];
1626 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1627 y[ii*db_size[1] + kk] += xa[2*face_id] * x[jj*db_size[1] + kk];
1628 y[jj*db_size[1] + kk] += xa[2*face_id + 1] * x[ii*db_size[1] + kk];
1629 }
1630 }
1631 }
1632 }
1633
1634 }
1635
1636 }
1637 }
1638
1639 /*----------------------------------------------------------------------------
1640 * Local matrix.vector product y = A.x with native matrix.
1641 *
1642 * parameters:
1643 * matrix <-- pointer to matrix structure
1644 * exclude_diag <-- exclude diagonal if true,
1645 * sync <-- synchronize ghost cells if true
1646 * x <-> multipliying vector values
1647 * y --> resulting vector
1648 *----------------------------------------------------------------------------*/
1649
1650 static void
_mat_vec_p_l_native_omp_atomic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1651 _mat_vec_p_l_native_omp_atomic(const cs_matrix_t *matrix,
1652 bool exclude_diag,
1653 bool sync,
1654 cs_real_t x[restrict],
1655 cs_real_t y[restrict])
1656 {
1657 const cs_matrix_struct_native_t *ms = matrix->structure;
1658 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1659 const cs_real_t *restrict xa = mc->xa;
1660
1661 /* Initialize ghost cell communication */
1662
1663 cs_halo_state_t *hs
1664 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1665
1666 /* Diagonal part of matrix.vector product */
1667
1668 if (! exclude_diag) {
1669 _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1670 _zero_range(y, ms->n_rows, ms->n_cols_ext);
1671 }
1672 else
1673 _zero_range(y, 0, ms->n_cols_ext);
1674
1675 /* Finalize ghost cell comunication if overlap used */
1676
1677 if (hs != NULL)
1678 cs_halo_sync_wait(matrix->halo, x, hs);
1679
1680 /* non-diagonal terms */
1681
1682 if (mc->xa != NULL) {
1683
1684 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1685
1686 if (mc->symmetric) {
1687
1688 # pragma omp parallel for
1689 for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1690 cs_lnum_t ii = face_cel_p[face_id][0];
1691 cs_lnum_t jj = face_cel_p[face_id][1];
1692 # pragma omp atomic
1693 y[ii] += xa[face_id] * x[jj];
1694 # pragma omp atomic
1695 y[jj] += xa[face_id] * x[ii];
1696 }
1697 }
1698 else {
1699
1700 # pragma omp parallel for
1701 for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1702 cs_lnum_t ii = face_cel_p[face_id][0];
1703 cs_lnum_t jj = face_cel_p[face_id][1];
1704 # pragma omp atomic
1705 y[ii] += xa[2*face_id] * x[jj];
1706 # pragma omp atomic
1707 y[jj] += xa[2*face_id + 1] * x[ii];
1708 }
1709 }
1710
1711 }
1712 }
1713
1714 /*----------------------------------------------------------------------------
1715 * Local matrix.vector product y = A.x with native matrix, blocked version
1716 *
1717 * parameters:
1718 * matrix <-- pointer to matrix structure
1719 * exclude_diag <-- exclude diagonal if true,
1720 * sync <-- synchronize ghost cells if true
1721 * x <-> multipliying vector values
1722 * y --> resulting vector
1723 *----------------------------------------------------------------------------*/
1724
1725 static void
_b_mat_vec_p_l_native_omp_atomic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1726 _b_mat_vec_p_l_native_omp_atomic(const cs_matrix_t *matrix,
1727 bool exclude_diag,
1728 bool sync,
1729 cs_real_t x[restrict],
1730 cs_real_t y[restrict])
1731 {
1732 const cs_lnum_t *db_size = matrix->db_size;
1733
1734 const cs_matrix_struct_native_t *ms = matrix->structure;
1735 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1736 const cs_real_t *restrict xa = mc->xa;
1737
1738 /* Initialize ghost cell communication */
1739
1740 cs_halo_state_t *hs
1741 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1742
1743 /* Diagonal part of matrix.vector product */
1744
1745 if (! exclude_diag) {
1746 _b_diag_vec_p_l(mc->da, x, y, ms->n_rows, db_size);
1747 _b_zero_range(y, ms->n_rows, ms->n_cols_ext, db_size);
1748 }
1749 else
1750 _b_zero_range(y, 0, ms->n_cols_ext, db_size);
1751
1752 /* Finalize ghost cell comunication if overlap used */
1753
1754 if (hs != NULL)
1755 _pre_vector_multiply_sync_x_end(matrix, hs, x);
1756
1757 /* non-diagonal terms */
1758
1759 if (mc->xa != NULL) {
1760
1761 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1762
1763 if (mc->symmetric) {
1764
1765 # pragma omp parallel for
1766 for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1767 cs_lnum_t ii = face_cel_p[face_id][0];
1768 cs_lnum_t jj = face_cel_p[face_id][1];
1769 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1770 # pragma omp atomic
1771 y[ii*db_size[1] + kk] += xa[face_id] * x[jj*db_size[1] + kk];
1772 # pragma omp atomic
1773 y[jj*db_size[1] + kk] += xa[face_id] * x[ii*db_size[1] + kk];
1774 }
1775 }
1776
1777 }
1778 else {
1779
1780 # pragma omp parallel for
1781 for (cs_lnum_t face_id = 0; face_id < ms->n_edges; face_id++) {
1782 cs_lnum_t ii = face_cel_p[face_id][0];
1783 cs_lnum_t jj = face_cel_p[face_id][1];
1784 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
1785 # pragma omp atomic
1786 y[ii*db_size[1] + kk] += xa[2*face_id] * x[jj*db_size[1] + kk];
1787 # pragma omp atomic
1788 y[jj*db_size[1] + kk] += xa[2*face_id+1] * x[ii*db_size[1] + kk];
1789 }
1790 }
1791
1792 }
1793
1794 }
1795 }
1796
1797 #endif /* defined(HAVE_OPENMP) */
1798
1799 /*----------------------------------------------------------------------------
1800 * Local matrix.vector product y = A.x with native matrix.
1801 *
1802 * parameters:
1803 * matrix <-- pointer to matrix structure
1804 * exclude_diag <-- exclude diagonal if true,
1805 * sync <-- synchronize ghost cells if true
1806 * x <-> multipliying vector values
1807 * y --> resulting vector
1808 *----------------------------------------------------------------------------*/
1809
1810 static void
_mat_vec_p_l_native_vector(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])1811 _mat_vec_p_l_native_vector(const cs_matrix_t *matrix,
1812 bool exclude_diag,
1813 bool sync,
1814 cs_real_t x[restrict],
1815 cs_real_t y[restrict])
1816 {
1817 cs_lnum_t ii, jj, face_id;
1818 const cs_matrix_struct_native_t *ms = matrix->structure;
1819 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
1820 const cs_real_t *restrict xa = mc->xa;
1821
1822 assert(matrix->numbering->type == CS_NUMBERING_VECTORIZE);
1823
1824 /* Initialize ghost cell communication */
1825
1826 cs_halo_state_t *hs
1827 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
1828
1829 /* Diagonal part of matrix.vector product */
1830
1831 if (! exclude_diag) {
1832 _diag_vec_p_l(mc->da, x, y, ms->n_rows);
1833 _zero_range(y, ms->n_rows, ms->n_cols_ext);
1834 }
1835 else
1836 _zero_range(y, 0, ms->n_cols_ext);
1837
1838 /* Finalize ghost cell comunication if overlap used */
1839
1840 if (hs != NULL)
1841 cs_halo_sync_wait(matrix->halo, x, hs);
1842
1843 /* non-diagonal terms */
1844
1845 if (mc->xa != NULL) {
1846
1847 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
1848
1849 if (mc->symmetric) {
1850
1851 # if defined(HAVE_OPENMP_SIMD)
1852 # pragma omp simd safelen(CS_NUMBERING_SIMD_SIZE)
1853 # else
1854 # pragma dir nodep
1855 # pragma GCC ivdep
1856 # pragma _NEC ivdep
1857 # endif
1858 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1859 ii = face_cel_p[face_id][0];
1860 jj = face_cel_p[face_id][1];
1861 y[ii] += xa[face_id] * x[jj];
1862 y[jj] += xa[face_id] * x[ii];
1863 }
1864
1865 }
1866 else {
1867
1868 # if defined(HAVE_OPENMP_SIMD)
1869 # pragma omp simd safelen(CS_NUMBERING_SIMD_SIZE)
1870 # else
1871 # pragma dir nodep
1872 # pragma GCC ivdep
1873 # pragma _NEC ivdep
1874 # endif
1875 for (face_id = 0; face_id < ms->n_edges; face_id++) {
1876 ii = face_cel_p[face_id][0];
1877 jj = face_cel_p[face_id][1];
1878 y[ii] += xa[2*face_id] * x[jj];
1879 y[jj] += xa[2*face_id + 1] * x[ii];
1880 }
1881
1882 }
1883
1884 }
1885 }
1886
1887 /*----------------------------------------------------------------------------
1888 * Destroy a CSR matrix structure.
1889 *
1890 * parameters:
1891 * ms <-> pointer to CSR matrix structure pointer
1892 *----------------------------------------------------------------------------*/
1893
1894 static void
_destroy_struct_csr(void ** ms)1895 _destroy_struct_csr(void **ms)
1896 {
1897 if (ms != NULL && *ms !=NULL) {
1898 cs_matrix_struct_csr_t *_ms = *ms;
1899
1900 BFT_FREE(_ms->_row_index);
1901 BFT_FREE(_ms->_col_id);
1902 BFT_FREE(_ms);
1903
1904 *ms= NULL;
1905 }
1906 }
1907
1908 /*----------------------------------------------------------------------------
1909 * Create a CSR matrix structure from a native matrix stucture.
1910 *
1911 * Note that the structure created maps global cell numbers to the given
1912 * existing face -> cell connectivity array, so it must be destroyed before
1913 * this array (usually the code's global cell numbering) is freed.
1914 *
1915 * parameters:
1916 * have_diag <-- indicates if the diagonal is nonzero
1917 * n_rows <-- number of local rows
1918 * n_cols_ext <-- number of local + ghost columns
1919 * n_edges <-- local number of graph edges
1920 * edges <-- edges (symmetric row <-> column) connectivity
1921 *
1922 * returns:
1923 * pointer to allocated CSR matrix structure.
1924 *----------------------------------------------------------------------------*/
1925
1926 static cs_matrix_struct_csr_t *
_create_struct_csr(bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges)1927 _create_struct_csr(bool have_diag,
1928 cs_lnum_t n_rows,
1929 cs_lnum_t n_cols_ext,
1930 cs_lnum_t n_edges,
1931 const cs_lnum_2_t *edges)
1932 {
1933 cs_lnum_t ii, jj, face_id;
1934 const cs_lnum_t *restrict face_cel_p;
1935
1936 cs_lnum_t diag_elts = 1;
1937 cs_lnum_t *ccount = NULL;
1938
1939 cs_matrix_struct_csr_t *ms;
1940
1941 /* Allocate and map */
1942
1943 BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
1944
1945 ms->n_rows = n_rows;
1946 ms->n_cols_ext = n_cols_ext;
1947
1948 ms->direct_assembly = true;
1949 ms->have_diag = have_diag;
1950
1951 BFT_MALLOC(ms->_row_index, ms->n_rows + 1, cs_lnum_t);
1952 ms->row_index = NULL;
1953
1954 /* Count number of nonzero elements per row */
1955
1956 BFT_MALLOC(ccount, ms->n_rows, cs_lnum_t);
1957
1958 if (have_diag == false)
1959 diag_elts = 0;
1960
1961 for (ii = 0; ii < ms->n_rows; ii++) /* count starting with diagonal terms */
1962 ccount[ii] = diag_elts;
1963
1964 if (edges != NULL) {
1965
1966 face_cel_p = (const cs_lnum_t *restrict)edges;
1967
1968 for (face_id = 0; face_id < n_edges; face_id++) {
1969 ii = *face_cel_p++;
1970 jj = *face_cel_p++;
1971 if (ii < ms->n_rows)
1972 ccount[ii] += 1;
1973 if (jj < ms->n_rows)
1974 ccount[jj] += 1;
1975 }
1976
1977 } /* if (edges != NULL) */
1978
1979 ms->_row_index[0] = 0;
1980 for (ii = 0; ii < ms->n_rows; ii++) {
1981 ms->_row_index[ii+1] = ms->_row_index[ii] + ccount[ii];
1982 ccount[ii] = diag_elts; /* pre-count for diagonal terms */
1983 }
1984
1985 /* Build structure */
1986
1987 BFT_MALLOC(ms->_col_id, (ms->_row_index[ms->n_rows]), cs_lnum_t);
1988 ms->col_id = NULL;
1989
1990 if (have_diag == true) {
1991 for (ii = 0; ii < ms->n_rows; ii++) { /* diagonal terms */
1992 ms->_col_id[ms->_row_index[ii]] = ii;
1993 }
1994 }
1995
1996 if (edges != NULL) { /* non-diagonal terms */
1997
1998 face_cel_p = (const cs_lnum_t *restrict)edges;
1999
2000 for (face_id = 0; face_id < n_edges; face_id++) {
2001 ii = *face_cel_p++;
2002 jj = *face_cel_p++;
2003 if (ii < ms->n_rows) {
2004 ms->_col_id[ms->_row_index[ii] + ccount[ii]] = jj;
2005 ccount[ii] += 1;
2006 }
2007 if (jj < ms->n_rows) {
2008 ms->_col_id[ms->_row_index[jj] + ccount[jj]] = ii;
2009 ccount[jj] += 1;
2010 }
2011 }
2012
2013 } /* if (edges != NULL) */
2014
2015 BFT_FREE(ccount);
2016
2017 /* Sort line elements by column id (for better access patterns) */
2018
2019 ms->direct_assembly = cs_sort_indexed(ms->n_rows,
2020 ms->_row_index,
2021 ms->_col_id);
2022
2023 /* Compact elements if necessary */
2024
2025 if (ms->direct_assembly == false) {
2026
2027 cs_lnum_t *tmp_row_index = NULL;
2028 cs_lnum_t kk = 0;
2029
2030 BFT_MALLOC(tmp_row_index, ms->n_rows+1, cs_lnum_t);
2031 memcpy(tmp_row_index, ms->_row_index, (ms->n_rows+1)*sizeof(cs_lnum_t));
2032
2033 kk = 0;
2034
2035 for (ii = 0; ii < ms->n_rows; ii++) {
2036 cs_lnum_t *col_id = ms->_col_id + ms->_row_index[ii];
2037 cs_lnum_t n_cols = ms->_row_index[ii+1] - ms->_row_index[ii];
2038 cs_lnum_t col_id_prev = -1;
2039 ms->_row_index[ii] = kk;
2040 for (jj = 0; jj < n_cols; jj++) {
2041 if (col_id_prev != col_id[jj]) {
2042 ms->_col_id[kk++] = col_id[jj];
2043 col_id_prev = col_id[jj];
2044 }
2045 }
2046 }
2047 ms->_row_index[ms->n_rows] = kk;
2048
2049 assert(ms->_row_index[ms->n_rows] < tmp_row_index[ms->n_rows]);
2050
2051 BFT_FREE(tmp_row_index);
2052 BFT_REALLOC(ms->_col_id, (ms->_row_index[ms->n_rows]), cs_lnum_t);
2053
2054 }
2055
2056 ms->row_index = ms->_row_index;
2057 ms->col_id = ms->_col_id;
2058
2059 return ms;
2060 }
2061
2062 /*----------------------------------------------------------------------------
2063 * Create a CSR matrix structure from an index and an array related
2064 * to column id
2065 *
2066 * parameters:
2067 * have_diag <-- indicates if the diagonal structure contains nonzeroes
2068 * transfer <-- transfer property of row_index and col_id
2069 * if true, map them otherwise
2070 * ordered <-- indicates if row entries are already ordered
2071 * n_rows <-- local number of rows
2072 * n_cols_ext <-- local number of columns + ghosts
2073 * row_index <-- pointer to index on rows
2074 * col_id <-> pointer to array of colum ids related to the row index
2075 *
2076 * returns:
2077 * a pointer to a created CSR matrix structure
2078 *----------------------------------------------------------------------------*/
2079
2080 static cs_matrix_struct_csr_t *
_create_struct_csr_from_csr(bool have_diag,bool transfer,bool ordered,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t ** row_index,cs_lnum_t ** col_id)2081 _create_struct_csr_from_csr(bool have_diag,
2082 bool transfer,
2083 bool ordered,
2084 cs_lnum_t n_rows,
2085 cs_lnum_t n_cols_ext,
2086 cs_lnum_t **row_index,
2087 cs_lnum_t **col_id)
2088 {
2089 cs_matrix_struct_csr_t *ms = NULL;
2090
2091 cs_lnum_t *_row_index = *row_index;
2092 cs_lnum_t *_col_id = *col_id;
2093
2094 /* Allocate and map */
2095
2096 BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2097
2098 ms->n_rows = n_rows;
2099 ms->n_cols_ext = n_cols_ext;
2100
2101 ms->direct_assembly = false; /* not relevant here */
2102 ms->have_diag = have_diag;
2103
2104 ms->row_index = _row_index;
2105 ms->col_id = _col_id;
2106
2107 ms->_row_index = NULL;
2108 ms->_col_id = NULL;
2109
2110 if (transfer == true) {
2111
2112 ms->_row_index = _row_index;
2113 ms->_col_id = _col_id;
2114
2115 *row_index = NULL;
2116 *col_id = NULL;
2117
2118 /* Sort line elements by column id (for better access patterns) */
2119
2120 if (! ordered)
2121 cs_sort_indexed(ms->n_rows,
2122 ms->_row_index,
2123 ms->_col_id);
2124
2125 }
2126
2127 return ms;
2128 }
2129
2130 /*----------------------------------------------------------------------------
2131 * Create a CSR matrix structure from an index and an array related
2132 * to column id
2133 *
2134 * parameters:
2135 * have_diag <-- indicates if diagonal structure contains nonzeroes
2136 * direct_assembly <-- true if each value corresponds to a unique face
2137 * n_rows <- local number of rows
2138 * n_cols_ext <-- local number of columns + ghosts
2139 * row_index <-- index on rows
2140 * col_id <-> array of colum ids related to the row index
2141 *
2142 * returns:
2143 * a pointer to a created CSR matrix structure
2144 *----------------------------------------------------------------------------*/
2145
2146 static cs_matrix_struct_csr_t *
_create_struct_csr_from_shared(bool have_diag,bool direct_assembly,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t * row_index,const cs_lnum_t * col_id)2147 _create_struct_csr_from_shared(bool have_diag,
2148 bool direct_assembly,
2149 cs_lnum_t n_rows,
2150 cs_lnum_t n_cols_ext,
2151 const cs_lnum_t *row_index,
2152 const cs_lnum_t *col_id)
2153 {
2154 cs_matrix_struct_csr_t *ms = NULL;
2155
2156 /* Allocate and map */
2157
2158 BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2159
2160 ms->n_rows = n_rows;
2161 ms->n_cols_ext = n_cols_ext;
2162
2163 ms->direct_assembly = direct_assembly;
2164 ms->have_diag = have_diag;
2165
2166 ms->row_index = row_index;
2167 ms->col_id = col_id;
2168
2169 ms->_row_index = NULL;
2170 ms->_col_id = NULL;
2171
2172 return ms;
2173 }
2174
2175 /*----------------------------------------------------------------------------
2176 * Create a CSR matrix structure from the restriction to local rank of
2177 * another CSR matrix structure.
2178 *
2179 * parameters:
2180 * src <-- base matrix structure
2181 *
2182 * returns:
2183 * a pointer to a created CSR matrix structure
2184 *----------------------------------------------------------------------------*/
2185
2186 static cs_matrix_struct_csr_t *
_create_struct_csr_from_restrict_local(const cs_matrix_struct_csr_t * src)2187 _create_struct_csr_from_restrict_local(const cs_matrix_struct_csr_t *src)
2188 {
2189 cs_matrix_struct_csr_t *ms = NULL;
2190
2191 /* Allocate and map */
2192
2193 BFT_MALLOC(ms, 1, cs_matrix_struct_csr_t);
2194
2195 const cs_lnum_t n_rows = src->n_rows;
2196
2197 ms->n_rows = n_rows;
2198 ms->n_cols_ext = n_rows;
2199
2200 ms->direct_assembly = src->direct_assembly;
2201 ms->have_diag = src->have_diag;
2202
2203 BFT_MALLOC(ms->_row_index, ms->n_rows+1, cs_lnum_t);
2204 BFT_MALLOC(ms->_col_id, src->row_index[ms->n_rows], cs_lnum_t);
2205
2206 ms->_row_index[0] = 0;
2207
2208 cs_lnum_t k = 0;
2209
2210 const cs_lnum_t *col_id_s = src->col_id;
2211 cs_lnum_t *col_id_d = ms->_col_id;
2212
2213 for (cs_lnum_t i = 0; i < n_rows; i++) {
2214 const cs_lnum_t s_id = src->row_index[i];
2215 const cs_lnum_t e_id = src->row_index[i+1];
2216 for (cs_lnum_t j = s_id; j < e_id; j++) {
2217 cs_lnum_t c_id = col_id_s[j];
2218 if (c_id < n_rows) {
2219 col_id_d[k] = c_id;
2220 k += 1;
2221 }
2222 }
2223 ms->_row_index[i+1] = k;
2224 }
2225
2226 BFT_REALLOC(ms->_col_id, ms->_row_index[n_rows], cs_lnum_t);
2227
2228 ms->row_index = ms->_row_index;
2229 ms->col_id = ms->_col_id;
2230
2231 return ms;
2232 }
2233
2234 /*----------------------------------------------------------------------------
2235 * Destroy CSR matrix coefficients.
2236 *
2237 * parameters:
2238 * m <-> pointer to matrix structure
2239 *----------------------------------------------------------------------------*/
2240
2241 static void
_destroy_coeff_csr(cs_matrix_t * m)2242 _destroy_coeff_csr(cs_matrix_t *m)
2243 {
2244 if (m->coeffs != NULL) {
2245 cs_matrix_coeff_csr_t *mc = m->coeffs;
2246
2247 BFT_FREE(mc->_val);
2248 BFT_FREE(mc->_d_val);
2249
2250 BFT_FREE(m->coeffs);
2251 }
2252 }
2253
2254 /*----------------------------------------------------------------------------
2255 * Create CSR matrix coefficients.
2256 *
2257 * returns:
2258 * pointer to allocated CSR coefficients structure.
2259 *----------------------------------------------------------------------------*/
2260
2261 static cs_matrix_coeff_csr_t *
_create_coeff_csr(void)2262 _create_coeff_csr(void)
2263 {
2264 cs_matrix_coeff_csr_t *mc = NULL;
2265
2266 /* Allocate */
2267
2268 BFT_MALLOC(mc, 1, cs_matrix_coeff_csr_t);
2269
2270 /* Initialize */
2271
2272 mc->val = NULL;
2273 mc->_val = NULL;
2274
2275 mc->d_val = NULL;
2276 mc->_d_val = NULL;
2277
2278 return mc;
2279 }
2280
2281 /*----------------------------------------------------------------------------
2282 * Set CSR matrix coefficients to zero.
2283 *
2284 * The coefficients should already be allocated.
2285 *
2286 * Use of this function is preferrable to a simple loop, as its
2287 * threading behavior should be consistent with SpMW in NUMA cases.
2288 *
2289 * parameters:
2290 * matrix <-> pointer to matrix structure
2291 *----------------------------------------------------------------------------*/
2292
2293 static void
_zero_coeffs_csr(cs_matrix_t * matrix)2294 _zero_coeffs_csr(cs_matrix_t *matrix)
2295 {
2296 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2297
2298 const cs_matrix_struct_csr_t *ms = matrix->structure;
2299
2300 const cs_lnum_t n_rows = ms->n_rows;
2301 const cs_lnum_t *eb_size = matrix->eb_size;
2302
2303 if (eb_size[0] == 1) {
2304 # pragma omp parallel for if(n_rows > CS_THR_MIN)
2305 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2306 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2307 cs_real_t *m_row = mc->_val + ms->row_index[ii];
2308 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
2309 m_row[jj] = 0.0;
2310 }
2311 }
2312 else {
2313 # pragma omp parallel for if(n_rows*eb_size[0] > CS_THR_MIN)
2314 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2315 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2316 cs_real_t *m_row = mc->_val + ms->row_index[ii]*eb_size[3];
2317 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2318 for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
2319 m_row[jj*eb_size[3] + kk] = 0.0;
2320 }
2321 }
2322 }
2323 }
2324
2325 /*----------------------------------------------------------------------------
2326 * Set CSR extradiagonal matrix coefficients for the case where direct
2327 * assignment is possible (i.e. when there are no multiple contributions
2328 * to a given coefficient).
2329 *
2330 * parameters:
2331 * matrix <-- pointer to matrix structure
2332 * symmetric <-- indicates if extradiagonal values are symmetric
2333 * n_edges <-- local number of graph edges
2334 * edges <-- edges (symmetric row <-> column) connectivity
2335 * xa <-- extradiagonal values
2336 *----------------------------------------------------------------------------*/
2337
2338 static void
_set_xa_coeffs_csr_direct(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t xa[restrict])2339 _set_xa_coeffs_csr_direct(cs_matrix_t *matrix,
2340 bool symmetric,
2341 cs_lnum_t n_edges,
2342 const cs_lnum_2_t *edges,
2343 const cs_real_t xa[restrict])
2344 {
2345 cs_lnum_t ii, jj, face_id;
2346 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2347
2348 const cs_matrix_struct_csr_t *ms = matrix->structure;
2349
2350 /* Copy extra-diagonal values */
2351
2352 assert(edges != NULL);
2353
2354 const cs_lnum_t *restrict edges_p
2355 = (const cs_lnum_t *restrict)(edges);
2356
2357 if (symmetric == false) {
2358
2359 for (face_id = 0; face_id < n_edges; face_id++) {
2360 cs_lnum_t kk, ll;
2361 ii = *edges_p++;
2362 jj = *edges_p++;
2363 if (ii < ms->n_rows) {
2364 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2365 mc->_val[kk] = xa[2*face_id];
2366 }
2367 if (jj < ms->n_rows) {
2368 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2369 mc->_val[ll] = xa[2*face_id + 1];
2370 }
2371 }
2372
2373 }
2374 else { /* if symmetric == true */
2375
2376 for (face_id = 0; face_id < n_edges; face_id++) {
2377 cs_lnum_t kk, ll;
2378 ii = *edges_p++;
2379 jj = *edges_p++;
2380 if (ii < ms->n_rows) {
2381 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2382 mc->_val[kk] = xa[face_id];
2383 }
2384 if (jj < ms->n_rows) {
2385 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2386 mc->_val[ll] = xa[face_id];
2387 }
2388
2389 }
2390
2391 } /* end of condition on coefficients symmetry */
2392
2393 }
2394
2395 /*----------------------------------------------------------------------------
2396 * Set CSR extradiagonal matrix coefficients for the case where there are
2397 * multiple contributions to a given coefficient).
2398 *
2399 * The matrix coefficients should have been initialized (i.e. set to 0)
2400 * some before using this function.
2401 *
2402 * parameters:
2403 * matrix <-- pointer to matrix structure
2404 * symmetric <-- indicates if extradiagonal values are symmetric
2405 * n_edges <-- local number of graph edges
2406 * edges <-- edges (symmetric row <-> column) connectivity
2407 * xa <-- extradiagonal values
2408 *----------------------------------------------------------------------------*/
2409
2410 static void
_set_xa_coeffs_csr_increment(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t edges[restrict],const cs_real_t xa[restrict])2411 _set_xa_coeffs_csr_increment(cs_matrix_t *matrix,
2412 bool symmetric,
2413 cs_lnum_t n_edges,
2414 const cs_lnum_2_t edges[restrict],
2415 const cs_real_t xa[restrict])
2416 {
2417 cs_lnum_t ii, jj, face_id;
2418 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2419
2420 const cs_matrix_struct_csr_t *ms = matrix->structure;
2421
2422 /* Copy extra-diagonal values */
2423
2424 assert(edges != NULL);
2425
2426 const cs_lnum_t *restrict edges_p
2427 = (const cs_lnum_t *restrict)(edges);
2428
2429 if (symmetric == false) {
2430
2431 for (face_id = 0; face_id < n_edges; face_id++) {
2432 cs_lnum_t kk, ll;
2433 ii = *edges_p++;
2434 jj = *edges_p++;
2435 if (ii < ms->n_rows) {
2436 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2437 mc->_val[kk] += xa[2*face_id];
2438 }
2439 if (jj < ms->n_rows) {
2440 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2441 mc->_val[ll] += xa[2*face_id + 1];
2442 }
2443 }
2444
2445 }
2446 else { /* if symmetric == true */
2447
2448 for (face_id = 0; face_id < n_edges; face_id++) {
2449 cs_lnum_t kk, ll;
2450 ii = *edges_p++;
2451 jj = *edges_p++;
2452 if (ii < ms->n_rows) {
2453 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
2454 mc->_val[kk] += xa[face_id];
2455 }
2456 if (jj < ms->n_rows) {
2457 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
2458 mc->_val[ll] += xa[face_id];
2459 }
2460
2461 }
2462
2463 } /* end of condition on coefficients symmetry */
2464
2465 }
2466
2467 /*----------------------------------------------------------------------------
2468 * Set CSR matrix coefficients.
2469 *
2470 * parameters:
2471 * matrix <-> pointer to matrix structure
2472 * symmetric <-- indicates if extradiagonal values are symmetric
2473 * copy <-- indicates if coefficients should be copied
2474 * n_edges <-- local number of graph edges
2475 * edges <-- edges (symmetric row <-> column) connectivity
2476 * da <-- diagonal values (NULL if all zero)
2477 * xa <-- extradiagonal values (NULL if all zero)
2478 *----------------------------------------------------------------------------*/
2479
2480 static void
_set_coeffs_csr(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_t edges[restrict][2],const cs_real_t da[restrict],const cs_real_t xa[restrict])2481 _set_coeffs_csr(cs_matrix_t *matrix,
2482 bool symmetric,
2483 bool copy,
2484 cs_lnum_t n_edges,
2485 const cs_lnum_t edges[restrict][2],
2486 const cs_real_t da[restrict],
2487 const cs_real_t xa[restrict])
2488 {
2489 CS_UNUSED(copy);
2490
2491 cs_lnum_t ii, jj;
2492 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2493
2494 const cs_matrix_struct_csr_t *ms = matrix->structure;
2495
2496 if (mc->_val == NULL)
2497 BFT_MALLOC(mc->_val, ms->row_index[ms->n_rows], cs_real_t);
2498 mc->val = mc->_val;
2499
2500 /* Initialize coefficients to zero if assembly is incremental */
2501
2502 if (ms->direct_assembly == false)
2503 _zero_coeffs_csr(matrix);
2504
2505 /* Copy diagonal values */
2506
2507 if (ms->have_diag == true) {
2508
2509 if (da != NULL) {
2510 for (ii = 0; ii < ms->n_rows; ii++) {
2511 cs_lnum_t kk;
2512 for (kk = ms->row_index[ii]; ms->col_id[kk] != ii; kk++);
2513 mc->_val[kk] = da[ii];
2514 }
2515 }
2516 else {
2517 for (ii = 0; ii < ms->n_rows; ii++) {
2518 cs_lnum_t kk;
2519 for (kk = ms->row_index[ii]; ms->col_id[kk] != ii; kk++);
2520 mc->_val[kk] = 0.0;
2521 }
2522 }
2523
2524 }
2525
2526 /* Mark diagonal values as not queried (mc->_d_val not changed) */
2527
2528 mc->d_val = NULL;
2529
2530 /* Copy extra-diagonal values */
2531
2532 if (edges != NULL) {
2533
2534 if (xa != NULL) {
2535
2536 if (ms->direct_assembly == true)
2537 _set_xa_coeffs_csr_direct(matrix, symmetric, n_edges, edges, xa);
2538 else
2539 _set_xa_coeffs_csr_increment(matrix, symmetric, n_edges, edges, xa);
2540
2541 }
2542 else { /* if (xa == NULL) */
2543
2544 for (ii = 0; ii < ms->n_rows; ii++) {
2545 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
2546 cs_real_t *m_row = mc->_val + ms->row_index[ii];
2547 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2548
2549 for (jj = 0; jj < n_cols; jj++) {
2550 if (col_id[jj] != ii)
2551 m_row[jj] = 0.0;
2552 }
2553
2554 }
2555
2556 }
2557
2558 } /* (matrix->edges != NULL) */
2559
2560 }
2561
2562 /*----------------------------------------------------------------------------
2563 * Set CSR matrix coefficients provided in MSR form.
2564 *
2565 * If da and xa are equal to NULL, then initialize val with zeros.
2566 *
2567 * parameters:
2568 * matrix <-> pointer to matrix structure
2569 * row_index <-- MSR row index (0 to n-1)
2570 * col_id <-- MSR column id (0 to n-1)
2571 * d_vals <-- diagonal values (NULL if all zero)
2572 * d_vals_transfer <-- diagonal values whose ownership is trasferred
2573 * (NULL or d_vals in, NULL out)
2574 * x_vals <-- extradiagonal values (NULL if all zero)
2575 * x_vals_transfer <-- extradiagonal values whose ownership is transferred
2576 * (NULL or x_vals in, NULL out)
2577 *----------------------------------------------------------------------------*/
2578
2579 static void
_set_coeffs_csr_from_msr(cs_matrix_t * matrix,const cs_lnum_t row_index[],const cs_lnum_t col_id[],const cs_real_t d_vals[restrict],cs_real_t ** d_vals_transfer,const cs_real_t x_vals[restrict],cs_real_t ** x_vals_transfer)2580 _set_coeffs_csr_from_msr(cs_matrix_t *matrix,
2581 const cs_lnum_t row_index[],
2582 const cs_lnum_t col_id[],
2583 const cs_real_t d_vals[restrict],
2584 cs_real_t **d_vals_transfer,
2585 const cs_real_t x_vals[restrict],
2586 cs_real_t **x_vals_transfer)
2587 {
2588 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2589
2590 const cs_matrix_struct_csr_t *ms = matrix->structure;
2591
2592 const cs_lnum_t n_rows = ms->n_rows;
2593
2594 /* Sanity check */
2595
2596 if (matrix->db_size[0] > 1 || matrix->eb_size[0] > 1)
2597 bft_error
2598 (__FILE__, __LINE__, 0,
2599 "%s:\n"
2600 " case with diagonal block size %ld en extradiagonal block size %ld\n"
2601 " not implemented.\n",
2602 __func__, (long)matrix->db_size[0], (long)matrix->eb_size[0]);
2603
2604 /* Special configuration where ownership is transferred directly */
2605
2606 /* TODO: we should use metadata or check that the row_index and
2607 column id values are consistent, which should be true as long
2608 as columns are ordered in an identical manner */
2609
2610 if (x_vals_transfer != NULL) {
2611 if (d_vals == NULL && *x_vals_transfer != NULL) {
2612 mc->_val = *x_vals_transfer;
2613 mc->val = mc->_val;
2614 *x_vals_transfer = NULL;
2615 return;
2616 }
2617 }
2618
2619 /* Allocate local array */
2620
2621 if (mc->_val == NULL)
2622 BFT_MALLOC(mc->_val, ms->row_index[ms->n_rows], cs_real_t);
2623
2624 mc->val = mc->_val;
2625
2626 /* Mark diagonal values as not queried (mc->_d_val not changed) */
2627
2628 mc->d_val = NULL;
2629
2630 /* Case with diagonal and extradiagonal values */
2631
2632 if (d_vals != NULL && x_vals != NULL) {
2633
2634 # pragma omp parallel for if(n_rows > CS_THR_MIN)
2635 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2636
2637 const cs_lnum_t *restrict m_col_id = ms->col_id + ms->row_index[ii];
2638 cs_real_t *restrict m_row = mc->_val + ms->row_index[ii];
2639 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2640
2641 const cs_lnum_t *restrict s_col_id = col_id + row_index[ii];
2642 const cs_real_t *restrict s_row = x_vals + ms->row_index[ii];
2643 cs_lnum_t n_s_cols = row_index[ii+1] - row_index[ii];
2644
2645 cs_lnum_t c_id_0 = 0;
2646
2647 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2648 if (m_col_id[jj] == ii)
2649 m_row[jj] = d_vals[ii];
2650 else {
2651 /* Optimize for ordered case */
2652 if (m_col_id[jj] == s_col_id[c_id_0]) {
2653 m_row[jj] = s_row[c_id_0];
2654 c_id_0++;
2655 }
2656 else {
2657 for (cs_lnum_t kk = c_id_0; kk < n_s_cols; kk++) {
2658 if (m_col_id[jj] == s_col_id[kk]) {
2659 m_row[jj] = s_row[kk];
2660 break;
2661 }
2662 }
2663 }
2664 }
2665 }
2666
2667 }
2668 }
2669
2670 /* Case with diagonal values only */
2671
2672 else if (d_vals != NULL) {
2673
2674 # pragma omp parallel for if(n_rows > CS_THR_MIN)
2675 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2676
2677 const cs_lnum_t *restrict m_col_id = ms->col_id + ms->row_index[ii];
2678 cs_real_t *restrict m_row = mc->_val + ms->row_index[ii];
2679 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2680
2681 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2682 if (m_col_id[jj] == ii)
2683 m_row[jj] = d_vals[ii];
2684 else
2685 m_row[jj] = 0.;
2686 }
2687
2688 }
2689 }
2690
2691 /* Case with null-diagonal */
2692
2693 else if (x_vals != NULL) {
2694
2695 # pragma omp parallel for if(n_rows > CS_THR_MIN)
2696 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2697
2698 const cs_lnum_t *restrict m_col_id = ms->col_id + ms->row_index[ii];
2699 cs_real_t *restrict m_row = mc->_val + ms->row_index[ii];
2700 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2701
2702 const cs_lnum_t *restrict s_col_id = col_id + row_index[ii];
2703 const cs_real_t *restrict s_row = x_vals + ms->row_index[ii];
2704 cs_lnum_t n_s_cols = row_index[ii+1] - row_index[ii];
2705
2706 cs_lnum_t c_id_0 = 0;
2707
2708 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2709 if (m_col_id[jj] == ii)
2710 m_row[jj] = 0.;
2711 else {
2712 /* Optimize for ordered case */
2713 if (m_col_id[jj] == s_col_id[c_id_0]) {
2714 m_row[jj] = s_row[c_id_0];
2715 c_id_0++;
2716 }
2717 else {
2718 for (cs_lnum_t kk = c_id_0; kk < n_s_cols; kk++) {
2719 if (m_col_id[jj] == s_col_id[kk]) {
2720 m_row[jj] = s_row[kk];
2721 break;
2722 }
2723 }
2724 }
2725 }
2726 }
2727
2728 }
2729
2730 }
2731
2732 else
2733 _zero_coeffs_csr(matrix);
2734
2735 /* Now free transferred arrays */
2736
2737 if (d_vals_transfer != NULL)
2738 BFT_FREE(*d_vals_transfer);
2739 if (x_vals_transfer != NULL)
2740 BFT_FREE(*x_vals_transfer);
2741 }
2742
2743 /*----------------------------------------------------------------------------
2744 * Release shared CSR matrix coefficients.
2745 *
2746 * parameters:
2747 * matrix <-- pointer to matrix structure
2748 *----------------------------------------------------------------------------*/
2749
2750 static void
_release_coeffs_csr(cs_matrix_t * matrix)2751 _release_coeffs_csr(cs_matrix_t *matrix)
2752 {
2753 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2754 if (mc != NULL)
2755 mc->d_val = NULL;
2756 return;
2757 }
2758
2759 /*----------------------------------------------------------------------------
2760 * Copy diagonal of CSR matrix.
2761 *
2762 * parameters:
2763 * matrix <-- pointer to matrix structure
2764 * da --> diagonal (pre-allocated, size: n_rows)
2765 *----------------------------------------------------------------------------*/
2766
2767 static void
_copy_diagonal_csr(const cs_matrix_t * matrix,cs_real_t * restrict da)2768 _copy_diagonal_csr(const cs_matrix_t *matrix,
2769 cs_real_t *restrict da)
2770 {
2771 const cs_matrix_struct_csr_t *ms = matrix->structure;
2772 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2773 cs_lnum_t n_rows = ms->n_rows;
2774
2775 # pragma omp parallel for if(n_rows > CS_THR_MIN)
2776 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2777
2778 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
2779 const cs_real_t *restrict m_row = mc->val + ms->row_index[ii];
2780 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
2781
2782 da[ii] = 0.0;
2783 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
2784 if (col_id[jj] == ii) {
2785 da[ii] = m_row[jj];
2786 break;
2787 }
2788 }
2789
2790 }
2791 }
2792
2793 /*----------------------------------------------------------------------------*/
2794 /*!
2795 * \brief Get matrix diagonal values for CSR matrix.
2796 *
2797 * In case of matrixes with block diagonal coefficients, a pointer to
2798 * the complete block diagonal is returned.
2799 *
2800 * \param[in] matrix pointer to matrix structure
2801 *
2802 * \return pointer to matrix diagonal array
2803 */
2804 /*----------------------------------------------------------------------------*/
2805
2806 static const cs_real_t *
_get_diagonal_csr(const cs_matrix_t * matrix)2807 _get_diagonal_csr(const cs_matrix_t *matrix)
2808 {
2809 const cs_real_t *diag = NULL;
2810
2811 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2812 assert(matrix->db_size[3] == 1);
2813 if (mc->_d_val == NULL)
2814 BFT_MALLOC(mc->_d_val, matrix->n_rows, cs_real_t);
2815 if (mc->d_val == NULL) {
2816 cs_matrix_copy_diagonal(matrix, mc->_d_val);
2817 mc->d_val = mc->_d_val;
2818 }
2819 diag = mc->d_val;
2820
2821 return diag;
2822 }
2823
2824 /*----------------------------------------------------------------------------*/
2825 /*!
2826 * \brief Function for initialization of CSR matrix coefficients using
2827 * local row ids and column indexes.
2828 *
2829 * \warning The matrix pointer must point to valid data when the selection
2830 * function is called, so the life cycle of the data pointed to
2831 * should be at least as long as that of the assembler values
2832 * structure.
2833 *
2834 * \param[in, out] matrix_p untyped pointer to matrix description structure
2835 * \param[in] db_size optional diagonal block sizes
2836 * \param[in] eb_size optional extra-diagonal block sizes
2837 */
2838 /*----------------------------------------------------------------------------*/
2839
2840 static void
_csr_assembler_values_init(void * matrix_p,const cs_lnum_t db_size[4],const cs_lnum_t eb_size[4])2841 _csr_assembler_values_init(void *matrix_p,
2842 const cs_lnum_t db_size[4],
2843 const cs_lnum_t eb_size[4])
2844 {
2845 CS_UNUSED(db_size);
2846
2847 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
2848
2849 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2850
2851 const cs_lnum_t n_rows = matrix->n_rows;
2852 cs_lnum_t e_stride = 1;
2853 if (eb_size != NULL)
2854 e_stride = eb_size[3];
2855
2856 const cs_matrix_struct_csr_t *ms = matrix->structure;
2857
2858 /* Initialize diagonal values */
2859
2860 BFT_REALLOC(mc->_val, e_stride*ms->row_index[ms->n_rows], cs_real_t);
2861 mc->val = mc->_val;
2862
2863 # pragma omp parallel for if(n_rows*db_size[0] > CS_THR_MIN)
2864 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
2865 cs_lnum_t n_s_cols = (ms->row_index[ii+1] - ms->row_index[ii])*e_stride;
2866 cs_lnum_t displ = ms->row_index[ii]*e_stride;
2867 for (cs_lnum_t jj = 0; jj < n_s_cols; jj++)
2868 mc->_val[displ + jj] = 0;
2869 }
2870 }
2871
2872 /*----------------------------------------------------------------------------*/
2873 /*!
2874 * \brief Function for addition to CSR matrix coefficients using
2875 * local row ids and column indexes.
2876 *
2877 * Values whose associated row index is negative should be ignored;
2878 * Values whose column index is -1 are assumed to be assigned to a
2879 * separately stored diagonal. Other indexes shoudl be valid.
2880 *
2881 * \warning The matrix pointer must point to valid data when the selection
2882 * function is called, so the life cycle of the data pointed to
2883 * should be at least as long as that of the assembler values
2884 * structure.
2885 *
2886 * \remark Note that we pass column indexes (not ids) here; as the
2887 * caller is already assumed to have identified the index
2888 * matching a given column id.
2889 *
2890 * \param[in, out] matrix_p untyped pointer to matrix description structure
2891 * \param[in] n number of values to add
2892 * \param[in] stride associated data block size
2893 * \param[in] row_id associated local row ids
2894 * \param[in] col_idx associated local column indexes
2895 * \param[in] vals pointer to values (size: n*stride)
2896 */
2897 /*----------------------------------------------------------------------------*/
2898
2899 static void
_csr_assembler_values_add(void * matrix_p,cs_lnum_t n,cs_lnum_t stride,const cs_lnum_t row_id[],const cs_lnum_t col_idx[],const cs_real_t vals[])2900 _csr_assembler_values_add(void *matrix_p,
2901 cs_lnum_t n,
2902 cs_lnum_t stride,
2903 const cs_lnum_t row_id[],
2904 const cs_lnum_t col_idx[],
2905 const cs_real_t vals[])
2906 {
2907 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
2908
2909 cs_matrix_coeff_csr_t *mc = matrix->coeffs;
2910
2911 const cs_matrix_struct_csr_t *ms = matrix->structure;
2912
2913 if (stride == 1) {
2914
2915 /* Copy instead of test for OpenMP to avoid outlining for small sets */
2916
2917 if (n*stride <= CS_THR_MIN) {
2918 for (cs_lnum_t ii = 0; ii < n; ii++) {
2919 if (row_id[ii] < 0)
2920 continue;
2921 else {
2922 cs_lnum_t r_id = row_id[ii];
2923 mc->_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
2924 }
2925 }
2926 }
2927
2928 else {
2929 # pragma omp parallel for if(n*stride > CS_THR_MIN)
2930 for (cs_lnum_t ii = 0; ii < n; ii++) {
2931 if (row_id[ii] < 0)
2932 continue;
2933 else {
2934 cs_lnum_t r_id = row_id[ii];
2935 mc->_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
2936 }
2937 }
2938 }
2939 }
2940
2941 else { /* if (stride > 1) */
2942
2943 /* Copy instead of test for OpenMP to avoid outlining for small sets */
2944
2945 if (n*stride <= CS_THR_MIN) {
2946 for (cs_lnum_t ii = 0; ii < n; ii++) {
2947 if (row_id[ii] < 0)
2948 continue;
2949 else {
2950 cs_lnum_t r_id = row_id[ii];
2951 cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
2952 for (cs_lnum_t jj = 0; jj < stride; jj++)
2953 mc->_val[displ + jj] += vals[ii*stride + jj];
2954 }
2955 }
2956 }
2957
2958 else {
2959 # pragma omp parallel for if(n*stride > CS_THR_MIN)
2960 for (cs_lnum_t ii = 0; ii < n; ii++) {
2961 if (row_id[ii] < 0)
2962 continue;
2963 else {
2964 cs_lnum_t r_id = row_id[ii];
2965 cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
2966 for (cs_lnum_t jj = 0; jj < stride; jj++)
2967 mc->_val[displ + jj] += vals[ii*stride + jj];
2968 }
2969 }
2970 }
2971 }
2972
2973 }
2974
2975 /*----------------------------------------------------------------------------*/
2976 /*!
2977 * \brief Create and initialize a CSR matrix assembler values structure.
2978 *
2979 * The associated matrix's structure must have been created using
2980 * \ref cs_matrix_structure_create_from_assembler.
2981 *
2982 * Block sizes are defined by an optional array of 4 values:
2983 * 0: useful block size, 1: vector block extents,
2984 * 2: matrix line extents, 3: matrix line*column extents
2985 *
2986 * \param[in, out] matrix pointer to matrix structure
2987 * \param[in] diag_block_size block sizes for diagonal, or NULL
2988 * \param[in] extra_diag_block_size block sizes for extra diagonal,
2989 * or NULL
2990 *
2991 * \return pointer to initialized matrix assembler values structure;
2992 */
2993 /*----------------------------------------------------------------------------*/
2994
2995 static cs_matrix_assembler_values_t *
_assembler_values_create_csr(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)2996 _assembler_values_create_csr(cs_matrix_t *matrix,
2997 const cs_lnum_t *diag_block_size,
2998 const cs_lnum_t *extra_diag_block_size)
2999 {
3000 cs_matrix_assembler_values_t *mav
3001 = cs_matrix_assembler_values_create(matrix->assembler,
3002 false,
3003 diag_block_size,
3004 extra_diag_block_size,
3005 (void *)matrix,
3006 _csr_assembler_values_init,
3007 _csr_assembler_values_add,
3008 NULL,
3009 NULL,
3010 NULL);
3011
3012 return mav;
3013 }
3014
3015 /*----------------------------------------------------------------------------
3016 * Local matrix.vector product y = A.x with CSR matrix.
3017 *
3018 * parameters:
3019 * matrix <-- pointer to matrix structure
3020 * exclude_diag <-- exclude diagonal if true,
3021 * sync <-- synchronize ghost cells if true
3022 * x <-> multipliying vector values
3023 * y --> resulting vector
3024 *----------------------------------------------------------------------------*/
3025
3026 static void
_mat_vec_p_l_csr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)3027 _mat_vec_p_l_csr(const cs_matrix_t *matrix,
3028 bool exclude_diag,
3029 bool sync,
3030 cs_real_t *restrict x,
3031 cs_real_t *restrict y)
3032 {
3033 const cs_matrix_struct_csr_t *ms = matrix->structure;
3034 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
3035 cs_lnum_t n_rows = ms->n_rows;
3036
3037 /* Ghost cell communication */
3038
3039 cs_halo_state_t *hs
3040 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
3041 if (hs != NULL)
3042 cs_halo_sync_wait(matrix->halo, x, hs);
3043
3044 /* Standard case */
3045
3046 if (!exclude_diag) {
3047
3048 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3049 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3050
3051 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
3052 const cs_real_t *restrict m_row = mc->val + ms->row_index[ii];
3053 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3054 cs_real_t sii = 0.0;
3055
3056 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3057 sii += (m_row[jj]*x[col_id[jj]]);
3058
3059 y[ii] = sii;
3060
3061 }
3062
3063 }
3064
3065 /* Exclude diagonal */
3066
3067 else {
3068
3069 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3070 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3071
3072 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
3073 const cs_real_t *restrict m_row = mc->val + ms->row_index[ii];
3074 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3075 cs_real_t sii = 0.0;
3076
3077 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3078 if (col_id[jj] != ii)
3079 sii += (m_row[jj]*x[col_id[jj]]);
3080 }
3081
3082 y[ii] = sii;
3083
3084 }
3085 }
3086 }
3087
3088 #if defined (HAVE_MKL)
3089
3090 static void
_mat_vec_p_l_csr_mkl(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)3091 _mat_vec_p_l_csr_mkl(const cs_matrix_t *matrix,
3092 bool exclude_diag,
3093 bool sync,
3094 cs_real_t *restrict x,
3095 cs_real_t *restrict y)
3096 {
3097 const cs_matrix_struct_csr_t *ms = matrix->structure;
3098 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
3099
3100 /* Ghost cell communication */
3101
3102 cs_halo_state_t *hs
3103 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
3104 if (hs != NULL)
3105 cs_halo_sync_wait(matrix->halo, x, hs);
3106
3107 /* MKL call */
3108
3109 int n_rows = ms->n_rows;
3110 char transa[] = "n";
3111
3112 if (exclude_diag)
3113 bft_error(__FILE__, __LINE__, 0,
3114 _(_no_exclude_diag_error_str), __func__);
3115
3116 mkl_cspblas_dcsrgemv(transa,
3117 &n_rows,
3118 mc->val,
3119 ms->row_index,
3120 ms->col_id,
3121 (double *)x,
3122 y);
3123 }
3124
3125 #endif /* defined (HAVE_MKL) */
3126
3127 /*----------------------------------------------------------------------------
3128 * Create MSR matrix coefficients.
3129 *
3130 * returns:
3131 * pointer to allocated MSR coefficients structure.
3132 *----------------------------------------------------------------------------*/
3133
3134 static cs_matrix_coeff_msr_t *
_create_coeff_msr(void)3135 _create_coeff_msr(void)
3136 {
3137 cs_matrix_coeff_msr_t *mc;
3138
3139 /* Allocate */
3140
3141 BFT_MALLOC(mc, 1, cs_matrix_coeff_msr_t);
3142
3143 /* Initialize */
3144
3145 mc->max_db_size = 0;
3146 mc->max_eb_size = 0;
3147
3148 mc->d_val = NULL;
3149 mc->x_val = NULL;
3150
3151 mc->_d_val = NULL;
3152 mc->_x_val = NULL;
3153
3154 return mc;
3155 }
3156
3157 /*----------------------------------------------------------------------------
3158 * Destroy MSR matrix coefficients.
3159 *
3160 * parameters:
3161 * m <-> pointer to matrix structure
3162 *----------------------------------------------------------------------------*/
3163
3164 static void
_destroy_coeff_msr(cs_matrix_t * m)3165 _destroy_coeff_msr(cs_matrix_t *m)
3166 {
3167 if (m->coeffs != NULL) {
3168 cs_matrix_coeff_msr_t *mc = m->coeffs;
3169
3170 BFT_FREE(mc->_x_val);
3171 BFT_FREE(mc->_d_val);
3172
3173 BFT_FREE(m->coeffs);
3174 }
3175 }
3176
3177 /*----------------------------------------------------------------------------
3178 * Set MSR matrix extradiagonal coefficients to zero.
3179 *
3180 * The coefficients should already be allocated.
3181 *
3182 * Use of this function is preferrable to a simple loop, as its
3183 * threading behavior should be consistent with SpMW in NUMA cases.
3184 *
3185 * parameters:
3186 * matrix <-> pointer to matrix structure
3187 *----------------------------------------------------------------------------*/
3188
3189 static void
_zero_x_coeffs_msr(cs_matrix_t * matrix)3190 _zero_x_coeffs_msr(cs_matrix_t *matrix)
3191 {
3192 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3193
3194 const cs_matrix_struct_csr_t *ms = matrix->structure;
3195
3196 const cs_lnum_t n_rows = ms->n_rows;
3197 const cs_lnum_t *eb_size = matrix->eb_size;
3198
3199 if (eb_size[0] == 1) {
3200 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3201 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3202 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3203 cs_real_t *m_row = mc->_x_val + ms->row_index[ii];
3204 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3205 m_row[jj] = 0.0;
3206 }
3207 }
3208 else {
3209 # pragma omp parallel for if(n_rows*eb_size[0] > CS_THR_MIN)
3210 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3211 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3212 cs_real_t *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
3213 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3214 for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
3215 m_row[jj*eb_size[3] + kk] = 0.0;
3216 }
3217 }
3218 }
3219 }
3220
3221 /*----------------------------------------------------------------------------
3222 * Set MSR extradiagonal matrix coefficients for the case where direct
3223 * assignment is possible (i.e. when there are no multiple contributions
3224 * to a given coefficient).
3225 *
3226 * parameters:
3227 * matrix <-- pointer to matrix structure
3228 * symmetric <-- indicates if extradiagonal values are symmetric
3229 * n_edges <-- local number of graph edges
3230 * edges <-- edges (symmetric row <-> column) connectivity
3231 * xa <-- extradiagonal values
3232 *----------------------------------------------------------------------------*/
3233
3234 static void
_set_xa_coeffs_msr_direct(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3235 _set_xa_coeffs_msr_direct(cs_matrix_t *matrix,
3236 bool symmetric,
3237 cs_lnum_t n_edges,
3238 const cs_lnum_2_t *edges,
3239 const cs_real_t *restrict xa)
3240 {
3241 cs_lnum_t ii, jj, face_id;
3242 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3243
3244 const cs_matrix_struct_csr_t *ms = matrix->structure;
3245
3246 /* Copy extra-diagonal values */
3247
3248 assert(edges != NULL || n_edges == 0);
3249
3250 if (symmetric == false) {
3251
3252 const cs_lnum_t *restrict edges_p
3253 = (const cs_lnum_t *restrict)(edges);
3254
3255 for (face_id = 0; face_id < n_edges; face_id++) {
3256 cs_lnum_t kk, ll;
3257 ii = *edges_p++;
3258 jj = *edges_p++;
3259 if (ii < ms->n_rows) {
3260 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3261 mc->_x_val[kk] = xa[2*face_id];
3262 }
3263 if (jj < ms->n_rows) {
3264 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3265 mc->_x_val[ll] = xa[2*face_id + 1];
3266 }
3267 }
3268
3269 }
3270 else { /* if symmetric == true */
3271
3272 const cs_lnum_t *restrict edges_p
3273 = (const cs_lnum_t *restrict)(edges);
3274
3275 for (face_id = 0; face_id < n_edges; face_id++) {
3276 cs_lnum_t kk, ll;
3277 ii = *edges_p++;
3278 jj = *edges_p++;
3279 if (ii < ms->n_rows) {
3280 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3281 mc->_x_val[kk] = xa[face_id];
3282 }
3283 if (jj < ms->n_rows) {
3284 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3285 mc->_x_val[ll] = xa[face_id];
3286 }
3287
3288 }
3289
3290 } /* end of condition on coefficients symmetry */
3291
3292 }
3293
3294 /*----------------------------------------------------------------------------
3295 * Set MSR extradiagonal block matrix coefficients for the case where direct
3296 * assignment is possible (i.e. when there are no multiple contributions
3297 * to a given coefficient).
3298 *
3299 * parameters:
3300 * matrix <-- pointer to matrix structure
3301 * symmetric <-- indicates if extradiagonal values are symmetric
3302 * n_edges <-- local number of graph edges
3303 * edges <-- edges (symmetric row <-> column) connectivity
3304 * xa <-- extradiagonal values
3305 *----------------------------------------------------------------------------*/
3306
3307 static void
_set_xa_coeffs_msr_direct_block(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3308 _set_xa_coeffs_msr_direct_block(cs_matrix_t *matrix,
3309 bool symmetric,
3310 cs_lnum_t n_edges,
3311 const cs_lnum_2_t *edges,
3312 const cs_real_t *restrict xa)
3313 {
3314 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3315
3316 const cs_matrix_struct_csr_t *ms = matrix->structure;
3317
3318 const cs_lnum_t b_size = matrix->eb_size[3];
3319
3320 /* Copy extra-diagonal values */
3321
3322 assert(edges != NULL || n_edges == 0);
3323
3324 if (symmetric == false) {
3325
3326 const cs_lnum_t *restrict edges_p
3327 = (const cs_lnum_t *restrict)(edges);
3328
3329 for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3330 cs_lnum_t ii = *edges_p++;
3331 cs_lnum_t jj = *edges_p++;
3332 cs_lnum_t kk, ll;
3333 if (ii < ms->n_rows) {
3334 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3335 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3336 mc->_x_val[kk*b_size + pp] = xa[2*face_id*b_size + pp];
3337 }
3338 if (jj < ms->n_rows) {
3339 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3340 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3341 mc->_x_val[ll*b_size + pp] = xa[(2*face_id+1)*b_size + pp];
3342 }
3343 }
3344
3345 }
3346 else { /* if symmetric == true */
3347
3348 const cs_lnum_t *restrict edges_p
3349 = (const cs_lnum_t *restrict)(edges);
3350
3351 for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3352 cs_lnum_t ii = *edges_p++;
3353 cs_lnum_t jj = *edges_p++;
3354 cs_lnum_t kk, ll;
3355 if (ii < ms->n_rows) {
3356 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3357 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3358 mc->_x_val[kk*b_size + pp] = xa[face_id*b_size + pp];
3359 }
3360 if (jj < ms->n_rows) {
3361 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3362 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3363 mc->_x_val[ll*b_size + pp] = xa[face_id*b_size + pp];
3364 }
3365
3366 }
3367
3368 } /* end of condition on coefficients symmetry */
3369
3370 }
3371
3372 /*----------------------------------------------------------------------------
3373 * Set MSR extradiagonal matrix coefficients for the case where there are
3374 * multiple contributions to a given coefficient.
3375 *
3376 * The matrix coefficients should have been initialized (i.e. set to 0)
3377 * some before using this function.
3378 *
3379 * parameters:
3380 * matrix <-- pointer to matrix structure
3381 * symmetric <-- indicates if extradiagonal values are symmetric
3382 * n_edges <-- local number of graph edges
3383 * edges <-- edges (symmetric row <-> column) connectivity
3384 * xa <-- extradiagonal values
3385 *----------------------------------------------------------------------------*/
3386
3387 static void
_set_xa_coeffs_msr_increment(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3388 _set_xa_coeffs_msr_increment(cs_matrix_t *matrix,
3389 bool symmetric,
3390 cs_lnum_t n_edges,
3391 const cs_lnum_2_t *edges,
3392 const cs_real_t *restrict xa)
3393 {
3394 cs_lnum_t ii, jj, face_id;
3395 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3396
3397 const cs_matrix_struct_csr_t *ms = matrix->structure;
3398
3399 /* Copy extra-diagonal values */
3400
3401 assert(edges != NULL);
3402
3403 if (symmetric == false) {
3404
3405 const cs_lnum_t *restrict edges_p
3406 = (const cs_lnum_t *restrict)(edges);
3407
3408 for (face_id = 0; face_id < n_edges; face_id++) {
3409 cs_lnum_t kk, ll;
3410 ii = *edges_p++;
3411 jj = *edges_p++;
3412 if (ii < ms->n_rows) {
3413 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3414 mc->_x_val[kk] += xa[2*face_id];
3415 }
3416 if (jj < ms->n_rows) {
3417 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3418 mc->_x_val[ll] += xa[2*face_id + 1];
3419 }
3420 }
3421
3422 }
3423 else { /* if symmetric == true */
3424
3425 const cs_lnum_t *restrict edges_p
3426 = (const cs_lnum_t *restrict)(edges);
3427
3428 for (face_id = 0; face_id < n_edges; face_id++) {
3429 cs_lnum_t kk, ll;
3430 ii = *edges_p++;
3431 jj = *edges_p++;
3432 if (ii < ms->n_rows) {
3433 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3434 mc->_x_val[kk] += xa[face_id];
3435 }
3436 if (jj < ms->n_rows) {
3437 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3438 mc->_x_val[ll] += xa[face_id];
3439 }
3440
3441 }
3442
3443 } /* end of condition on coefficients symmetry */
3444
3445 }
3446
3447 /*----------------------------------------------------------------------------
3448 * Set MSR extradiagonal matrix coefficients for the case where there are
3449 * multiple contributions to a given coefficient.
3450 *
3451 * The matrix coefficients should have been initialized (i.e. set to 0)
3452 * some before using this function.
3453 *
3454 * parameters:
3455 * matrix <-- pointer to matrix structure
3456 * symmetric <-- indicates if extradiagonal values are symmetric
3457 * n_edges <-- local number of graph edges
3458 * edges <-- edges (symmetric row <-> column) connectivity
3459 * xa <-- extradiagonal values
3460 *----------------------------------------------------------------------------*/
3461
3462 static void
_set_xa_coeffs_msr_increment_block(cs_matrix_t * matrix,bool symmetric,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_real_t * restrict xa)3463 _set_xa_coeffs_msr_increment_block(cs_matrix_t *matrix,
3464 bool symmetric,
3465 cs_lnum_t n_edges,
3466 const cs_lnum_2_t *edges,
3467 const cs_real_t *restrict xa)
3468 {
3469 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3470
3471 const cs_matrix_struct_csr_t *ms = matrix->structure;
3472
3473 const cs_lnum_t b_size = matrix->eb_size[3];
3474
3475 /* Copy extra-diagonal values */
3476
3477 assert(edges != NULL);
3478
3479 if (symmetric == false) {
3480
3481 const cs_lnum_t *restrict edges_p
3482 = (const cs_lnum_t *restrict)(edges);
3483
3484 for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3485 cs_lnum_t ii = *edges_p++;
3486 cs_lnum_t jj = *edges_p++;
3487 cs_lnum_t kk, ll;
3488 if (ii < ms->n_rows) {
3489 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3490 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3491 mc->_x_val[kk*b_size + pp] += xa[2*face_id*b_size + pp];
3492 }
3493 if (jj < ms->n_rows) {
3494 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3495 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3496 mc->_x_val[ll*b_size + pp] += xa[(2*face_id+1)*b_size + pp];
3497 }
3498 }
3499
3500 }
3501 else { /* if symmetric == true */
3502
3503 const cs_lnum_t *restrict edges_p
3504 = (const cs_lnum_t *restrict)(edges);
3505
3506 for (cs_lnum_t face_id = 0; face_id < n_edges; face_id++) {
3507 cs_lnum_t ii = *edges_p++;
3508 cs_lnum_t jj = *edges_p++;
3509 cs_lnum_t kk, ll;
3510 if (ii < ms->n_rows) {
3511 for (kk = ms->row_index[ii]; ms->col_id[kk] != jj; kk++);
3512 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3513 mc->_x_val[kk*b_size + pp] += xa[face_id*b_size + pp];
3514 }
3515 if (jj < ms->n_rows) {
3516 for (ll = ms->row_index[jj]; ms->col_id[ll] != ii; ll++);
3517 for (cs_lnum_t pp = 0; pp < b_size; pp++)
3518 mc->_x_val[kk*b_size + pp] += xa[face_id*b_size + pp];
3519 }
3520
3521 }
3522
3523 } /* end of condition on coefficients symmetry */
3524
3525 }
3526
3527 /*----------------------------------------------------------------------------
3528 * Map or copy MSR matrix diagonal coefficients.
3529 *
3530 * parameters:
3531 * matrix <-> pointer to matrix structure
3532 * copy <-- indicates if coefficients should be copied
3533 * da <-- diagonal values (NULL if all zero)
3534 *----------------------------------------------------------------------------*/
3535
3536 static void
_map_or_copy_da_coeffs_msr(cs_matrix_t * matrix,bool copy,const cs_real_t * restrict da)3537 _map_or_copy_da_coeffs_msr(cs_matrix_t *matrix,
3538 bool copy,
3539 const cs_real_t *restrict da)
3540 {
3541 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3542
3543 const cs_lnum_t n_rows = matrix->n_rows;
3544 const cs_lnum_t *db_size = matrix->db_size;
3545
3546 /* Map or copy diagonal values */
3547
3548 if (da != NULL) {
3549
3550 if (copy) {
3551 if (mc->_d_val == NULL || mc->max_db_size < db_size[3]) {
3552 BFT_REALLOC(mc->_d_val, db_size[3]*n_rows, cs_real_t);
3553 mc->max_db_size = db_size[3];
3554 }
3555 # pragma omp parallel for if(n_rows*db_size[0] > CS_THR_MIN)
3556 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3557 for (cs_lnum_t jj = 0; jj < db_size[3]; jj++)
3558 mc->_d_val[ii*db_size[3] + jj] = da[ii*db_size[3] + jj];
3559 }
3560 mc->d_val = mc->_d_val;
3561 }
3562 else
3563 mc->d_val = da;
3564
3565 }
3566 else
3567 mc->d_val = NULL;
3568 }
3569
3570 /*----------------------------------------------------------------------------
3571 * Map or copy MSR matrix extra diagonal coefficients.
3572 *
3573 * This assumes the xa values are already provided in MSR form.
3574 *
3575 * Setting xa = NULL and copy = true, this function also ensures allocation
3576 * of and zeroes extradiagonal coefficients.
3577 *
3578 * parameters:
3579 * matrix <-> pointer to matrix structure
3580 * copy <-- indicates if coefficients should be copied
3581 * xa <-- extradiagonal values (NULL if all zero)
3582 *----------------------------------------------------------------------------*/
3583
3584 static void
_map_or_copy_xa_coeffs_msr(cs_matrix_t * matrix,bool copy,const cs_real_t * restrict xa)3585 _map_or_copy_xa_coeffs_msr(cs_matrix_t *matrix,
3586 bool copy,
3587 const cs_real_t *restrict xa)
3588 {
3589 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3590
3591 const cs_matrix_struct_csr_t *ms = matrix->structure;
3592 const cs_lnum_t n_rows = matrix->n_rows;
3593 const cs_lnum_t *eb_size = matrix->eb_size;
3594
3595 if (xa == NULL || copy) {
3596
3597 /* Ensure allocation */
3598 if (mc->_x_val == NULL || mc->max_eb_size < eb_size[3]) {
3599 BFT_REALLOC(mc->_x_val,
3600 eb_size[3]*ms->row_index[ms->n_rows],
3601 cs_real_t);
3602 mc->max_eb_size = eb_size[3];
3603 }
3604 mc->x_val = mc->_x_val;
3605
3606 /* zero if required */
3607 if (xa == NULL)
3608 _zero_x_coeffs_msr(matrix);
3609
3610 }
3611
3612 /* Map or copy extradiagonal values (we could use memcpy, but prefer
3613 to have a similar threading behavior to SpMV for NUMA performance) */
3614
3615 if (xa != NULL) {
3616
3617 if (copy) {
3618 if (eb_size[0] == 1) {
3619 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3620 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3621 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3622 const cs_real_t *s_row = xa + ms->row_index[ii];
3623 cs_real_t *m_row = mc->_x_val + ms->row_index[ii];
3624 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
3625 m_row[jj] = s_row[jj];
3626 }
3627 }
3628 else {
3629 # pragma omp parallel for if(n_rows*eb_size[0] > CS_THR_MIN)
3630 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3631 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
3632 const cs_real_t *s_row = xa + ms->row_index[ii]*eb_size[3];
3633 cs_real_t *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
3634 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
3635 for (cs_lnum_t kk = 0; kk < eb_size[3]; kk++)
3636 m_row[jj*eb_size[3] + kk] = s_row[jj*eb_size[3] + kk];
3637 }
3638 }
3639 }
3640 }
3641
3642 else
3643 mc->x_val = xa;
3644
3645 }
3646 }
3647
3648 /*----------------------------------------------------------------------------
3649 * Set MSR matrix coefficients.
3650 *
3651 * parameters:
3652 * matrix <-> pointer to matrix structure
3653 * symmetric <-- indicates if extradiagonal values are symmetric
3654 * copy <-- indicates if coefficients should be copied
3655 * n_edges <-- local number of graph edges
3656 * edges <-- edges (symmetric row <-> column) connectivity
3657 * da <-- diagonal values (NULL if all zero)
3658 * xa <-- extradiagonal values (NULL if all zero)
3659 *----------------------------------------------------------------------------*/
3660
3661 static void
_set_coeffs_msr(cs_matrix_t * matrix,bool symmetric,bool copy,cs_lnum_t n_edges,const cs_lnum_2_t * restrict edges,const cs_real_t * restrict da,const cs_real_t * restrict xa)3662 _set_coeffs_msr(cs_matrix_t *matrix,
3663 bool symmetric,
3664 bool copy,
3665 cs_lnum_t n_edges,
3666 const cs_lnum_2_t *restrict edges,
3667 const cs_real_t *restrict da,
3668 const cs_real_t *restrict xa)
3669 {
3670 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3671
3672 const cs_matrix_struct_csr_t *ms = matrix->structure;
3673 const cs_lnum_t *eb_size = matrix->eb_size;
3674
3675 /* Map or copy diagonal values */
3676
3677 _map_or_copy_da_coeffs_msr(matrix, copy, da);
3678
3679 /* Extradiagonal values */
3680
3681 if (mc->_x_val == NULL || mc->max_eb_size < eb_size[3]) {
3682 BFT_REALLOC(mc->_x_val,
3683 eb_size[3]*ms->row_index[ms->n_rows],
3684 cs_real_t);
3685 mc->max_eb_size = eb_size[3];
3686 }
3687 mc->x_val = mc->_x_val;
3688
3689 /* Copy extra-diagonal values if assembly is direct */
3690
3691 if (ms->direct_assembly) {
3692 if (xa == NULL)
3693 _zero_x_coeffs_msr(matrix);
3694 if (eb_size[0] == 1)
3695 _set_xa_coeffs_msr_direct(matrix, symmetric, n_edges, edges, xa);
3696 else
3697 _set_xa_coeffs_msr_direct_block(matrix, symmetric, n_edges, edges, xa);
3698 }
3699
3700 /* Initialize coefficients to zero if assembly is incremental */
3701
3702 else {
3703 _zero_x_coeffs_msr(matrix);
3704 if (eb_size[0] == 1)
3705 _set_xa_coeffs_msr_increment(matrix, symmetric, n_edges, edges, xa);
3706 else
3707 _set_xa_coeffs_msr_increment_block(matrix, symmetric, n_edges, edges, xa);
3708 }
3709 }
3710
3711 /*----------------------------------------------------------------------------
3712 * Set MSR matrix coefficients provided in the same form.
3713 *
3714 * If da and xa are equal to NULL, then initialize val with zeros.
3715 *
3716 * parameters:
3717 * matrix <-> pointer to matrix structure
3718 * copy <-- indicates if coefficients should be copied
3719 * when not transferred
3720 * row_index <-- MSR row index (0 to n-1)
3721 * col_id <-- MSR column id (0 to n-1)
3722 * d_vals <-- diagonal values (NULL if all zero)
3723 * d_vals_transfer <-- diagonal values whose ownership is transferred
3724 * (NULL or d_vals in, NULL out)
3725 * x_vals <-- extradiagonal values (NULL if all zero)
3726 * x_vals_transfer <-- extradiagonal values whose ownership is transferred
3727 * (NULL or x_vals in, NULL out)
3728 *----------------------------------------------------------------------------*/
3729
3730 static void
_set_coeffs_msr_from_msr(cs_matrix_t * matrix,bool copy,const cs_lnum_t row_index[],const cs_lnum_t col_id[],const cs_real_t * d_vals,cs_real_t ** d_vals_transfer,const cs_real_t * x_vals,cs_real_t ** x_vals_transfer)3731 _set_coeffs_msr_from_msr(cs_matrix_t *matrix,
3732 bool copy,
3733 const cs_lnum_t row_index[],
3734 const cs_lnum_t col_id[],
3735 const cs_real_t *d_vals,
3736 cs_real_t **d_vals_transfer,
3737 const cs_real_t *x_vals,
3738 cs_real_t **x_vals_transfer)
3739 {
3740 CS_UNUSED(row_index);
3741 CS_UNUSED(col_id);
3742
3743 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3744
3745 bool d_transferred = false, x_transferred = false;
3746
3747 /* TODO: we should use metadata or check that the row_index and
3748 column id values are consistent, which should be true as long
3749 as columns are ordered in an identical manner */
3750
3751 if (d_vals_transfer != NULL) {
3752 if (*d_vals_transfer != NULL) {
3753 mc->max_db_size = matrix->db_size[0];
3754 if (mc->_d_val != *d_vals_transfer) {
3755 BFT_FREE(mc->_d_val);
3756 mc->_d_val = *d_vals_transfer;
3757 }
3758 mc->d_val = mc->_d_val;
3759 *d_vals_transfer = NULL;
3760 d_transferred = true;
3761 }
3762 }
3763
3764 if (x_vals_transfer != NULL) {
3765 if (*x_vals_transfer != NULL) {
3766 mc->max_db_size = matrix->db_size[0];
3767 BFT_FREE(mc->_x_val);
3768 mc->_x_val = *x_vals_transfer;
3769 mc->x_val = mc->_x_val;
3770 *x_vals_transfer = NULL;
3771 x_transferred = true;
3772 }
3773 }
3774
3775 if (d_transferred == false)
3776 _map_or_copy_da_coeffs_msr(matrix, copy, d_vals);
3777
3778 if (x_transferred == false)
3779 _map_or_copy_xa_coeffs_msr(matrix, copy, x_vals);
3780
3781 /* Now free transferred arrays */
3782
3783 if (d_vals_transfer != NULL)
3784 BFT_FREE(*d_vals_transfer);
3785 if (x_vals_transfer != NULL)
3786 BFT_FREE(*x_vals_transfer);
3787 }
3788
3789 /*----------------------------------------------------------------------------
3790 * Release shared MSR matrix coefficients.
3791 *
3792 * parameters:
3793 * matrix <-- pointer to matrix structure
3794 *----------------------------------------------------------------------------*/
3795
3796 static void
_release_coeffs_msr(cs_matrix_t * matrix)3797 _release_coeffs_msr(cs_matrix_t *matrix)
3798 {
3799 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3800 if (mc !=NULL) {
3801 /* Unmap shared values */
3802 mc->d_val = NULL;
3803 mc->x_val = NULL;
3804 }
3805 }
3806
3807 /*----------------------------------------------------------------------------*/
3808 /*!
3809 * \brief Get matrix diagonal values for MSR matrix.
3810 *
3811 * In case of matrixes with block diagonal coefficients, a pointer to
3812 * the complete block diagonal is returned.
3813 *
3814 * \param[in] matrix pointer to matrix structure
3815 *
3816 * \return pointer to matrix diagonal array
3817 */
3818 /*----------------------------------------------------------------------------*/
3819
3820 static const cs_real_t *
_get_diagonal_msr(const cs_matrix_t * matrix)3821 _get_diagonal_msr(const cs_matrix_t *matrix)
3822 {
3823 const cs_real_t *diag = NULL;
3824
3825 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3826 if (mc->d_val == NULL) {
3827 cs_lnum_t n_rows = matrix->n_rows * matrix->db_size[3];
3828 if (mc->_d_val == NULL || mc->max_db_size < matrix->db_size[3]) {
3829 BFT_REALLOC(mc->_d_val, matrix->db_size[3]*matrix->n_rows, cs_real_t);
3830 mc->max_db_size = matrix->db_size[3];
3831 }
3832 # pragma omp parallel for if(n_rows > CS_THR_MIN)
3833 for (cs_lnum_t ii = 0; ii < n_rows; ii++)
3834 mc->_d_val[ii] = 0.0;
3835 mc->d_val = mc->_d_val;
3836 }
3837 diag = mc->d_val;
3838
3839 return diag;
3840 }
3841
3842 /*----------------------------------------------------------------------------*/
3843 /*!
3844 * \brief Function for initialization of MSR matrix coefficients using
3845 * local row ids and column indexes.
3846 *
3847 * \warning The matrix pointer must point to valid data when the selection
3848 * function is called, so the life cycle of the data pointed to
3849 * should be at least as long as that of the assembler values
3850 * structure.
3851 *
3852 * \param[in, out] matrix_p untyped pointer to matrix description structure
3853 * \param[in] db_size optional diagonal block sizes
3854 * \param[in] eb_size optional extra-diagonal block sizes
3855 */
3856 /*----------------------------------------------------------------------------*/
3857
3858 static void
_msr_assembler_values_init(void * matrix_p,const cs_lnum_t db_size[4],const cs_lnum_t eb_size[4])3859 _msr_assembler_values_init(void *matrix_p,
3860 const cs_lnum_t db_size[4],
3861 const cs_lnum_t eb_size[4])
3862 {
3863 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
3864
3865 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3866
3867 const cs_lnum_t n_rows = matrix->n_rows;
3868
3869 cs_lnum_t d_stride = 1;
3870 if (db_size != NULL)
3871 d_stride = db_size[3];
3872 cs_lnum_t e_stride = 1;
3873 if (eb_size != NULL)
3874 e_stride = eb_size[3];
3875
3876 const cs_matrix_struct_csr_t *ms = matrix->structure;
3877
3878 /* Initialize diagonal values */
3879
3880 BFT_REALLOC(mc->_d_val, d_stride*n_rows, cs_real_t);
3881 mc->d_val = mc->_d_val;
3882 mc->max_db_size = d_stride;
3883
3884 BFT_REALLOC(mc->_x_val, e_stride*ms->row_index[ms->n_rows], cs_real_t);
3885 mc->x_val = mc->_x_val;
3886 mc->max_eb_size = e_stride;
3887
3888 # pragma omp parallel for if(n_rows*db_size[0] > CS_THR_MIN)
3889 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
3890 for (cs_lnum_t jj = 0; jj < d_stride; jj++)
3891 mc->_d_val[ii*d_stride + jj] = 0;
3892 cs_lnum_t n_s_cols = (ms->row_index[ii+1] - ms->row_index[ii])*e_stride;
3893 cs_lnum_t displ = ms->row_index[ii]*e_stride;
3894 for (cs_lnum_t jj = 0; jj < n_s_cols; jj++)
3895 mc->_x_val[displ + jj] = 0;
3896 }
3897 }
3898
3899 /*----------------------------------------------------------------------------*/
3900 /*!
3901 * \brief Function for addition to MSR matrix coefficients using
3902 * local row ids and column indexes.
3903 *
3904 * Values whose associated row index is negative should be ignored;
3905 * Values whose column index is -1 are assumed to be assigned to a
3906 * separately stored diagonal. Other indexes shoudl be valid.
3907 *
3908 * \warning The matrix pointer must point to valid data when the selection
3909 * function is called, so the life cycle of the data pointed to
3910 * should be at least as long as that of the assembler values
3911 * structure.
3912 *
3913 * \remark Note that we pass column indexes (not ids) here; as the
3914 * caller is already assumed to have identified the index
3915 * matching a given column id.
3916 *
3917 * \param[in, out] matrix_p untyped pointer to matrix description structure
3918 * \param[in] n number of values to add
3919 * \param[in] stride associated data block size
3920 * \param[in] row_id associated local row ids
3921 * \param[in] col_idx associated local column indexes
3922 * \param[in] vals pointer to values (size: n*stride)
3923 */
3924 /*----------------------------------------------------------------------------*/
3925
3926 static void
_msr_assembler_values_add(void * matrix_p,cs_lnum_t n,cs_lnum_t stride,const cs_lnum_t row_id[],const cs_lnum_t col_idx[],const cs_real_t vals[])3927 _msr_assembler_values_add(void *matrix_p,
3928 cs_lnum_t n,
3929 cs_lnum_t stride,
3930 const cs_lnum_t row_id[],
3931 const cs_lnum_t col_idx[],
3932 const cs_real_t vals[])
3933 {
3934 cs_matrix_t *matrix = (cs_matrix_t *)matrix_p;
3935
3936 cs_matrix_coeff_msr_t *mc = matrix->coeffs;
3937
3938 const cs_matrix_struct_csr_t *ms = matrix->structure;
3939
3940 if (stride == 1) {
3941
3942 /* Copy instead of test for OpenMP to avoid outlining for small sets */
3943
3944 if (n*stride <= CS_THR_MIN) {
3945 for (cs_lnum_t ii = 0; ii < n; ii++) {
3946 cs_lnum_t r_id = row_id[ii];
3947 if (r_id < 0)
3948 continue;
3949 if (col_idx[ii] < 0) {
3950 # pragma omp atomic
3951 mc->_d_val[r_id] += vals[ii];
3952 }
3953 else {
3954 # pragma omp atomic
3955 mc->_x_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
3956 }
3957 }
3958 }
3959
3960 else {
3961 # pragma omp parallel for if(n*stride > CS_THR_MIN)
3962 for (cs_lnum_t ii = 0; ii < n; ii++) {
3963 cs_lnum_t r_id = row_id[ii];
3964 if (r_id < 0)
3965 continue;
3966 if (col_idx[ii] < 0) {
3967 # pragma omp atomic
3968 mc->_d_val[r_id] += vals[ii];
3969 }
3970 else {
3971 # pragma omp atomic
3972 mc->_x_val[ms->row_index[r_id] + col_idx[ii]] += vals[ii];
3973 }
3974 }
3975 }
3976 }
3977
3978 else { /* if (stride > 1) */
3979
3980 /* Copy instead of test for OpenMP to avoid outlining for small sets */
3981
3982 if (n*stride <= CS_THR_MIN) {
3983 for (cs_lnum_t ii = 0; ii < n; ii++) {
3984 cs_lnum_t r_id = row_id[ii];
3985 if (r_id < 0)
3986 continue;
3987 if (col_idx[ii] < 0) {
3988 for (cs_lnum_t jj = 0; jj < stride; jj++)
3989 mc->_d_val[r_id*stride + jj] += vals[ii*stride + jj];
3990 }
3991 else {
3992 cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
3993 for (cs_lnum_t jj = 0; jj < stride; jj++)
3994 mc->_x_val[displ + jj] += vals[ii*stride + jj];
3995 }
3996 }
3997 }
3998
3999 else {
4000 # pragma omp parallel for if(n*stride > CS_THR_MIN)
4001 for (cs_lnum_t ii = 0; ii < n; ii++) {
4002 cs_lnum_t r_id = row_id[ii];
4003 if (r_id < 0)
4004 continue;
4005 if (col_idx[ii] < 0) {
4006 for (cs_lnum_t jj = 0; jj < stride; jj++)
4007 mc->_d_val[r_id*stride + jj] += vals[ii*stride + jj];
4008 }
4009 else {
4010 cs_lnum_t displ = (ms->row_index[r_id] + col_idx[ii])*stride;
4011 for (cs_lnum_t jj = 0; jj < stride; jj++)
4012 mc->_x_val[displ + jj] += vals[ii*stride + jj];
4013 }
4014 }
4015 }
4016 }
4017
4018 }
4019
4020 /*----------------------------------------------------------------------------*/
4021 /*!
4022 * \brief Create and initialize an MSR matrix assembler values structure.
4023 *
4024 * The associated matrix's structure must have been created using
4025 * \ref cs_matrix_structure_create_from_assembler.
4026 *
4027 * Block sizes are defined by an optional array of 4 values:
4028 * 0: useful block size, 1: vector block extents,
4029 * 2: matrix line extents, 3: matrix line*column extents
4030 *
4031 * \param[in, out] matrix pointer to matrix structure
4032 * \param[in] diag_block_size block sizes for diagonal, or NULL
4033 * \param[in] extra_diag_block_size block sizes for extra diagonal,
4034 * or NULL
4035 *
4036 * \return pointer to initialized matrix assembler values structure;
4037 */
4038 /*----------------------------------------------------------------------------*/
4039
4040 static cs_matrix_assembler_values_t *
_assembler_values_create_msr(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)4041 _assembler_values_create_msr(cs_matrix_t *matrix,
4042 const cs_lnum_t *diag_block_size,
4043 const cs_lnum_t *extra_diag_block_size)
4044 {
4045 cs_matrix_assembler_values_t *mav
4046 = cs_matrix_assembler_values_create(matrix->assembler,
4047 true,
4048 diag_block_size,
4049 extra_diag_block_size,
4050 (void *)matrix,
4051 _msr_assembler_values_init,
4052 _msr_assembler_values_add,
4053 NULL,
4054 NULL,
4055 NULL);
4056
4057 return mav;
4058 }
4059
4060 /*----------------------------------------------------------------------------
4061 * Local matrix.vector product y = A.x with MSR matrix.
4062 *
4063 * parameters:
4064 * matrix <-- pointer to matrix structure
4065 * exclude_diag <-- exclude diagonal if true,
4066 * sync <-- synchronize ghost cells if true
4067 * x <-> multipliying vector values
4068 * y --> resulting vector
4069 *----------------------------------------------------------------------------*/
4070
4071 static void
_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4072 _mat_vec_p_l_msr(const cs_matrix_t *matrix,
4073 bool exclude_diag,
4074 bool sync,
4075 cs_real_t *restrict x,
4076 cs_real_t *restrict y)
4077 {
4078 const cs_matrix_struct_csr_t *ms = matrix->structure;
4079 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4080 cs_lnum_t n_rows = ms->n_rows;
4081
4082 /* Ghost cell communication */
4083
4084 cs_halo_state_t *hs
4085 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4086 if (hs != NULL)
4087 cs_halo_sync_wait(matrix->halo, x, hs);
4088
4089 /* Standard case */
4090
4091 if (!exclude_diag && mc->d_val != NULL) {
4092
4093 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4094 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4095
4096 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4097 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4098 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4099 cs_real_t sii = 0.0;
4100
4101 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4102 sii += (m_row[jj]*x[col_id[jj]]);
4103
4104 y[ii] = sii + mc->d_val[ii]*x[ii];
4105
4106 }
4107
4108 }
4109
4110 /* Exclude diagonal */
4111
4112 else {
4113
4114 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4115 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4116
4117 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4118 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4119 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4120 cs_real_t sii = 0.0;
4121
4122 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4123 sii += (m_row[jj]*x[col_id[jj]]);
4124
4125 y[ii] = sii;
4126
4127 }
4128 }
4129
4130 }
4131
4132 /*----------------------------------------------------------------------------
4133 * Local matrix.vector product y = A.x with MSR matrix.
4134 *
4135 * parameters:
4136 * matrix <-- pointer to matrix structure
4137 * exclude_diag <-- exclude diagonal if true,
4138 * sync <-- synchronize ghost cells if true
4139 * x <-> multipliying vector values
4140 * y --> resulting vector
4141 *----------------------------------------------------------------------------*/
4142
4143 static void
_mat_vec_p_l_msr_omp_sched(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4144 _mat_vec_p_l_msr_omp_sched(const cs_matrix_t *matrix,
4145 bool exclude_diag,
4146 bool sync,
4147 cs_real_t *restrict x,
4148 cs_real_t *restrict y)
4149 {
4150 const cs_matrix_struct_csr_t *ms = matrix->structure;
4151 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4152 cs_lnum_t n_rows = ms->n_rows;
4153
4154 /* Ghost cell communication */
4155
4156 cs_halo_state_t *hs
4157 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4158 if (hs != NULL)
4159 cs_halo_sync_wait(matrix->halo, x, hs);
4160
4161 /* Standard case */
4162
4163 if (!exclude_diag && mc->d_val != NULL) {
4164
4165 # pragma omp parallel if(n_rows > CS_THR_MIN)
4166 {
4167
4168 cs_lnum_t n_s_rows = cs_align(n_rows * 0.9, _cs_cl);
4169 if (n_s_rows > n_rows)
4170 n_s_rows = n_rows;
4171
4172 # pragma omp for nowait
4173 for (cs_lnum_t ii = 0; ii < n_s_rows; ii++) {
4174
4175 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4176 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4177 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4178 cs_real_t sii = 0.0;
4179
4180 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4181 sii += (m_row[jj]*x[col_id[jj]]);
4182
4183 y[ii] = sii + mc->d_val[ii]*x[ii];
4184
4185 }
4186
4187 # pragma omp for schedule(dynamic, _cs_cl)
4188 for (cs_lnum_t ii = n_s_rows; ii < n_rows; ii++) {
4189
4190 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4191 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4192 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4193 cs_real_t sii = 0.0;
4194
4195 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4196 sii += (m_row[jj]*x[col_id[jj]]);
4197
4198 y[ii] = sii + mc->d_val[ii]*x[ii];
4199
4200 }
4201
4202 }
4203
4204 }
4205
4206 /* Exclude diagonal */
4207
4208 else {
4209
4210 # pragma omp parallel if(n_rows > CS_THR_MIN)
4211 {
4212
4213 cs_lnum_t n_s_rows = cs_align(n_rows * 0.9, _cs_cl);
4214 if (n_s_rows > n_rows)
4215 n_s_rows = n_rows;
4216
4217 # pragma omp for
4218 for (cs_lnum_t ii = 0; ii < n_s_rows; ii++) {
4219
4220 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4221 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4222 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4223 cs_real_t sii = 0.0;
4224
4225 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4226 sii += (m_row[jj]*x[col_id[jj]]);
4227
4228 y[ii] = sii;
4229
4230 }
4231
4232 # pragma omp for schedule(dynamic, _cs_cl)
4233 for (cs_lnum_t ii = n_s_rows; ii < n_rows; ii++) {
4234
4235 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4236 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4237 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4238 cs_real_t sii = 0.0;
4239
4240 for (cs_lnum_t jj = 0; jj < n_cols; jj++)
4241 sii += (m_row[jj]*x[col_id[jj]]);
4242
4243 y[ii] = sii;
4244
4245 }
4246
4247 }
4248
4249 }
4250
4251 }
4252
4253 /*----------------------------------------------------------------------------
4254 * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4255 *
4256 * parameters:
4257 * matrix <-- pointer to matrix structure
4258 * exclude_diag <-- exclude diagonal if true,
4259 * sync <-- synchronize ghost cells if true
4260 * x <-> multipliying vector values
4261 * y --> resulting vector
4262 *----------------------------------------------------------------------------*/
4263
4264 static void
_b_mat_vec_p_l_msr_generic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4265 _b_mat_vec_p_l_msr_generic(const cs_matrix_t *matrix,
4266 bool exclude_diag,
4267 bool sync,
4268 cs_real_t x[restrict],
4269 cs_real_t y[restrict])
4270 {
4271 const cs_matrix_struct_csr_t *ms = matrix->structure;
4272 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4273 const cs_lnum_t n_rows = ms->n_rows;
4274 const cs_lnum_t *db_size = matrix->db_size;
4275
4276 /* Ghost cell communication */
4277
4278 cs_halo_state_t *hs
4279 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4280 if (hs != NULL)
4281 _pre_vector_multiply_sync_x_end(matrix, hs, x);
4282
4283 /* Standard case */
4284
4285 if (!exclude_diag && mc->d_val != NULL) {
4286
4287 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4288 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4289
4290 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4291 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4292 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4293
4294 _dense_b_ax(ii, db_size, mc->d_val, x, y);
4295
4296 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4297 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4298 y[ii*db_size[1] + kk]
4299 += (m_row[jj]*x[col_id[jj]*db_size[1] + kk]);
4300 }
4301 }
4302
4303 }
4304
4305 }
4306
4307 /* Exclude diagonal */
4308
4309 else {
4310
4311 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4312 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4313
4314 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4315 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4316 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4317
4318 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4319 y[ii*db_size[1] + kk] = 0.;
4320
4321 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4322 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4323 y[ii*db_size[1] + kk]
4324 += (m_row[jj]*x[col_id[jj]*db_size[1] + kk]);
4325 }
4326 }
4327
4328 }
4329 }
4330
4331 }
4332
4333 /*----------------------------------------------------------------------------
4334 * Local matrix.vector product y = A.x with MSR matrix, 3x3 blocked version.
4335 *
4336 * parameters:
4337 * matrix <-- pointer to matrix structure
4338 * exclude_diag <-- exclude diagonal if true,
4339 * sync <-- synchronize ghost cells if true
4340 * x <-> multipliying vector values
4341 * y --> resulting vector
4342 *----------------------------------------------------------------------------*/
4343
4344 static void
_3_3_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t * restrict x,cs_real_t * restrict y)4345 _3_3_mat_vec_p_l_msr(const cs_matrix_t *matrix,
4346 bool exclude_diag,
4347 bool sync,
4348 cs_real_t *restrict x,
4349 cs_real_t *restrict y)
4350 {
4351 const cs_matrix_struct_csr_t *ms = matrix->structure;
4352 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4353 const cs_lnum_t n_rows = ms->n_rows;
4354
4355 assert(matrix->db_size[0] == 3 && matrix->db_size[3] == 9);
4356
4357 /* Ghost cell communication */
4358
4359 cs_halo_state_t *hs
4360 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4361 if (hs != NULL)
4362 _pre_vector_multiply_sync_x_end(matrix, hs, x);
4363
4364 /* Standard case */
4365
4366 if (!exclude_diag && mc->d_val != NULL) {
4367
4368 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4369 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4370
4371 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4372 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4373 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4374
4375 _dense_3_3_ax(ii, mc->d_val, x, y);
4376
4377 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4378 for (cs_lnum_t kk = 0; kk < 3; kk++)
4379 y[ii*3 + kk] += (m_row[jj]*x[col_id[jj]*3 + kk]);
4380 }
4381
4382 }
4383
4384 }
4385
4386 /* Exclude diagonal */
4387
4388 else {
4389
4390 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4391 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4392
4393 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4394 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4395 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4396
4397 for (cs_lnum_t kk = 0; kk < 3; kk++)
4398 y[ii*3 + kk] = 0.;
4399
4400 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4401 for (cs_lnum_t kk = 0; kk < 3; kk++)
4402 y[ii*3 + kk] += (m_row[jj]*x[col_id[jj]*3 + kk]);
4403 }
4404
4405 }
4406 }
4407
4408 }
4409
4410 /*----------------------------------------------------------------------------
4411 * Local matrix.vector product y = A.x with MSR matrix, 6x6 blocked version.
4412 *
4413 * parameters:
4414 * matrix <-- pointer to matrix structure
4415 * exclude_diag <-- exclude diagonal if true,
4416 * sync <-- synchronize ghost cells if true
4417 * x <-> multipliying vector values
4418 * y --> resulting vector
4419 *----------------------------------------------------------------------------*/
4420
4421 static void
_6_6_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4422 _6_6_mat_vec_p_l_msr(const cs_matrix_t *matrix,
4423 bool exclude_diag,
4424 bool sync,
4425 cs_real_t x[restrict],
4426 cs_real_t y[restrict])
4427 {
4428 const cs_matrix_struct_csr_t *ms = matrix->structure;
4429 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4430 const cs_lnum_t n_rows = ms->n_rows;
4431
4432 assert(matrix->db_size[0] == 6 && matrix->db_size[3] == 36);
4433
4434 /* Ghost cell communication */
4435
4436 cs_halo_state_t *hs
4437 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4438 if (hs != NULL)
4439 _pre_vector_multiply_sync_x_end(matrix, hs, x);
4440
4441 /* Standard case */
4442
4443 if (!exclude_diag && mc->d_val != NULL) {
4444
4445 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4446 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4447
4448 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4449 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4450 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4451
4452 _dense_6_6_ax(ii, mc->d_val, x, y);
4453
4454 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4455 for (cs_lnum_t kk = 0; kk < 6; kk++)
4456 y[ii*6 + kk] += (m_row[jj]*x[col_id[jj]*6 + kk]);
4457 }
4458
4459 }
4460
4461 }
4462
4463 /* Exclude diagonal */
4464
4465 else {
4466
4467 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4468 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4469
4470 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4471 const cs_real_t *restrict m_row = mc->x_val + ms->row_index[ii];
4472 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4473
4474 for (cs_lnum_t kk = 0; kk < 6; kk++)
4475 y[ii*6 + kk] = 0.;
4476
4477 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4478 for (cs_lnum_t kk = 0; kk < 6; kk++)
4479 y[ii*6 + kk] += (m_row[jj]*x[col_id[jj]*6 + kk]);
4480 }
4481
4482 }
4483 }
4484
4485 }
4486
4487 /*----------------------------------------------------------------------------
4488 * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4489 *
4490 * This variant uses fixed block size variants for common cases.
4491 *
4492 * parameters:
4493 * matrix <-- pointer to matrix structure
4494 * exclude_diag <-- exclude diagonal if true,
4495 * sync <-- synchronize ghost cells if true
4496 * x <-> multipliying vector values
4497 * y --> resulting vector
4498 *----------------------------------------------------------------------------*/
4499
4500 static void
_b_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4501 _b_mat_vec_p_l_msr(const cs_matrix_t *matrix,
4502 bool exclude_diag,
4503 bool sync,
4504 cs_real_t x[restrict],
4505 cs_real_t y[restrict])
4506 {
4507 if (matrix->db_size[0] == 3 && matrix->db_size[3] == 9)
4508 _3_3_mat_vec_p_l_msr(matrix, exclude_diag, sync, x, y);
4509
4510 else if (matrix->db_size[0] == 6 && matrix->db_size[3] == 36)
4511 _6_6_mat_vec_p_l_msr(matrix, exclude_diag, sync, x, y);
4512
4513 else
4514 _b_mat_vec_p_l_msr_generic(matrix, exclude_diag, sync, x, y);
4515 }
4516
4517 /*----------------------------------------------------------------------------
4518 * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4519 *
4520 * parameters:
4521 * matrix <-- pointer to matrix structure
4522 * exclude_diag <-- exclude diagonal if true,
4523 * sync <-- synchronize ghost cells if true
4524 * x <-> multipliying vector values
4525 * y --> resulting vector
4526 *----------------------------------------------------------------------------*/
4527
4528 static void
_bb_mat_vec_p_l_msr_3(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4529 _bb_mat_vec_p_l_msr_3(const cs_matrix_t *matrix,
4530 bool exclude_diag,
4531 bool sync,
4532 cs_real_t x[restrict],
4533 cs_real_t y[restrict])
4534 {
4535 const cs_matrix_struct_csr_t *ms = matrix->structure;
4536 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4537 const cs_lnum_t n_rows = ms->n_rows;
4538 const cs_lnum_t *db_size = matrix->db_size;
4539
4540 /* Ghost cell communication */
4541
4542 cs_halo_state_t *hs
4543 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4544 if (hs != NULL)
4545 _pre_vector_multiply_sync_x_end(matrix, hs, x);
4546
4547 /* Standard case */
4548
4549 if (!exclude_diag && mc->d_val != NULL) {
4550
4551 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4552 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4553
4554 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4555 const cs_real_t *restrict m_row = mc->x_val + (ms->row_index[ii]*9);
4556 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4557
4558 _dense_b_ax(ii, db_size, mc->d_val, x, y);
4559
4560 cs_real_t * _y = y + ii*3;
4561
4562 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4563 _y[0] += ( m_row[jj*9] * x[col_id[jj]*3]
4564 + m_row[jj*9 + 1] * x[col_id[jj]*3 + 1]
4565 + m_row[jj*9 + 2] * x[col_id[jj]*3 + 2]);
4566 _y[1] += ( m_row[jj*9 + 3] * x[col_id[jj]*3]
4567 + m_row[jj*9 + 3 + 1] * x[col_id[jj]*3 + 1]
4568 + m_row[jj*9 + 3 + 2] * x[col_id[jj]*3 + 2]);
4569 _y[2] += ( m_row[jj*9 + 6] * x[col_id[jj]*3]
4570 + m_row[jj*9 + 6 + 1] * x[col_id[jj]*3 + 1]
4571 + m_row[jj*9 + 6 + 2] * x[col_id[jj]*3 + 2]);
4572 }
4573
4574 }
4575
4576 }
4577
4578 /* Exclude diagonal */
4579
4580 else {
4581
4582 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4583 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4584
4585 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4586 const cs_real_t *restrict m_row = mc->x_val + (ms->row_index[ii]*9);
4587 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4588
4589 cs_real_t * _y = y + (ii*db_size[1]);
4590
4591 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4592 _y[kk] = 0.;
4593
4594 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4595 _y[0] += ( m_row[jj*9] * x[col_id[jj]*3]
4596 + m_row[jj*9 + 1] * x[col_id[jj]*3 + 1]
4597 + m_row[jj*9 + 2] * x[col_id[jj]*3 + 2]);
4598 _y[1] += ( m_row[jj*9 + 3] * x[col_id[jj]*3]
4599 + m_row[jj*9 + 3 + 1] * x[col_id[jj]*3 + 1]
4600 + m_row[jj*9 + 3 + 2] * x[col_id[jj]*3 + 2]);
4601 _y[2] += ( m_row[jj*9 + 6] * x[col_id[jj]*3]
4602 + m_row[jj*9 + 6 + 1] * x[col_id[jj]*3 + 1]
4603 + m_row[jj*9 + 6 + 2] * x[col_id[jj]*3 + 2]);
4604 }
4605
4606 }
4607 }
4608
4609 }
4610
4611 /*----------------------------------------------------------------------------
4612 * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4613 *
4614 * parameters:
4615 * matrix <-- pointer to matrix structure
4616 * exclude_diag <-- exclude diagonal if true,
4617 * sync <-- synchronize ghost cells if true
4618 * x <-> multipliying vector values
4619 * y --> resulting vector
4620 *----------------------------------------------------------------------------*/
4621
4622 static void
_bb_mat_vec_p_l_msr_generic(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4623 _bb_mat_vec_p_l_msr_generic(const cs_matrix_t *matrix,
4624 bool exclude_diag,
4625 bool sync,
4626 cs_real_t x[restrict],
4627 cs_real_t y[restrict])
4628 {
4629 const cs_matrix_struct_csr_t *ms = matrix->structure;
4630 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4631 const cs_lnum_t n_rows = ms->n_rows;
4632 const cs_lnum_t *db_size = matrix->db_size;
4633 const cs_lnum_t *eb_size = matrix->eb_size;
4634
4635 /* Ghost cell communication */
4636
4637 cs_halo_state_t *hs
4638 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4639 if (hs != NULL)
4640 _pre_vector_multiply_sync_x_end(matrix, hs, x);
4641
4642 /* Standard case */
4643
4644 if (!exclude_diag && mc->d_val != NULL) {
4645
4646 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4647 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4648
4649 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4650 const cs_real_t *restrict m_row = mc->x_val
4651 + (ms->row_index[ii]*eb_size[3]);
4652 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4653
4654 _dense_b_ax(ii, db_size, mc->d_val, x, y);
4655
4656 cs_real_t * _y = y + (ii*db_size[1]);
4657
4658 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4659 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4660 for (cs_lnum_t ll = 0; ll < db_size[0]; ll++) {
4661 _y[kk] += ( m_row[jj*eb_size[3] + kk*eb_size[2] + ll]
4662 * x[col_id[jj]*db_size[1] + ll]);
4663 }
4664 }
4665 }
4666
4667 }
4668
4669 }
4670
4671 /* Exclude diagonal */
4672
4673 else {
4674
4675 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4676 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
4677
4678 const cs_lnum_t *restrict col_id = ms->col_id + ms->row_index[ii];
4679 const cs_real_t *restrict m_row = mc->x_val
4680 + (ms->row_index[ii]*eb_size[3]);
4681 cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
4682
4683 cs_real_t * _y = y + (ii*db_size[1]);
4684
4685 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4686 _y[kk] = 0.;
4687
4688 for (cs_lnum_t jj = 0; jj < n_cols; jj++) {
4689 for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4690 for (cs_lnum_t ll = 0; ll < db_size[0]; ll++) {
4691 _y[kk] += ( m_row[jj*eb_size[3] + kk*eb_size[2] + ll]
4692 * x[col_id[jj]*db_size[1] + ll]);
4693 }
4694 }
4695 }
4696
4697 }
4698 }
4699
4700 }
4701
4702 /*----------------------------------------------------------------------------
4703 * Local matrix.vector product y = A.x with MSR matrix, blocked version.
4704 *
4705 * parameters:
4706 * matrix <-- pointer to matrix structure
4707 * exclude_diag <-- exclude diagonal if true,
4708 * sync <-- synchronize ghost cells if true
4709 * x <-> multipliying vector values
4710 * y --> resulting vector
4711 *----------------------------------------------------------------------------*/
4712
4713 static void
_bb_mat_vec_p_l_msr(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4714 _bb_mat_vec_p_l_msr(const cs_matrix_t *matrix,
4715 bool exclude_diag,
4716 bool sync,
4717 cs_real_t x[restrict],
4718 cs_real_t y[restrict])
4719 {
4720 if (matrix->eb_size[0] == 3 && matrix->eb_size[3] == 9)
4721 _bb_mat_vec_p_l_msr_3(matrix, exclude_diag, sync, x, y);
4722
4723 else
4724 _bb_mat_vec_p_l_msr_generic(matrix, exclude_diag, sync, x, y);
4725 }
4726
4727 /*----------------------------------------------------------------------------
4728 * Local matrix.vector product y = A.x with MSR matrix, using MKL
4729 *
4730 * parameters:
4731 * exclude_diag <-- exclude diagonal if true
4732 * matrix <-- pointer to matrix structure
4733 * hs <-- halo state: if non-NULL, call cs_halo_sync_wait
4734 * locally (possibly allowing computation/communication
4735 * overlap)
4736 * x <-> multipliying vector values
4737 * y --> resulting vector
4738 *----------------------------------------------------------------------------*/
4739
4740 #if defined (HAVE_MKL)
4741
4742 static void
_mat_vec_p_l_msr_mkl(const cs_matrix_t * matrix,bool exclude_diag,bool sync,cs_real_t x[restrict],cs_real_t y[restrict])4743 _mat_vec_p_l_msr_mkl(const cs_matrix_t *matrix,
4744 bool exclude_diag,
4745 bool sync,
4746 cs_real_t x[restrict],
4747 cs_real_t y[restrict])
4748 {
4749 const cs_matrix_struct_csr_t *ms = matrix->structure;
4750 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
4751
4752 /* Ghost cell communication */
4753
4754 cs_halo_state_t *hs
4755 = (sync) ? _pre_vector_multiply_sync_x_start(matrix, x) : NULL;
4756 if (hs != NULL)
4757 cs_halo_sync_wait(matrix->halo, x, hs);
4758
4759 /* Call MKL function */
4760
4761 int n_rows = ms->n_rows;
4762 char transa[] = "n";
4763
4764 mkl_cspblas_dcsrgemv(transa,
4765 &n_rows,
4766 mc->x_val,
4767 ms->row_index,
4768 ms->col_id,
4769 (double *)x,
4770 y);
4771
4772 /* Add diagonal contribution */
4773
4774 if (!exclude_diag && mc->d_val != NULL) {
4775 cs_lnum_t ii;
4776 const double *restrict da = mc->d_val;
4777 # pragma omp parallel for if(n_rows > CS_THR_MIN)
4778 for (ii = 0; ii < n_rows; ii++)
4779 y[ii] += da[ii] * x[ii];
4780 }
4781 }
4782
4783 #endif /* defined (HAVE_MKL) */
4784
4785 /*----------------------------------------------------------------------------
4786 * Add variant
4787 *
4788 * parameters:
4789 * type <-- matrix type
4790 * mft <-- fill type tuned for
4791 * ed_flag <-- 0: with diagonal only, 1 exclude only; 2; both
4792 * vector_multiply <-- function pointer for A.x
4793 * n_variants <-> number of variants
4794 * n_variants_max <-> current maximum number of variants
4795 * m_variant <-> array of matrix variants
4796 *----------------------------------------------------------------------------*/
4797
4798 static void
_variant_add(const char * name,cs_matrix_type_t type,cs_matrix_fill_type_t mft,int ed_flag,cs_matrix_vector_product_t * vector_multiply,int * n_variants,int * n_variants_max,cs_matrix_variant_t ** m_variant)4799 _variant_add(const char *name,
4800 cs_matrix_type_t type,
4801 cs_matrix_fill_type_t mft,
4802 int ed_flag,
4803 cs_matrix_vector_product_t *vector_multiply,
4804 int *n_variants,
4805 int *n_variants_max,
4806 cs_matrix_variant_t **m_variant)
4807 {
4808 cs_matrix_variant_t *v;
4809 int i = *n_variants;
4810
4811 if (vector_multiply == NULL)
4812 return;
4813
4814 if (*n_variants_max == *n_variants) {
4815 if (*n_variants_max == 0)
4816 *n_variants_max = 8;
4817 else
4818 *n_variants_max *= 2;
4819 BFT_REALLOC(*m_variant, *n_variants_max, cs_matrix_variant_t);
4820 }
4821
4822 v = (*m_variant) + i;
4823
4824 for (int j = 0; j < 2; j++) {
4825 v->vector_multiply[j] = NULL;
4826 strncpy(v->name[j], name, 31);
4827 v->name[j][31] = '\0';
4828 }
4829
4830 v->type = type;
4831 v->fill_type = mft;
4832
4833 if (ed_flag != 1)
4834 v->vector_multiply[0] = vector_multiply;
4835 if (ed_flag != 0)
4836 v->vector_multiply[1] = vector_multiply;
4837
4838 *n_variants += 1;
4839 }
4840
4841 /*----------------------------------------------------------------------------
4842 * Select the sparse matrix-vector product function to be used by a
4843 * matrix or variant for a given fill type.
4844 *
4845 * Currently, possible variant functions are:
4846 *
4847 * CS_MATRIX_NATIVE (all fill types)
4848 * default
4849 * standard
4850 * fixed (for CS_MATRIX_33_BLOCK_D or CS_MATRIX_33_BLOCK_D_SYM)
4851 * omp (for OpenMP with compatible numbering)
4852 * vector (For vector machine with compatible numbering)
4853 *
4854 * CS_MATRIX_CSR (for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
4855 * default
4856 * standard
4857 * mkl (with MKL)
4858 *
4859 * CS_MATRIX_MSR
4860 * default
4861 * standard
4862 * omp_sched (Improved OpenMP scheduling, , for CS_MATRIX_SCALAR*)
4863 * mkl (with MKL, for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
4864 *
4865 * parameters:
4866 * m_type <-- Matrix type
4867 * numbering <-- mesh numbering type, or NULL
4868 * fill type <-- matrix fill type to merge from
4869 * ed_flag <-- 0: with diagonal only, 1 exclude only; 2; both
4870 * func_name <-- function type name, or NULL for default
4871 * vector_multiply <-> multiplication function array
4872 *
4873 * returns:
4874 * 0 for success, 1 for incompatible function, 2 for compatible
4875 * function not available in current build
4876 *----------------------------------------------------------------------------*/
4877
4878 static int
_set_spmv_func(cs_matrix_type_t m_type,const cs_numbering_t * numbering,cs_matrix_fill_type_t fill_type,int ed_flag,const char * func_name,cs_matrix_vector_product_t * vector_multiply[2])4879 _set_spmv_func(cs_matrix_type_t m_type,
4880 const cs_numbering_t *numbering,
4881 cs_matrix_fill_type_t fill_type,
4882 int ed_flag,
4883 const char *func_name,
4884 cs_matrix_vector_product_t *vector_multiply[2])
4885 {
4886 int retcode = 1;
4887 int standard = 0;
4888
4889 cs_matrix_vector_product_t *spmv[2] = {NULL, NULL};
4890
4891 if (func_name == NULL)
4892 standard = 2;
4893 else if (!strcmp(func_name, "default"))
4894 standard = 2;
4895 else if (!strcmp(func_name, "standard"))
4896 standard = 1;
4897
4898 switch(m_type) {
4899
4900 case CS_MATRIX_NATIVE:
4901
4902 if (standard > 0) { /* standard or default */
4903
4904 switch(fill_type) {
4905 case CS_MATRIX_SCALAR:
4906 case CS_MATRIX_SCALAR_SYM:
4907 spmv[0] = _mat_vec_p_l_native;
4908 spmv[1] = _mat_vec_p_l_native;
4909 break;
4910 case CS_MATRIX_BLOCK_D:
4911 case CS_MATRIX_BLOCK_D_66:
4912 case CS_MATRIX_BLOCK_D_SYM:
4913 spmv[0] = _b_mat_vec_p_l_native_fixed;
4914 spmv[1] = _b_mat_vec_p_l_native_fixed;
4915 break;
4916 case CS_MATRIX_BLOCK:
4917 spmv[0] = _bb_mat_vec_p_l_native;
4918 spmv[1] = _bb_mat_vec_p_l_native;
4919 break;
4920 default:
4921 break;
4922 }
4923
4924 if (standard > 1) { /* default optimized variants */
4925 switch(fill_type) {
4926 case CS_MATRIX_SCALAR:
4927 case CS_MATRIX_SCALAR_SYM:
4928 if (numbering != NULL) {
4929 #if defined(HAVE_OPENMP)
4930 if (numbering->type == CS_NUMBERING_THREADS) {
4931 spmv[0] = _mat_vec_p_l_native_omp;
4932 spmv[1] = _mat_vec_p_l_native_omp;
4933 }
4934 #endif
4935 if (numbering->type == CS_NUMBERING_VECTORIZE) {
4936 spmv[0] = _mat_vec_p_l_native_vector;
4937 spmv[1] = _mat_vec_p_l_native_vector;
4938 }
4939 }
4940 break;
4941 case CS_MATRIX_BLOCK_D:
4942 case CS_MATRIX_BLOCK_D_66:
4943 case CS_MATRIX_BLOCK_D_SYM:
4944 if (numbering != NULL) {
4945 #if defined(HAVE_OPENMP)
4946 if (numbering->type == CS_NUMBERING_THREADS) {
4947 spmv[0] = _b_mat_vec_p_l_native_omp;
4948 spmv[1] = _b_mat_vec_p_l_native_omp;
4949 }
4950 #endif
4951 }
4952 break;
4953 default:
4954 break;
4955 }
4956 }
4957
4958 }
4959
4960 else if (!strcmp(func_name, "omp")) {
4961 #if defined(HAVE_OPENMP)
4962 if (numbering != NULL) {
4963 if (numbering->type == CS_NUMBERING_THREADS) {
4964 switch(fill_type) {
4965 case CS_MATRIX_SCALAR:
4966 case CS_MATRIX_SCALAR_SYM:
4967 spmv[0] = _mat_vec_p_l_native_omp;
4968 spmv[1] = _mat_vec_p_l_native_omp;
4969 break;
4970 case CS_MATRIX_BLOCK_D:
4971 case CS_MATRIX_BLOCK_D_66:
4972 case CS_MATRIX_BLOCK_D_SYM:
4973 spmv[0] = _b_mat_vec_p_l_native_omp;
4974 spmv[1] = _b_mat_vec_p_l_native_omp;
4975 break;
4976 default:
4977 break;
4978 }
4979 }
4980 }
4981 #else
4982 retcode = 2;
4983 #endif
4984 }
4985
4986 else if (!strcmp(func_name, "omp_atomic")) {
4987 #if defined(HAVE_OPENMP)
4988 switch(fill_type) {
4989 case CS_MATRIX_SCALAR:
4990 case CS_MATRIX_SCALAR_SYM:
4991 spmv[0] = _mat_vec_p_l_native_omp_atomic;
4992 spmv[1] = _mat_vec_p_l_native_omp_atomic;
4993 break;
4994 case CS_MATRIX_BLOCK_D:
4995 case CS_MATRIX_BLOCK_D_66:
4996 case CS_MATRIX_BLOCK_D_SYM:
4997 spmv[0] = _b_mat_vec_p_l_native_omp_atomic;
4998 spmv[1] = _b_mat_vec_p_l_native_omp_atomic;
4999 break;
5000 default:
5001 break;
5002 }
5003 #else
5004 retcode = 2;
5005 #endif
5006 }
5007
5008 else if (!strcmp(func_name, "vector")) {
5009 switch(fill_type) {
5010 case CS_MATRIX_SCALAR:
5011 case CS_MATRIX_SCALAR_SYM:
5012 spmv[0] = _mat_vec_p_l_native_vector;
5013 spmv[1] = _mat_vec_p_l_native_vector;
5014 break;
5015 default:
5016 break;
5017 }
5018 }
5019
5020 break;
5021
5022 case CS_MATRIX_CSR:
5023
5024 switch(fill_type) {
5025 case CS_MATRIX_SCALAR:
5026 case CS_MATRIX_SCALAR_SYM:
5027 if (standard > 0) {
5028 spmv[0] = _mat_vec_p_l_csr;
5029 spmv[1] = _mat_vec_p_l_csr;
5030 }
5031 else if (!strcmp(func_name, "mkl")) {
5032 #if defined(HAVE_MKL)
5033 spmv[0] = _mat_vec_p_l_csr_mkl;
5034 spmv[1] = _mat_vec_p_l_csr_mkl;
5035 #else
5036 retcode = 2;
5037 #endif
5038 }
5039 break;
5040 default:
5041 break;
5042 }
5043
5044 break;
5045
5046 case CS_MATRIX_MSR:
5047
5048 if (standard > 0) {
5049 switch(fill_type) {
5050 case CS_MATRIX_SCALAR:
5051 case CS_MATRIX_SCALAR_SYM:
5052 spmv[0] = _mat_vec_p_l_msr;
5053 spmv[1] = _mat_vec_p_l_msr;
5054 break;
5055 case CS_MATRIX_BLOCK_D:
5056 case CS_MATRIX_BLOCK_D_66:
5057 case CS_MATRIX_BLOCK_D_SYM:
5058 spmv[0] = _b_mat_vec_p_l_msr;
5059 spmv[1] = _b_mat_vec_p_l_msr;
5060 break;
5061 case CS_MATRIX_BLOCK:
5062 spmv[0] = _bb_mat_vec_p_l_msr;
5063 spmv[1] = _bb_mat_vec_p_l_msr;
5064 break;
5065 default:
5066 break;
5067 }
5068 }
5069
5070 else if (!strcmp(func_name, "mkl")) {
5071 #if defined(HAVE_MKL)
5072 switch(fill_type) {
5073 case CS_MATRIX_SCALAR:
5074 case CS_MATRIX_SCALAR_SYM:
5075 spmv[0] = _mat_vec_p_l_msr_mkl;
5076 spmv[1] = _mat_vec_p_l_msr_mkl;
5077 break;
5078 default:
5079 break;
5080 }
5081 #else
5082 retcode = 2;
5083 #endif
5084 }
5085
5086 else if (!strcmp(func_name, "omp_sched")) {
5087 switch(fill_type) {
5088 case CS_MATRIX_SCALAR:
5089 case CS_MATRIX_SCALAR_SYM:
5090 spmv[0] = _mat_vec_p_l_msr_omp_sched;
5091 spmv[1] = _mat_vec_p_l_msr_omp_sched;
5092 break;
5093 default:
5094 break;
5095 }
5096 }
5097
5098 break;
5099
5100 default:
5101 break;
5102 }
5103
5104 if (ed_flag != 1 && spmv[0] != NULL) {
5105 vector_multiply[0] = spmv[0];
5106 retcode = 0;
5107 }
5108 if (ed_flag != 0 && spmv[0] != NULL) {
5109 vector_multiply[1] = spmv[1];
5110 retcode = 0;
5111 }
5112
5113 return retcode;
5114 }
5115
5116 /*----------------------------------------------------------------------------*/
5117 /*!
5118 * \brief Create matrix structure internals using a matrix assembler.
5119 *
5120 * Only CSR and MSR formats are handled.
5121 *
5122 * \param[in] type type of matrix considered
5123 * \param[in] ma pointer to matrix assembler structure
5124 *
5125 * \return a pointer to created matrix structure internals
5126 */
5127 /*----------------------------------------------------------------------------*/
5128
5129 static void *
_structure_from_assembler(cs_matrix_type_t type,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_matrix_assembler_t * ma)5130 _structure_from_assembler(cs_matrix_type_t type,
5131 cs_lnum_t n_rows,
5132 cs_lnum_t n_cols_ext,
5133 cs_matrix_assembler_t *ma)
5134 {
5135 void *structure = NULL;
5136
5137 /* Get info on assembler structure */
5138
5139 bool ma_sep_diag = cs_matrix_assembler_get_separate_diag(ma);
5140 const cs_lnum_t *row_index = cs_matrix_assembler_get_row_index(ma);
5141 const cs_lnum_t *col_id = cs_matrix_assembler_get_col_ids(ma);
5142
5143 /* Define structure */
5144
5145 switch(type) {
5146
5147 case CS_MATRIX_CSR:
5148 /* Assume diagonal is present (should not be important
5149 for assembly using matrix assembler) */
5150 if (ma_sep_diag == false)
5151 structure = _create_struct_csr_from_shared(true, /* have_diag */
5152 false, /* for safety */
5153 n_rows,
5154 n_cols_ext,
5155 row_index,
5156 col_id);
5157 else {
5158 cs_lnum_t *_row_index, *_col_id;
5159 BFT_MALLOC(_row_index, n_rows + 1, cs_lnum_t);
5160 BFT_MALLOC(_col_id, row_index[n_rows] + n_rows, cs_lnum_t);
5161 _row_index[0] = 0;
5162 for (cs_lnum_t i = 0; i < n_rows; i++) {
5163 cs_lnum_t n_cols = row_index[i+1] - row_index[i];
5164 cs_lnum_t j = 0, k = 0;
5165 const cs_lnum_t *s_c_id = col_id + row_index[i];
5166 cs_lnum_t *d_c_id = _col_id + row_index[i] + i;
5167 while (j < n_cols && s_c_id[j] < i)
5168 d_c_id[k++] = s_c_id[j++];
5169 d_c_id[k++] = i;
5170 while (j < n_cols)
5171 d_c_id[k++] = s_c_id[j++];
5172 _row_index[i+1] = row_index[i+1] + i + 1;
5173 }
5174 structure = _create_struct_csr_from_csr(true, /* have_idag */
5175 true,
5176 true,
5177 n_rows,
5178 n_cols_ext,
5179 &_row_index,
5180 &_col_id);
5181 }
5182 break;
5183
5184 case CS_MATRIX_MSR:
5185 if (ma_sep_diag == true)
5186 structure = _create_struct_csr_from_shared(false,
5187 false, /* for safety */
5188 n_rows,
5189 n_cols_ext,
5190 row_index,
5191 col_id);
5192 else {
5193 cs_lnum_t *_row_index, *_col_id;
5194 BFT_MALLOC(_row_index, n_rows + 1, cs_lnum_t);
5195 BFT_MALLOC(_col_id, row_index[n_rows], cs_lnum_t);
5196 _row_index[0] = 0;
5197 cs_lnum_t k = 0;
5198 for (cs_lnum_t i = 0; i < n_rows; i++) {
5199 cs_lnum_t n_cols = row_index[i+1] - row_index[i];
5200 const cs_lnum_t *s_c_id = col_id + row_index[i];
5201 for (cs_lnum_t j = 0; j < n_cols; j++) {
5202 if (s_c_id[j] != i)
5203 _col_id[k++] = s_c_id[j];
5204 }
5205 _row_index[i+1] = k;
5206 }
5207 BFT_REALLOC(_col_id, _row_index[n_rows], cs_lnum_t);
5208 structure = _create_struct_csr_from_csr(false,
5209 true,
5210 true,
5211 n_rows,
5212 n_cols_ext,
5213 &_row_index,
5214 &_col_id);
5215 }
5216 break;
5217 default:
5218 if (type >= 0 && type < CS_MATRIX_N_BUILTIN_TYPES)
5219 bft_error(__FILE__, __LINE__, 0,
5220 _("%s: handling of matrices in %s format\n"
5221 "is not operational yet."),
5222 __func__,
5223 _(_matrix_type_name[type]));
5224 else
5225 bft_error(__FILE__, __LINE__, 0,
5226 _("%s: handling of matrices in external format type %d\n"
5227 "is not handled by this function."),
5228 __func__, (int)type);
5229 break;
5230 }
5231
5232 return structure;
5233 }
5234
5235 /*----------------------------------------------------------------------------*/
5236 /*!
5237 * \brief Destroy matrix structure internals.
5238 *
5239 * \param[in] type matrix structure type
5240 * \param[in, out] structure pointer to matrix structure pointer
5241 */
5242 /*----------------------------------------------------------------------------*/
5243
5244 static void
_structure_destroy(cs_matrix_type_t type,void ** structure)5245 _structure_destroy(cs_matrix_type_t type,
5246 void **structure)
5247 {
5248 switch(type) {
5249 case CS_MATRIX_NATIVE:
5250 _destroy_struct_native(structure);
5251 break;
5252 case CS_MATRIX_CSR:
5253 _destroy_struct_csr(structure);
5254 break;
5255 case CS_MATRIX_MSR:
5256 _destroy_struct_csr(structure);
5257 break;
5258 default:
5259 assert(0);
5260 break;
5261 }
5262 }
5263
5264 /*----------------------------------------------------------------------------*/
5265 /*!
5266 * \brief Create a matrix container using a given type.
5267 *
5268 * \param[in] type chosen matrix type
5269 *
5270 * \return pointer to created matrix structure;
5271 */
5272 /*----------------------------------------------------------------------------*/
5273
5274 static cs_matrix_t *
_matrix_create(cs_matrix_type_t type)5275 _matrix_create(cs_matrix_type_t type)
5276 {
5277 int i;
5278 cs_matrix_fill_type_t mft;
5279 cs_matrix_t *m;
5280
5281 BFT_MALLOC(m, 1, cs_matrix_t);
5282
5283 m->type = type;
5284
5285 if (m->type >= 0 && m->type < CS_MATRIX_N_BUILTIN_TYPES) {
5286 m->type_name = _matrix_type_name[m->type];
5287 m->type_fname = _matrix_type_fullname[m->type];
5288 }
5289 else {
5290 m->type_name = _matrix_type_name[CS_MATRIX_N_BUILTIN_TYPES];
5291 m->type_fname = _matrix_type_fullname[CS_MATRIX_N_BUILTIN_TYPES];
5292 }
5293
5294 /* Map shared structure */
5295
5296 m->n_rows = 0;
5297 m->n_cols_ext = 0;
5298
5299 m->symmetric = false;
5300
5301 for (i = 0; i < 4; i++) {
5302 m->db_size[i] = 0;
5303 m->eb_size[i] = 0;
5304 }
5305 m->fill_type = CS_MATRIX_N_FILL_TYPES;
5306
5307 m->structure = NULL;
5308 m->_structure = NULL;
5309
5310 m->halo = NULL;
5311 m->numbering = NULL;
5312 m->assembler = NULL;
5313
5314 for (mft = 0; mft < CS_MATRIX_N_FILL_TYPES; mft++) {
5315 for (i = 0; i < 2; i++)
5316 m->vector_multiply[mft][i] = NULL;
5317 }
5318
5319 /* Define coefficients */
5320
5321 switch(m->type) {
5322 case CS_MATRIX_NATIVE:
5323 m->coeffs = _create_coeff_native();
5324 break;
5325 case CS_MATRIX_CSR:
5326 m->coeffs = _create_coeff_csr();
5327 break;
5328 case CS_MATRIX_MSR:
5329 m->coeffs = _create_coeff_msr();
5330 break;
5331 default:
5332 bft_error(__FILE__, __LINE__, 0,
5333 _("Handling of matrixes in format type %d\n"
5334 "is not operational yet."),
5335 m->type);
5336 break;
5337 }
5338
5339 m->xa = NULL;
5340
5341 /* Set function pointers here */
5342
5343 m->set_coefficients = NULL;
5344
5345 for (mft = 0; mft < CS_MATRIX_N_FILL_TYPES; mft++)
5346 _set_spmv_func(m->type,
5347 m->numbering,
5348 mft,
5349 2, /* ed_flag */
5350 NULL, /* func_name */
5351 m->vector_multiply[mft]);
5352
5353 switch(m->type) {
5354
5355 case CS_MATRIX_NATIVE:
5356
5357 m->set_coefficients = _set_coeffs_native;
5358 m->release_coefficients = _release_coeffs_native;
5359 m->copy_diagonal = _copy_diagonal_separate;
5360 m->get_diagonal = _get_diagonal_native;
5361 m->destroy_structure = _destroy_struct_native;
5362 m->destroy_coefficients = _destroy_coeff_native;
5363 m->assembler_values_create = NULL;
5364 break;
5365
5366 case CS_MATRIX_CSR:
5367 m->set_coefficients = _set_coeffs_csr;
5368 m->release_coefficients = _release_coeffs_csr;
5369 m->copy_diagonal = _copy_diagonal_csr;
5370 m->get_diagonal = _get_diagonal_csr;
5371 m->destroy_structure = _destroy_struct_csr;
5372 m->destroy_coefficients = _destroy_coeff_csr;
5373 m->assembler_values_create = _assembler_values_create_csr;
5374 break;
5375
5376 case CS_MATRIX_MSR:
5377 m->set_coefficients = _set_coeffs_msr;
5378 m->release_coefficients = _release_coeffs_msr;
5379 m->copy_diagonal = _copy_diagonal_separate;
5380 m->get_diagonal = _get_diagonal_msr;
5381 m->destroy_structure = _destroy_struct_csr;
5382 m->destroy_coefficients = _destroy_coeff_msr;
5383 m->assembler_values_create = _assembler_values_create_msr;
5384 break;
5385
5386 default:
5387 assert(0);
5388 break;
5389
5390 }
5391
5392 for (i = 0; i < CS_MATRIX_N_FILL_TYPES; i++) {
5393 if (m->vector_multiply[i][1] == NULL)
5394 m->vector_multiply[i][1] = m->vector_multiply[i][0];
5395 }
5396
5397 return m;
5398 }
5399
5400 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
5401
5402 /*============================================================================
5403 * Public function definitions
5404 *============================================================================*/
5405
5406 /*----------------------------------------------------------------------------*/
5407 /*!
5408 * \brief Create a matrix structure.
5409 *
5410 * Note that the structure created usually maps to the given existing
5411 * cell global number, face -> cell connectivity arrays, and cell halo
5412 * structure, so it must be destroyed before they are freed
5413 * (usually along with the code's main face -> cell structure).
5414 *
5415 * Note that the resulting matrix structure will contain either a full or
5416 * an empty main diagonal, and that the extra-diagonal structure is always
5417 * symmetric (though the coefficients my not be, and we may choose a
5418 * matrix format that does not exploit this symmetry). If the edges
5419 * connectivity argument is NULL, the matrix will be purely diagonal.
5420 *
5421 * \param[in] type type of matrix considered
5422 * \param[in] have_diag indicates if the diagonal structure
5423 * contains nonzeroes
5424 * \param[in] n_rows local number of rows
5425 * \param[in] n_cols_ext number of local + ghost columns
5426 * \param[in] n_edges local number of (undirected) graph edges
5427 * \param[in] edges edges (symmetric row <-> column) connectivity
5428 * \param[in] halo halo structure associated with cells, or NULL
5429 * \param[in] numbering vectorization or thread-related numbering info,
5430 * or NULL
5431 *
5432 * \return pointer to created matrix structure;
5433 */
5434 /*----------------------------------------------------------------------------*/
5435
5436 cs_matrix_structure_t *
cs_matrix_structure_create(cs_matrix_type_t type,bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_halo_t * halo,const cs_numbering_t * numbering)5437 cs_matrix_structure_create(cs_matrix_type_t type,
5438 bool have_diag,
5439 cs_lnum_t n_rows,
5440 cs_lnum_t n_cols_ext,
5441 cs_lnum_t n_edges,
5442 const cs_lnum_2_t *edges,
5443 const cs_halo_t *halo,
5444 const cs_numbering_t *numbering)
5445 {
5446 cs_matrix_structure_t *ms;
5447
5448 BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5449
5450 ms->type = type;
5451
5452 ms->n_rows = n_rows;
5453 ms->n_cols_ext = n_cols_ext;
5454
5455 /* Define Structure */
5456
5457 switch(ms->type) {
5458 case CS_MATRIX_NATIVE:
5459 ms->structure = _create_struct_native(n_rows,
5460 n_cols_ext,
5461 n_edges,
5462 edges);
5463 break;
5464 case CS_MATRIX_CSR:
5465 ms->structure = _create_struct_csr(have_diag,
5466 n_rows,
5467 n_cols_ext,
5468 n_edges,
5469 edges);
5470 break;
5471 case CS_MATRIX_MSR:
5472 ms->structure = _create_struct_csr(false,
5473 n_rows,
5474 n_cols_ext,
5475 n_edges,
5476 edges);
5477 break;
5478 default:
5479 bft_error(__FILE__, __LINE__, 0,
5480 _("Handling of matrixes in format type %d\n"
5481 "is not operational yet."),
5482 type);
5483 break;
5484 }
5485
5486 /* Set pointers to structures shared from mesh here */
5487
5488 ms->halo = halo;
5489 ms->numbering = numbering;
5490 ms->assembler = NULL;
5491
5492 return ms;
5493 }
5494
5495 /*----------------------------------------------------------------------------*/
5496 /*!
5497 * \brief Create a matrix structure based on a MSR connectivity definition.
5498 *
5499 * Only CSR and MSR formats are handled.
5500 *
5501 * col_id is sorted row by row during the creation of this structure.
5502 *
5503 * In case the property of the row index and col_id arrays are transferred
5504 * to the structure, the arrays pointers passed as arguments are set to NULL,
5505 * to help ensure the caller does not use the original arrays directly after
5506 * this call.
5507 *
5508 * \param[in] type type of matrix considered
5509 * \param[in] transfer transfer property of row_index and col_id
5510 * if true, map them otherwise
5511 * \param[in] have_diag indicates if the structure includes the
5512 * diagonal (should be the same for all rows)
5513 * \param[in] n_rows local number of rows
5514 * \param[in] n_cols_ext local number of columns + ghosts
5515 * \param[in] row_index pointer to index on rows
5516 * \param[in, out] col_id pointer to array of colum ids related to
5517 * the row index
5518 * \param[in] halo halo structure for synchronization, or NULL
5519 * \param[in] numbering vectorization or thread-related numbering info,
5520 * or NULL
5521 *
5522 * \return a pointer to a created matrix structure
5523 */
5524 /*----------------------------------------------------------------------------*/
5525
5526 cs_matrix_structure_t *
cs_matrix_structure_create_msr(cs_matrix_type_t type,bool transfer,bool have_diag,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t ** row_index,cs_lnum_t ** col_id,const cs_halo_t * halo,const cs_numbering_t * numbering)5527 cs_matrix_structure_create_msr(cs_matrix_type_t type,
5528 bool transfer,
5529 bool have_diag,
5530 cs_lnum_t n_rows,
5531 cs_lnum_t n_cols_ext,
5532 cs_lnum_t **row_index,
5533 cs_lnum_t **col_id,
5534 const cs_halo_t *halo,
5535 const cs_numbering_t *numbering)
5536 {
5537 cs_matrix_structure_t *ms = NULL;
5538
5539 BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5540
5541 ms->type = type;
5542
5543 ms->n_rows = n_rows;
5544 ms->n_cols_ext = n_cols_ext;
5545
5546 /* Define Structure */
5547
5548 switch(ms->type) {
5549 case CS_MATRIX_CSR:
5550 ms->structure = _create_struct_csr_from_csr(have_diag,
5551 transfer,
5552 false,
5553 n_rows,
5554 n_cols_ext,
5555 row_index,
5556 col_id);
5557 break;
5558 case CS_MATRIX_MSR:
5559 ms->structure = _create_struct_csr_from_csr(false,
5560 transfer,
5561 false,
5562 n_rows,
5563 n_cols_ext,
5564 row_index,
5565 col_id);
5566 break;
5567 default:
5568 if (type >= 0 && type < CS_MATRIX_N_BUILTIN_TYPES)
5569 bft_error(__FILE__, __LINE__, 0,
5570 _("%s: handling of matrices in %s format\n"
5571 "is not operational yet."),
5572 __func__,
5573 _(_matrix_type_name[type]));
5574 else
5575 bft_error(__FILE__, __LINE__, 0,
5576 _("%s: handling of matrices in external format type %d\n"
5577 "is not handled by this function."),
5578 __func__, (int)type);
5579 break;
5580 }
5581
5582 /* Set pointers to structures shared from mesh here */
5583
5584 ms->halo = halo;
5585 ms->numbering = numbering;
5586 ms->assembler = NULL;
5587
5588 return ms;
5589 }
5590
5591 /*----------------------------------------------------------------------------*/
5592 /*!
5593 * \brief Create an MSR matrix structure sharing an existing connectivity
5594 * definition.
5595 *
5596 * Note that as the structure created maps to the given existing
5597 * cell global number, face -> cell connectivity arrays, and cell halo
5598 * structure, it must be destroyed before they are freed
5599 * (usually along with the code's main face -> cell structure).
5600 *
5601 * \param[in] have_diag indicates if the structure includes the
5602 * diagonal (should be the same for all rows)
5603 * \param[in] direct_assembly true if each value corresponds to
5604 a unique face
5605 * \param[in] n_rows local number of rows
5606 * \param[in] n_cols_ext local number of columns + ghosts
5607 * \param[in] row_index index on rows
5608 * \param[in] col_id array of colum ids related to the row index
5609 * \param[in] halo halo structure for synchronization, or NULL
5610 * \param[in] numbering vectorization or thread-related numbering
5611 * info, or NULL
5612 *
5613 * \returns a pointer to a created matrix structure
5614 */
5615 /*----------------------------------------------------------------------------*/
5616
5617 cs_matrix_structure_t *
cs_matrix_structure_create_msr_shared(bool have_diag,bool direct_assembly,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t * row_index,const cs_lnum_t * col_id,const cs_halo_t * halo,const cs_numbering_t * numbering)5618 cs_matrix_structure_create_msr_shared(bool have_diag,
5619 bool direct_assembly,
5620 cs_lnum_t n_rows,
5621 cs_lnum_t n_cols_ext,
5622 const cs_lnum_t *row_index,
5623 const cs_lnum_t *col_id,
5624 const cs_halo_t *halo,
5625 const cs_numbering_t *numbering)
5626 {
5627 cs_matrix_structure_t *ms = NULL;
5628
5629 BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5630
5631 ms->type = CS_MATRIX_MSR;
5632
5633 ms->n_rows = n_rows;
5634 ms->n_cols_ext = n_cols_ext;
5635
5636 /* Define Structure */
5637
5638 ms->structure = _create_struct_csr_from_shared(have_diag,
5639 direct_assembly,
5640 n_rows,
5641 n_cols_ext,
5642 row_index,
5643 col_id);
5644
5645 /* Set pointers to structures shared from mesh here */
5646
5647 ms->halo = halo;
5648 ms->numbering = numbering;
5649 ms->assembler = NULL;
5650
5651 return ms;
5652 }
5653
5654 /*----------------------------------------------------------------------------*/
5655 /*!
5656 * \brief Create a matrix structure using a matrix assembler.
5657 *
5658 * Only CSR and MSR formats are handled.
5659 *
5660 * \param[in] type type of matrix considered
5661 * \param[in] ma pointer to matrix assembler structure
5662 *
5663 * \return a pointer to a created matrix structure
5664 */
5665 /*----------------------------------------------------------------------------*/
5666
5667 cs_matrix_structure_t *
cs_matrix_structure_create_from_assembler(cs_matrix_type_t type,cs_matrix_assembler_t * ma)5668 cs_matrix_structure_create_from_assembler(cs_matrix_type_t type,
5669 cs_matrix_assembler_t *ma)
5670 {
5671 cs_matrix_structure_t *ms = NULL;
5672
5673 BFT_MALLOC(ms, 1, cs_matrix_structure_t);
5674
5675 ms->type = type;
5676
5677 ms->n_rows = cs_matrix_assembler_get_n_rows(ma);
5678 ms->n_cols_ext = cs_matrix_assembler_get_n_columns(ma);;
5679
5680 /* Define internal structure */
5681
5682 ms->structure = _structure_from_assembler(ms->type,
5683 ms->n_rows,
5684 ms->n_cols_ext,
5685 ma);
5686
5687 /* Set pointers to structures shared from mesh here */
5688
5689 ms->halo = cs_matrix_assembler_get_halo(ma);
5690
5691 ms->numbering = NULL;
5692
5693 ms->assembler = ma;
5694
5695 return ms;
5696 }
5697
5698 /*----------------------------------------------------------------------------*/
5699 /*!
5700 * \brief Destroy a matrix structure.
5701 *
5702 * \param[in, out] ms pointer to matrix structure pointer
5703 */
5704 /*----------------------------------------------------------------------------*/
5705
5706 void
cs_matrix_structure_destroy(cs_matrix_structure_t ** ms)5707 cs_matrix_structure_destroy(cs_matrix_structure_t **ms)
5708 {
5709 if (ms != NULL && *ms != NULL) {
5710
5711 cs_matrix_structure_t *_ms = *ms;
5712
5713 _structure_destroy(_ms->type, &(_ms->structure));
5714
5715 /* Now free main structure */
5716
5717 BFT_FREE(*ms);
5718 }
5719 }
5720
5721 /*----------------------------------------------------------------------------*/
5722 /*!
5723 * \brief Create a matrix container using a given structure.
5724 *
5725 * Note that the matrix container maps to the assigned structure,
5726 * so it must be destroyed before that structure.
5727 *
5728 * \param[in] ms associated matrix structure
5729 *
5730 * \return pointer to created matrix structure;
5731 */
5732 /*----------------------------------------------------------------------------*/
5733
5734 cs_matrix_t *
cs_matrix_create(const cs_matrix_structure_t * ms)5735 cs_matrix_create(const cs_matrix_structure_t *ms)
5736 {
5737 assert(ms != NULL); /* Sanity check */
5738
5739 cs_matrix_t *m = _matrix_create(ms->type);
5740
5741 /* Map shared structure */
5742
5743 m->n_rows = ms->n_rows;
5744 m->n_cols_ext = ms->n_cols_ext;
5745
5746 m->structure = ms->structure;
5747
5748 m->halo = ms->halo;
5749 m->numbering = ms->numbering;
5750 m->assembler = ms->assembler;
5751
5752 return m;
5753 }
5754
5755 /*----------------------------------------------------------------------------*/
5756 /*!
5757 * \brief Create a matrix directly from assembler.
5758 *
5759 * Only CSR and MSR formats are handled.
5760 *
5761 * \param[in] type type of matrix considered
5762 * \param[in] ma pointer to matrix assembler structure
5763 *
5764 * \return a pointer to a created matrix structure
5765 */
5766 /*----------------------------------------------------------------------------*/
5767
5768 cs_matrix_t *
cs_matrix_create_from_assembler(cs_matrix_type_t type,cs_matrix_assembler_t * ma)5769 cs_matrix_create_from_assembler(cs_matrix_type_t type,
5770 cs_matrix_assembler_t *ma)
5771 {
5772 cs_matrix_t *m = _matrix_create(type);
5773
5774 m->assembler = ma;
5775
5776 m->type = type;
5777
5778 m->n_rows = cs_matrix_assembler_get_n_rows(ma);
5779 m->n_cols_ext = cs_matrix_assembler_get_n_columns(ma);;
5780
5781 /* Define internal structure */
5782
5783 m->_structure = _structure_from_assembler(m->type,
5784 m->n_rows,
5785 m->n_cols_ext,
5786 ma);
5787 m->structure = m->_structure;
5788
5789 /* Set pointers to structures shared from mesh here */
5790
5791 m->halo = cs_matrix_assembler_get_halo(ma);
5792
5793 m->numbering = NULL;
5794
5795 m->assembler = ma;
5796
5797 return m;
5798 }
5799
5800 /*----------------------------------------------------------------------------*/
5801 /*!
5802 * \brief Create a matrix container by copying another
5803 *
5804 * Note that the matrix containers share the same assigned structure,
5805 * so they must be both destroyed before that structure.
5806 *
5807 * If assigned, coefficients are not copied.
5808 *
5809 * \param[in] src reference matrix structure
5810 *
5811 * \return pointer to created matrix structure;
5812 */
5813 /*----------------------------------------------------------------------------*/
5814
5815 cs_matrix_t *
cs_matrix_create_by_copy(cs_matrix_t * src)5816 cs_matrix_create_by_copy(cs_matrix_t *src)
5817 {
5818 cs_matrix_t *m;
5819
5820 BFT_MALLOC(m, 1, cs_matrix_t);
5821
5822 memcpy(m, src, sizeof(cs_matrix_t));
5823
5824 /* Define coefficients */
5825
5826 switch(m->type) {
5827 case CS_MATRIX_NATIVE:
5828 m->coeffs = _create_coeff_native();
5829 break;
5830 case CS_MATRIX_CSR:
5831 m->coeffs = _create_coeff_csr();
5832 break;
5833 case CS_MATRIX_MSR:
5834 m->coeffs = _create_coeff_msr();
5835 break;
5836 default:
5837 bft_error(__FILE__, __LINE__, 0,
5838 _("Handling of matrixes in format type %d\n"
5839 "is not operational yet."),
5840 m->type);
5841 break;
5842 }
5843
5844 cs_matrix_release_coefficients(m);
5845
5846 return m;
5847 }
5848
5849 /*----------------------------------------------------------------------------*/
5850 /*!
5851 * \brief Create a matrix based on the local restriction of a base matrix.
5852 *
5853 * Coefficients are copied. Some coefficients may be shared with the
5854 * parent matrix, so the base matrix must not be destroyed before the
5855 * restriction matrix.
5856 *
5857 * \param[in] src reference matrix structure
5858 *
5859 * \return pointer to created matrix structure;
5860 */
5861 /*----------------------------------------------------------------------------*/
5862
5863 cs_matrix_t *
cs_matrix_create_by_local_restrict(const cs_matrix_t * src)5864 cs_matrix_create_by_local_restrict(const cs_matrix_t *src)
5865 {
5866 cs_matrix_t *m = NULL;
5867
5868 const cs_lnum_t n_rows = src->n_rows;
5869 const cs_lnum_t *eb_size = src->eb_size;
5870
5871 BFT_MALLOC(m, 1, cs_matrix_t);
5872 memcpy(m, src, sizeof(cs_matrix_t));
5873 m->n_cols_ext = m->n_rows;
5874
5875 m->structure = NULL;
5876 m->_structure = NULL;
5877
5878 m->halo = NULL;
5879 m->numbering = NULL;
5880 m->assembler = NULL;
5881 m->xa = NULL;
5882 m->coeffs = NULL;
5883
5884 /* Define coefficients */
5885
5886 switch(m->type) {
5887 case CS_MATRIX_MSR:
5888 {
5889 m->_structure = _create_struct_csr_from_restrict_local(src->structure);
5890 m->structure = m->_structure;
5891 m->coeffs = _create_coeff_msr();
5892 cs_matrix_coeff_msr_t *mc = m->coeffs;
5893 cs_matrix_coeff_msr_t *mc_src = src->coeffs;
5894 const cs_matrix_struct_csr_t *ms = m->structure;
5895 const cs_matrix_struct_csr_t *ms_src = src->structure;
5896 mc->d_val = mc_src->d_val;
5897 BFT_MALLOC(mc->_x_val, src->eb_size[3]*ms->row_index[n_rows], cs_real_t);
5898 mc->x_val = mc->_x_val;
5899 for (cs_lnum_t ii = 0; ii < n_rows; ii++) {
5900 const cs_lnum_t n_cols = ms->row_index[ii+1] - ms->row_index[ii];
5901 const cs_real_t *s_row = mc_src->x_val
5902 + ms_src->row_index[ii]*eb_size[3];
5903 cs_real_t *m_row = mc->_x_val + ms->row_index[ii]*eb_size[3];
5904 memcpy(m_row, s_row, sizeof(cs_real_t)*eb_size[3]*n_cols);
5905 }
5906 mc->max_db_size = m->db_size[3];
5907 mc->max_eb_size = m->eb_size[3];
5908 }
5909 break;
5910 case CS_MATRIX_NATIVE:
5911 case CS_MATRIX_CSR:
5912 default:
5913 bft_error(__FILE__, __LINE__, 0,
5914 _("Handling of matrixes in %s format\n"
5915 "is not operational yet."),
5916 _(m->type_name));
5917 break;
5918 }
5919
5920 return m;
5921 }
5922
5923 /*----------------------------------------------------------------------------
5924 * Destroy a matrix structure.
5925 *
5926 * In the case of a compound matrix, sub-matrices are not destroyed.
5927 *
5928 * parameters:
5929 * matrix <-> pointer to matrix structure pointer
5930 *----------------------------------------------------------------------------*/
5931
5932 void
cs_matrix_destroy(cs_matrix_t ** matrix)5933 cs_matrix_destroy(cs_matrix_t **matrix)
5934 {
5935 if (matrix != NULL && *matrix != NULL) {
5936
5937 cs_matrix_t *m = *matrix;
5938
5939 m->destroy_coefficients(m);
5940
5941 if (m->_structure != NULL) {
5942 m->destroy_structure(&(m->_structure));
5943 m->structure = NULL;
5944 }
5945
5946 /* Now free main structure */
5947
5948 BFT_FREE(*matrix);
5949 }
5950 }
5951
5952 /*----------------------------------------------------------------------------*/
5953 /*!
5954 * \brief Return matrix type.
5955 *
5956 * \param[in] matrix pointer to matrix structure
5957 */
5958 /*----------------------------------------------------------------------------*/
5959
5960 cs_matrix_type_t
cs_matrix_get_type(const cs_matrix_t * matrix)5961 cs_matrix_get_type(const cs_matrix_t *matrix)
5962 {
5963 if (matrix == NULL)
5964 bft_error(__FILE__, __LINE__, 0,
5965 _("The matrix is not defined."));
5966 return matrix->type;
5967 }
5968
5969 /*----------------------------------------------------------------------------
5970 * Return matrix type name.
5971 *
5972 * parameters:
5973 * matrix --> pointer to matrix structure
5974 *----------------------------------------------------------------------------*/
5975
5976 const char *
cs_matrix_get_type_name(const cs_matrix_t * matrix)5977 cs_matrix_get_type_name(const cs_matrix_t *matrix)
5978 {
5979 if (matrix == NULL)
5980 bft_error(__FILE__, __LINE__, 0,
5981 _("%s: matrix not defined."), __func__);
5982
5983 return matrix->type_name;
5984 }
5985
5986 /*----------------------------------------------------------------------------
5987 * Return matrix type full name.
5988 *
5989 * parameters:
5990 * matrix --> pointer to matrix structure
5991 *----------------------------------------------------------------------------*/
5992
5993 const char *
cs_matrix_get_type_fullname(const cs_matrix_t * matrix)5994 cs_matrix_get_type_fullname(const cs_matrix_t *matrix)
5995 {
5996 if (matrix == NULL)
5997 bft_error(__FILE__, __LINE__, 0,
5998 _("%s: matrix not defined."), __func__);
5999
6000 return matrix->type_fname;
6001 }
6002
6003 /*----------------------------------------------------------------------------*/
6004 /*!
6005 * \brief Return number of columns in a matrix.
6006 *
6007 * \param[in] matrix pointer to matrix structure
6008 */
6009 /*----------------------------------------------------------------------------*/
6010
6011 cs_lnum_t
cs_matrix_get_n_columns(const cs_matrix_t * matrix)6012 cs_matrix_get_n_columns(const cs_matrix_t *matrix)
6013 {
6014 if (matrix == NULL)
6015 bft_error(__FILE__, __LINE__, 0,
6016 _("The matrix is not defined."));
6017 return matrix->n_cols_ext;
6018 }
6019
6020 /*----------------------------------------------------------------------------*/
6021 /*!
6022 * \brief Return number of rows in matrix.
6023 *
6024 * \param[in] matrix pointer to matrix structure
6025 */
6026 /*----------------------------------------------------------------------------*/
6027
6028 cs_lnum_t
cs_matrix_get_n_rows(const cs_matrix_t * matrix)6029 cs_matrix_get_n_rows(const cs_matrix_t *matrix)
6030 {
6031 if (matrix == NULL)
6032 bft_error(__FILE__, __LINE__, 0,
6033 _("The matrix is not defined."));
6034 return matrix->n_rows;
6035 }
6036
6037 /*----------------------------------------------------------------------------*/
6038 /*!
6039 * \brief Return number of entries in matrix.
6040 *
6041 * When the block size is > 1, the number reported is the number of
6042 * entry blocks, not individual entries.
6043 *
6044 * \param[in] matrix pointer to matrix structure
6045 */
6046 /*----------------------------------------------------------------------------*/
6047
6048 cs_lnum_t
cs_matrix_get_n_entries(const cs_matrix_t * matrix)6049 cs_matrix_get_n_entries(const cs_matrix_t *matrix)
6050 {
6051 cs_lnum_t retval = 0;
6052
6053 if (matrix == NULL)
6054 bft_error(__FILE__, __LINE__, 0,
6055 _("The matrix is not defined."));
6056
6057 switch(matrix->type) {
6058 case CS_MATRIX_NATIVE:
6059 {
6060 const cs_matrix_struct_native_t *ms = matrix->structure;
6061 retval = ms->n_edges*2 + ms->n_rows;
6062 }
6063 break;
6064 case CS_MATRIX_CSR:
6065 {
6066 const cs_matrix_struct_csr_t *ms = matrix->structure;
6067 retval = ms->row_index[ms->n_rows];
6068 }
6069 break;
6070 case CS_MATRIX_MSR:
6071 {
6072 const cs_matrix_struct_csr_t *ms = matrix->structure;
6073 retval = ms->row_index[ms->n_rows] + ms->n_rows;
6074 }
6075 break;
6076 default:
6077 break;
6078 }
6079
6080 return retval;
6081 }
6082
6083 /*----------------------------------------------------------------------------*/
6084 /*!
6085 * \brief Return matrix diagonal block sizes.
6086 *
6087 * Block sizes are defined by a array of 4 values:
6088 * 0: useful block size, 1: vector block extents,
6089 * 2: matrix line extents, 3: matrix line*column extents
6090 *
6091 * \param[in] matrix pointer to matrix structure
6092 *
6093 * \return pointer to block sizes
6094 */
6095 /*----------------------------------------------------------------------------*/
6096
6097 const cs_lnum_t *
cs_matrix_get_diag_block_size(const cs_matrix_t * matrix)6098 cs_matrix_get_diag_block_size(const cs_matrix_t *matrix)
6099 {
6100 if (matrix == NULL)
6101 bft_error(__FILE__, __LINE__, 0,
6102 _("The matrix is not defined."));
6103
6104 return matrix->db_size;
6105 }
6106
6107 /*----------------------------------------------------------------------------*/
6108 /*!
6109 * \brief Return matrix extra-diagonal block sizes.
6110 *
6111 * Block sizes are defined by a array of 4 values:
6112 * 0: useful block size, 1: vector block extents,
6113 * 2: matrix line extents, 3: matrix line*column extents
6114 *
6115 * \param[in] matrix pointer to matrix structure
6116 *
6117 * \return pointer to block sizes
6118 */
6119 /*----------------------------------------------------------------------------*/
6120
6121 const cs_lnum_t *
cs_matrix_get_extra_diag_block_size(const cs_matrix_t * matrix)6122 cs_matrix_get_extra_diag_block_size(const cs_matrix_t *matrix)
6123 {
6124 if (matrix == NULL)
6125 bft_error(__FILE__, __LINE__, 0,
6126 _("The matrix is not defined."));
6127
6128 return matrix->eb_size;
6129 }
6130
6131 /*----------------------------------------------------------------------------*/
6132 /*!
6133 * \brief Return pointer to matrix halo structure.
6134 *
6135 * \param[in] matrix pointer to matrix structure
6136 *
6137 * \return pointer to halo strucuture
6138 */
6139 /*----------------------------------------------------------------------------*/
6140
6141 const cs_halo_t *
cs_matrix_get_halo(const cs_matrix_t * matrix)6142 cs_matrix_get_halo(const cs_matrix_t *matrix)
6143 {
6144 if (matrix == NULL)
6145 bft_error(__FILE__, __LINE__, 0,
6146 _("The matrix is not defined."));
6147
6148 return matrix->halo;
6149 }
6150
6151 /*----------------------------------------------------------------------------*/
6152 /*!
6153 * \brief Get matrix fill type, depending on block sizes.
6154 *
6155 * Block sizes are defined by an optional array of 4 values:
6156 * 0: useful block size, 1: vector block extents,
6157 * 2: matrix line extents, 3: matrix line*column extents
6158 *
6159 * \param[in] symmetric indicates if matrix coefficients
6160 * are symmetric
6161 * \param[in] diag_block_size block sizes for diagonal, or NULL
6162 * \param[in] extra_diag_block_size block sizes for extra diagonal, or NULL
6163 *
6164 * \return matrix fill type
6165 */
6166 /*----------------------------------------------------------------------------*/
6167
6168 cs_matrix_fill_type_t
cs_matrix_get_fill_type(bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)6169 cs_matrix_get_fill_type(bool symmetric,
6170 const cs_lnum_t *diag_block_size,
6171 const cs_lnum_t *extra_diag_block_size)
6172 {
6173 cs_matrix_fill_type_t fill_type = CS_MATRIX_N_FILL_TYPES;
6174
6175 cs_lnum_t _db_size = 1, _eb_size = 1;
6176 if (diag_block_size != NULL)
6177 _db_size = diag_block_size[0];
6178
6179 if (extra_diag_block_size != NULL)
6180 _eb_size = extra_diag_block_size[0];
6181
6182 /* Set fill type */
6183
6184 cs_base_check_bool(&symmetric);
6185
6186 if (_db_size == 1) {
6187 if (symmetric)
6188 fill_type = CS_MATRIX_SCALAR_SYM;
6189 else
6190 fill_type = CS_MATRIX_SCALAR;
6191 }
6192 else if (_eb_size == 1) {
6193 if (symmetric)
6194 fill_type = CS_MATRIX_BLOCK_D_SYM;
6195 else if (_db_size == 6)
6196 fill_type = CS_MATRIX_BLOCK_D_66;
6197 else
6198 fill_type = CS_MATRIX_BLOCK_D;
6199 }
6200 else
6201 fill_type = CS_MATRIX_BLOCK;
6202
6203 return fill_type;
6204 }
6205
6206 /*----------------------------------------------------------------------------*/
6207 /*!
6208 * \brief Set matrix coefficients defined relative to a "native" edge graph,
6209 * sharing arrays with the caller when possible.
6210 *
6211 * With shared arrays, the matrix becomes unusable if the arrays passed as
6212 * arguments are not be modified (its coefficients should be unset first
6213 * to mark this).
6214 *
6215 * Depending on current options and initialization, values will be copied
6216 * or simply mapped.
6217 *
6218 * Block sizes are defined by an optional array of 4 values:
6219 * 0: useful block size, 1: vector block extents,
6220 * 2: matrix line extents, 3: matrix line*column extents
6221 *
6222 * \param[in, out] matrix pointer to matrix structure
6223 * \param[in] symmetric indicates if matrix coefficients
6224 * are symmetric
6225 * \param[in] diag_block_size block sizes for diagonal, or NULL
6226 * \param[in] extra_diag_block_size block sizes for extra diagonal,
6227 * or NULL
6228 * \param[in] n_edges local number of graph edges
6229 * \param[in] edges edges (row <-> column) connectivity
6230 * \param[in] da diagonal values (NULL if zero)
6231 * \param[in] xa extradiagonal values (NULL if zero)
6232 * casts as:
6233 * xa[n_edges] if symmetric,
6234 * xa[n_edges][2] if non symmetric
6235 */
6236 /*----------------------------------------------------------------------------*/
6237
6238 void
cs_matrix_set_coefficients(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t n_edges,const cs_lnum_2_t edges[],const cs_real_t * da,const cs_real_t * xa)6239 cs_matrix_set_coefficients(cs_matrix_t *matrix,
6240 bool symmetric,
6241 const cs_lnum_t *diag_block_size,
6242 const cs_lnum_t *extra_diag_block_size,
6243 const cs_lnum_t n_edges,
6244 const cs_lnum_2_t edges[],
6245 const cs_real_t *da,
6246 const cs_real_t *xa)
6247 {
6248 if (matrix == NULL)
6249 bft_error(__FILE__, __LINE__, 0,
6250 _("The matrix is not defined."));
6251
6252 cs_base_check_bool(&symmetric);
6253
6254 /* Set fill type */
6255 _set_fill_info(matrix,
6256 symmetric,
6257 diag_block_size,
6258 extra_diag_block_size);
6259
6260 /* Set coefficients */
6261
6262 if (matrix->set_coefficients != NULL) {
6263 matrix->xa = xa;
6264 matrix->set_coefficients(matrix, symmetric, false, n_edges, edges, da, xa);
6265 }
6266 else
6267 bft_error
6268 (__FILE__, __LINE__, 0,
6269 "Matrix format %s with fill type %s does not handle\n"
6270 "coefficient assignment from native (graph-edge) coefficients.",
6271 matrix->type_name,
6272 cs_matrix_fill_type_name[matrix->fill_type]);
6273 }
6274
6275 /*----------------------------------------------------------------------------*/
6276 /*!
6277 * \brief Set matrix coefficients, copying values to private arrays.
6278 *
6279 * With private arrays, the matrix becomes independant from the
6280 * arrays passed as arguments.
6281 *
6282 * Block sizes are defined by an optional array of 4 values:
6283 * 0: useful block size, 1: vector block extents,
6284 * 2: matrix line extents, 3: matrix line*column extents
6285 *
6286 * \param[in, out] matrix pointer to matrix structure
6287 * \param[in] symmetric indicates if matrix coefficients
6288 * are symmetric
6289 * \param[in] diag_block_size block sizes for diagonal, or NULL
6290 * \param[in] extra_diag_block_size block sizes for extra diagonal,
6291 * or NULL
6292 * \param[in] n_edges local number of graph edges
6293 * \param[in] edges edges (row <-> column) connectivity
6294 * \param[in] da diagonal values (NULL if zero)
6295 * \param[in] xa extradiagonal values (NULL if zero)
6296 * casts as:
6297 * xa[n_edges] if symmetric,
6298 * xa[n_edges][2] if non symmetric
6299 */
6300 /*----------------------------------------------------------------------------*/
6301
6302 void
cs_matrix_copy_coefficients(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t n_edges,const cs_lnum_2_t edges[],const cs_real_t * da,const cs_real_t * xa)6303 cs_matrix_copy_coefficients(cs_matrix_t *matrix,
6304 bool symmetric,
6305 const cs_lnum_t *diag_block_size,
6306 const cs_lnum_t *extra_diag_block_size,
6307 const cs_lnum_t n_edges,
6308 const cs_lnum_2_t edges[],
6309 const cs_real_t *da,
6310 const cs_real_t *xa)
6311 {
6312 if (matrix == NULL)
6313 bft_error(__FILE__, __LINE__, 0,
6314 _("The matrix is not defined."));
6315
6316 cs_base_check_bool(&symmetric);
6317
6318 _set_fill_info(matrix,
6319 symmetric,
6320 diag_block_size,
6321 extra_diag_block_size);
6322
6323 if (matrix->set_coefficients != NULL)
6324 matrix->set_coefficients(matrix, symmetric, true, n_edges, edges, da, xa);
6325 else
6326 bft_error
6327 (__FILE__, __LINE__, 0,
6328 "Matrix format %s with fill type %s does not handle\n"
6329 "coefficient assignment from native (graph-edge) coefficients.",
6330 matrix->type_name,
6331 cs_matrix_fill_type_name[matrix->fill_type]);
6332 }
6333
6334 /*----------------------------------------------------------------------------*/
6335 /*!
6336 * \brief Set matrix coefficients in an MSR format, transfering the
6337 * property of those arrays to the matrix.
6338 *
6339 * If the matrix is also in MSR format, this avoids an extra copy.
6340 * If it is in a different format, values are copied to the structure,
6341 * and the original arrays freed. In any case, the arrays pointers passed as
6342 * arguments are set to NULL, to help ensure the caller does not use the
6343 * original arrays directly after this call.
6344 *
6345 * Block sizes are defined by an optional array of 4 values:
6346 * 0: useful block size, 1: vector block extents,
6347 * 2: matrix line extents, 3: matrix line*column extents
6348 *
6349 * \param[in, out] matrix pointer to matrix structure
6350 * \param[in] symmetric indicates if matrix coefficients
6351 * are symmetric
6352 * \param[in] diag_block_size block sizes for diagonal, or NULL
6353 * \param[in] extra_diag_block_size block sizes for extra diagonal,
6354 * or NULL
6355 * \param[in] row_index MSR row index (0 to n-1)
6356 * \param[in] col_id MSR column id (0 to n-1)
6357 * \param[in, out] d_val diagonal values (NULL if zero)
6358 * \param[in, out] x_val extradiagonal values (NULL if zero)
6359 */
6360 /*----------------------------------------------------------------------------*/
6361
6362 void
cs_matrix_transfer_coefficients_msr(cs_matrix_t * matrix,bool symmetric,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size,const cs_lnum_t row_index[],const cs_lnum_t col_id[],cs_real_t ** d_val,cs_real_t ** x_val)6363 cs_matrix_transfer_coefficients_msr(cs_matrix_t *matrix,
6364 bool symmetric,
6365 const cs_lnum_t *diag_block_size,
6366 const cs_lnum_t *extra_diag_block_size,
6367 const cs_lnum_t row_index[],
6368 const cs_lnum_t col_id[],
6369 cs_real_t **d_val,
6370 cs_real_t **x_val)
6371 {
6372 const cs_real_t *d_val_p = (d_val != NULL) ? *d_val : NULL;
6373 const cs_real_t *x_val_p = (x_val != NULL) ? *x_val : NULL;
6374
6375 if (matrix == NULL)
6376 bft_error(__FILE__, __LINE__, 0,
6377 _("The matrix is not defined."));
6378
6379 cs_base_check_bool(&symmetric);
6380
6381 _set_fill_info(matrix,
6382 symmetric,
6383 diag_block_size,
6384 extra_diag_block_size);
6385
6386 switch(matrix->type) {
6387
6388 case CS_MATRIX_CSR:
6389 _set_coeffs_csr_from_msr(matrix,
6390 row_index,
6391 col_id,
6392 d_val_p,
6393 d_val,
6394 x_val_p,
6395 x_val);
6396 break;
6397
6398 case CS_MATRIX_MSR:
6399 _set_coeffs_msr_from_msr(matrix,
6400 false, /* ignored in case of transfer */
6401 row_index,
6402 col_id,
6403 d_val_p,
6404 d_val,
6405 x_val_p,
6406 x_val);
6407 break;
6408
6409 default:
6410 bft_error
6411 (__FILE__, __LINE__, 0,
6412 "Matrix format %s with fill type %s does not handle\n"
6413 "coefficient assignment from native (graph-edge) coefficients.",
6414 matrix->type_name,
6415 cs_matrix_fill_type_name[matrix->fill_type]);
6416 }
6417 }
6418
6419 /*----------------------------------------------------------------------------*/
6420 /*!
6421 * \brief Release shared matrix coefficients.
6422 *
6423 * Pointers to mapped coefficients are set to NULL, while
6424 * coefficient copies owned by the matrix are not modified.
6425 *
6426 * This simply ensures the matrix does not maintain pointers
6427 * to nonexistant data.
6428 *
6429 * \param[in, out] matrix pointer to matrix structure
6430 */
6431 /*----------------------------------------------------------------------------*/
6432
6433 void
cs_matrix_release_coefficients(cs_matrix_t * matrix)6434 cs_matrix_release_coefficients(cs_matrix_t *matrix)
6435 {
6436 /* Check API state */
6437
6438 if (matrix == NULL)
6439 bft_error(__FILE__, __LINE__, 0,
6440 _("The matrix is not defined."));
6441
6442 if (matrix->release_coefficients != NULL) {
6443 matrix->xa = NULL;
6444 matrix->release_coefficients(matrix);
6445 }
6446 else {
6447 bft_error
6448 (__FILE__, __LINE__, 0,
6449 "Matrix format %s is missing a release_coefficients function.",
6450 matrix->type_name);
6451 }
6452
6453 /* Set fill type to impossible value */
6454
6455 _clear_fill_info(matrix);
6456 }
6457
6458 /*----------------------------------------------------------------------------*/
6459 /*!
6460 * \brief Create and initialize a CSR matrix assembler values structure.
6461 *
6462 * The associated matrix's structure must have been created using
6463 * \ref cs_matrix_structure_create_from_assembler.
6464 *
6465 * Block sizes are defined by an optional array of 4 values:
6466 * 0: useful block size, 1: vector block extents,
6467 * 2: matrix line extents, 3: matrix line*column extents
6468 *
6469 * \param[in, out] matrix pointer to matrix structure
6470 * \param[in] diag_block_size block sizes for diagonal, or NULL
6471 * \param[in] extra_diag_block_size block sizes for extra diagonal,
6472 * or NULL
6473 *
6474 * \return pointer to initialized matrix assembler values structure;
6475 */
6476 /*----------------------------------------------------------------------------*/
6477
6478 cs_matrix_assembler_values_t *
cs_matrix_assembler_values_init(cs_matrix_t * matrix,const cs_lnum_t * diag_block_size,const cs_lnum_t * extra_diag_block_size)6479 cs_matrix_assembler_values_init(cs_matrix_t *matrix,
6480 const cs_lnum_t *diag_block_size,
6481 const cs_lnum_t *extra_diag_block_size)
6482 {
6483 cs_matrix_assembler_values_t *mav = NULL;
6484
6485 /* Set fill type */
6486
6487 _set_fill_info(matrix,
6488 false, /* symmetric */
6489 diag_block_size,
6490 extra_diag_block_size);
6491
6492 /* Create values assembler */
6493
6494 if (matrix->assembler_values_create != NULL)
6495 mav = matrix->assembler_values_create(matrix,
6496 diag_block_size,
6497 extra_diag_block_size);
6498
6499 else
6500 bft_error(__FILE__, __LINE__, 0,
6501 _("%s: direct assembly handling of matrices of type %s\n"
6502 "is not available."),
6503 __func__, _(matrix->type_name));
6504
6505 return mav;
6506 }
6507
6508 /*----------------------------------------------------------------------------*/
6509 /*!
6510 * \brief Copy matrix diagonal values.
6511 *
6512 * In case of matrixes with block diagonal coefficients, only the true
6513 * diagonal values are copied.
6514 *
6515 * \param[in] matrix pointer to matrix structure
6516 * \param[out] da diagonal (pre-allocated, size: n_rows*block_size)
6517 */
6518 /*----------------------------------------------------------------------------*/
6519
6520 void
cs_matrix_copy_diagonal(const cs_matrix_t * matrix,cs_real_t * restrict da)6521 cs_matrix_copy_diagonal(const cs_matrix_t *matrix,
6522 cs_real_t *restrict da)
6523 {
6524 /* Check API state */
6525
6526 if (matrix == NULL)
6527 bft_error(__FILE__, __LINE__, 0,
6528 _("The matrix is not defined."));
6529
6530 if (matrix->copy_diagonal != NULL)
6531 matrix->copy_diagonal(matrix, da);
6532 }
6533
6534 /*----------------------------------------------------------------------------*/
6535 /*!
6536 * \brief Query matrix coefficients symmetry
6537 *
6538 * \param[in] matrix pointer to matrix structure
6539 *
6540 * \return true if coefficients are symmetric, false otherwise
6541 */
6542 /*----------------------------------------------------------------------------*/
6543
6544 bool
cs_matrix_is_symmetric(const cs_matrix_t * matrix)6545 cs_matrix_is_symmetric(const cs_matrix_t *matrix)
6546 {
6547 return matrix->symmetric;
6548 }
6549
6550 /*----------------------------------------------------------------------------*/
6551 /*!
6552 * \brief Indicate whether coefficients were mapped from native face-based
6553 * arrays.
6554 *
6555 * It is used in the current multgrid code, but should be removed as soon
6556 * as the dependency to the native format is removed.
6557 *
6558 * \param[in] matrix pointer to matrix structure
6559 *
6560 * \return true if coefficients were mapped from native face-based arrays,
6561 * false otherwise
6562 */
6563 /*----------------------------------------------------------------------------*/
6564
6565 bool
cs_matrix_is_mapped_from_native(const cs_matrix_t * matrix)6566 cs_matrix_is_mapped_from_native(const cs_matrix_t *matrix)
6567 {
6568 bool retval = false;
6569
6570 if (matrix->xa != NULL)
6571 retval = true;
6572
6573 return retval;
6574 }
6575
6576 /*----------------------------------------------------------------------------*/
6577 /*!
6578 * \brief Get matrix diagonal values.
6579 *
6580 * In case of matrixes with block diagonal coefficients, a pointer to
6581 * the complete block diagonal is returned.
6582 *
6583 * \param[in] matrix pointer to matrix structure
6584 *
6585 * \return pointer to matrix diagonal array
6586 */
6587 /*----------------------------------------------------------------------------*/
6588
6589 const cs_real_t *
cs_matrix_get_diagonal(const cs_matrix_t * matrix)6590 cs_matrix_get_diagonal(const cs_matrix_t *matrix)
6591 {
6592 const cs_real_t *diag = NULL;
6593
6594 if (matrix->get_diagonal != NULL)
6595 diag = matrix->get_diagonal(matrix);
6596
6597 else
6598 bft_error(__FILE__, __LINE__, 0,
6599 _("%s: not available for matrix type: %s."),
6600 __func__, cs_matrix_get_type_name(matrix));
6601
6602 return diag;
6603 }
6604
6605 /*----------------------------------------------------------------------------*/
6606 /*!
6607 * \brief Get pointer to matrix extra-diagonal values in "native" format
6608 *
6609 * \deprecated
6610 *
6611 * This function only functions if the coefficients were mapped from native
6612 * coefficients using cs_matrix_set_coefficients(), in which case the pointer
6613 * returned is the same as the one passed to that function.
6614 *
6615 * It is used in the current multgrid code, but should be removed as soon
6616 * as the dependency to the native format is removed.
6617 *
6618 * \param[in] matrix pointer to matrix structure
6619 *
6620 * \return pointer to matrix diagonal array
6621 */
6622 /*----------------------------------------------------------------------------*/
6623
6624 const cs_real_t *
cs_matrix_get_extra_diagonal(const cs_matrix_t * matrix)6625 cs_matrix_get_extra_diagonal(const cs_matrix_t *matrix)
6626 {
6627 const cs_real_t *exdiag = NULL;
6628
6629 if (matrix->xa == NULL)
6630 bft_error
6631 (__FILE__, __LINE__, 0,
6632 _("Matrix coefficients were not mapped from native face-based arrays,\n"
6633 "so the extra-diagonal coefficients are not available in that form."));
6634 else
6635 exdiag = matrix->xa;
6636
6637 return exdiag;
6638 }
6639
6640 /*----------------------------------------------------------------------------*/
6641 /*!
6642 * \brief Initialize row info for a given matrix.
6643 *
6644 * \param[out] r row info structure
6645 */
6646 /*----------------------------------------------------------------------------*/
6647
6648 void
cs_matrix_row_init(cs_matrix_row_info_t * r)6649 cs_matrix_row_init(cs_matrix_row_info_t *r)
6650 {
6651 r->row_size = 0;
6652 r->buffer_size = 0;
6653 r->col_id = NULL;
6654 r->_col_id = NULL;
6655 r->vals = NULL;
6656 r->_vals = NULL;
6657 }
6658
6659 /*----------------------------------------------------------------------------*/
6660 /*!
6661 * \brief Finalize row info for a given matrix.
6662 *
6663 * \param[in, out] r row info structure
6664 */
6665 /*----------------------------------------------------------------------------*/
6666
6667 void
cs_matrix_row_finalize(cs_matrix_row_info_t * r)6668 cs_matrix_row_finalize(cs_matrix_row_info_t *r)
6669 {
6670 r->row_size = 0;
6671 r->buffer_size = 0;
6672 r->col_id = NULL;
6673 BFT_FREE(r->_col_id);
6674 r->vals = NULL;
6675 BFT_FREE(r->_vals);
6676 }
6677
6678 /*----------------------------------------------------------------------------*/
6679 /*!
6680 * \brief Get row values for a given matrix.
6681 *
6682 * This function may not work for all matrix types.
6683 *
6684 * In the case of blocked matrixes, the true (non-blocked)
6685 * values are returned.
6686 *
6687 * The row information structure must have been previously initialized
6688 * using \ref cs_matrix_row_init, and should be finalized using
6689 * using \ref cs_matrix_row_finalize, so as to free buffers it may have
6690 * built for certain matrix formats.
6691 *
6692 * \param[in] matrix pointer to matrix structure
6693 * \param[in] row_id id of row to query
6694 * \param[in, out] r row info structure
6695 */
6696 /*----------------------------------------------------------------------------*/
6697
6698 void
cs_matrix_get_row(const cs_matrix_t * matrix,const cs_lnum_t row_id,cs_matrix_row_info_t * r)6699 cs_matrix_get_row(const cs_matrix_t *matrix,
6700 const cs_lnum_t row_id,
6701 cs_matrix_row_info_t *r)
6702 {
6703 cs_lnum_t b_size = matrix->db_size[0];
6704
6705 switch (matrix->type) {
6706
6707 case CS_MATRIX_CSR:
6708 {
6709 const cs_matrix_struct_csr_t *ms = matrix->structure;
6710 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
6711 r->row_size = (ms->row_index[row_id+1] - ms->row_index[row_id])*b_size;
6712 r->col_id = ms->col_id + ms->row_index[row_id]*b_size;
6713 if (mc->val != NULL)
6714 r->vals = mc->val + ms->row_index[row_id]*b_size;
6715 else
6716 r->vals = NULL;
6717 }
6718 break;
6719
6720 case CS_MATRIX_MSR:
6721 {
6722 const cs_lnum_t _row_id = row_id / b_size;
6723 const cs_matrix_struct_csr_t *ms = matrix->structure;
6724 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
6725 const cs_lnum_t n_ed_cols = ms->row_index[_row_id+1]
6726 - ms->row_index[_row_id];
6727 if (b_size == 1)
6728 r->row_size = n_ed_cols + 1;
6729 else if (matrix->eb_size[0] == 1)
6730 r->row_size = n_ed_cols*b_size;
6731 else
6732 r->row_size = (n_ed_cols+1)*b_size;
6733 if (r->buffer_size < r->row_size) {
6734 r->buffer_size = r->row_size*2;
6735 BFT_REALLOC(r->_col_id, r->buffer_size, cs_lnum_t);
6736 r->col_id = r->_col_id;
6737 BFT_REALLOC(r->_vals, r->buffer_size, cs_real_t);
6738 r->vals = r->_vals;
6739 }
6740 cs_lnum_t ii = 0, jj = 0;
6741 const cs_lnum_t *restrict c_id = ms->col_id + ms->row_index[_row_id];
6742 if (b_size == 1) {
6743 const cs_real_t *m_row = mc->val + ms->row_index[_row_id];
6744 for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6745 r->_col_id[ii] = c_id[jj];
6746 r->_vals[ii++] = m_row[jj];
6747 }
6748 r->_col_id[ii] = _row_id;
6749 r->_vals[ii++] = mc->d_val[_row_id];
6750 for (; jj < n_ed_cols; jj++) {
6751 r->_col_id[ii] = c_id[jj];
6752 r->_vals[ii++] = m_row[jj];
6753 }
6754 }
6755 else if (matrix->eb_size[0] == 1) {
6756 const cs_lnum_t _sub_id = row_id % b_size;
6757 const cs_lnum_t *db_size = matrix->db_size;
6758 const cs_real_t *m_row = mc->val + ms->row_index[_row_id];
6759 for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6760 r->_col_id[ii] = c_id[jj]*b_size + _sub_id;
6761 r->_vals[ii++] = m_row[jj];
6762 }
6763 for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6764 r->_col_id[ii] = _row_id*b_size + kk;
6765 r->_vals[ii++] = mc->d_val[ _row_id*db_size[3]
6766 + _sub_id*db_size[2] + kk];
6767 }
6768 for (; jj < n_ed_cols; jj++) {
6769 r->_col_id[ii] = c_id[jj]*b_size + _sub_id;
6770 r->_vals[ii++] = m_row[jj];
6771 }
6772 }
6773 else {
6774 const cs_lnum_t _sub_id = row_id % b_size;
6775 const cs_lnum_t *db_size = matrix->db_size;
6776 const cs_lnum_t *eb_size = matrix->db_size;
6777 const cs_real_t *m_row = mc->val + ms->row_index[_row_id]*eb_size[3];
6778 for (jj = 0; jj < n_ed_cols && c_id[jj] < _row_id; jj++) {
6779 for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6780 r->_col_id[ii] = c_id[jj]*b_size + kk;
6781 r->_vals[ii++] = m_row[_sub_id*eb_size[2] + kk];
6782 }
6783 }
6784 for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6785 r->_col_id[ii] = _row_id*b_size + kk;
6786 r->_vals[ii++] = mc->d_val[ _row_id*db_size[3]
6787 + _sub_id*db_size[2] + kk];
6788 }
6789 for (; jj < n_ed_cols; jj++) {
6790 for (cs_lnum_t kk = 0; kk < b_size; kk++) {
6791 r->_col_id[ii] = c_id[jj]*b_size + kk;
6792 r->_vals[ii++] = m_row[_sub_id*eb_size[2] + kk];
6793 }
6794 }
6795 }
6796 }
6797 break;
6798
6799 default:
6800 bft_error
6801 (__FILE__, __LINE__, 0,
6802 _("Matrix format %s with fill type %s does not handle %s operation."),
6803 matrix->type_name,
6804 cs_matrix_fill_type_name[matrix->fill_type],
6805 __func__);
6806 }
6807 }
6808
6809 /*----------------------------------------------------------------------------*/
6810 /*!
6811 * \brief Get arrays describing a matrix in native format.
6812 *
6813 * This function works for matrix in native format.
6814 *
6815 * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6816 * and cs_matrix_get_extra_diag_block_size().
6817 *
6818 * \param[in] matrix pointer to matrix structure
6819 * \param[out] symmetric true if symmetric
6820 * \param[out] n_edges number of associated faces
6821 * \param[out] edges edges (symmetric row <-> column) connectivity
6822 * \param[out] d_val diagonal values
6823 * \param[out] x_val extra-diagonal values
6824 */
6825 /*----------------------------------------------------------------------------*/
6826
6827 void
cs_matrix_get_native_arrays(const cs_matrix_t * matrix,bool * symmetric,cs_lnum_t * n_edges,const cs_lnum_2_t ** edges,const cs_real_t ** d_val,const cs_real_t ** x_val)6828 cs_matrix_get_native_arrays(const cs_matrix_t *matrix,
6829 bool *symmetric,
6830 cs_lnum_t *n_edges,
6831 const cs_lnum_2_t **edges,
6832 const cs_real_t **d_val,
6833 const cs_real_t **x_val)
6834 {
6835 if (symmetric != NULL)
6836 *symmetric = false;
6837 if (n_edges != NULL)
6838 *n_edges = 0;
6839 if (edges != NULL)
6840 *edges = NULL;
6841 if (d_val != NULL)
6842 *d_val = NULL;
6843 if (x_val != NULL)
6844 *x_val = NULL;
6845
6846 if (matrix->type == CS_MATRIX_NATIVE) {
6847 const cs_matrix_struct_native_t *ms = matrix->structure;
6848 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
6849 if (n_edges != NULL)
6850 *n_edges = ms->n_edges;
6851 if (edges != NULL)
6852 *edges = ms->edges;
6853 if (mc != NULL) {
6854 if (symmetric != NULL)
6855 *symmetric = mc->symmetric;
6856 if (d_val != NULL)
6857 *d_val = mc->da;
6858 if (x_val != NULL)
6859 *x_val = mc->xa;
6860 }
6861 }
6862 }
6863
6864 /*----------------------------------------------------------------------------*/
6865 /*!
6866 * \brief Get arrays describing a matrix in CSR format.
6867 *
6868 * This function only works for an CSR matrix (i.e. there is
6869 * no automatic conversion from another matrix type).
6870 *
6871 * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6872 * and cs_matrix_get_extra_diag_block_size().
6873 *
6874 * \param[in] matrix pointer to matrix structure
6875 * \param[out] row_index CSR row index
6876 * \param[out] col_id CSR column id
6877 * \param[out] val values
6878 */
6879 /*----------------------------------------------------------------------------*/
6880
6881 void
cs_matrix_get_csr_arrays(const cs_matrix_t * matrix,const cs_lnum_t ** row_index,const cs_lnum_t ** col_id,const cs_real_t ** val)6882 cs_matrix_get_csr_arrays(const cs_matrix_t *matrix,
6883 const cs_lnum_t **row_index,
6884 const cs_lnum_t **col_id,
6885 const cs_real_t **val)
6886 {
6887 if (row_index != NULL)
6888 *row_index = NULL;
6889 if (col_id != NULL)
6890 *col_id = NULL;
6891 if (val != NULL)
6892 *val = NULL;
6893
6894 if (matrix->type == CS_MATRIX_CSR) {
6895 const cs_matrix_struct_csr_t *ms = matrix->structure;
6896 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
6897 if (row_index != NULL)
6898 *row_index = ms->row_index;
6899 if (col_id != NULL)
6900 *col_id = ms->col_id;
6901 if (val != NULL && mc != NULL) {
6902 *val = mc->val;
6903 }
6904 }
6905 }
6906
6907 /*----------------------------------------------------------------------------*/
6908 /*!
6909 * \brief Get arrays describing a matrix in MSR format.
6910 *
6911 * This function only works for an MSR matrix (i.e. there is
6912 * no automatic conversion from another matrix type).
6913 *
6914 * Matrix block sizes can be obtained by cs_matrix_get_diag_block_size()
6915 * and cs_matrix_get_extra_diag_block_size().
6916 *
6917 * \param[in] matrix pointer to matrix structure
6918 * \param[out] row_index MSR row index
6919 * \param[out] col_id MSR column id
6920 * \param[out] d_val diagonal values
6921 * \param[out] x_val extra-diagonal values
6922 */
6923 /*----------------------------------------------------------------------------*/
6924
6925 void
cs_matrix_get_msr_arrays(const cs_matrix_t * matrix,const cs_lnum_t ** row_index,const cs_lnum_t ** col_id,const cs_real_t ** d_val,const cs_real_t ** x_val)6926 cs_matrix_get_msr_arrays(const cs_matrix_t *matrix,
6927 const cs_lnum_t **row_index,
6928 const cs_lnum_t **col_id,
6929 const cs_real_t **d_val,
6930 const cs_real_t **x_val)
6931 {
6932 if (row_index != NULL)
6933 *row_index = NULL;
6934 if (col_id != NULL)
6935 *col_id = NULL;
6936 if (d_val != NULL)
6937 *d_val = NULL;
6938 if (x_val != NULL)
6939 *x_val = NULL;
6940
6941 if (matrix->type == CS_MATRIX_MSR) {
6942 const cs_matrix_struct_csr_t *ms = matrix->structure;
6943 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
6944 if (row_index != NULL)
6945 *row_index = ms->row_index;
6946 if (col_id != NULL)
6947 *col_id = ms->col_id;
6948 if (mc != NULL) {
6949 if (d_val != NULL)
6950 *d_val = mc->d_val;
6951 if (x_val != NULL)
6952 *x_val = mc->x_val;
6953 }
6954 }
6955 else
6956 bft_error
6957 (__FILE__, __LINE__, 0,
6958 _("%s is not available for matrix using %s storage."),
6959 __func__,
6960 matrix->type_name);
6961 }
6962
6963 /*----------------------------------------------------------------------------*/
6964 /*!
6965 * \brief Matrix.vector product y = A.x
6966 *
6967 * This function includes a halo update of x prior to multiplication by A.
6968 *
6969 * \param[in] matrix pointer to matrix structure
6970 * \param[in, out] x multipliying vector values
6971 * (ghost values updated)
6972 * \param[out] y resulting vector
6973 */
6974 /*----------------------------------------------------------------------------*/
6975
6976 void
cs_matrix_vector_multiply(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)6977 cs_matrix_vector_multiply(const cs_matrix_t *matrix,
6978 cs_real_t *restrict x,
6979 cs_real_t *restrict y)
6980 {
6981 assert(matrix != NULL);
6982
6983 if (matrix->vector_multiply[matrix->fill_type][0] != NULL)
6984 matrix->vector_multiply[matrix->fill_type][0](matrix, false, true, x, y);
6985
6986 else
6987 bft_error(__FILE__, __LINE__, 0,
6988 _("%s: Matrix of type: %s is missing a vector multiply\n"
6989 "function for fill type %s."),
6990 __func__, cs_matrix_get_type_name(matrix),
6991 cs_matrix_fill_type_name[matrix->fill_type]);
6992 }
6993
6994 /*----------------------------------------------------------------------------*/
6995 /*!
6996 * \brief Matrix.vector product y = A.x with no prior halo update of x.
6997 *
6998 * This function does not include a halo update of x prior to multiplication
6999 * by A, so it should be called only when the halo of x is known to already
7000 * be up to date (in which case we avoid the performance penalty of a
7001 * redundant update by using this variant of the matrix.vector product).
7002 *
7003 * \param[in] matrix pointer to matrix structure
7004 * \param[in] x multipliying vector values
7005 * \param[out] y resulting vector
7006 */
7007 /*----------------------------------------------------------------------------*/
7008
7009 void
cs_matrix_vector_multiply_nosync(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)7010 cs_matrix_vector_multiply_nosync(const cs_matrix_t *matrix,
7011 cs_real_t *restrict x,
7012 cs_real_t *restrict y)
7013 {
7014 assert(matrix != NULL);
7015
7016 if (matrix->vector_multiply[matrix->fill_type][0] != NULL)
7017 matrix->vector_multiply[matrix->fill_type][0](matrix, false, false, x, y);
7018
7019 else
7020 bft_error(__FILE__, __LINE__, 0,
7021 _("%s: Matrix of type: %s is missing a vector multiply\n"
7022 "function for fill type %s."),
7023 __func__, cs_matrix_get_type_name(matrix),
7024 cs_matrix_fill_type_name[matrix->fill_type]);
7025 }
7026
7027 /*----------------------------------------------------------------------------*/
7028 /*!
7029 * \brief Matrix.vector product y = (A-D).x
7030 *
7031 * This function includes a halo update of x prior to multiplication by A.
7032 *
7033 * \param[in] matrix pointer to matrix structure
7034 * \param[in, out] x multipliying vector values
7035 * (ghost values updated)
7036 * \param[out] y resulting vector
7037 */
7038 /*----------------------------------------------------------------------------*/
7039
7040 void
cs_matrix_exdiag_vector_multiply(const cs_matrix_t * matrix,cs_real_t * restrict x,cs_real_t * restrict y)7041 cs_matrix_exdiag_vector_multiply(const cs_matrix_t *matrix,
7042 cs_real_t *restrict x,
7043 cs_real_t *restrict y)
7044 {
7045 assert(matrix != NULL);
7046
7047 if (matrix->vector_multiply[matrix->fill_type][1] != NULL)
7048 matrix->vector_multiply[matrix->fill_type][1](matrix, true, true, x, y);
7049
7050 else
7051 bft_error(__FILE__, __LINE__, 0,
7052 _("%s: Matrix of type: %s is missing a vector multiply\n"
7053 "vy extra-diagonal function for fill type %s."),
7054 __func__, cs_matrix_get_type_name(matrix),
7055 cs_matrix_fill_type_name[matrix->fill_type]);
7056 }
7057
7058 /*----------------------------------------------------------------------------*/
7059 /*!
7060 * \brief Build matrix variant
7061 *
7062 * The variant will initially use default matrix-vector functions,
7063 * which can be later modified using cs_matrix_variant_set_func().
7064 *
7065 * \param[in] m pointer to matrix
7066 */
7067 /*----------------------------------------------------------------------------*/
7068
7069 cs_matrix_variant_t *
cs_matrix_variant_create(cs_matrix_t * m)7070 cs_matrix_variant_create(cs_matrix_t *m)
7071 {
7072 cs_matrix_variant_t *mv;
7073
7074 BFT_MALLOC(mv, 1, cs_matrix_variant_t);
7075
7076 mv->type = m->type;
7077 mv->fill_type = m->fill_type;
7078
7079 for (int j = 0; j < 2; j++) {
7080 mv->vector_multiply[j] = NULL;
7081 strncpy(mv->name[j], "default", 31);
7082 mv->name[j][31] = '\0';
7083 }
7084
7085 (void) _set_spmv_func(m->type,
7086 m->numbering,
7087 m->fill_type,
7088 2,
7089 NULL, /* func_name */
7090 mv->vector_multiply);
7091
7092 return mv;
7093 }
7094
7095 /*----------------------------------------------------------------------------*/
7096 /*!
7097 * \brief Build list of variants for tuning or testing.
7098 *
7099 * The matrix coefficients should be assigned, so the fill type can
7100 * be determined.
7101 *
7102 * \param[in] m associated matrix
7103 * \param[out] n_variants number of variants
7104 * \param[out] m_variant array of matrix variants
7105 */
7106 /*----------------------------------------------------------------------------*/
7107
7108 void
cs_matrix_variant_build_list(const cs_matrix_t * m,int * n_variants,cs_matrix_variant_t ** m_variant)7109 cs_matrix_variant_build_list(const cs_matrix_t *m,
7110 int *n_variants,
7111 cs_matrix_variant_t **m_variant)
7112 {
7113 int n_variants_max = 0;
7114
7115 *n_variants = 0;
7116 *m_variant = NULL;
7117
7118 cs_matrix_vector_product_t *vector_multiply = NULL;
7119
7120 if (m->type == CS_MATRIX_NATIVE) {
7121
7122 switch(m->fill_type) {
7123 case CS_MATRIX_SCALAR:
7124 case CS_MATRIX_SCALAR_SYM:
7125 vector_multiply = _mat_vec_p_l_native;
7126 break;
7127 case CS_MATRIX_BLOCK_D:
7128 case CS_MATRIX_BLOCK_D_66:
7129 case CS_MATRIX_BLOCK_D_SYM:
7130 vector_multiply = _b_mat_vec_p_l_native_fixed;
7131 break;
7132 case CS_MATRIX_BLOCK:
7133 vector_multiply = _bb_mat_vec_p_l_native;
7134 break;
7135 default:
7136 vector_multiply = NULL;
7137 }
7138
7139 _variant_add(_("native, baseline"),
7140 m->type,
7141 m->fill_type,
7142 2, /* ed_flag */
7143 vector_multiply,
7144 n_variants,
7145 &n_variants_max,
7146 m_variant);
7147
7148 if (m->numbering != NULL) {
7149
7150 #if defined(HAVE_OPENMP)
7151
7152 if (m->numbering->type == CS_NUMBERING_THREADS) {
7153
7154 switch(m->fill_type) {
7155 case CS_MATRIX_SCALAR:
7156 case CS_MATRIX_SCALAR_SYM:
7157 vector_multiply = _mat_vec_p_l_native_omp;
7158 break;
7159 case CS_MATRIX_BLOCK_D:
7160 case CS_MATRIX_BLOCK_D_66:
7161 case CS_MATRIX_BLOCK_D_SYM:
7162 vector_multiply = _b_mat_vec_p_l_native_omp;
7163 break;
7164 default:
7165 vector_multiply = NULL;
7166 }
7167
7168 _variant_add(_("native, OpenMP"),
7169 m->type,
7170 m->fill_type,
7171 2, /* ed_flag */
7172 vector_multiply,
7173 n_variants,
7174 &n_variants_max,
7175 m_variant);
7176 }
7177
7178 switch(m->fill_type) {
7179 case CS_MATRIX_SCALAR:
7180 case CS_MATRIX_SCALAR_SYM:
7181 vector_multiply = _mat_vec_p_l_native_omp_atomic;
7182 break;
7183 case CS_MATRIX_BLOCK_D:
7184 case CS_MATRIX_BLOCK_D_66:
7185 case CS_MATRIX_BLOCK_D_SYM:
7186 vector_multiply = _b_mat_vec_p_l_native_omp_atomic;
7187 break;
7188 default:
7189 vector_multiply = NULL;
7190 }
7191
7192 _variant_add(_("native, OpenMP atomic"),
7193 m->type,
7194 m->fill_type,
7195 2, /* ed_flag */
7196 vector_multiply,
7197 n_variants,
7198 &n_variants_max,
7199 m_variant);
7200
7201 #endif
7202
7203 if (m->numbering->type == CS_NUMBERING_VECTORIZE) {
7204
7205 switch(m->fill_type) {
7206 case CS_MATRIX_SCALAR:
7207 case CS_MATRIX_SCALAR_SYM:
7208 vector_multiply = _mat_vec_p_l_native_vector;
7209 break;
7210 default:
7211 vector_multiply = NULL;
7212 }
7213
7214 _variant_add(_("native, vectorized"),
7215 m->type,
7216 m->fill_type,
7217 2, /* ed_flag */
7218 vector_multiply,
7219 n_variants,
7220 &n_variants_max,
7221 m_variant);
7222
7223 }
7224
7225 }
7226
7227 }
7228
7229 if (m->type == CS_MATRIX_CSR) {
7230
7231 switch(m->fill_type) {
7232 case CS_MATRIX_SCALAR:
7233 case CS_MATRIX_SCALAR_SYM:
7234 vector_multiply = _mat_vec_p_l_csr;
7235 break;
7236 default:
7237 vector_multiply = NULL;
7238 }
7239
7240 _variant_add(_("CSR"),
7241 m->type,
7242 m->fill_type,
7243 2, /* ed_flag */
7244 vector_multiply,
7245 n_variants,
7246 &n_variants_max,
7247 m_variant);
7248
7249 #if defined(HAVE_MKL)
7250
7251 switch(m->fill_type) {
7252 case CS_MATRIX_SCALAR:
7253 case CS_MATRIX_SCALAR_SYM:
7254 vector_multiply = _mat_vec_p_l_csr_mkl;
7255 break;
7256 default:
7257 vector_multiply = NULL;
7258 }
7259
7260 _variant_add(_("CSR, with MKL"),
7261 m->type,
7262 m->fill_type,
7263 0, /* ed_flag */
7264 vector_multiply,
7265 n_variants,
7266 &n_variants_max,
7267 m_variant);
7268
7269 #endif /* defined(HAVE_MKL) */
7270
7271 }
7272
7273 if (m->type == CS_MATRIX_MSR) {
7274
7275 switch(m->fill_type) {
7276 case CS_MATRIX_SCALAR:
7277 case CS_MATRIX_SCALAR_SYM:
7278 vector_multiply = _mat_vec_p_l_msr;
7279 break;
7280 case CS_MATRIX_BLOCK_D:
7281 case CS_MATRIX_BLOCK_D_66:
7282 case CS_MATRIX_BLOCK_D_SYM:
7283 vector_multiply = _b_mat_vec_p_l_msr;
7284 break;
7285 case CS_MATRIX_BLOCK:
7286 vector_multiply = _bb_mat_vec_p_l_msr;
7287 break;
7288 default:
7289 vector_multiply = NULL;
7290 }
7291
7292 _variant_add(_("MSR"),
7293 m->type,
7294 m->fill_type,
7295 2, /* ed_flag */
7296 vector_multiply,
7297 n_variants,
7298 &n_variants_max,
7299 m_variant);
7300
7301 #if defined(HAVE_MKL)
7302
7303 switch(m->fill_type) {
7304 case CS_MATRIX_SCALAR:
7305 case CS_MATRIX_SCALAR_SYM:
7306 vector_multiply = _mat_vec_p_l_msr_mkl;
7307 break;
7308 default:
7309 vector_multiply = NULL;
7310 }
7311
7312 _variant_add(_("MSR, with MKL"),
7313 m->type,
7314 m->fill_type,
7315 2, /* ed_flag */
7316 vector_multiply,
7317 n_variants,
7318 &n_variants_max,
7319 m_variant);
7320
7321 #endif /* defined(HAVE_MKL) */
7322
7323 #if defined(HAVE_OPENMP)
7324
7325 if (omp_get_num_threads() > 1) {
7326
7327 switch(m->fill_type) {
7328 case CS_MATRIX_SCALAR:
7329 case CS_MATRIX_SCALAR_SYM:
7330 vector_multiply = _mat_vec_p_l_msr_omp_sched;
7331 break;
7332 default:
7333 vector_multiply = NULL;
7334 }
7335
7336 _variant_add(_("MSR, OpenMP scheduling"),
7337 m->type,
7338 m->fill_type,
7339 2, /* ed_flag */
7340 vector_multiply,
7341 n_variants,
7342 &n_variants_max,
7343 m_variant);
7344
7345 }
7346
7347 #endif /* defined(HAVE_OPENMP) */
7348
7349 }
7350
7351 n_variants_max = *n_variants;
7352 BFT_REALLOC(*m_variant, *n_variants, cs_matrix_variant_t);
7353 }
7354
7355 /*----------------------------------------------------------------------------*/
7356 /*!
7357 * \brief Destroy a matrix variant structure.
7358 *
7359 * \param[in, out] mv pointer to matrix variant pointer
7360 */
7361 /*----------------------------------------------------------------------------*/
7362
7363 void
cs_matrix_variant_destroy(cs_matrix_variant_t ** mv)7364 cs_matrix_variant_destroy(cs_matrix_variant_t **mv)
7365 {
7366 if (mv != NULL)
7367 BFT_FREE(*mv);
7368 }
7369
7370 /*----------------------------------------------------------------------------*/
7371 /*!
7372 * \brief Apply a variant to a given matrix
7373 *
7374 * \param[in, out] m pointer to matrix
7375 * \param[in] mv pointer to matrix variant pointer
7376 */
7377 /*----------------------------------------------------------------------------*/
7378
7379 void
cs_matrix_variant_apply(cs_matrix_t * m,cs_matrix_variant_t * mv)7380 cs_matrix_variant_apply(cs_matrix_t *m,
7381 cs_matrix_variant_t *mv)
7382 {
7383 if (m == NULL || mv == NULL)
7384 return;
7385
7386 if ( m->type < 0 || m->type > CS_MATRIX_N_BUILTIN_TYPES
7387 || m->fill_type < 0 || m->fill_type > CS_MATRIX_N_FILL_TYPES)
7388 return;
7389
7390 for (int i = 0; i < 2; i++)
7391 m->vector_multiply[m->fill_type][i] = mv->vector_multiply[i];
7392 }
7393
7394 /*----------------------------------------------------------------------------*/
7395 /*!
7396 * \brief Select the sparse matrix-vector product function to be used by a
7397 * matrix variant for a given fill type.
7398 *
7399 * Currently, possible variant functions are:
7400 *
7401 * CS_MATRIX_NATIVE (all fill types)
7402 * default
7403 * standard
7404 * omp (for OpenMP with compatible numbering)
7405 * omp_atomic (for OpenMP with atomics)
7406 * vector (For vector machine with compatible numbering)
7407 *
7408 * CS_MATRIX_CSR (for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
7409 * default
7410 * standard
7411 * mkl (with MKL)
7412 *
7413 * CS_MATRIX_MSR (all fill types except CS_MATRIX_33_BLOCK)
7414 * default
7415 * standard
7416 * mkl (with MKL, for CS_MATRIX_SCALAR or CS_MATRIX_SCALAR_SYM)
7417 * omp_sched (For OpenMP with scheduling)
7418 *
7419 * parameters:
7420 * mv <-> Pointer to matrix variant
7421 * numbering <-- mesh numbering info, or NULL
7422 * fill type <-- matrix fill type to merge from
7423 * ed_flag <-- 0: with diagonal only, 1 exclude only; 2; both
7424 * func_name <-- function type name
7425 */
7426 /*----------------------------------------------------------------------------*/
7427
7428 void
cs_matrix_variant_set_func(cs_matrix_variant_t * mv,const cs_numbering_t * numbering,cs_matrix_fill_type_t fill_type,int ed_flag,const char * func_name)7429 cs_matrix_variant_set_func(cs_matrix_variant_t *mv,
7430 const cs_numbering_t *numbering,
7431 cs_matrix_fill_type_t fill_type,
7432 int ed_flag,
7433 const char *func_name)
7434 {
7435 int s_id = (ed_flag != 1) ? 0 : 1;
7436 int e_id = (ed_flag != 0) ? 2 : 1;
7437
7438 for (int j = s_id; j < e_id; j++) {
7439
7440 int retcode = _set_spmv_func(mv->type,
7441 numbering,
7442 fill_type,
7443 j,
7444 func_name,
7445 mv->vector_multiply);
7446
7447 if (retcode == 1)
7448 bft_error
7449 (__FILE__, __LINE__, 0,
7450 _("Assignment of matrix.vector product \"%s\" to matrix variant \"%s\"\n"
7451 "of type \"%s\" for fill \"%s\" not allowed."),
7452 func_name, mv->name[j], _matrix_type_name[mv->type],
7453 cs_matrix_fill_type_name[fill_type]);
7454 else if (retcode == 2)
7455 bft_error
7456 (__FILE__, __LINE__, 0,
7457 _("Matrix.vector product function type \"%s\"\n"
7458 "is not available in this build."),
7459 func_name);
7460 }
7461 }
7462
7463 /*----------------------------------------------------------------------------*/
7464 /*!
7465 * \brief Get the type associated with a matrix variant.
7466 *
7467 * \param[in] mv pointer to matrix variant structure
7468 */
7469 /*----------------------------------------------------------------------------*/
7470
7471 cs_matrix_type_t
cs_matrix_variant_type(const cs_matrix_variant_t * mv)7472 cs_matrix_variant_type(const cs_matrix_variant_t *mv)
7473 {
7474 return mv->type;
7475 }
7476
7477 /*----------------------------------------------------------------------------*/
7478
7479 END_C_DECLS
7480