1 /*============================================================================
2 * Utilitary functions for sparse matrixes.
3 *============================================================================*/
4
5 /*
6 This file is part of Code_Saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2021 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 */
24
25 /*----------------------------------------------------------------------------*/
26
27 #include "cs_defs.h"
28
29 /*----------------------------------------------------------------------------
30 * Standard C library headers
31 *----------------------------------------------------------------------------*/
32
33 #include <stdarg.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <math.h>
39
40 #if defined(HAVE_MPI)
41 #include <mpi.h>
42 #endif
43
44 /*----------------------------------------------------------------------------
45 * Local headers
46 *----------------------------------------------------------------------------*/
47
48 #include "bft_mem.h"
49 #include "bft_error.h"
50 #include "bft_printf.h"
51
52 #include "fvm_io_num.h"
53
54 #include "cs_base.h"
55 #include "cs_blas.h"
56 #include "cs_file.h"
57 #include "cs_halo.h"
58 #include "cs_log.h"
59 #include "cs_numbering.h"
60 #include "cs_order.h"
61 #include "cs_parall.h"
62 #include "cs_part_to_block.h"
63 #include "cs_prototypes.h"
64 #include "cs_timer.h"
65
66 /*----------------------------------------------------------------------------
67 * Header for the current file
68 *----------------------------------------------------------------------------*/
69
70 #include "cs_matrix_util.h"
71
72 #include "cs_matrix_priv.h"
73
74 /*----------------------------------------------------------------------------*/
75
76 BEGIN_C_DECLS
77
78 /*! \cond DOXYGEN_SHOULD_SKIP_THIS */
79
80 /*=============================================================================
81 * Local Macro Definitions
82 *============================================================================*/
83
84 /*=============================================================================
85 * Local Type Definitions
86 *============================================================================*/
87
88 /*============================================================================
89 * Global variables
90 *============================================================================*/
91
92 /*============================================================================
93 * Private function definitions
94 *============================================================================*/
95
96 /*----------------------------------------------------------------------------
97 * y[i] = abs(da[i]), with da possibly NULL.
98 *
99 * parameters:
100 * da <-- Pointer to coefficients array (usually matrix diagonal)
101 * dd --> Resulting vector
102 * n_rows <-- Number of rows
103 * n_cols_ext <-- Number of columns + ghost columns
104 *----------------------------------------------------------------------------*/
105
106 static void
_diag_dom_diag_contrib(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,cs_lnum_t n_cols_ext)107 _diag_dom_diag_contrib(const cs_real_t *restrict da,
108 cs_real_t *restrict dd,
109 cs_lnum_t n_rows,
110 cs_lnum_t n_cols_ext)
111 {
112 cs_lnum_t ii;
113
114 if (da != NULL) {
115 # pragma omp parallel for
116 for (ii = 0; ii < n_rows; ii++)
117 dd[ii] = fabs(da[ii]);
118 for (ii = n_rows; ii < n_cols_ext; ii++)
119 dd[ii] = 0.0;
120 }
121 else {
122 # pragma omp parallel for
123 for (ii = 0; ii < n_cols_ext; ii++)
124 dd[ii] = 0.0;
125 }
126
127 }
128
129 /*----------------------------------------------------------------------------
130 * Block diagonal contribution to diagonal dominance, with da possibly NULL.
131 *
132 * parameters:
133 * da <-- Pointer to coefficients array (usually matrix diagonal)
134 * dd --> Resulting vector
135 * n_rows <-- Number of rows
136 * n_cols_ext <-- Number of colmuns + ghost columns
137 * b_size <-- block size, including padding:
138 * b_size[0]: useful block size
139 * b_size[1]: vector block extents
140 * b_size[2]: matrix line extents
141 * b_size[3]: matrix line*column (block) extents
142 *----------------------------------------------------------------------------*/
143
144 static void
_b_diag_dom_diag_contrib(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,cs_lnum_t n_cols_ext,const cs_lnum_t b_size[4])145 _b_diag_dom_diag_contrib(const cs_real_t *restrict da,
146 cs_real_t *restrict dd,
147 cs_lnum_t n_rows,
148 cs_lnum_t n_cols_ext,
149 const cs_lnum_t b_size[4])
150 {
151 cs_lnum_t ii, jj, kk;
152 double sign;
153 const cs_lnum_t dd_size = n_cols_ext*b_size[1];
154
155 # pragma omp parallel for
156 for (ii = 0; ii < dd_size; ii++)
157 dd[ii] = 0.0;
158
159 if (da != NULL) {
160 # pragma omp parallel for private(jj, kk, sign)
161 for (ii = 0; ii < n_rows; ii++) {
162 for (jj = 0; jj < b_size[1]; jj++)
163 dd[ii*b_size[1] + jj] = 0.0;
164 for (jj = 0; jj < b_size[0]; jj++) {
165 for (kk = 0; kk < b_size[0]; kk++) {
166 sign = (jj == kk) ? 1. : -1.;
167 dd[ii*b_size[1] + kk]
168 += sign*fabs(da[ii*b_size[3] + jj*b_size[2] + kk]);
169 }
170 }
171 }
172 }
173 }
174
175 /*----------------------------------------------------------------------------
176 * Normalize diagonal dominance, with da possibly NULL.
177 *
178 * parameters:
179 * da <-- Pointer to coefficients array (usually matrix diagonal)
180 * dd --> Resulting vector
181 * n_rows <-- Number of rows
182 *----------------------------------------------------------------------------*/
183
184 static void
_diag_dom_diag_normalize(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows)185 _diag_dom_diag_normalize(const cs_real_t *restrict da,
186 cs_real_t *restrict dd,
187 cs_lnum_t n_rows)
188 {
189 cs_lnum_t ii;
190
191 if (da != NULL) {
192 # pragma omp parallel for
193 for (ii = 0; ii < n_rows; ii++) {
194 if (fabs(da[ii]) > 1.e-18)
195 dd[ii] /= fabs(da[ii]);
196 else if (dd[ii] > -1.e-18)
197 dd[ii] = -1.e18;
198 else
199 dd[ii] = 0;
200 }
201 }
202 else {
203 # pragma omp parallel for
204 for (ii = 0; ii < n_rows; ii++) {
205 if (dd[ii] > -1.e-18)
206 dd[ii] = -1.e18;
207 else
208 dd[ii] = 0;
209 }
210 }
211 }
212
213 /*----------------------------------------------------------------------------
214 * Normalize block diagonal dominance, with da possibly NULL.
215 *
216 * parameters:
217 * da <-- Pointer to coefficients array (usually matrix diagonal)
218 * dd --> Resulting vector
219 * n_rows <-- Number of rows
220 * b_size <-- block size, including padding:
221 * b_size[0]: useful block size
222 * b_size[1]: vector block extents
223 * b_size[2]: matrix line extents
224 * b_size[3]: matrix line*column (block) extents
225 *----------------------------------------------------------------------------*/
226
227 static void
_b_diag_dom_diag_normalize(const cs_real_t * restrict da,cs_real_t * restrict dd,cs_lnum_t n_rows,const cs_lnum_t b_size[4])228 _b_diag_dom_diag_normalize(const cs_real_t *restrict da,
229 cs_real_t *restrict dd,
230 cs_lnum_t n_rows,
231 const cs_lnum_t b_size[4])
232 {
233 cs_lnum_t ii, jj;
234 double d_val;
235
236 if (da != NULL) {
237 # pragma omp parallel for private(jj, d_val)
238 for (ii = 0; ii < n_rows; ii++) {
239 for (jj = 0; jj < b_size[0]; jj++) {
240 d_val = fabs(da[ii*b_size[3] + jj*b_size[2] + jj]);
241 if (d_val > 1.e-18)
242 dd[ii*b_size[1] + jj] /= d_val;
243 else if (dd[ii*b_size[1] + jj] > -1.e-18)
244 dd[ii*b_size[1] + jj] = -1.e18;
245 else
246 dd[ii*b_size[1] + jj] = 0;
247 }
248 }
249 }
250 }
251
252 /*----------------------------------------------------------------------------
253 * Measure Diagonal dominance of native matrix.
254 *
255 * parameters:
256 * matrix <-- Pointer to matrix structure
257 * dd --> Resulting vector
258 *----------------------------------------------------------------------------*/
259
260 static void
_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)261 _diag_dom_native(const cs_matrix_t *matrix,
262 cs_real_t *restrict dd)
263 {
264 cs_lnum_t ii, jj, edge_id;
265
266 const cs_matrix_struct_native_t *ms = matrix->structure;
267 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
268
269 const cs_real_t *restrict xa = mc->xa;
270
271 /* diagonal contribution */
272
273 _diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext);
274
275 /* non-diagonal terms */
276
277 if (mc->xa != NULL) {
278
279 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
280
281 if (mc->symmetric) {
282
283 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
284 ii = face_cel_p[edge_id][0];
285 jj = face_cel_p[edge_id][1];
286 dd[ii] -= fabs(xa[edge_id]);
287 dd[jj] -= fabs(xa[edge_id]);
288 }
289
290 }
291 else {
292
293 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
294 ii = face_cel_p[edge_id][0];
295 jj = face_cel_p[edge_id][1];
296 dd[ii] -= fabs(xa[2*edge_id]);
297 dd[jj] -= fabs(xa[2*edge_id + 1]);
298 }
299
300 }
301
302 }
303
304 _diag_dom_diag_normalize(mc->da, dd, ms->n_rows);
305 }
306
307 /*----------------------------------------------------------------------------
308 * Measure Diagonal dominance of native matrix with block diagonal.
309 *
310 * parameters:
311 * matrix <-- Pointer to matrix structure
312 * dd --> Resulting vector
313 *----------------------------------------------------------------------------*/
314
315 static void
_b_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)316 _b_diag_dom_native(const cs_matrix_t *matrix,
317 cs_real_t *restrict dd)
318 {
319 cs_lnum_t ii, jj, kk, edge_id;
320
321 const cs_matrix_struct_native_t *ms = matrix->structure;
322 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
323
324 const cs_real_t *restrict xa = mc->xa;
325 const cs_lnum_t *db_size = matrix->db_size;
326
327 /* block diagonal contribution */
328
329 _b_diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext, db_size);
330
331 /* non-diagonal terms */
332
333 if (mc->xa != NULL) {
334
335 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
336
337 if (mc->symmetric) {
338
339 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
340 ii = face_cel_p[edge_id][0];
341 jj = face_cel_p[edge_id][1];
342 for (kk = 0; kk < db_size[0]; kk++) {
343 dd[ii*db_size[1] + kk] -= fabs(xa[edge_id]);
344 dd[jj*db_size[1] + kk] -= fabs(xa[edge_id]);
345 }
346 }
347 }
348 else {
349
350 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
351 ii = face_cel_p[edge_id][0];
352 jj = face_cel_p[edge_id][1];
353 for (kk = 0; kk < db_size[0]; kk++) {
354 dd[ii*db_size[1] + kk] -= fabs(xa[2*edge_id]);
355 dd[jj*db_size[1] + kk] -= fabs(xa[2*edge_id + 1]);
356 }
357 }
358
359 }
360
361 }
362
363 _b_diag_dom_diag_normalize(mc->da, dd, ms->n_rows, db_size);
364 }
365
366 /*----------------------------------------------------------------------------
367 * Measure Diagonal dominance of native block matrix.
368 *
369 * parameters:
370 * matrix <-- Pointer to matrix structure
371 * dd --> Resulting vector
372 *----------------------------------------------------------------------------*/
373
374 static void
_bb_diag_dom_native(const cs_matrix_t * matrix,cs_real_t * restrict dd)375 _bb_diag_dom_native(const cs_matrix_t *matrix,
376 cs_real_t *restrict dd)
377 {
378 cs_lnum_t ii, jj, kk, ll, edge_id;
379
380 const cs_matrix_struct_native_t *ms = matrix->structure;
381 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
382
383 const cs_real_t *restrict xa = mc->xa;
384 const cs_lnum_t *db_size = matrix->db_size;
385 const cs_lnum_t *eb_size = matrix->eb_size;
386
387 /* block diagonal contribution */
388
389 _b_diag_dom_diag_contrib(mc->da, dd, ms->n_rows, ms->n_cols_ext, db_size);
390
391 /* non-diagonal terms */
392
393 if (mc->xa != NULL) {
394
395 const cs_lnum_2_t *restrict face_cel_p = ms->edges;
396
397 if (mc->symmetric) {
398
399 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
400 ii = face_cel_p[edge_id][0];
401 jj = face_cel_p[edge_id][1];
402 for (kk = 0; kk < eb_size[0]; kk++) {
403 for (ll = 0; ll < eb_size[0]; ll++) {
404 cs_lnum_t si = edge_id*eb_size[3] + kk*eb_size[2] + ll;
405 dd[ii*db_size[1] + kk] -= fabs(xa[si]);
406 dd[jj*db_size[1] + kk] -= fabs(xa[si]);
407 }
408 }
409 }
410 }
411 else {
412
413 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
414 ii = face_cel_p[edge_id][0];
415 jj = face_cel_p[edge_id][1];
416 for (kk = 0; kk < db_size[0]; kk++) {
417 for (ll = 0; ll < eb_size[0]; ll++) {
418 cs_lnum_t si0 = 2*edge_id*eb_size[3] + kk*eb_size[2] + ll;
419 cs_lnum_t si1 = (2*edge_id+1)*eb_size[3] + kk*eb_size[2] + ll;
420 dd[ii*db_size[1] + kk] -= fabs(xa[si0]);
421 dd[jj*db_size[1] + kk] -= fabs(xa[si1]);
422 }
423 }
424 }
425
426 }
427
428 }
429
430 _b_diag_dom_diag_normalize(mc->da, dd, ms->n_rows, db_size);
431 }
432
433 /*----------------------------------------------------------------------------
434 * Measure Diagonal dominance of CSR matrix.
435 *
436 * parameters:
437 * matrix <-- Pointer to matrix structure
438 * dd --> Resulting vector
439 *----------------------------------------------------------------------------*/
440
441 static void
_diag_dom_csr(const cs_matrix_t * matrix,cs_real_t * restrict dd)442 _diag_dom_csr(const cs_matrix_t *matrix,
443 cs_real_t *restrict dd)
444 {
445 cs_lnum_t ii, jj, n_cols;
446 double sii, sign, d_val;
447 const cs_lnum_t *restrict col_id;
448 const cs_real_t *restrict m_row;
449
450 const cs_matrix_struct_csr_t *ms = matrix->structure;
451 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
452 cs_lnum_t n_rows = ms->n_rows;
453
454 /* Standard case */
455
456 # pragma omp parallel for private(jj, col_id, m_row, n_cols, sii, sign, d_val)
457 for (ii = 0; ii < n_rows; ii++) {
458 col_id = ms->col_id + ms->row_index[ii];
459 m_row = mc->val + ms->row_index[ii];
460 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
461 sii = 0.0;
462 d_val = 0.0;
463 for (jj = 0; jj < n_cols; jj++) {
464 if (col_id[jj] == ii) {
465 sign = 1.;
466 d_val = fabs(m_row[jj]);
467 }
468 else
469 sign = -1.;
470 sii += sign * fabs(m_row[jj]);
471 }
472 if (d_val > 1.e-18)
473 sii /= d_val;
474 else if (sii > -1.e-18)
475 sii = -1.e18;
476 else
477 sii = 0;
478 dd[ii] = sii;
479 }
480 }
481
482 /*----------------------------------------------------------------------------
483 * Measure Diagonal dominance of MSR matrix.
484 *
485 * parameters:
486 * matrix <-- Pointer to matrix structure
487 * dd --> Resulting vector
488 *----------------------------------------------------------------------------*/
489
490 static void
_diag_dom_msr(const cs_matrix_t * matrix,cs_real_t * restrict dd)491 _diag_dom_msr(const cs_matrix_t *matrix,
492 cs_real_t *restrict dd)
493 {
494 cs_lnum_t ii, jj, n_cols;
495 cs_real_t sii;
496 const cs_real_t *restrict m_row;
497
498 const cs_matrix_struct_csr_t *ms = matrix->structure;
499 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
500 cs_lnum_t n_rows = ms->n_rows;
501
502 /* diagonal contribution */
503
504 _diag_dom_diag_contrib(mc->d_val, dd, ms->n_rows, ms->n_cols_ext);
505
506 /* extra-diagonal contribution */
507
508 if (mc->x_val != NULL) {
509
510 # pragma omp parallel for private(jj, m_row, n_cols, sii)
511 for (ii = 0; ii < n_rows; ii++) {
512 m_row = mc->x_val + ms->row_index[ii];
513 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
514 sii = 0.0;
515 for (jj = 0; jj < n_cols; jj++)
516 sii -= fabs(m_row[jj]);
517 dd[ii] += sii;
518 }
519
520 }
521
522 _diag_dom_diag_normalize(mc->d_val, dd, n_rows);
523 }
524
525 /*----------------------------------------------------------------------------
526 * Measure Diagonal dominance of MSR matrix, blocked version.
527 *
528 * parameters:
529 * matrix <-- Pointer to matrix structure
530 * dd --> Resulting vector
531 *----------------------------------------------------------------------------*/
532
533 static void
_b_diag_dom_msr(const cs_matrix_t * matrix,cs_real_t * restrict dd)534 _b_diag_dom_msr(const cs_matrix_t *matrix,
535 cs_real_t *restrict dd)
536 {
537 cs_lnum_t ii, jj, kk, n_cols;
538 const cs_real_t *restrict m_row;
539
540 const cs_matrix_struct_csr_t *ms = matrix->structure;
541 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
542 const cs_lnum_t *db_size = matrix->db_size;
543 const cs_lnum_t n_rows = ms->n_rows;
544
545 /* diagonal contribution */
546
547 _b_diag_dom_diag_contrib(mc->d_val, dd, ms->n_rows, ms->n_cols_ext, db_size);
548
549 /* extra-diagonal contribution */
550
551 if (mc->x_val != NULL) {
552
553 # pragma omp parallel for private(jj, kk, m_row, n_cols)
554 for (ii = 0; ii < n_rows; ii++) {
555 m_row = mc->x_val + ms->row_index[ii];
556 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
557 for (jj = 0; jj < n_cols; jj++) {
558 for (kk = 0; kk < db_size[0]; kk++)
559 dd[ii*db_size[1] + kk] -= fabs(m_row[jj]);
560 }
561 }
562
563 }
564
565 _b_diag_dom_diag_normalize(mc->d_val, dd, ms->n_rows, db_size);
566 }
567
568 /*----------------------------------------------------------------------------
569 * Diagonal contribution to matrix dump.
570 *
571 * parameters:
572 * da <-- Pointer to coefficients array (usually matrix diagonal)
573 * m_coo <-- Matrix coefficient coordinates array
574 * m_val <-- Matrix coefficient values array
575 * g_coo_num <-- Global coordinate numbers
576 * n_rows <-- Number of rows
577 *----------------------------------------------------------------------------*/
578
579 static void
_pre_dump_diag_contrib(const cs_real_t * restrict da,cs_gnum_t * restrict m_coo,cs_real_t * restrict m_val,const cs_gnum_t * restrict g_coo_num,cs_lnum_t n_rows)580 _pre_dump_diag_contrib(const cs_real_t *restrict da,
581 cs_gnum_t *restrict m_coo,
582 cs_real_t *restrict m_val,
583 const cs_gnum_t *restrict g_coo_num,
584 cs_lnum_t n_rows)
585 {
586 cs_lnum_t ii;
587
588 if (da != NULL) {
589 # pragma omp parallel for
590 for (ii = 0; ii < n_rows; ii++) {
591 m_coo[ii*2] = g_coo_num[ii];
592 m_coo[ii*2 + 1] = g_coo_num[ii];
593 m_val[ii] = da[ii];
594 }
595 }
596 else {
597 # pragma omp parallel for
598 for (ii = 0; ii < n_rows; ii++) {
599 m_coo[ii*2] = g_coo_num[ii];
600 m_coo[ii*2 + 1] = g_coo_num[ii];
601 m_val[ii] = 0.0;
602 }
603 }
604 }
605
606 /*----------------------------------------------------------------------------
607 * Block diagonal contribution to matrix dump.
608 *
609 * parameters:
610 * da <-- Pointer to coefficients array (usually matrix diagonal)
611 * m_coo <-- Matrix coefficient coordinates array
612 * m_val <-- Matrix coefficient values array
613 * g_coo_num <-- Global coordinate numbers
614 * n_rows <-- Number of rows
615 * b_size <-- block size, including padding:
616 * b_size[0]: useful block size
617 * b_size[1]: vector block extents
618 * b_size[2]: matrix line extents
619 * b_size[3]: matrix line*column (block) extents
620 *----------------------------------------------------------------------------*/
621
622 static void
_b_pre_dump_diag_contrib(const cs_real_t * restrict da,cs_gnum_t * restrict m_coo,cs_real_t * restrict m_val,const cs_gnum_t * restrict g_coo_num,cs_lnum_t n_rows,const cs_lnum_t b_size[4])623 _b_pre_dump_diag_contrib(const cs_real_t *restrict da,
624 cs_gnum_t *restrict m_coo,
625 cs_real_t *restrict m_val,
626 const cs_gnum_t *restrict g_coo_num,
627 cs_lnum_t n_rows,
628 const cs_lnum_t b_size[4])
629 {
630 cs_lnum_t ii, jj, kk;
631 cs_lnum_t db_size[2] = {b_size[0], b_size[0]*b_size[0]};
632
633 if (da != NULL) {
634 # pragma omp parallel for private(jj, kk)
635 for (ii = 0; ii < n_rows; ii++) {
636 for (jj = 0; jj < b_size[0]; jj++) {
637 for (kk = 0; kk < b_size[0]; kk++) {
638 m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2]
639 = g_coo_num[ii]*b_size[0] + jj;
640 m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2 + 1]
641 = g_coo_num[ii]*b_size[0] + kk;
642 m_val[ii*db_size[1] + jj*db_size[0] + kk]
643 = da[ii*b_size[3] + jj*b_size[2] + kk];
644 }
645 }
646 }
647 }
648 else {
649 # pragma omp parallel for private(jj, kk)
650 for (ii = 0; ii < n_rows; ii++) {
651 for (jj = 0; jj < b_size[0]; jj++) {
652 for (kk = 0; kk < b_size[0]; kk++) {
653 m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2]
654 = g_coo_num[ii]*b_size[0] + jj;
655 m_coo[(ii*db_size[1] + jj*db_size[0] + kk)*2 + 1]
656 = g_coo_num[ii]*b_size[0] + kk;
657 m_val[ii*db_size[1] + jj*db_size[0] + kk] = 0.0;
658 }
659 }
660 }
661 }
662 }
663
664 /*----------------------------------------------------------------------------
665 * Prepare dump of native matrix.
666 *
667 * parameters:
668 * matrix <-- Pointer to matrix structure
669 * g_coo_num <-- Global coordinate numbers
670 * m_coo --> Matrix coefficient coordinates array
671 * m_val --> Matrix coefficient values array
672 *
673 * returns:
674 * number of matrix entries
675 *----------------------------------------------------------------------------*/
676
677 static cs_lnum_t
_pre_dump_native(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)678 _pre_dump_native(const cs_matrix_t *matrix,
679 const cs_gnum_t *g_coo_num,
680 cs_gnum_t **m_coo,
681 cs_real_t **m_val)
682 {
683 cs_lnum_t ii, jj, edge_id, dump_id;
684
685 cs_gnum_t *restrict _m_coo;
686 cs_real_t *restrict _m_val;
687
688 const cs_matrix_struct_native_t *ms = matrix->structure;
689 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
690
691 const cs_real_t *restrict xa = mc->xa;
692
693 cs_lnum_t n_entries = ms->n_rows + ms->n_edges*2;
694
695 /* Allocate arrays */
696
697 BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
698 BFT_MALLOC(_m_val, n_entries, double);
699
700 *m_coo = _m_coo;
701 *m_val = _m_val;
702
703 /* diagonal contribution */
704
705 _pre_dump_diag_contrib(mc->da, _m_coo, _m_val, g_coo_num, ms->n_rows);
706
707 /* non-diagonal terms */
708
709 dump_id = ms->n_rows;
710
711 if (mc->xa != NULL) {
712
713 const cs_lnum_2_t *restrict face_cel_p
714 = (const cs_lnum_2_t *restrict)(ms->edges);
715
716 if (mc->symmetric) {
717
718 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
719 ii = face_cel_p[edge_id][0];
720 jj = face_cel_p[edge_id][1];
721 _m_coo[dump_id*2] = g_coo_num[ii];
722 _m_coo[dump_id*2 + 1] = g_coo_num[jj];
723 _m_val[dump_id] = xa[edge_id];
724 _m_coo[dump_id*2 + 2] = g_coo_num[jj];
725 _m_coo[dump_id*2 + 3] = g_coo_num[ii];
726 _m_val[dump_id + 1] = xa[edge_id];
727 dump_id += 2;
728 }
729
730 }
731 else {
732
733 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
734 ii = face_cel_p[edge_id][0];
735 jj = face_cel_p[edge_id][1];
736 _m_coo[dump_id*2] = g_coo_num[ii];
737 _m_coo[dump_id*2 + 1] = g_coo_num[jj];
738 _m_val[dump_id] = xa[2*edge_id];
739 _m_coo[dump_id*2 + 2] = g_coo_num[jj];
740 _m_coo[dump_id*2 + 3] = g_coo_num[ii];
741 _m_val[dump_id + 1] = xa[2*edge_id + 1];
742 dump_id += 2;
743 }
744
745 }
746
747 }
748
749 return n_entries;
750 }
751
752 /*----------------------------------------------------------------------------
753 * Prepare dump of native matrix with block diagonal.
754 *
755 * parameters:
756 * matrix <-- Pointer to matrix structure
757 * g_coo_num <-- Global coordinate numbers
758 * m_coo --> Matrix coefficient coordinates array
759 * m_val --> Matrix coefficient values array
760 *
761 * returns:
762 * number of matrix entries
763 *----------------------------------------------------------------------------*/
764
765 static cs_lnum_t
_b_pre_dump_native(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)766 _b_pre_dump_native(const cs_matrix_t *matrix,
767 const cs_gnum_t *g_coo_num,
768 cs_gnum_t **m_coo,
769 cs_real_t **m_val)
770 {
771 cs_lnum_t ii, jj, kk, edge_id, dump_id;
772
773 cs_gnum_t *restrict _m_coo;
774 cs_real_t *restrict _m_val;
775
776 const cs_matrix_struct_native_t *ms = matrix->structure;
777 const cs_matrix_coeff_native_t *mc = matrix->coeffs;
778
779 const cs_real_t *restrict xa = mc->xa;
780 const cs_lnum_t *db_size = matrix->db_size;
781
782 cs_lnum_t n_entries = (ms->n_rows*db_size[0] + ms->n_edges*2) * db_size[0];
783
784 /* Allocate arrays */
785
786 BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
787 BFT_MALLOC(_m_val, n_entries, double);
788
789 *m_coo = _m_coo;
790 *m_val = _m_val;
791
792 /* block diagonal contribution */
793
794 _b_pre_dump_diag_contrib(mc->da, _m_coo, _m_val,
795 g_coo_num, ms->n_rows, db_size);
796
797 /* non-diagonal terms */
798
799 dump_id = ms->n_rows*db_size[0]*db_size[0];
800
801 if (mc->xa != NULL) {
802
803 const cs_lnum_2_t *restrict face_cel_p
804 = (const cs_lnum_2_t *restrict)(ms->edges);
805
806 if (mc->symmetric) {
807
808 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
809 ii = face_cel_p[edge_id][0];
810 jj = face_cel_p[edge_id][1];
811 for (kk = 0; kk < db_size[0]; kk++) {
812 _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
813 _m_coo[dump_id*2 + 1] = g_coo_num[jj]*db_size[0] + kk;
814 _m_val[dump_id] = xa[edge_id];
815 _m_coo[dump_id*2 + 2] = g_coo_num[jj]*db_size[0] + kk;
816 _m_coo[dump_id*2 + 3] = g_coo_num[ii]*db_size[0] + kk;
817 _m_val[dump_id + 1] = xa[edge_id];
818 dump_id += 2;
819 }
820 }
821 }
822 else {
823
824 for (edge_id = 0; edge_id < ms->n_edges; edge_id++) {
825 ii = face_cel_p[edge_id][0];
826 jj = face_cel_p[edge_id][1];
827 for (kk = 0; kk < db_size[0]; kk++) {
828 _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
829 _m_coo[dump_id*2 + 1] = g_coo_num[jj]*db_size[0] + kk;
830 _m_val[dump_id] = xa[edge_id*2];
831 _m_coo[dump_id*2 + 2] = g_coo_num[jj]*db_size[0] + kk;
832 _m_coo[dump_id*2 + 3] = g_coo_num[ii]*db_size[0] + kk;
833 _m_val[dump_id + 1] = xa[edge_id*2 + 1];
834 dump_id += 2;
835 }
836 }
837
838 }
839
840 }
841
842 return n_entries;
843 }
844
845 /*----------------------------------------------------------------------------
846 * Prepare dump of CSR matrix.
847 *
848 * parameters:
849 * matrix <-- Pointer to matrix structure
850 * g_coo_num <-- Global coordinate numbers
851 * m_coo --> Matrix coefficient coordinates array
852 * m_val --> Matrix coefficient values array
853 *
854 * returns:
855 * number of matrix entries
856 *----------------------------------------------------------------------------*/
857
858 static cs_lnum_t
_pre_dump_csr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)859 _pre_dump_csr(const cs_matrix_t *matrix,
860 const cs_gnum_t *g_coo_num,
861 cs_gnum_t **m_coo,
862 cs_real_t **m_val)
863 {
864 cs_lnum_t ii, jj, n_cols, dump_id;
865 const cs_lnum_t *restrict col_id;
866 const cs_real_t *restrict m_row;
867
868 cs_gnum_t *restrict _m_coo;
869 cs_real_t *restrict _m_val;
870
871 const cs_matrix_struct_csr_t *ms = matrix->structure;
872 const cs_matrix_coeff_csr_t *mc = matrix->coeffs;
873 cs_lnum_t dump_id_shift = 0;
874 cs_lnum_t n_rows = ms->n_rows;
875
876 cs_lnum_t n_entries = ms->row_index[n_rows];
877
878 /* Allocate arrays */
879
880 BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
881 BFT_MALLOC(_m_val, n_entries, double);
882
883 *m_coo = _m_coo;
884 *m_val = _m_val;
885
886 /* Contribution */
887
888 if (ms->have_diag == false) {
889 assert(0);
890 # pragma omp parallel for
891 for (ii = 0; ii < n_rows; ii++) {
892 _m_coo[ii*2] = g_coo_num[ii];
893 _m_coo[ii*2+1] = g_coo_num[ii];
894 _m_val[ii] = 0.0;
895 }
896 dump_id_shift = n_rows;
897 }
898
899 # pragma omp parallel for private(jj, dump_id, col_id, m_row, n_cols)
900 for (ii = 0; ii < n_rows; ii++) {
901 col_id = ms->col_id + ms->row_index[ii];
902 m_row = mc->val + ms->row_index[ii];
903 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
904 for (jj = 0; jj < n_cols; jj++) {
905 dump_id = ms->row_index[ii] + jj + dump_id_shift;
906 _m_coo[dump_id*2] = g_coo_num[ii];
907 _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
908 _m_val[dump_id] = m_row[jj];
909 }
910 }
911
912 return n_entries;
913 }
914
915 /*----------------------------------------------------------------------------
916 * Prepare dump of MSR matrix.
917 *
918 * parameters:
919 * matrix <-- Pointer to matrix structure
920 * g_coo_num <-- Global coordinate numbers
921 * m_coo --> Matrix coefficient coordinates array
922 * m_val --> Matrix coefficient values array
923 *
924 * returns:
925 * number of matrix entries
926 *----------------------------------------------------------------------------*/
927
928 static cs_lnum_t
_pre_dump_msr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)929 _pre_dump_msr(const cs_matrix_t *matrix,
930 const cs_gnum_t *g_coo_num,
931 cs_gnum_t **m_coo,
932 cs_real_t **m_val)
933 {
934 cs_lnum_t ii, jj, n_cols, dump_id;
935 const cs_lnum_t *restrict col_id;
936 const cs_real_t *restrict m_row;
937
938 cs_gnum_t *restrict _m_coo;
939 cs_real_t *restrict _m_val;
940
941 const cs_matrix_struct_csr_t *ms = matrix->structure;
942 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
943 const cs_lnum_t n_rows = ms->n_rows;
944
945 cs_lnum_t n_entries = ms->row_index[n_rows] + ms->n_rows;
946
947 /* Allocate arrays */
948
949 BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
950 BFT_MALLOC(_m_val, n_entries, double);
951
952 *m_coo = _m_coo;
953 *m_val = _m_val;
954
955 /* diagonal contribution */
956
957 _pre_dump_diag_contrib(mc->d_val, _m_coo, _m_val, g_coo_num, ms->n_rows);
958
959 /* extra-diagonal contribution */
960
961 if (mc->x_val != NULL) {
962 # pragma omp parallel for private(jj, dump_id, col_id, m_row, n_cols)
963 for (ii = 0; ii < n_rows; ii++) {
964 col_id = ms->col_id + ms->row_index[ii];
965 m_row = mc->x_val + ms->row_index[ii];
966 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
967 for (jj = 0; jj < n_cols; jj++) {
968 dump_id = ms->row_index[ii] + jj + ms->n_rows;
969 _m_coo[dump_id*2] = g_coo_num[ii];
970 _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
971 _m_val[dump_id] = m_row[jj];
972 }
973 }
974 }
975 else {
976 # pragma omp parallel for private(jj, dump_id, col_id, n_cols)
977 for (ii = 0; ii < n_rows; ii++) {
978 col_id = ms->col_id + ms->row_index[ii];
979 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
980 for (jj = 0; jj < n_cols; jj++) {
981 dump_id = ms->row_index[ii] + jj + ms->n_rows;
982 _m_coo[dump_id*2] = g_coo_num[ii];
983 _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]];
984 _m_val[dump_id] = 0.0;
985 }
986 }
987 }
988
989 return n_entries;
990 }
991
992 /*----------------------------------------------------------------------------
993 * Prepare dump of MSR matrix, blocked version.
994 *
995 * parameters:
996 * matrix <-- Pointer to matrix structure
997 * g_coo_num <-- Global coordinate numbers
998 * m_coo --> Matrix coefficient coordinates array
999 * m_val --> Matrix coefficient values array
1000 *
1001 * returns:
1002 * number of matrix entries
1003 *----------------------------------------------------------------------------*/
1004
1005 static cs_lnum_t
_b_pre_dump_msr(const cs_matrix_t * matrix,const cs_gnum_t * g_coo_num,cs_gnum_t ** m_coo,cs_real_t ** m_val)1006 _b_pre_dump_msr(const cs_matrix_t *matrix,
1007 const cs_gnum_t *g_coo_num,
1008 cs_gnum_t **m_coo,
1009 cs_real_t **m_val)
1010 {
1011 cs_lnum_t ii, jj, kk, n_cols, dump_id;
1012 const cs_lnum_t *restrict col_id;
1013 const cs_real_t *restrict m_row;
1014
1015 cs_gnum_t *restrict _m_coo;
1016 cs_real_t *restrict _m_val;
1017
1018 const cs_matrix_struct_csr_t *ms = matrix->structure;
1019 const cs_matrix_coeff_msr_t *mc = matrix->coeffs;
1020 const cs_lnum_t *db_size = matrix->db_size;
1021 const cs_lnum_t n_rows = ms->n_rows;
1022 const cs_lnum_t dump_id_shift = ms->n_rows*db_size[0]*db_size[0];
1023
1024 cs_lnum_t n_entries = ms->row_index[n_rows]*db_size[0]
1025 + ms->n_rows*db_size[0]*db_size[0];
1026
1027 /* Allocate arrays */
1028
1029 BFT_MALLOC(_m_coo, n_entries*2, cs_gnum_t);
1030 BFT_MALLOC(_m_val, n_entries, double);
1031
1032 *m_coo = _m_coo;
1033 *m_val = _m_val;
1034
1035 /* diagonal contribution */
1036
1037 _b_pre_dump_diag_contrib(mc->d_val, _m_coo, _m_val,
1038 g_coo_num, ms->n_rows, db_size);
1039
1040
1041 /* extra-diagonal contribution */
1042
1043 if (mc->x_val != NULL) {
1044 # pragma omp parallel for private(jj, kk, dump_id, col_id, m_row, n_cols)
1045 for (ii = 0; ii < n_rows; ii++) {
1046 col_id = ms->col_id + ms->row_index[ii];
1047 m_row = mc->x_val + ms->row_index[ii];
1048 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
1049 for (jj = 0; jj < n_cols; jj++) {
1050 for (kk = 0; kk < db_size[0]; kk++) {
1051 dump_id = (ms->row_index[ii] + jj)*db_size[0] + kk + dump_id_shift;
1052 _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
1053 _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]]*db_size[0] + kk;
1054 _m_val[dump_id] = m_row[jj];
1055 }
1056 }
1057 }
1058 }
1059 else {
1060 # pragma omp parallel for private(jj, kk, dump_id, col_id, n_cols)
1061 for (ii = 0; ii < n_rows; ii++) {
1062 col_id = ms->col_id + ms->row_index[ii];
1063 n_cols = ms->row_index[ii+1] - ms->row_index[ii];
1064 for (jj = 0; jj < n_cols; jj++) {
1065 for (kk = 0; kk < db_size[0]; kk++) {
1066 dump_id = (ms->row_index[ii] + jj)*db_size[0] + kk + dump_id_shift;
1067 _m_coo[dump_id*2] = g_coo_num[ii]*db_size[0] + kk;
1068 _m_coo[dump_id*2+1] = g_coo_num[col_id[jj]]*db_size[0] + kk;
1069 _m_val[dump_id] = 0.0;
1070 }
1071 }
1072 }
1073 }
1074
1075 return n_entries;
1076 }
1077
1078 /*----------------------------------------------------------------------------
1079 * Write header for dump of matrix to native file.
1080 *
1081 * 3 characters are used to define (in order):
1082 * - the size of cs_gnum_t (int, long, or long long)
1083 * - the size of floating point values (double, should be size _)
1084 * - 'l' for little-endian files, 'b' for big-endian files.
1085 *
1086 * parameters:
1087 * f <-- Pointer to file structure
1088 *----------------------------------------------------------------------------*/
1089
1090 static void
_write_header_simple(cs_file_t * f)1091 _write_header_simple(cs_file_t *f)
1092 {
1093 unsigned char flags[3] = {(unsigned char) sizeof(cs_gnum_t),
1094 (unsigned char) sizeof(double),
1095 'b'};
1096
1097 /* Check if system is "big-endian" or "little-endian" */
1098 {
1099 unsigned int_endian = 0;
1100 *((char *)(&int_endian)) = '\1';
1101 if (int_endian == 1)
1102 flags[2] = 'l';
1103 }
1104
1105 assert(sizeof(cs_gnum_t) < 255);
1106 assert(sizeof(double) < 255); /* should always be 8 */
1107
1108 cs_file_write_global(f, flags, 1, 3);
1109 }
1110
1111 /*----------------------------------------------------------------------------
1112 * Locally sort matrix dump data.
1113 *
1114 * parameters:
1115 * n_entries <-- Number of local matrix entries
1116 * m_coords <-> Matrix coefficient coordinates (interlaced)
1117 * m_vals <-> Matrix coefficient values
1118 *----------------------------------------------------------------------------*/
1119
1120 static void
_sort_matrix_dump_data(cs_lnum_t n_entries,cs_gnum_t * m_coords,double * m_vals)1121 _sort_matrix_dump_data(cs_lnum_t n_entries,
1122 cs_gnum_t *m_coords,
1123 double *m_vals)
1124 {
1125 cs_lnum_t ii, jj;
1126 cs_gnum_t *_m_coords;
1127 double *_m_vals;
1128
1129 cs_lnum_t *order = cs_order_gnum_s(NULL, m_coords, 2, n_entries);
1130
1131 BFT_MALLOC(_m_coords, n_entries*2, cs_gnum_t);
1132
1133 for (ii = 0; ii < n_entries; ii++) {
1134 jj = order[ii];
1135 _m_coords[ii*2] = m_coords[jj*2];
1136 _m_coords[ii*2+1] = m_coords[jj*2+1];
1137 }
1138 memcpy(m_coords, _m_coords, n_entries*2*sizeof(cs_gnum_t));
1139
1140 BFT_FREE(_m_coords);
1141
1142 BFT_MALLOC(_m_vals, n_entries, double);
1143
1144 for (ii = 0; ii < n_entries; ii++)
1145 _m_vals[ii] = m_vals[order[ii]];
1146
1147 memcpy(m_vals, _m_vals, n_entries*sizeof(double));
1148
1149 BFT_FREE(_m_vals);
1150
1151 BFT_FREE(order);
1152 }
1153
1154 /*----------------------------------------------------------------------------
1155 * Prepare data for matrix dump.
1156 *
1157 * Build local arrays with global matrix coordinates and coefficients.
1158 *
1159 * parameters:
1160 * m <-- Pointer to matrix structure
1161 * n_entries --> Number of local matrix entries
1162 * m_coords --> Matrix coefficient coordinates (interlaced)
1163 * m_vals --> Matrix coefficient values
1164 *----------------------------------------------------------------------------*/
1165
1166 static void
_prepare_matrix_dump_data(const cs_matrix_t * m,cs_lnum_t * n_entries,cs_gnum_t ** m_coords,double ** m_vals)1167 _prepare_matrix_dump_data(const cs_matrix_t *m,
1168 cs_lnum_t *n_entries,
1169 cs_gnum_t **m_coords,
1170 double **m_vals)
1171 {
1172 cs_lnum_t ii, jj;
1173 cs_lnum_t _n_entries = 0;
1174 cs_gnum_t coo_shift = 1, n_g_rows = 0;
1175
1176 cs_gnum_t *g_coo_num = NULL;
1177 cs_gnum_t *_m_coords = NULL;
1178 double *_m_vals = NULL;
1179
1180 BFT_MALLOC(g_coo_num, m->n_cols_ext, cs_gnum_t);
1181
1182 n_g_rows = m->n_rows;
1183
1184 #if defined(HAVE_MPI)
1185 if (cs_glob_n_ranks > 1) {
1186 cs_gnum_t loc_shift = m->n_rows;
1187 MPI_Scan(&loc_shift, &coo_shift, 1, CS_MPI_GNUM, MPI_SUM, cs_glob_mpi_comm);
1188 coo_shift = coo_shift + 1 - loc_shift;
1189 }
1190 #endif
1191
1192 for (ii = 0; ii < m->n_rows; ii++)
1193 g_coo_num[ii] = ii + coo_shift;
1194
1195 if (m->halo != NULL)
1196 cs_halo_sync_untyped(m->halo,
1197 CS_HALO_STANDARD,
1198 sizeof(cs_gnum_t),
1199 g_coo_num);
1200
1201 /* Populate arrays based on matrix type */
1202
1203 switch(m->type) {
1204 case CS_MATRIX_NATIVE:
1205 if (m->db_size[3] == 1)
1206 _n_entries = _pre_dump_native(m, g_coo_num, &_m_coords, &_m_vals);
1207 else
1208 _n_entries = _b_pre_dump_native(m, g_coo_num, &_m_coords, &_m_vals);
1209 break;
1210 case CS_MATRIX_CSR:
1211 assert(m->db_size[3] == 1);
1212 _n_entries = _pre_dump_csr(m, g_coo_num, &_m_coords, &_m_vals);
1213 break;
1214 case CS_MATRIX_MSR:
1215 if (m->db_size[3] == 1)
1216 _n_entries = _pre_dump_msr(m, g_coo_num, &_m_coords, &_m_vals);
1217 else
1218 _n_entries = _b_pre_dump_msr(m, g_coo_num, &_m_coords, &_m_vals);
1219 break;
1220 default:
1221 bft_error(__FILE__, __LINE__, 0,
1222 _("Dump of matrixes in %s format\n"
1223 "is not operational yet."),
1224 _(cs_matrix_get_type_name(m)));
1225 break;
1226 }
1227
1228 BFT_FREE(g_coo_num);
1229
1230 /* Sort values */
1231
1232 _sort_matrix_dump_data(_n_entries, _m_coords, _m_vals);
1233
1234 /* Remove references to extra periodicity ghost values if present */
1235
1236 for (ii = 0, jj = 0; ii < _n_entries; ii++) {
1237 if (_m_coords[ii*2] <= n_g_rows && _m_coords[ii*2+1] <= n_g_rows) {
1238 _m_coords[jj*2] = _m_coords[ii*2];
1239 _m_coords[jj*2+1] = _m_coords[ii*2+1];
1240 _m_vals[jj] = _m_vals[ii];
1241 jj += 1;
1242 }
1243 }
1244 _n_entries = jj;
1245
1246 /* Return arrays */
1247
1248 *n_entries = _n_entries;
1249 *m_coords = _m_coords;
1250 *m_vals = _m_vals;
1251 }
1252
1253 #if defined(HAVE_MPI)
1254
1255 /*----------------------------------------------------------------------------
1256 * Write matrix data to file in parallel mode.
1257 *
1258 * Arrays written are freed by this function.
1259 *
1260 * The full matrix is written, whether symmetric or not. Row and column
1261 * coordinats are 1-to-n based.
1262 *
1263 * File format:
1264 * - number of matrix entries (1 integer of type cs_gnum_t).
1265 * - array of row coordinates (n_entries integers of type cs_gnum_t)
1266 * - array of column coordinates (n_entries integers of type cs_gnum_t)
1267 * - array of matrix coefficients (n_entries values of type double)
1268 *
1269 * parameters:
1270 * m <-- Pointer to matrix structure
1271 * f <-> associated file pointer
1272 *----------------------------------------------------------------------------*/
1273
1274 static void
_write_matrix_g(const cs_matrix_t * m,cs_file_t * f)1275 _write_matrix_g(const cs_matrix_t *m,
1276 cs_file_t *f)
1277 {
1278 int block_rank_step = 1, min_block_size = 0;
1279 cs_lnum_t block_size = 0;
1280 cs_gnum_t n_glob_ents = 0;
1281
1282 cs_gnum_t *b_coords = NULL, *c_coords = NULL, *r_coords = NULL;
1283 double *b_vals = NULL;
1284
1285 cs_block_dist_info_t bi;
1286
1287 fvm_io_num_t *io_num = NULL;
1288 cs_part_to_block_t *d = NULL;
1289
1290 cs_lnum_t n_entries = 0;
1291 cs_gnum_t *m_coords = NULL;
1292 double *m_vals = NULL;
1293
1294 const cs_datatype_t gnum_type
1295 = (sizeof(cs_gnum_t) == 8) ? CS_UINT64 : CS_UINT32;
1296
1297 /* Initialization for matrix coefficients */
1298
1299 _prepare_matrix_dump_data(m, &n_entries, &m_coords, &m_vals);
1300
1301 /* Initialization for redistribution */
1302
1303 io_num = fvm_io_num_create_from_adj_s(NULL, m_coords, n_entries, 2);
1304
1305 n_glob_ents = fvm_io_num_get_global_count(io_num);
1306
1307 cs_file_get_default_comm(&block_rank_step, NULL, NULL);
1308
1309 bi = cs_block_dist_compute_sizes(cs_glob_rank_id,
1310 cs_glob_n_ranks,
1311 block_rank_step,
1312 min_block_size/2,
1313 n_glob_ents);
1314
1315 d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
1316 bi,
1317 n_entries,
1318 fvm_io_num_get_global_num(io_num));
1319
1320 /* Write number of entries */
1321
1322 cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1323
1324 /* Distribute to blocks */
1325
1326 block_size = (bi.gnum_range[1] - bi.gnum_range[0]);
1327
1328 /* Distribute coordinate blocks on ranks */
1329
1330 if (block_size > 0)
1331 BFT_MALLOC(b_coords, block_size*2, cs_gnum_t);
1332
1333 cs_part_to_block_copy_array(d, gnum_type, 2, m_coords, b_coords);
1334
1335 BFT_FREE(m_coords);
1336
1337 /* De-interlace coordinates */
1338
1339 if (block_size > 0) {
1340 cs_lnum_t ii;
1341 BFT_MALLOC(r_coords, block_size, cs_gnum_t);
1342 BFT_MALLOC(c_coords, block_size, cs_gnum_t);
1343 for (ii = 0; ii < block_size; ii++) {
1344 r_coords[ii] = b_coords[ii*2];
1345 c_coords[ii] = b_coords[ii*2+1];
1346 }
1347 BFT_FREE(b_coords);
1348 }
1349
1350 /* Write coordinate blocks */
1351
1352 cs_file_write_block_buffer(f,
1353 r_coords,
1354 sizeof(cs_gnum_t),
1355 1,
1356 bi.gnum_range[0],
1357 bi.gnum_range[1]);
1358
1359 cs_file_write_block_buffer(f,
1360 c_coords,
1361 sizeof(cs_gnum_t),
1362 1,
1363 bi.gnum_range[0],
1364 bi.gnum_range[1]);
1365
1366 BFT_FREE(c_coords);
1367 BFT_FREE(r_coords);
1368
1369 /* Distribute value blocks on ranks */
1370
1371 if (block_size > 0)
1372 BFT_MALLOC(b_vals, block_size, double);
1373
1374 cs_part_to_block_copy_array(d, CS_DOUBLE, 1, m_vals, b_vals);
1375
1376 BFT_FREE(m_vals);
1377
1378 /* Write value blocks */
1379
1380 cs_file_write_block_buffer(f,
1381 b_vals,
1382 sizeof(double),
1383 1,
1384 bi.gnum_range[0],
1385 bi.gnum_range[1]);
1386
1387 BFT_FREE(b_vals);
1388
1389 /* Free matrix coefficient distribution structures */
1390
1391 cs_part_to_block_destroy(&d);
1392 io_num = fvm_io_num_destroy(io_num);
1393 }
1394
1395 #endif /* #if defined(HAVE_MPI) */
1396
1397 /*----------------------------------------------------------------------------
1398 * Write matrix data to file in serial mode.
1399 *
1400 * Arrays written are freed by this function.
1401 *
1402 * The full matrix is written, whether symmetric or not. Row and column
1403 * coordinats are 1-to-n based.
1404 *
1405 * File format:
1406 * - number of matrix entries (1 integer of type cs_gnum_t).
1407 * - array of row coordinates (n_entries integers of type cs_gnum_t)
1408 * - array of column coordinates (n_entries integers of type cs_gnum_t)
1409 * - array of matrix coefficients (n_entries values of type double)
1410 *
1411 * parameters:
1412 * m <-- Pointer to matrix structure
1413 * f <-> associated file pointer
1414 *----------------------------------------------------------------------------*/
1415
1416 static void
_write_matrix_l(const cs_matrix_t * m,cs_file_t * f)1417 _write_matrix_l(const cs_matrix_t *m,
1418 cs_file_t *f)
1419 {
1420 cs_lnum_t ii;
1421 cs_gnum_t n_glob_ents = 0;
1422
1423 cs_gnum_t *c_coords = NULL, *r_coords = NULL;
1424
1425 cs_lnum_t n_entries = 0;
1426 cs_gnum_t *m_coords = NULL;
1427 double *m_vals = NULL;
1428
1429 /* Initialization */
1430
1431 _prepare_matrix_dump_data(m, &n_entries, &m_coords, &m_vals);
1432
1433 n_glob_ents = n_entries;
1434
1435 /* Write number of entries */
1436
1437 cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1438
1439 /* De-interlace coordinates */
1440
1441 BFT_MALLOC(r_coords, n_entries, cs_gnum_t);
1442 BFT_MALLOC(c_coords, n_entries, cs_gnum_t);
1443 for (ii = 0; ii < n_entries; ii++) {
1444 r_coords[ii] = m_coords[ii*2];
1445 c_coords[ii] = m_coords[ii*2+1];
1446 }
1447 BFT_FREE(m_coords);
1448
1449 /* Write coordinate blocks */
1450
1451 cs_file_write_global(f, r_coords, sizeof(cs_gnum_t), n_entries);
1452 cs_file_write_global(f, c_coords, sizeof(cs_gnum_t), n_entries);
1453
1454 BFT_FREE(r_coords);
1455 BFT_FREE(c_coords);
1456
1457 /* Write value blocks */
1458
1459 cs_file_write_global(f, m_vals, sizeof(double), n_entries);
1460
1461 BFT_FREE(m_vals);
1462 }
1463
1464 #if defined(HAVE_MPI)
1465
1466 /*----------------------------------------------------------------------------
1467 * Write vector data to file in parallel mode.
1468 *
1469 * File format:
1470 * - vector size matrix entries (1 integer of type cs_gnum_t).
1471 * - array of vector values (n_elts values of type double)
1472 *
1473 * parameters:
1474 * n_elts <-- local number of matrix entries
1475 * vals <-- vector element values
1476 * f <-> associated file pointer
1477 *----------------------------------------------------------------------------*/
1478
1479 static void
_write_vector_g(cs_lnum_t n_elts,const cs_real_t * vals,cs_file_t * f)1480 _write_vector_g(cs_lnum_t n_elts,
1481 const cs_real_t *vals,
1482 cs_file_t *f)
1483 {
1484 cs_lnum_t ii;
1485
1486 int block_rank_step = 1, min_block_size = 0;
1487 cs_lnum_t block_size = 0;
1488 cs_gnum_t coo_shift = 1;
1489 cs_gnum_t local_max = 0, n_glob_ents = 0;
1490
1491 cs_gnum_t *g_elt_num = NULL;
1492 double *b_vals = NULL;
1493
1494 cs_block_dist_info_t bi;
1495
1496 cs_part_to_block_t *d = NULL;
1497
1498 cs_gnum_t loc_shift = n_elts;
1499 MPI_Scan(&loc_shift, &coo_shift, 1, CS_MPI_GNUM, MPI_SUM, cs_glob_mpi_comm);
1500 coo_shift = coo_shift - loc_shift;
1501
1502 /* Get maximum global number value */
1503
1504 if (n_elts > 0)
1505 local_max = n_elts + coo_shift;
1506
1507 MPI_Allreduce(&local_max, &n_glob_ents, 1, CS_MPI_GNUM, MPI_MAX,
1508 cs_glob_mpi_comm);
1509
1510 BFT_MALLOC(g_elt_num, n_elts, cs_gnum_t);
1511
1512 for (ii = 0; ii < n_elts; ii++)
1513 g_elt_num[ii] = ii + coo_shift + 1;
1514
1515 cs_file_get_default_comm(&block_rank_step, NULL, NULL);
1516
1517 /* Redistribution structures */
1518
1519 bi = cs_block_dist_compute_sizes(cs_glob_rank_id,
1520 cs_glob_n_ranks,
1521 block_rank_step,
1522 min_block_size/2,
1523 n_glob_ents);
1524
1525 d = cs_part_to_block_create_by_gnum(cs_glob_mpi_comm,
1526 bi,
1527 n_elts,
1528 g_elt_num);
1529
1530 /* Write number of entries */
1531
1532 cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1533
1534 /* Distribute to blocks */
1535
1536 block_size = (bi.gnum_range[1] - bi.gnum_range[0]);
1537
1538 if (block_size > 0)
1539 BFT_MALLOC(b_vals, block_size, double);
1540
1541 if (sizeof(cs_real_t) != sizeof(double)) {
1542 double *p_vals = NULL;
1543 BFT_MALLOC(p_vals, n_elts, double);
1544 for (ii = 0; ii < n_elts; ii++)
1545 p_vals[ii] = vals[ii];
1546 cs_part_to_block_copy_array(d, CS_DOUBLE, 1, p_vals, b_vals);
1547 BFT_FREE(p_vals);
1548 }
1549 else
1550 cs_part_to_block_copy_array(d, CS_DOUBLE, 1, vals, b_vals);
1551
1552 /* Write value blocks */
1553
1554 cs_file_write_block_buffer(f,
1555 b_vals,
1556 sizeof(double),
1557 1,
1558 bi.gnum_range[0],
1559 bi.gnum_range[1]);
1560
1561 BFT_FREE(b_vals);
1562
1563 /* Free distribution structures */
1564
1565 cs_part_to_block_destroy(&d);
1566
1567 BFT_FREE(g_elt_num);
1568 }
1569
1570 #endif /* #if defined(HAVE_MPI) */
1571
1572 /*----------------------------------------------------------------------------
1573 * Write vector data to file in serial mode.
1574 *
1575 * File format:
1576 * - vector size matrix entries (1 integer of type cs_gnum_t).
1577 * - array of vector values (n_elts values of type double)
1578 *
1579 * parameters:
1580 * n_elts <-- local number of matrix entries
1581 * vals <-- vector element values
1582 * f <-> associated file pointer
1583 *----------------------------------------------------------------------------*/
1584
1585 static void
_write_vector_l(cs_lnum_t n_elts,const cs_real_t * vals,cs_file_t * f)1586 _write_vector_l(cs_lnum_t n_elts,
1587 const cs_real_t *vals,
1588 cs_file_t *f)
1589 {
1590 cs_gnum_t n_glob_ents = n_elts;
1591
1592 /* Write number of entries */
1593
1594 cs_file_write_global(f, &n_glob_ents, sizeof(cs_gnum_t), 1);
1595
1596 /* Write values */
1597
1598 if (sizeof(cs_real_t) != sizeof(double)) {
1599 cs_lnum_t ii;
1600 double *p_vals = NULL;
1601 BFT_MALLOC(p_vals, n_elts, double);
1602 for (ii = 0; ii < n_elts; ii++)
1603 p_vals[ii] = vals[ii];
1604 cs_file_write_global(f, p_vals, sizeof(double), n_elts);
1605 BFT_FREE(p_vals);
1606 }
1607 else
1608 cs_file_write_global(f, vals, sizeof(double), n_elts);
1609 }
1610
1611 /*----------------------------------------------------------------------------
1612 * Compute the Frobenius norm of a matrix.
1613 *
1614 * parameters:
1615 * m <-- pointer to matrix structure
1616 *
1617 * returns:
1618 * matrix Frobenius norm (or -1 if not computable)
1619 *----------------------------------------------------------------------------*/
1620
1621 static double
_frobenius_norm(const cs_matrix_t * m)1622 _frobenius_norm(const cs_matrix_t *m)
1623 {
1624 double retval = -1.;
1625
1626 if (m == NULL)
1627 return retval;
1628
1629 cs_matrix_fill_type_t ft = m->fill_type;
1630
1631 switch(m->type) {
1632
1633 case CS_MATRIX_NATIVE:
1634 {
1635 if ( (m->eb_size[0]*m->eb_size[0] == m->eb_size[3])
1636 && (m->db_size[0]*m->db_size[0] == m->db_size[3])) {
1637
1638 cs_lnum_t d_stride = m->db_size[3];
1639 cs_lnum_t e_stride = m->eb_size[3];
1640 const cs_matrix_struct_native_t *ms = m->structure;
1641 const cs_matrix_coeff_native_t *mc = m->coeffs;
1642 double e_mult = (m->eb_size[3] == 1) ? m->db_size[0] : 1;
1643 if (mc->symmetric)
1644 e_mult *= 2;
1645 else
1646 e_stride *= 2;
1647
1648 retval = cs_dot_xx(d_stride*m->n_rows, mc->da);
1649
1650 double ed_contrib = 0.;
1651 const cs_real_t *restrict xa = mc->xa;
1652 # pragma omp parallel reduction(+:ed_contrib) if (ms->n_edges > CS_THR_MIN)
1653 {
1654 double c = 0; /* Use Kahan compensated summation for
1655 sum of block contributions (but not for local
1656 block sums, for simplicity and performance) */
1657 # pragma omp for
1658 for (cs_lnum_t edge_id = 0; edge_id < ms->n_edges; edge_id++) {
1659 cs_lnum_t ii = ms->edges[edge_id][0];
1660 if (ii < ms->n_rows) {
1661 double bsum = 0;
1662 for (cs_lnum_t kk = 0; kk < e_stride; kk++) {
1663 bsum += ( xa[edge_id*e_stride + kk]
1664 * xa[edge_id*e_stride + kk]);
1665 }
1666 double z = bsum - c;
1667 double t = ed_contrib + z;
1668 c = (t - ed_contrib) - z;
1669 ed_contrib = t;
1670 }
1671 }
1672 }
1673 retval += ed_contrib*e_mult;
1674
1675 cs_parall_sum(1, CS_DOUBLE, &retval);
1676 }
1677 }
1678 break;
1679
1680 case CS_MATRIX_CSR:
1681 assert( ft == CS_MATRIX_SCALAR
1682 || ft == CS_MATRIX_SCALAR_SYM
1683 || ft == CS_MATRIX_BLOCK);
1684 if (m->eb_size[0]*m->eb_size[0] == m->eb_size[3]) {
1685 cs_lnum_t stride = m->eb_size[3];
1686 const cs_matrix_struct_csr_t *ms = m->structure;
1687 const cs_matrix_coeff_csr_t *mc = m->coeffs;
1688 cs_lnum_t n_vals = ms->row_index[m->n_rows];
1689 retval = cs_dot_xx(stride*n_vals, mc->val);
1690 cs_parall_sum(1, CS_DOUBLE, &retval);
1691 }
1692 break;
1693
1694 case CS_MATRIX_MSR:
1695 if ( (m->eb_size[0]*m->eb_size[0] == m->eb_size[3])
1696 && (m->db_size[0]*m->db_size[0] == m->db_size[3])) {
1697 cs_lnum_t d_stride = m->db_size[3];
1698 cs_lnum_t e_stride = m->eb_size[3];
1699 const cs_matrix_struct_csr_t *ms = m->structure;
1700 const cs_matrix_coeff_msr_t *mc = m->coeffs;
1701 cs_lnum_t n_vals = ms->row_index[m->n_rows];
1702 double d_mult = (m->eb_size[3] == 1) ? m->db_size[0] : 1;
1703 retval = cs_dot_xx(d_stride*m->n_rows, mc->d_val);
1704 retval += d_mult * cs_dot_xx(e_stride*n_vals, mc->x_val);
1705 cs_parall_sum(1, CS_DOUBLE, &retval);
1706 }
1707 break;
1708
1709 default:
1710 retval = -1;
1711 }
1712
1713 if (retval > 0)
1714 retval = sqrt(retval);
1715
1716 return retval;
1717 }
1718
1719 /*! (DOXYGEN_SHOULD_SKIP_THIS) \endcond */
1720
1721 /*============================================================================
1722 * Public function definitions
1723 *============================================================================*/
1724
1725 /*----------------------------------------------------------------------------
1726 * Compute diagonal dominance metric.
1727 *
1728 * parameters:
1729 * matrix <-- pointer to matrix structure
1730 * dd --> diagonal dominance (normalized)
1731 *----------------------------------------------------------------------------*/
1732
1733 void
cs_matrix_diag_dominance(const cs_matrix_t * matrix,cs_real_t dd[])1734 cs_matrix_diag_dominance(const cs_matrix_t *matrix,
1735 cs_real_t dd[])
1736 {
1737 const cs_halo_t *halo = matrix->halo;
1738
1739 switch(matrix->type) {
1740 case CS_MATRIX_NATIVE:
1741 if (matrix->db_size[3] == 1)
1742 _diag_dom_native(matrix, dd);
1743 else if (matrix->eb_size[3] == 1)
1744 _b_diag_dom_native(matrix, dd);
1745 else
1746 _bb_diag_dom_native(matrix, dd);
1747 break;
1748 case CS_MATRIX_CSR:
1749 assert(matrix->db_size[3] == 1);
1750 _diag_dom_csr(matrix, dd);
1751 break;
1752 case CS_MATRIX_MSR:
1753 if (matrix->db_size[3] == 1)
1754 _diag_dom_msr(matrix, dd);
1755 else
1756 _b_diag_dom_msr(matrix, dd);
1757 break;
1758 break;
1759 default:
1760 bft_error(__FILE__, __LINE__, 0,
1761 _("Extraction of diagonal dominance of matrixes in %s format\n"
1762 "is not operational yet."),
1763 _(cs_matrix_get_type_name(matrix)));
1764 break;
1765 }
1766
1767 /* Sync ghost rows as a precaution */
1768
1769 if (halo != NULL) {
1770 if (matrix->db_size[3] == 1)
1771 cs_halo_sync_var(halo, CS_HALO_STANDARD, dd);
1772 else {
1773 cs_halo_sync_var_strided(halo, CS_HALO_STANDARD, dd, matrix->db_size[1]);
1774 if (halo->n_transforms > 0 && matrix->db_size[0] == 3)
1775 cs_halo_perio_sync_var_vect(halo,
1776 CS_HALO_STANDARD,
1777 dd,
1778 matrix->db_size[1]);
1779 }
1780 }
1781 }
1782
1783 /*----------------------------------------------------------------------------
1784 * Dump a linear system matrix and right-hand side to file.
1785 *
1786 * parameters:
1787 * matrix <-- pointer to matrix structure
1788 * rhs <-- right hand side vector
1789 * name <-- identifier string used in file name
1790 *----------------------------------------------------------------------------*/
1791
1792 void
cs_matrix_dump_linear_system(const cs_matrix_t * matrix,const cs_real_t rhs[],const char * name)1793 cs_matrix_dump_linear_system(const cs_matrix_t *matrix,
1794 const cs_real_t rhs[],
1795 const char *name)
1796 {
1797 char filename[64];
1798 cs_gnum_t n_g_rows = matrix->n_rows;
1799
1800 cs_file_t *f = NULL;
1801
1802 #if defined(HAVE_MPI)
1803 if (cs_glob_n_ranks > 1) {
1804 cs_gnum_t n_l_rows = matrix->n_rows;
1805 MPI_Allreduce(&n_l_rows, &n_g_rows, 1, CS_MPI_GNUM, MPI_SUM,
1806 cs_glob_mpi_comm);
1807 }
1808 #endif
1809
1810 snprintf(filename, 63, "%s_%010llu", name, (unsigned long long)n_g_rows);
1811 filename[63] = '\0';
1812
1813 f = cs_file_open_default(filename, CS_FILE_MODE_WRITE);
1814
1815 _write_header_simple(f);
1816
1817 #if defined(HAVE_MPI)
1818 if (cs_glob_n_ranks > 1) {
1819 _write_matrix_g(matrix, f);
1820 _write_vector_g(matrix->n_rows, rhs, f);
1821 }
1822 #endif
1823 if (cs_glob_n_ranks == 1) {
1824 _write_matrix_l(matrix, f);
1825 _write_vector_l(matrix->n_rows, rhs, f);
1826 }
1827
1828 f = cs_file_free(f);
1829 }
1830
1831 /*----------------------------------------------------------------------------*/
1832 /*!
1833 * \brief Log general info relative to matrix.
1834 *
1835 * \param[in] matrix pointer to matrix structure
1836 * \param[in] verbosity verbosity level
1837 */
1838 /*----------------------------------------------------------------------------*/
1839
1840 void
cs_matrix_log_info(const cs_matrix_t * matrix,int verbosity)1841 cs_matrix_log_info(const cs_matrix_t *matrix,
1842 int verbosity)
1843 {
1844 cs_log_t l = CS_LOG_DEFAULT;
1845
1846 if (matrix == NULL)
1847 bft_error(__FILE__, __LINE__, 0,
1848 _("The matrix is not defined."));
1849
1850 cs_log_printf(l,
1851 _("\n"
1852 " Matrix info:\n"
1853 " type: %s\n"),
1854 cs_matrix_get_type_fullname(matrix));
1855
1856 if (matrix->fill_type == CS_MATRIX_N_FILL_TYPES)
1857 return;
1858
1859 cs_log_printf(l,
1860 _(" fill type: %s\n"),
1861 cs_matrix_fill_type_name[matrix->fill_type]);
1862
1863 if (verbosity > 1) {
1864 double fnorm = _frobenius_norm(matrix);
1865 if (fnorm > -1)
1866 cs_log_printf(l,
1867 _(" Frobenius norm: %11.4e\n"), fnorm);
1868 }
1869
1870 cs_log_printf(l, "\n");
1871 }
1872
1873 /*----------------------------------------------------------------------------
1874 * Test matrix dump operations.
1875 *
1876 * parameters:
1877 * n_rows <-- number of local rows
1878 * n_cols_ext <-- number of columns including ghost columns (array size)
1879 * n_edges <-- local number of graph edges
1880 * edges <-- graph edges connectivity
1881 * halo <-- cell halo structure
1882 * numbering <-- vectorization or thread-related numbering info, or NULL
1883 *----------------------------------------------------------------------------*/
1884
1885 void
cs_matrix_dump_test(cs_lnum_t n_rows,cs_lnum_t n_cols_ext,cs_lnum_t n_edges,const cs_lnum_2_t * edges,const cs_halo_t * halo,const cs_numbering_t * numbering)1886 cs_matrix_dump_test(cs_lnum_t n_rows,
1887 cs_lnum_t n_cols_ext,
1888 cs_lnum_t n_edges,
1889 const cs_lnum_2_t *edges,
1890 const cs_halo_t *halo,
1891 const cs_numbering_t *numbering)
1892 {
1893 cs_lnum_t ii;
1894 int test_id;
1895
1896 cs_real_t *da = NULL, *xa = NULL, *rhs = NULL;
1897 cs_lnum_t diag_block_size[4] = {3, 3, 3, 9};
1898 cs_lnum_t extra_diag_block_size[4] = {1, 1, 1, 1};
1899
1900 const int n_tests = 7;
1901 const char *name[] = {"matrix_native",
1902 "matrix_native_sym",
1903 "matrix_native_block",
1904 "matrix_csr",
1905 "matrix_msr",
1906 "matrix_msr_block"};
1907 const cs_matrix_type_t type[] = {CS_MATRIX_NATIVE,
1908 CS_MATRIX_NATIVE,
1909 CS_MATRIX_NATIVE,
1910 CS_MATRIX_CSR,
1911 CS_MATRIX_MSR,
1912 CS_MATRIX_MSR};
1913 const bool sym_flag[] = {false, true, false, false, false, false};
1914 const int block_flag[] = {0, 0, 1, 0, 0, 1};
1915
1916 /* Allocate and initialize working arrays */
1917 /*-----------------------------------------*/
1918
1919 BFT_MALLOC(rhs, n_cols_ext*diag_block_size[1], cs_real_t);
1920
1921 BFT_MALLOC(da, n_cols_ext*diag_block_size[3], cs_real_t);
1922 BFT_MALLOC(xa, n_edges*2, cs_real_t);
1923
1924 # pragma omp parallel for
1925 for (ii = 0; ii < n_cols_ext*diag_block_size[3]; ii++)
1926 da[ii] = 1.0 + ii*0.1/n_cols_ext;
1927 # pragma omp parallel for
1928 for (ii = 0; ii < n_cols_ext*diag_block_size[1]; ii++)
1929 rhs[ii] = ii*0.1/n_cols_ext;
1930
1931 # pragma omp parallel for
1932 for (ii = 0; ii < n_edges; ii++) {
1933 xa[ii*2] = 0.5*(1.0 + ii*1.0/n_edges);
1934 xa[ii*2 + 1] = -0.5*(1.0 + ii*1.0/n_edges);
1935 }
1936
1937 /* Loop on variant types */
1938 /*-----------------------*/
1939
1940 for (test_id = 0; test_id < n_tests; test_id++) {
1941
1942 cs_lnum_t *_diag_block_size = (block_flag[test_id]) ? diag_block_size : NULL;
1943 cs_lnum_t *_extra_diag_block_size = (block_flag[test_id]-1) ?
1944 extra_diag_block_size : NULL;
1945
1946 cs_matrix_structure_t
1947 *ms = cs_matrix_structure_create(type[test_id],
1948 true,
1949 n_rows,
1950 n_cols_ext,
1951 n_edges,
1952 edges,
1953 halo,
1954 numbering);
1955 cs_matrix_t *m = cs_matrix_create(ms);
1956
1957 cs_matrix_set_coefficients(m,
1958 sym_flag[test_id],
1959 _diag_block_size,
1960 _extra_diag_block_size,
1961 n_edges,
1962 edges,
1963 da,
1964 xa);
1965
1966 cs_matrix_dump_linear_system(m, rhs, name[test_id]);
1967
1968 cs_matrix_release_coefficients(m);
1969
1970 cs_matrix_destroy(&m);
1971 cs_matrix_structure_destroy(&ms);
1972 }
1973
1974 BFT_FREE(rhs);
1975
1976 BFT_FREE(da);
1977 BFT_FREE(xa);
1978 }
1979
1980 /*----------------------------------------------------------------------------*/
1981
1982 END_C_DECLS
1983